├── report.pdf
├── test_data_of_each_station.xls
├── rain_shuffle
    ├── MODEL
    │   ├── train_GBRT_371.m
    │   ├── mlp_371_64_2_0.005.pth
    │   └── lstm_371_128_2_0.001.pth
    ├── MODEL_NO
    │   ├── train_GBRT_371.m
    │   ├── mlp_371_64_4_0.001.pth
    │   ├── lstm_371_128_2_0.001.pth
    │   └── seq2seq_371_128_0.1_0.001.pth
    ├── models
    │   ├── seq2seq_313_best.pth
    │   ├── seq2seq_314_best.pth
    │   ├── seq2seq_371_best.pth
    │   ├── seq2seq_372_best.pth
    │   └── seq2seq_393_best.pth
    ├── image_example
    │   ├── seq2seq_313.png
    │   ├── seq2seq_314.png
    │   ├── seq2seq_371.png
    │   ├── seq2seq_372.png
    │   └── seq2seq_393.png
    ├── __pycache__
    │   ├── eval.cpython-36.pyc
    │   ├── eval.cpython-37.pyc
    │   ├── seq2seq_module.cpython-36.pyc
    │   └── seq2seq_module.cpython-37.pyc
    ├── make_test.py
    ├── hour_cat.py
    ├── hour2day.py
    ├── csv2txt.py
    ├── hour2txt.py
    ├── hour2txt_ifshuffle.py
    ├── test.py
    ├── ARIMA.py
    ├── mlp.py
    ├── run_hyp.sh
    ├── seq2seq.py
    ├── lstm_.py
    ├── att_seq2seq.py
    ├── SVR_sigmoid.py
    ├── SVR_rbf.py
    ├── SVR_poly.py
    ├── GBRT.py
    ├── XGB.py
    ├── MLP_module.py
    ├── run_no.sh
    ├── run_no_time2.sh
    ├── run_no_time1.sh
    ├── LSTM_module.py
    ├── run_time1.sh
    ├── run_time2.sh
    ├── run.sh
    ├── ensemble_learn_np.py
    ├── ensemble_learn.py
    ├── ensemble_learn_np 1.py
    ├── seq2seq_module.py
    ├── att_seq2seq_module.py
    └── eval.py
├── test_data_of_each_station_1_hour.xls
├── test_data_of_each_station_2_hours.xls
├── test_data_of_each_station_shuffle.xls
└── README.md


/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/report.pdf


--------------------------------------------------------------------------------
/test_data_of_each_station.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/test_data_of_each_station.xls


--------------------------------------------------------------------------------
/rain_shuffle/MODEL/train_GBRT_371.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL/train_GBRT_371.m


--------------------------------------------------------------------------------
/test_data_of_each_station_1_hour.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/test_data_of_each_station_1_hour.xls


--------------------------------------------------------------------------------
/test_data_of_each_station_2_hours.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/test_data_of_each_station_2_hours.xls


--------------------------------------------------------------------------------
/test_data_of_each_station_shuffle.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/test_data_of_each_station_shuffle.xls


--------------------------------------------------------------------------------
/rain_shuffle/MODEL_NO/train_GBRT_371.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL_NO/train_GBRT_371.m


--------------------------------------------------------------------------------
/rain_shuffle/models/seq2seq_313_best.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/models/seq2seq_313_best.pth


--------------------------------------------------------------------------------
/rain_shuffle/models/seq2seq_314_best.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/models/seq2seq_314_best.pth


--------------------------------------------------------------------------------
/rain_shuffle/models/seq2seq_371_best.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/models/seq2seq_371_best.pth


--------------------------------------------------------------------------------
/rain_shuffle/models/seq2seq_372_best.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/models/seq2seq_372_best.pth


--------------------------------------------------------------------------------
/rain_shuffle/models/seq2seq_393_best.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/models/seq2seq_393_best.pth


--------------------------------------------------------------------------------
/rain_shuffle/MODEL/mlp_371_64_2_0.005.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL/mlp_371_64_2_0.005.pth


--------------------------------------------------------------------------------
/rain_shuffle/image_example/seq2seq_313.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/image_example/seq2seq_313.png


--------------------------------------------------------------------------------
/rain_shuffle/image_example/seq2seq_314.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/image_example/seq2seq_314.png


--------------------------------------------------------------------------------
/rain_shuffle/image_example/seq2seq_371.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/image_example/seq2seq_371.png


--------------------------------------------------------------------------------
/rain_shuffle/image_example/seq2seq_372.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/image_example/seq2seq_372.png


--------------------------------------------------------------------------------
/rain_shuffle/image_example/seq2seq_393.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/image_example/seq2seq_393.png


--------------------------------------------------------------------------------
/rain_shuffle/MODEL/lstm_371_128_2_0.001.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL/lstm_371_128_2_0.001.pth


--------------------------------------------------------------------------------
/rain_shuffle/MODEL_NO/mlp_371_64_4_0.001.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL_NO/mlp_371_64_4_0.001.pth


--------------------------------------------------------------------------------
/rain_shuffle/__pycache__/eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/__pycache__/eval.cpython-36.pyc


--------------------------------------------------------------------------------
/rain_shuffle/__pycache__/eval.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/__pycache__/eval.cpython-37.pyc


--------------------------------------------------------------------------------
/rain_shuffle/MODEL_NO/lstm_371_128_2_0.001.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL_NO/lstm_371_128_2_0.001.pth


--------------------------------------------------------------------------------
/rain_shuffle/MODEL_NO/seq2seq_371_128_0.1_0.001.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/MODEL_NO/seq2seq_371_128_0.1_0.001.pth


--------------------------------------------------------------------------------
/rain_shuffle/__pycache__/seq2seq_module.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/__pycache__/seq2seq_module.cpython-36.pyc


--------------------------------------------------------------------------------
/rain_shuffle/__pycache__/seq2seq_module.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shengjie-bob/Rainfall_Prediction/HEAD/rain_shuffle/__pycache__/seq2seq_module.cpython-37.pyc


--------------------------------------------------------------------------------
/rain_shuffle/make_test.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import os
 4 | 
 5 | path = os.getcwd()
 6 | 
 7 | #选择数据集
 8 | num_array=np.array([313,314,371,372,393])
 9 | 
10 | #测试集的比例
11 | ratio =0.1
12 | 
13 | for num in num_array:
14 |     df = pd.read_csv('{}/sample/hour{}.csv'.format(path,num))
15 | 
16 |     data_num = len(df)
17 | 
18 |     test_num =int(ratio*data_num)
19 | 
20 |     test_df =df.iloc[(data_num-test_num):,1:]
21 | 
22 |     test_df.to_csv('{}/sample/station{}.csv'.format(path, num))
23 | 
24 |     print('station{}ok'.format(num))


--------------------------------------------------------------------------------
/rain_shuffle/hour_cat.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import os
 4 | 
 5 | record=pd.read_csv('sudeste.csv')
 6 | 
 7 | path ='./hour_data'
 8 | if os.path.exists(path):
 9 |     pass
10 | else:
11 |     os.mkdir(path)
12 | 
13 | print(record.columns)
14 | print(record.index)
15 | 
16 | print(record.head())
17 | 
18 | print(record[record.wsid==178].head())
19 | 
20 | print(record.wsid.value_counts())
21 | 
22 | #获得数据的气象点标签
23 | idx = record.wsid.values
24 | idx= np.unique(idx)
25 | idx_num=record.wsid.value_counts()
26 | 
27 | #保存数据较多的气象点 并按天得到数据的整理
28 | for i in idx:
29 |     if idx_num[i]>=120000:
30 |         df=record[record.wsid==i]
31 |     else:
32 |         continue
33 |     df = df.dropna(axis=0, how='all')
34 | 
35 |     df = df.fillna(value=0)
36 | 
37 |     df.to_csv('{}/hour{}.csv'.format(path,i))
38 | 
39 | print('yy')
40 | 


--------------------------------------------------------------------------------
/rain_shuffle/hour2day.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import os
 4 | 
 5 | record=pd.read_csv('sudeste.csv')
 6 | 
 7 | path ='./day_data'
 8 | if os.path.exists(path):
 9 |     pass
10 | else:
11 |     os.mkdir(path)
12 | 
13 | print(record.columns)
14 | print(record.index)
15 | 
16 | print(record.head())
17 | 
18 | print(record[record.wsid==178].head())
19 | 
20 | print(record.wsid.value_counts())
21 | 
22 | #获得数据的气象点标签
23 | idx = record.wsid.values
24 | idx= np.unique(idx)
25 | idx_num=record.wsid.value_counts()
26 | 
27 | #保存数据较多的气象点 并按天得到数据的整理
28 | for i in idx:
29 |     if idx_num[i]>=100000:
30 |         df=record[record.wsid==i]
31 |     else:
32 |         continue
33 |     df = df.dropna(axis=0, how='all')
34 | 
35 |     df = df.fillna(value=0)
36 | 
37 |     tim = df.date.values
38 | 
39 |     tim = np.unique(tim)
40 | 
41 |     k = 0
42 |     # 得到每天的降水数据
43 |     for j in tim:
44 |         k = k + 1
45 |         df1 = df[df.date == j]
46 |         djsk = df1.values
47 |         data = np.mean(djsk[:, 14:30], axis=0)
48 |         rain = np.sum(djsk[:, 14])
49 |         djsk = djsk[-1, :][:, np.newaxis].T
50 |         djsk[:, 14:30] = data
51 |         djsk[:, 14] = rain
52 | 
53 |         if k == 1:
54 |             df_all = djsk
55 |         else:
56 |             df_all = np.vstack([df_all, djsk])
57 | 
58 |     df = pd.DataFrame(df_all)
59 | 
60 |     df.to_csv('{}/tian{}.csv'.format(path,i))
61 | 
62 | 
63 | print('yy')
64 | 


--------------------------------------------------------------------------------
/rain_shuffle/csv2txt.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import os
 4 | 
 5 | path = os.getcwd()
 6 | 
 7 | #选择数据集
 8 | num=373
 9 | 
10 | df = pd.read_csv('{}/day_data/tian{}.csv'.format(path,num))
11 | 
12 | data_num = len(df)
13 | #特征的index
14 | # feature_idx=[15,16,19,20,26,28,29]
15 | feature_idx=[15,19,20,21,26,29,30]
16 | rain_idx =[15]
17 | #天数
18 | time_ser=3
19 | 
20 | #数据集划分
21 | ratio = 0.9
22 | 
23 | train_list=[]
24 | label_list=[]
25 | 
26 | #该部分实现对全零数据的筛选
27 | #k为计数有效数据
28 | k=0
29 | for i in range(data_num):
30 |     j=i+1
31 |     t = df.iloc[i, feature_idx].values
32 |     if not np.any(t):
33 |         k = 0
34 |         continue
35 |     else:
36 |         k =k+1
37 |     if j - time_ser >=0 and k == time_ser :
38 |         train = df.iloc[j-time_ser:j, feature_idx].values
39 |         label = df.iloc[j,rain_idx].values
40 |         train=np.reshape(train,(1,-1))
41 |         train_list.append(train)
42 |         label_list.append(label)
43 |         k =0
44 |     if i >= data_num-2:
45 |         break
46 | 
47 | train_arr =np.array(train_list).reshape((len(train_list),-1))
48 | label_arr =np.array(label_list).reshape((len(label_list),-1))
49 | 
50 | path ='./train_test'
51 | if os.path.exists(path):
52 |     pass
53 | else:
54 |     os.mkdir(path)
55 | 
56 | np.savetxt('{}/train{}.txt'.format(path,num),train_arr)
57 | np.savetxt('{}/label{}.txt'.format(path,num),label_arr)
58 | 
59 | print('ss')
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/rain_shuffle/hour2txt.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import os
 4 | 
 5 | path = os.getcwd()
 6 | 
 7 | #选择数据集
 8 | num=396
 9 | 
10 | df = pd.read_csv('{}/hour_data/hour{}.csv'.format(path,num))
11 | 
12 | data_num = len(df)
13 | #特征的index
14 | # feature_idx=[15,16,19,20,26,28,29]
15 | feature_idx=[15,19,20,21,26,29,30]
16 | rain_idx =[15]
17 | #天数
18 | time_ser=3
19 | 
20 | #数据集划分
21 | ratio = 0.9
22 | 
23 | train_list=[]
24 | label_list=[]
25 | 
26 | #该部分实现对全零数据的筛选
27 | #k为计数有效数据
28 | k=0
29 | for i in range(data_num):
30 |     j=i+1
31 |     t = df.iloc[i, feature_idx].values
32 |     if not np.any(t):
33 |         k = 0
34 |         continue
35 |     else:
36 |         k =k+1
37 |     if j - time_ser >=0 and k == time_ser :
38 |         train = df.iloc[j-time_ser:j, feature_idx].values
39 |         label = df.iloc[j,rain_idx].values
40 |         train=np.reshape(train,(1,-1))
41 |         train_list.append(train)
42 |         label_list.append(label)
43 |         k =0
44 |     if i >= data_num-2:
45 |         break
46 | 
47 | train_arr =np.array(train_list).reshape((len(train_list),-1))
48 | label_arr =np.array(label_list).reshape((len(label_list),-1))
49 | 
50 | path ='./train_test_hour'
51 | if os.path.exists(path):
52 |     pass
53 | else:
54 |     os.mkdir(path)
55 | 
56 | np.savetxt('{}/train{}.txt'.format(path,num),train_arr)
57 | np.savetxt('{}/label{}.txt'.format(path,num),label_arr)
58 | 
59 | print('ss')
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/rain_shuffle/hour2txt_ifshuffle.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import os
 4 | from random import shuffle
 5 | import sys
 6 | 
 7 | # 定义打乱函数
 8 | def myShuffle(X, y):
 9 |     y = y.reshape(-1,1)
10 |     X_temp = np.concatenate((X, y),axis=1)
11 |     shuffle(X_temp)
12 |     return X_temp[:,:len(X_temp[0])-1], X_temp[:,-1].reshape(-1,1)
13 | 
14 | path = os.getcwd()
15 | #特征的index
16 | # feature_idx=[15,16,19,20,26,28,29]
17 | feature_idx = [15,19,20,21,26,29,30]
18 | rain_idx = [15]
19 | 
20 | time_ser = 3 #小时数
21 | ratio = 0.9
22 | ifshuffle = False
23 | 
24 | #该部分实现对全零数据的筛选
25 | #k为计数有效数据
26 | for num in [312,313,314,315,316,371,372,373,374,393,394,396]:
27 |     k=0
28 |     df = pd.read_csv('{}/hour_data/hour{}.csv'.format(path,num),engine='python')
29 |     data_num = len(df)
30 |     train_list=[]
31 |     label_list=[]
32 |     for i in range(data_num):
33 |         j=i+1
34 |         t = df.iloc[i, feature_idx].values
35 |         if not np.any(t):
36 |             k = 0
37 |             continue
38 |         else:
39 |             k =k+1
40 |         if j - time_ser >=0 and k == time_ser :
41 |             train = df.iloc[j-time_ser:j, feature_idx].values
42 |             label = df.iloc[j,rain_idx].values
43 |             train=np.reshape(train,(1,-1))
44 |             train_list.append(train)
45 |             label_list.append(label)
46 |             k =0
47 |         if i >= data_num-2:
48 |             break
49 |     
50 |     train_arr_old =np.array(train_list).reshape((len(train_list),-1))
51 |     label_arr_old =np.array(label_list).reshape((len(label_list),-1))
52 |     if ifshuffle:
53 |         train_arr, label_arr = myShuffle(train_arr_old, label_arr_old)
54 |         path ='./train_test_hour_shuffle'
55 |     else:
56 |         train_arr, label_arr = train_arr_old, label_arr_old
57 |         path ='./train_test_hour'
58 |     
59 |     len_feature = len(train_arr)
60 |     train_feature = train_arr[:int(ratio*len_feature),:]
61 |     train_rain = label_arr[:int(ratio*len_feature),:]
62 |     test_feature = train_arr[int(ratio*len_feature):,:]
63 |     test_rain = label_arr[int(ratio*len_feature):,:]
64 |     
65 |     if os.path.exists(path):
66 |         pass
67 |     else:
68 |         os.mkdir(path)
69 |     
70 |     np.savetxt('{}/train{}.txt'.format(path,num),train_feature)
71 |     np.savetxt('{}/train_label{}.txt'.format(path,num),train_rain)
72 |     np.savetxt('{}/test{}.txt'.format(path,num),test_feature)
73 |     np.savetxt('{}/test_label{}.txt'.format(path,num),test_rain)
74 |     
75 |     path = os.getcwd()
76 |     print('num%d结束'%num)
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/rain_shuffle/test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from sklearn.preprocessing import *
 4 | import os
 5 | from eval import  evaluation
 6 | from sklearn.metrics import *
 7 | import matplotlib.pyplot as plt
 8 | import argparse
 9 | from torch import Tensor
10 | from torch.autograd import Variable
11 | 
12 | # os.environ["CUDA_VISIBLE_DEVICES"]='0'
13 | # torch.cuda.set_device(0)
14 | 
15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
16 | 
17 | #设计传参数
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('--station', type=int, default=393, help='id of station')
20 | parser.add_argument('--model_type',type=str,default='seq2seq')
21 | parser.add_argument('--model',type=str,default='models/seq2seq_393_best.pth',help='model name')
22 | parser.add_argument('--input-dim', type=int, default=3, help='num of hour')
23 | parser.add_argument('--seq-len', type=int, default=7, help='num of parameter each hour')
24 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not')
25 | opt = parser.parse_args()
26 | 
27 | path =os.getcwd()
28 | #station的id
29 | num = opt.station
30 | 
31 | path_2 = 'sample'
32 | 
33 | if opt.ifshuffle:
34 |       path_2 = 'sample_shuffle'
35 | 
36 | ratio=0.9
37 | #和数据集设计有关
38 | input_dim = opt.input_dim
39 | seq_len= opt.seq_len
40 | 
41 | #测试集
42 | test_feature = np.loadtxt("{}/{}/station{}/test{}.txt".format(path,path_2,num, num)).astype(np.float32)
43 | test_rain = np.loadtxt("{}/{}/station{}/test_label{}.txt".format(path,path_2, num,num)).astype(np.float32)
44 | test_feature = scale(test_feature, axis=0)
45 | test_rain = np.reshape(test_rain, (-1, 1))
46 | 
47 | 
48 | model = torch.load('{}/{}'.format(path,opt.model), map_location=torch.device('cpu'))
49 | x_tensor = Tensor(test_feature)
50 | if opt.model_type =='mlp':
51 |       pass
52 | else:
53 |       x_tensor = Variable(x_tensor).view(-1, 7, 3)
54 | y_pre_temp = model(x_tensor).detach().numpy()
55 | 
56 | error = mean_squared_error(test_rain, y_pre_temp)
57 | print('Model Test MSE: %.3f' % error)
58 | 
59 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_pre_temp)
60 | 
61 | print('SEQ2SEQ_rmse: %r' % rmse,
62 |       'SEQ2SEQ_mae: %r' % mae,
63 |       'SEQ2SEQ_mdae: %r' % mdae,
64 |       'SEQ2SEQ_r2: %r' % r2,
65 |       'SEQ2SEQ_var: %r' % var)
66 | 
67 | path ='./models'
68 | fig = plt.figure()
69 | plt.plot(test_rain, 'b', label='real')
70 | plt.plot(y_pre_temp, 'r', label='prediction',alpha=0.3)
71 | plt.legend(loc='best')
72 | plt.show()
73 | fig.savefig('{}/{}_{}.png'.format(path,opt.model_type,num), dpi=300)
74 | 
75 | f = open('{}/test_SEQ2SEQ_{}.txt'.format(path,num), 'w+')
76 | f.write('SEQ2SEQ_rmse: %r ' % rmse +
77 |       'SEQ2SEQ_mae: %r ' % mae +
78 |       'SEQ2SEQ_mdae: %r ' % mdae +
79 |       'SEQ2SEQ_r2: %r ' % r2 +
80 |       'SEQ2SEQ_var: %r ' % var)
81 | f.close()
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/rain_shuffle/ARIMA.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from sklearn.preprocessing import *
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from sklearn.svm import SVR
  6 | from pandas.plotting import *
  7 | from statsmodels.graphics.tsaplots import *
  8 | from statsmodels.stats.diagnostic import acorr_ljungbox
  9 | from statsmodels.tsa.arima_model import ARIMA
 10 | from sklearn.metrics import mean_squared_error
 11 | import os
 12 | from eval import evaluation
 13 | import argparse
 14 | 
 15 | #设计传参数
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument('--station', type=int, default=371, help='id of station')
 18 | opt = parser.parse_args()
 19 | 
 20 | path =os.getcwd()
 21 | #station的id
 22 | num = opt.station
 23 | path_2 ='train_test_hour'
 24 | ratio=0.9
 25 | 
 26 | 
 27 | rain=np.loadtxt("{}/{}/label{}.txt".format(path,path_2,num))
 28 | # min_max_scaler = MinMaxScaler()
 29 | # rain = min_max_scaler.fit_transform(rain)
 30 | 
 31 | # df = pd.read_csv('tian.csv')
 32 | #
 33 | # rain_series=df.iloc[:4000,16]
 34 | 
 35 | autocorrelation_plot(rain)
 36 | plt.show()
 37 | #
 38 | # lag_plot(rain_series)
 39 | # plt.show()
 40 | 
 41 | # plot_acf(rain_series)
 42 | # plt.show()
 43 | #
 44 | # diff1 = rain_series.diff(1).dropna()
 45 | # diff1.plot()
 46 | # plt.show()
 47 | 
 48 | 
 49 | # plot_acf(diff1)
 50 | # plt.show()
 51 | #
 52 | # plot_pacf(rain_series)
 53 | # plt.show()
 54 | 
 55 | # print(u'差分序列的白噪声检验结果为：', acorr_ljungbox( diff1, lags=1))
 56 | #
 57 | # model = ARIMA(diff1[:1000], (0,1,2)).fit()
 58 | # model.summary2()
 59 | # re=model.fittedvalues
 60 | # results=model.predict()
 61 | #
 62 | #
 63 | # output = model.forecast()
 64 | 
 65 | 
 66 | X = rain
 67 | size = int(len(X) * ratio)
 68 | train, test = X[0:size], X[size:len(X)]
 69 | history = [x for x in train]
 70 | predictions = list()
 71 | for t in range(len(test)):
 72 |     model = ARIMA(history[:5000], order=(5,1,0))
 73 |     model_fit = model.fit(disp=0)
 74 |     output = model_fit.forecast()
 75 |     yhat = output[0]
 76 |     predictions.append(yhat)
 77 |     obs = test[t]
 78 |     history.append(obs)
 79 |     print('predicted=%f, expected=%f' % (yhat, obs))
 80 | error = mean_squared_error(test, predictions)
 81 | print('Test MSE: %.3f' % error)
 82 | 
 83 | 
 84 | 
 85 | rmse, mae, mdae,r2,var = evaluation(test, predictions)
 86 | 
 87 | print('ARIMA_rmse: %r' % rmse,
 88 |       'ARIMA_mae: %r' % mae,
 89 |       'ARIMA_mdae: %r' % mdae,
 90 |       'ARIMA_r2: %r' % r2,
 91 |       'ARIMA_var: %r' % var)
 92 | 
 93 | 
 94 | plt.plot(test, 'b', label='real')
 95 | plt.plot(predictions, 'r', label='prediction',alpha=0.3)
 96 | plt.legend(loc='best')
 97 | plt.show()
 98 | 
 99 | """
100 | 保存数据
101 | """
102 | 
103 | path ='./ARIMA'
104 | """
105 | 建立文件夹
106 | """
107 | if os.path.exists(path):
108 |     pass
109 | else:
110 |     os.mkdir(path)
111 | 
112 | f = open('{}/test_ARIMA_{}.txt'.format(path,num), 'w+')
113 | f.write('ARIMA_rmse: %r ' % rmse +
114 |       'ARIMA_mae: %r ' % mae +
115 |       'ARIMA_mdae: %r ' % mdae +
116 |       'ARIMA_r2: %r ' % r2 +
117 |       'ARIMA_var: %r ' % var)
118 | f.close()
119 | 


--------------------------------------------------------------------------------
/rain_shuffle/mlp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from sklearn.preprocessing import *
 4 | import os
 5 | from sklearn.model_selection import train_test_split
 6 | from eval import  evaluation
 7 | from MLP_module import MLP
 8 | from sklearn.metrics import *
 9 | import matplotlib.pyplot as plt
10 | import argparse
11 | 
12 | # os.environ["CUDA_VISIBLE_DEVICES"]='0'
13 | # torch.cuda.set_device(0)
14 | 
15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
16 | 
17 | #设计传参数
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('--station', type=int, default=371, help='id of station')
20 | parser.add_argument('--epochs', type=int, default=30, help='number of epochs')
21 | parser.add_argument('--batch-size', type=int, default=100, help='batch size')
22 | parser.add_argument('--lr', type=float, default=1e-3, help='learning rate')
23 | parser.add_argument('--input-dim', type=int, default=3, help='num of hour')
24 | parser.add_argument('--seq-len', type=int, default=7, help='num of parameter each hour')
25 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not')
26 | parser.add_argument('--hidden_dim', type=int, default=64, help='hidden_dim')
27 | parser.add_argument('--n_layer', type=int, default=2, help='n_layer')
28 | opt = parser.parse_args()
29 | 
30 | path =os.getcwd()
31 | #station的id
32 | num = opt.station
33 | 
34 | path_2 = 'train_test_hour'
35 | 
36 | if opt.ifshuffle:
37 |       path_2 = 'train_test_hour_shuffle'
38 | 
39 | ratio=0.9
40 | #和数据集设计有关
41 | input_dim = opt.input_dim
42 | seq_len= opt.seq_len
43 | # 定义超参数
44 | batch_size = opt.batch_size
45 | learning_rate = opt.lr
46 | num_epoches = opt.epochs
47 | hidden_dim = opt.hidden_dim
48 | n_layer = opt.n_layer
49 | 
50 | 
51 | # 雨水信息
52 | train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32)
53 | train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32)
54 | train_feature = scale(train_feature, axis=0)
55 | train_rain = np.reshape(train_rain, (-1, 1))
56 | 
57 | test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32)
58 | test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32)
59 | test_feature = scale(test_feature, axis=0)
60 | test_rain = np.reshape(test_rain, (-1, 1))
61 | 
62 | model = MLP(num,seq_len*input_dim,hidden_dim=hidden_dim,n_layer=n_layer,batch_size=batch_size,learning_rate=learning_rate,shuffle=False,device_pu=device)
63 | 
64 | eval_best=model.fit(train_feature,train_rain,num_epoches=num_epoches)
65 | print("best val:"+str(eval_best)+'\n')
66 | 
67 | y_mlp=model.predict(test_feature,test_rain)
68 | 
69 | error = mean_squared_error(test_rain, y_mlp)
70 | print('Model Test MSE: %.3f' % error)
71 | 
72 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_mlp)
73 | 
74 | print('MLP_rmse: %r' % rmse,
75 |       'MLP_mae: %r' % mae,
76 |       'MLP_mdae: %r' % mdae,
77 |       'MLP_r2: %r' % r2,
78 |       'MLP_var: %r' % var)
79 | 
80 | 
81 | plt.plot(test_rain, 'b', label='real')
82 | plt.plot(y_mlp, 'r', label='prediction',alpha=0.3)
83 | plt.legend(loc='best')
84 | plt.show()
85 | 
86 | #保存数据
87 | path ='./MLP'
88 | f = open('{}/test_MLP_{}.txt'.format(path,num), 'w+')
89 | f.write('MLP_rmse: %r ' % rmse +
90 |       'MLP_mae: %r ' % mae +
91 |       'MLP_mdae: %r ' % mdae +
92 |       'MLP_r2: %r ' % r2 +
93 |       'MLP_var: %r ' % var)
94 | f.close()
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/rain_shuffle/run_hyp.sh:
--------------------------------------------------------------------------------
 1 | #！/bin/bash
 2 | # 第一个bash文件
 3 | clear;
 4 | echo 'Hello';
 5 | # source activate py3 
 6 | 
 7 | # #运行lstm_
 8 | # #学习率
 9 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --lr 1e-4 
10 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --lr 5e-4 
11 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --lr 1e-3 
12 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --lr 5e-3 
13 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --lr 1e-2
14 | # #隐藏单元个数
15 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --hidden_dim 16 
16 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --hidden_dim 32 
17 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --hidden_dim 64 
18 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --hidden_dim 128 
19 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --hidden_dim 256  
20 | # #层数
21 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --n_layer 1
22 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --n_layer 2
23 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --n_layer 3 
24 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --n_layer 4 
25 | # python lstm_.py --station 371  --epochs 100 --batch-size 100  --n_layer 5  
26 | 
27 | 
28 | # #运行mlp
29 | # #学习率
30 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --lr 1e-4 
31 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --lr 5e-4 
32 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --lr 1e-3 
33 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --lr 5e-3 
34 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --lr 1e-2
35 | # #隐藏单元个数
36 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --hidden_dim 16 
37 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --hidden_dim 32 
38 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --hidden_dim 64 
39 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --hidden_dim 128 
40 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --hidden_dim 256  
41 | #层数
42 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --n_layer 1
43 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --n_layer 2
44 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --n_layer 3 
45 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --n_layer 4 
46 | # python mlp.py --station 371  --epochs 100 --batch-size 100  --n_layer 5   
47 | 
48 | # # # #运行seq2seq
49 | # #学习率
50 | # python seq2seq.py --station 371  --epochs 100 --batch-size 30  --lr 1e-4 
51 | # python seq2seq.py --station 371  --epochs 100 --batch-size 30  --lr 5e-4 
52 | # python seq2seq.py --station 371  --epochs 100 --batch-size 30  --lr 1e-3 
53 | # python seq2seq.py --station 371  --epochs 100 --batch-size 30  --lr 5e-3 
54 | # python seq2seq.py --station 371  --epochs 100 --batch-size 30  --lr 1e-2
55 | # #隐藏单元个数
56 | # python seq2seq.py --station 371  --epochs 100 --batch-size 30  --hidden_dim 16 
57 | # python seq2seq.py --station 371  --epochs 100 --batch-size 30  --hidden_dim 32 
58 | # python seq2seq.py --station 371  --epochs 100 --batch-size 30  --hidden_dim 64 
59 | # python seq2seq.py --station 371  --epochs 100 --batch-size 30  --hidden_dim 128 
60 | # python seq2seq.py --station 371  --epochs 100 --batch-size 30  --hidden_dim 256  
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/rain_shuffle/seq2seq.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from sklearn.preprocessing import *
  4 | import os
  5 | from sklearn.model_selection import train_test_split
  6 | from eval import  evaluation
  7 | from seq2seq_module import Seq2Seq
  8 | from sklearn.metrics import *
  9 | import matplotlib.pyplot as plt
 10 | import argparse
 11 | 
 12 | # os.environ["CUDA_VISIBLE_DEVICES"]='0'
 13 | # torch.cuda.set_device(0)
 14 | 
 15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 16 | 
 17 | #设计传参数
 18 | parser = argparse.ArgumentParser()
 19 | parser.add_argument('--station', type=int, default=371, help='id of station')
 20 | parser.add_argument('--epochs', type=int, default=100, help='number of epochs')
 21 | parser.add_argument('--batch-size', type=int, default=30, help='batch size')
 22 | parser.add_argument('--lr', type=float, default=1e-3, help='learning rate')
 23 | parser.add_argument('--input-dim', type=int, default=3, help='num of hour')
 24 | parser.add_argument('--seq-len', type=int, default=7, help='num of parameter each hour')
 25 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not')
 26 | #超参数
 27 | parser.add_argument('--hidden_dim', type=int, default=128, help='hidden unit number')
 28 | 
 29 | opt = parser.parse_args()
 30 | 
 31 | path =os.getcwd()
 32 | #station的id
 33 | num = opt.station
 34 | 
 35 | path_2 = 'train_test_hour'
 36 | 
 37 | if opt.ifshuffle:
 38 |       path_2 = 'train_test_hour_shuffle'
 39 | 
 40 | ratio=0.9
 41 | #和数据集设计有关
 42 | input_dim = opt.input_dim
 43 | seq_len= opt.seq_len
 44 | # 定义超参数
 45 | batch_size = opt.batch_size
 46 | learning_rate = opt.lr
 47 | num_epoches = opt.epochs
 48 | hidden_dim = opt.hidden_dim
 49 | 
 50 | 
 51 | # 雨水信息
 52 | train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32)
 53 | train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32)
 54 | train_feature = scale(train_feature, axis=0)
 55 | train_rain = np.reshape(train_rain, (-1, 1))
 56 | 
 57 | test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32)
 58 | test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32)
 59 | test_feature = scale(test_feature, axis=0)
 60 | test_rain = np.reshape(test_rain, (-1, 1))
 61 | 
 62 | 
 63 | 
 64 | model = Seq2Seq(num,input_dim,seq_len,output_dim=1,hidden_size=hidden_dim,dropout=0.1,learning_rate=learning_rate,batch_size=batch_size,device_pu=device)
 65 | 
 66 | eval_best=model.fit(train_feature,train_rain,num_epoches=num_epoches,shuffle=False)
 67 | print("best val:"+str(eval_best)+'\n')
 68 | 
 69 | y_seq2seq=model.predict(test_feature,test_rain)
 70 | 
 71 | error = mean_squared_error(test_rain, y_seq2seq)
 72 | print('Model Test MSE: %.3f' % error)
 73 | 
 74 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_seq2seq)
 75 | 
 76 | print('SEQ2SEQ_rmse: %r' % rmse,
 77 |       'SEQ2SEQ_mae: %r' % mae,
 78 |       'SEQ2SEQ_mdae: %r' % mdae,
 79 |       'SEQ2SEQ_r2: %r' % r2,
 80 |       'SEQ2SEQ_var: %r' % var)
 81 | 
 82 | 
 83 | plt.plot(test_rain, 'b', label='real')
 84 | plt.plot(y_seq2seq, 'r', label='prediction',alpha=0.3)
 85 | plt.legend(loc='best')
 86 | plt.show()
 87 | 
 88 | 
 89 | path ='./SEQ2SEQ'
 90 | f = open('{}/test_SEQ2SEQ_{}.txt'.format(path,num), 'w+')
 91 | f.write('SEQ2SEQ_rmse: %r ' % rmse +
 92 |       'SEQ2SEQ_mae: %r ' % mae +
 93 |       'SEQ2SEQ_mdae: %r ' % mdae +
 94 |       'SEQ2SEQ_r2: %r ' % r2 +
 95 |       'SEQ2SEQ_var: %r ' % var)
 96 | f.close()
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/rain_shuffle/lstm_.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from sklearn.preprocessing import *
  4 | import os
  5 | from sklearn.model_selection import train_test_split
  6 | from eval import  evaluation
  7 | from LSTM_module import lstm
  8 | from sklearn.metrics import *
  9 | import matplotlib.pyplot as plt
 10 | import argparse
 11 | 
 12 | # os.environ["CUDA_VISIBLE_DEVICES"]='2'
 13 | # torch.cuda.set_device(2)
 14 | 
 15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 16 | 
 17 | #设计传参数
 18 | parser = argparse.ArgumentParser()
 19 | parser.add_argument('--station', type=int, default=313, help='id of station')
 20 | parser.add_argument('--epochs', type=int, default=30, help='number of epochs')
 21 | parser.add_argument('--batch-size', type=int, default=100, help='batch size')
 22 | parser.add_argument('--lr', type=float, default=1e-3, help='learning rate')
 23 | parser.add_argument('--input-dim', type=int, default=3, help='num of hour')
 24 | parser.add_argument('--seq-len', type=int, default=7, help='num of parameter each hour')
 25 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not')
 26 | #超参数
 27 | parser.add_argument('--hidden_dim', type=int, default=128, help='hidden unit number')
 28 | parser.add_argument('--n_layer', type=int, default=2, help='model layer number')
 29 | 
 30 | opt = parser.parse_args()
 31 | 
 32 | path =os.getcwd()
 33 | #station的id
 34 | num = opt.station
 35 | 
 36 | path_2 = 'train_test_hour'
 37 | 
 38 | if opt.ifshuffle:
 39 |       path_2 = 'train_test_hour_shuffle'
 40 | 
 41 | ratio=0.9
 42 | #和数据集设计有关
 43 | input_dim = opt.input_dim
 44 | seq_len= opt.seq_len
 45 | # 定义超参数
 46 | batch_size = opt.batch_size
 47 | learning_rate = opt.lr
 48 | num_epoches = opt.epochs
 49 | hidden_dim = opt.hidden_dim
 50 | n_layer = opt.n_layer
 51 | 
 52 | 
 53 | # 雨水信息
 54 | train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32)
 55 | train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32)
 56 | train_feature = scale(train_feature, axis=0)
 57 | train_rain = np.reshape(train_rain, (-1, 1))
 58 | 
 59 | test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32)
 60 | test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32)
 61 | test_feature = scale(test_feature, axis=0)
 62 | test_rain = np.reshape(test_rain, (-1, 1))
 63 | 
 64 | 
 65 | 
 66 | model = lstm(num,input_dim,seq_len,hidden_dim=hidden_dim,n_layer=n_layer,batch_size=batch_size,learning_rate=learning_rate,shuffle=False,device_pu=device)
 67 | 
 68 | eval_best=model.fit(train_feature,train_rain,num_epoches=num_epoches)
 69 | print("best val:"+str(eval_best)+'\n')
 70 | 
 71 | y_lstm=model.predict(test_feature,test_rain)
 72 | 
 73 | error = mean_squared_error(test_rain, y_lstm)
 74 | print('Model Test MSE: %.3f' % error)
 75 | 
 76 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_lstm)
 77 | 
 78 | print('LSTM_rmse: %r' % rmse,
 79 |       'LSTM_mae: %r' % mae,
 80 |       'LSTM_mdae: %r' % mdae,
 81 |       'LSTM_r2: %r' % r2,
 82 |       'LSTM_var: %r' % var)
 83 | 
 84 | 
 85 | plt.plot(test_rain, 'b', label='real')
 86 | plt.plot(y_lstm, 'r', label='prediction',alpha=0.3)
 87 | plt.legend(loc='best')
 88 | plt.show()
 89 | 
 90 | #保存数据
 91 | path ='./LSTM'
 92 | f = open('{}/test_LSTM_{}.txt'.format(path,num), 'w+')
 93 | f.write('LSTM_rmse: %r ' % rmse +
 94 |       'LSTM_mae: %r ' % mae +
 95 |       'LSTM_mdae: %r ' % mdae +
 96 |       'LSTM_r2: %r ' % r2 +
 97 |       'LSTM_var: %r ' % var)
 98 | f.close()
 99 | 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/rain_shuffle/att_seq2seq.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from sklearn.preprocessing import *
  4 | import os
  5 | from sklearn.model_selection import train_test_split
  6 | from eval import  evaluation
  7 | from att_seq2seq_module import Seq2Seq
  8 | from sklearn.metrics import *
  9 | import matplotlib.pyplot as plt
 10 | import argparse
 11 | 
 12 | # os.environ["CUDA_VISIBLE_DEVICES"]='0'
 13 | # torch.cuda.set_device(0)
 14 | 
 15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 16 | 
 17 | #设计传参数
 18 | parser = argparse.ArgumentParser()
 19 | parser.add_argument('--station', type=int, default=371, help='id of station')
 20 | parser.add_argument('--epochs', type=int, default=100, help='number of epochs')
 21 | parser.add_argument('--batch-size', type=int, default=30, help='batch size')
 22 | parser.add_argument('--lr', type=float, default=1e-3, help='learning rate')
 23 | parser.add_argument('--input-dim', type=int, default=7, help='num of hour')
 24 | parser.add_argument('--seq-len', type=int, default=3, help='num of parameter each hour')
 25 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not')
 26 | opt = parser.parse_args()
 27 | 
 28 | path =os.getcwd()
 29 | #station的id
 30 | num = opt.station
 31 | 
 32 | path_2 = 'train_test_hour'
 33 | 
 34 | if opt.ifshuffle:
 35 |       path_2 = 'train_test_hour_shuffle'
 36 | 
 37 | ratio=0.9
 38 | #和数据集设计有关
 39 | input_dim = opt.input_dim
 40 | seq_len= opt.seq_len
 41 | # 定义超参数
 42 | batch_size = opt.batch_size
 43 | learning_rate = opt.lr
 44 | num_epoches = opt.epochs
 45 | 
 46 | 
 47 | # 雨水信息
 48 | train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32)
 49 | train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32)
 50 | train_feature = scale(train_feature, axis=0)
 51 | train_rain = np.reshape(train_rain, (-1, 1))
 52 | 
 53 | test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32)
 54 | test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32)
 55 | test_feature = scale(test_feature, axis=0)
 56 | test_rain = np.reshape(test_rain, (-1, 1))
 57 | 
 58 | 
 59 | model = Seq2Seq(num,input_dim,seq_len,output_dim=1,hidden_size=128,dropout=0.1,learning_rate=learning_rate,batch_size=batch_size,device_pu=device)
 60 | 
 61 | eval_best=model.fit(train_feature,train_rain,num_epoches=num_epoches,shuffle=False)
 62 | print("best val:"+str(eval_best)+'\n')
 63 | 
 64 | y_seq2seq,y_attention=model.predict(test_feature,test_rain)
 65 | 
 66 | error = mean_squared_error(test_rain, y_seq2seq)
 67 | print('Model Test MSE: %.3f' % error)
 68 | 
 69 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_seq2seq)
 70 | 
 71 | print('ATTSEQ2SEQ_rmse: %r' % rmse,
 72 |       'ATTSEQ2SEQ_mae: %r' % mae,
 73 |       'ATTSEQ2SEQ_mdae: %r' % mdae,
 74 |       'ATTSEQ2SEQ_r2: %r' % r2,
 75 |       'ATTSEQ2SEQ_var: %r' % var)
 76 | 
 77 | 
 78 | plt.plot(test_rain, 'b', label='real')
 79 | plt.plot(y_seq2seq, 'r', label='prediction',alpha=0.3)
 80 | plt.legend(loc='best')
 81 | plt.show()
 82 | 
 83 | 
 84 | fig =plt.figure()
 85 | ax = fig.add_subplot(111)
 86 | cax = ax.matshow(y_attention[:5,:], cmap='bone')
 87 | fig.colorbar(cax)
 88 | plt.show()
 89 | 
 90 | path ='./ATT_SEQ2SEQ'
 91 | f = open('{}/test_ATT_SEQ2SEQ_{}.txt'.format(path,num), 'w+')
 92 | f.write('ATT_SEQ2SEQ_rmse: %r ' % rmse +
 93 |       'ATT_SEQ2SEQ_mae: %r ' % mae +
 94 |       'ATT_SEQ2SEQ_mdae: %r ' % mdae +
 95 |       'ATT_SEQ2SEQ_r2: %r ' % r2 +
 96 |       'ATT_SEQ2SEQ_var: %r ' % var)
 97 | f.close()
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/rain_shuffle/SVR_sigmoid.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from sklearn.preprocessing import *
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from sklearn.svm import SVR
  6 | from sklearn.metrics import mean_squared_error
  7 | import os
  8 | from sklearn.model_selection import train_test_split
  9 | import joblib
 10 | from eval import evaluation
 11 | import argparse
 12 | 
 13 | #设计传参数
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('--station', type=int, default=371, help='id of station')
 16 | opt = parser.parse_args()
 17 | 
 18 | path =os.getcwd()
 19 | #station的id
 20 | num = opt.station
 21 | path_2 ='train_test_hour'
 22 | ratio=0.9
 23 | 
 24 | feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num))
 25 | rain = np.loadtxt("{}/{}/label{}.txt".format(path,path_2, num))
 26 | 
 27 | feature = scale(feature, axis=0)
 28 | rain = np.reshape(rain, (-1, 1))
 29 | # min_max_scaler = MinMaxScaler()
 30 | # rain = min_max_scaler.fit_transform(rain)
 31 | 
 32 | # 雨水信息
 33 | train_feature = feature[:int(ratio * len(feature)), :]
 34 | train_rain = rain[:int(ratio * len(feature)), :]
 35 | 
 36 | test_feature = feature[int(ratio * len(feature)):, :]
 37 | test_rain = rain[int(ratio * len(feature)):, :]
 38 | 
 39 | x_train, x_test, y_train, y_test = train_test_split(train_feature, train_rain, test_size=0.22, random_state=42)
 40 | 
 41 | 
 42 | """
 43 | 超参数
 44 | """
 45 | n_gamma = np.arange(0.005,0.05,0.005)
 46 | n_C =np.arange(0.1,1,0.1)
 47 | n_degree =np.arange(1,10,1)
 48 | paramters =['C','gamma','degree']
 49 | kernel ='sigmoid'
 50 | 
 51 | path ='./SVR_{}'.format(kernel)
 52 | 
 53 | 
 54 | gamma_g=0.01
 55 | c_g=50
 56 | degree_g=3
 57 | 
 58 | if os.path.exists(path):
 59 |     pass
 60 | else:
 61 |     os.mkdir(path)
 62 | 
 63 | 
 64 | 
 65 | error = 100
 66 | f = open('{}/train_{}_{}.txt'.format(path,num,paramters[0]), 'w+')
 67 | for c in n_C:
 68 |     # 模型训练，使用GBDT算法
 69 |     regressor = SVR(kernel = kernel,gamma=gamma_g,C=c,degree=degree_g)
 70 |     regressor.fit(x_train, y_train.ravel())
 71 |     y_pre_train = regressor.predict(x_train)
 72 |     y_pre_test = regressor.predict(x_test)
 73 |     error_1 = mean_squared_error(y_train, y_pre_train)
 74 |     print('Train MSE: %.3f' % error_1)
 75 |     error_2 = mean_squared_error(y_test, y_pre_test)
 76 |     print('Val MSE: %.3f' % error_2)
 77 | 
 78 |     f.write('{}: '.format(paramters[0]) + str(c) +
 79 |             " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n')
 80 | 
 81 |     if error_2 < error:
 82 |         c_g = c
 83 |         error =error_2
 84 | f.close()
 85 | 
 86 | 
 87 | f = open('{}/train_{}_{}.txt'.format(path,num,paramters[1]), 'w+')
 88 | for gamma in n_gamma:
 89 |     # 模型训练，使用GBDT算法
 90 |     regressor = SVR(kernel = kernel,gamma=gamma,C=c_g,degree=degree_g)
 91 |     regressor.fit(x_train, y_train.ravel())
 92 |     y_pre_train = regressor.predict(x_train)
 93 |     y_pre_test = regressor.predict(x_test)
 94 |     error_1 = mean_squared_error(y_train, y_pre_train)
 95 |     print('Train MSE: %.3f' % error_1)
 96 |     error_2 = mean_squared_error(y_test, y_pre_test)
 97 |     print('Val MSE: %.3f' % error_2)
 98 | 
 99 |     f.write('{}: '.format(paramters[1]) + str(gamma) +
100 |             " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n')
101 | 
102 |     if error_2 <error:
103 |         gamma_g = gamma
104 |         error = error_2
105 | f.close()
106 | 
107 | 
108 | """
109 | 保存模型 加载模型 测试算法
110 | """
111 | #保存模型 加载模型 测试算法
112 | regressor = SVR(kernel=kernel, gamma=gamma_g, C=c_g,degree=degree_g)
113 | regressor.fit(x_train, y_train.ravel())
114 | joblib.dump(regressor, '{}/train_{}_{}.m'.format(path,kernel,num))   # 保存模型
115 | clf=joblib.load('{}/train_{}_{}.m'.format(path,kernel,num))
116 | y_svr = clf.predict(test_feature)
117 | error = mean_squared_error(test_rain, y_svr)
118 | print('Model Test MSE: %.3f' % error)
119 | 
120 | plt.plot(test_rain, 'b', label='real')
121 | plt.plot(y_svr, 'r', label='prediction',alpha=0.3)
122 | plt.legend(loc='best')
123 | plt.show()
124 | 
125 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_svr)
126 | 
127 | print('SVR_rmse: %r' % rmse,
128 |       'SVR_mae: %r' % mae,
129 |       'SVR_mdae: %r' % mdae,
130 |       'SVR_r2: %r' % r2,
131 |       'SVR_var: %r' % var)
132 | 
133 | f = open('{}/test_SVR_{}_{}.txt'.format(path,kernel,num), 'w+')
134 | f.write('SVR_rmse: %r ' % rmse +
135 |       'SVR_mae: %r ' % mae +
136 |       'SVR_mdae: %r ' % mdae +
137 |       'SVR_r2: %r ' % r2 +
138 |       'SVR_var: %r ' % var)
139 | f.close()
140 | 


--------------------------------------------------------------------------------
/rain_shuffle/SVR_rbf.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from sklearn.preprocessing import *
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from sklearn.svm import SVR
  6 | from sklearn.metrics import mean_squared_error
  7 | import os
  8 | from sklearn.model_selection import train_test_split
  9 | import joblib
 10 | from eval import evaluation
 11 | import argparse
 12 | 
 13 | 
 14 | #设计传参数
 15 | parser = argparse.ArgumentParser()
 16 | parser.add_argument('--station', type=int, default=371, help='id of station')
 17 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not')
 18 | opt = parser.parse_args()
 19 | 
 20 | path =os.getcwd()
 21 | #station的id
 22 | num = opt.station
 23 | 
 24 | path_2 = 'train_test_hour'
 25 | 
 26 | if opt.ifshuffle:
 27 |       path_2 = 'train_test_hour_shuffle'
 28 | 
 29 | ratio=0.9
 30 | 
 31 | 
 32 | # 雨水信息
 33 | train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32)
 34 | train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32)
 35 | train_feature = scale(train_feature, axis=0)
 36 | train_rain = np.reshape(train_rain, (-1, 1))
 37 | 
 38 | test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32)
 39 | test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32)
 40 | test_feature = scale(test_feature, axis=0)
 41 | test_rain = np.reshape(test_rain, (-1, 1))
 42 | 
 43 | x_train, x_test, y_train, y_test = train_test_split(train_feature, train_rain, test_size=0.22, random_state=42)
 44 | 
 45 | 
 46 | """
 47 | 超参数
 48 | """
 49 | n_gamma = np.arange(0.01,0.1,0.01)
 50 | n_C =np.arange(1,100,10)
 51 | n_degree =np.arange(1,10,1)
 52 | paramters =['C','gamma','degree']
 53 | kernel ='rbf'
 54 | 
 55 | path ='./SVR_{}'.format(kernel)
 56 | 
 57 | 
 58 | 
 59 | gamma_g=0.01
 60 | c_g=50
 61 | degree_g=3
 62 | 
 63 | if os.path.exists(path):
 64 |     pass
 65 | else:
 66 |     os.mkdir(path)
 67 | 
 68 | 
 69 | error = 100
 70 | # f = open('{}/train_{}_{}.txt'.format(path,num,paramters[0]), 'w+')
 71 | # for c in n_C:
 72 | #     # 模型训练，使用GBDT算法
 73 | #     regressor = SVR(kernel = kernel,gamma=gamma_g,C=c)
 74 | #     regressor.fit(x_train, y_train.ravel())
 75 | #     y_pre_train = regressor.predict(x_train)
 76 | #     y_pre_test = regressor.predict(x_test)
 77 | #     error_1 = mean_squared_error(y_train, y_pre_train)
 78 | #     print('Train MSE: %.3f' % error_1)
 79 | #     error_2 = mean_squared_error(y_test, y_pre_test)
 80 | #     print('Val MSE: %.3f' % error_2)
 81 | 
 82 | #     f.write('{}: '.format(paramters[0]) + str(c) +
 83 | #             " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n')
 84 | 
 85 | #     if error_2 < error:
 86 | #         c_g = c
 87 | #         error =error_2
 88 | # f.close()
 89 | 
 90 | 
 91 | # f = open('{}/train_{}_{}.txt'.format(path,num,paramters[1]), 'w+')
 92 | # for gamma in n_gamma:
 93 | #     # 模型训练，使用GBDT算法
 94 | #     regressor = SVR(kernel = kernel,gamma=gamma,C=c_g)
 95 | #     regressor.fit(x_train, y_train.ravel())
 96 | #     y_pre_train = regressor.predict(x_train)
 97 | #     y_pre_test = regressor.predict(x_test)
 98 | #     error_1 = mean_squared_error(y_train, y_pre_train)
 99 | #     print('Train MSE: %.3f' % error_1)
100 | #     error_2 = mean_squared_error(y_test, y_pre_test)
101 | #     print('Val MSE: %.3f' % error_2)
102 | 
103 | #     f.write('{}: '.format(paramters[1]) + str(gamma) +
104 | #             " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n')
105 | 
106 | #     if error_2 <error:
107 | #         gamma_g = gamma
108 | #         error = error_2
109 | # f.close()
110 | 
111 | 
112 | """
113 | 保存模型 加载模型 测试算法
114 | """
115 | #保存模型 加载模型 测试算法
116 | regressor = SVR(kernel=kernel, gamma=gamma_g, C=c_g)
117 | regressor.fit(x_train, y_train.ravel())
118 | joblib.dump(regressor, '{}/train_{}_{}.m'.format(path,kernel,num))   # 保存模型
119 | clf=joblib.load('{}/train_{}_{}.m'.format(path,kernel,num))
120 | y_svr = clf.predict(test_feature)
121 | error = mean_squared_error(test_rain, y_svr)
122 | print('Model Test MSE: %.3f' % error)
123 | 
124 | plt.plot(test_rain, 'b', label='real')
125 | plt.plot(y_svr, 'r', label='prediction',alpha=0.3)
126 | plt.legend(loc='best')
127 | plt.show()
128 | 
129 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_svr)
130 | 
131 | print('SVR_rmse: %r' % rmse,
132 |       'SVR_mae: %r' % mae,
133 |       'SVR_made: %r' % mdae,
134 |       'SVR_r2: %r' % r2,
135 |       'SVR_var: %r' % var)
136 | 
137 | f = open('{}/test_SVR_{}_{}.txt'.format(path,kernel,num), 'w+')
138 | f.write('SVR_rmse: %r ' % rmse +
139 |       'SVR_mae: %r ' % mae +
140 |       'SVR_mdae: %r ' % mdae +
141 |       'SVR_r2: %r ' % r2 +
142 |       'SVR_var: %r ' % var)
143 | f.close()


--------------------------------------------------------------------------------
/rain_shuffle/SVR_poly.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from sklearn.preprocessing import *
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from sklearn.svm import SVR
  6 | from sklearn.metrics import mean_squared_error
  7 | import os
  8 | from sklearn.model_selection import train_test_split
  9 | import joblib
 10 | from eval import evaluation
 11 | import argparse
 12 | 
 13 | 
 14 | #设计传参数
 15 | parser = argparse.ArgumentParser()
 16 | parser.add_argument('--station', type=int, default=371, help='id of station')
 17 | opt = parser.parse_args()
 18 | 
 19 | path =os.getcwd()
 20 | #station的id
 21 | num = opt.station
 22 | path_2 ='train_test_hour'
 23 | ratio=0.9
 24 | 
 25 | 
 26 | feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num))
 27 | rain = np.loadtxt("{}/{}/label{}.txt".format(path, path_2,num))
 28 | 
 29 | feature = scale(feature, axis=0)
 30 | rain = np.reshape(rain, (-1, 1))
 31 | # min_max_scaler = MinMaxScaler()
 32 | # rain = min_max_scaler.fit_transform(rain)
 33 | 
 34 | # 雨水信息
 35 | train_feature = feature[:int(ratio * len(feature)), :]
 36 | train_rain = rain[:int(ratio * len(feature)), :]
 37 | 
 38 | test_feature = feature[int(ratio * len(feature)):, :]
 39 | test_rain = rain[int(ratio * len(feature)):, :]
 40 | 
 41 | x_train, x_test, y_train, y_test = train_test_split(train_feature, train_rain, test_size=0.22, random_state=42)
 42 | 
 43 | 
 44 | """
 45 | 超参数
 46 | """
 47 | n_gamma = np.arange(0.01,0.1,0.01)
 48 | n_C =np.arange(1,100,10)
 49 | n_degree =np.arange(1,10,1)
 50 | paramters =['C','gamma','degree']
 51 | kernel ='poly'
 52 | 
 53 | path ='./SVR_{}'.format(kernel)
 54 | 
 55 | 
 56 | gamma_g=0.01
 57 | c_g=50
 58 | degree_g=3
 59 | 
 60 | if os.path.exists(path):
 61 |     pass
 62 | else:
 63 |     os.mkdir(path)
 64 | 
 65 | 
 66 | error = 100
 67 | f = open('{}/train_{}_{}.txt'.format(path,num,paramters[0]), 'w+')
 68 | for c in n_C:
 69 |     # 模型训练
 70 |     regressor = SVR(kernel = kernel,gamma=gamma_g,C=c,degree=degree_g)
 71 |     regressor.fit(x_train, y_train.ravel())
 72 |     y_pre_train = regressor.predict(x_train)
 73 |     y_pre_test = regressor.predict(x_test)
 74 |     error_1 = mean_squared_error(y_train, y_pre_train)
 75 |     print('Train MSE: %.3f' % error_1)
 76 |     error_2 = mean_squared_error(y_test, y_pre_test)
 77 |     print('Val MSE: %.3f' % error_2)
 78 | 
 79 |     f.write('{}: '.format(paramters[0]) + str(c) +
 80 |             " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n')
 81 | 
 82 |     if error_2 < error:
 83 |         c_g = c
 84 |         error =error_2
 85 | f.close()
 86 | 
 87 | 
 88 | f = open('{}/train_{}_{}.txt'.format(path,num,paramters[1]), 'w+')
 89 | for gamma in n_gamma:
 90 |     # 模型训练，使用GBDT算法
 91 |     regressor = SVR(kernel = kernel,gamma=gamma,C=c_g,degree=degree_g)
 92 |     regressor.fit(x_train, y_train.ravel())
 93 |     y_pre_train = regressor.predict(x_train)
 94 |     y_pre_test = regressor.predict(x_test)
 95 |     error_1 = mean_squared_error(y_train, y_pre_train)
 96 |     print('Train MSE: %.3f' % error_1)
 97 |     error_2 = mean_squared_error(y_test, y_pre_test)
 98 |     print('Val MSE: %.3f' % error_2)
 99 | 
100 |     f.write('{}: '.format(paramters[1]) + str(gamma) +
101 |             " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n')
102 |     if error_2 <error:
103 |         gamma_g = gamma
104 |         error = error_2
105 | f.close()
106 | 
107 | f = open('{}/train_{}_{}.txt'.format(path,num,paramters[2]), 'w+')
108 | for degree in n_degree:
109 |     # 模型训练，使用GBDT算法
110 |     regressor = SVR(kernel = kernel,gamma=gamma_g,C=c_g,degree=degree)
111 |     regressor.fit(x_train, y_train.ravel())
112 |     y_pre_train = regressor.predict(x_train)
113 |     y_pre_test = regressor.predict(x_test)
114 |     error_1 = mean_squared_error(y_train, y_pre_train)
115 |     print('Train MSE: %.3f' % error_1)
116 |     error_2 = mean_squared_error(y_test, y_pre_test)
117 |     print('Val MSE: %.3f' % error_2)
118 | 
119 |     f.write('{}: '.format(paramters[2]) + str(degree) +
120 |             " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n')
121 | 
122 |     if error_2 <error:
123 |         degree_g = degree
124 |         error = error_2
125 | f.close()
126 | 
127 | 
128 | """
129 | 保存模型 加载模型 测试算法
130 | """
131 | #保存模型 加载模型 测试算法
132 | regressor = SVR(kernel=kernel, gamma=gamma_g, C=c_g,degree=degree_g)
133 | regressor.fit(x_train, y_train.ravel())
134 | joblib.dump(regressor, '{}/train_{}_{}.m'.format(path,kernel,num))   # 保存模型
135 | clf=joblib.load('{}/train_{}_{}.m'.format(path,kernel,num))
136 | y_svr = clf.predict(test_feature)
137 | error = mean_squared_error(test_rain, y_svr)
138 | print('Model Test MSE: %.3f' % error)
139 | 
140 | plt.plot(test_rain, 'b', label='real')
141 | plt.plot(y_svr, 'r', label='prediction',alpha=0.3)
142 | plt.legend(loc='best')
143 | plt.show()
144 | 
145 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_svr)
146 | 
147 | print('SVR_rmse: %r' % rmse,
148 |       'SVR_mae: %r' % mae,
149 |       'SVR_mdae: %r' % mdae,
150 |       'SVR_r2: %r' % r2,
151 |       'SVR_var: %r' % var)
152 | 
153 | f = open('{}/test_SVR_{}_{}.txt'.format(path,kernel,num), 'w+')
154 | f.write('SVR_rmse: %r ' % rmse +
155 |       'SVR_mae: %r ' % mae +
156 |       'SVR_mdae: %r ' % mdae +
157 |       'SVR_r2: %r ' % r2 +
158 |       'SVR_var: %r ' % var)
159 | f.close()


--------------------------------------------------------------------------------
/rain_shuffle/GBRT.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from sklearn.model_selection import train_test_split
  3 | from sklearn.ensemble import GradientBoostingRegressor
  4 | from sklearn.externals import *
  5 | import joblib
  6 | import os
  7 | import numpy as np
  8 | from sklearn.preprocessing import *
  9 | from sklearn.metrics import mean_squared_error
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | from eval import  evaluation
 13 | import argparse
 14 | 
 15 | #设计传参数
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument('--station', type=int, default=371, help='id of station')
 18 | parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not')
 19 | opt = parser.parse_args()
 20 | 
 21 | path =os.getcwd()
 22 | #station的id
 23 | num = opt.station
 24 | 
 25 | path_2 = 'train_test_hour'
 26 | 
 27 | if opt.ifshuffle:
 28 |       path_2 = 'train_test_hour_shuffle'
 29 | 
 30 | ratio=0.9
 31 | 
 32 | 
 33 | # 雨水信息
 34 | train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32)
 35 | train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32)
 36 | train_feature = scale(train_feature, axis=0)
 37 | train_rain = np.reshape(train_rain, (-1, 1))
 38 | 
 39 | test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32)
 40 | test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32)
 41 | test_feature = scale(test_feature, axis=0)
 42 | test_rain = np.reshape(test_rain, (-1, 1))
 43 | 
 44 | 
 45 | """
 46 | 训练集和验证集分离
 47 | """
 48 | x_train, x_test, y_train, y_test = train_test_split(train_feature, train_rain,test_size=0.22, random_state=42)
 49 | 
 50 | 
 51 | """
 52 | 超参数
 53 | """
 54 | n_est = np.arange(10,100,10)
 55 | n_dep =np.arange(1,10,1)
 56 | n_lr =np.arange(0.01,0.1,0.01)
 57 | paramters =['lr','est','depth']
 58 | 
 59 | path ='./GBRT'
 60 | 
 61 | """
 62 | 初始超参数
 63 | """
 64 | lr_g=0.1
 65 | est_g=50
 66 | dep_g=5
 67 | 
 68 | """
 69 | 建立文件夹
 70 | """
 71 | if os.path.exists(path):
 72 |     pass
 73 | else:
 74 |     os.mkdir(path)
 75 | 
 76 | 
 77 | # error = 100
 78 | # f = open('{}/train_{}_{}.txt'.format(path,num,paramters[0]), 'w+')
 79 | # for lr in n_lr:
 80 | #     # 模型训练，使用GBDT算法
 81 | #     gbr = GradientBoostingRegressor(n_estimators=est_g, max_depth=dep_g, min_samples_split=3, learning_rate=lr)
 82 | #     gbr.fit(x_train, y_train.ravel())
 83 | #     y_pre_train = gbr.predict(x_train)
 84 | #     y_pre_test = gbr.predict(x_test)
 85 | #     error_1 = mean_squared_error(y_train, y_pre_train)
 86 | #     print('Train MSE: %.3f' % error_1)
 87 | #     error_2 = mean_squared_error(y_test, y_pre_test)
 88 | #     print('Val MSE: %.3f' % error_2)
 89 | 
 90 | #     f.write('{}: '.format(paramters[0]) + str(lr) +
 91 | #             " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n')    
 92 | #     if error_2 < error:
 93 | #         lr_g = lr
 94 | #         error =error_2
 95 | # f.close()
 96 | 
 97 | 
 98 | # f = open('{}/train_{}_{}.txt'.format(path,num,paramters[1]), 'w+')
 99 | # for est in n_est:
100 | #     # 模型训练，使用GBDT算法
101 | #     gbr = GradientBoostingRegressor(n_estimators=est, max_depth=dep_g, min_samples_split=3, learning_rate=lr_g)
102 | #     gbr.fit(x_train, y_train.ravel())
103 | #     y_pre_train = gbr.predict(x_train)
104 | #     y_pre_test = gbr.predict(x_test)
105 | #     error_1 = mean_squared_error(y_train, y_pre_train)
106 | #     print('Train MSE: %.3f' % error_1)
107 | #     error_2 = mean_squared_error(y_test, y_pre_test)
108 | #     print('Val MSE: %.3f' % error_2)
109 | 
110 | #     f.write('{}: '.format(paramters[1])+ str(est) +
111 | #             " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n')
112 | #     if error_2 <error:
113 | #         est_g = est
114 | #         error = error_2
115 | 
116 | # f.close()
117 | 
118 | 
119 | # f = open('{}/train_{}_{}.txt'.format(path,num,paramters[2]), 'w+')
120 | # for dep in n_dep:
121 | #     # 模型训练，使用GBDT算法
122 | #     gbr = GradientBoostingRegressor(n_estimators=est_g, max_depth=dep, min_samples_split=3, learning_rate=lr_g)
123 | #     gbr.fit(x_train, y_train.ravel())
124 | #     y_pre_train = gbr.predict(x_train)
125 | #     y_pre_test = gbr.predict(x_test)
126 | #     error_1 = mean_squared_error(y_train, y_pre_train)
127 | #     print('Train MSE: %.3f' % error_1)
128 | #     error_2 = mean_squared_error(y_test, y_pre_test)
129 | #     print('Val MSE: %.3f' % error_2)
130 | 
131 | #     f.write('{}: '.format(paramters[2]) + str(dep) +
132 | #             " Train_MSE: " + str(error_1) +' Val_MSE: '+ str(error_2)+'\n')
133 | 
134 | #     if error_2 < error:
135 | #         dep_g = dep
136 | #         error = error_2
137 | 
138 | # f.close()
139 | 
140 | """
141 | 保存模型 加载模型 测试算法
142 | """
143 | #保存模型 加载模型 测试算法
144 | gbr = GradientBoostingRegressor(n_estimators=est_g, max_depth=dep_g, min_samples_split=3, learning_rate=lr_g)
145 | gbr.fit(x_train, y_train.ravel())
146 | joblib.dump(gbr, '{}/train_GBRT_{}.m'.format(path,num))   # 保存模型
147 | clf=joblib.load('{}/train_GBRT_{}.m'.format(path,num))
148 | y_gbrt = clf.predict(test_feature)
149 | error = mean_squared_error(test_rain, y_gbrt)
150 | print('Model Test MSE: %.3f' % error)
151 | 
152 | plt.plot(test_rain, 'b', label='real')
153 | plt.plot(y_gbrt, 'r', label='prediction',alpha=0.3)
154 | plt.legend(loc='best')
155 | plt.show()
156 | 
157 | rmse, mae, mdae,r2,var = evaluation(test_rain, y_gbrt)
158 | 
159 | print('GBRT_rmse: %r' % rmse,
160 |       'GBRT_mae: %r' % mae,
161 |       'GBRT_mdae: %r' % mdae,
162 |       'GBRT_r2: %r' % r2,
163 |       'GBRT_var: %r' % var)
164 | 
165 | f = open('{}/test_GBRT_{}.txt'.format(path,num), 'w+')
166 | f.write('GBRT_rmse: %r ' % rmse +
167 |       'GBRT_mae: %r ' % mae +
168 |       'GBRT_mdae: %r ' % mdae +
169 |       'GBRT_r2: %r ' % r2 +
170 |       'GBRT_var: %r ' % var)
171 | f.close()
172 | 


--------------------------------------------------------------------------------
/rain_shuffle/XGB.py:
--------------------------------------------------------------------------------
  1 | # 实现XGBoost回归, 以MSE损失函数为例
  2 | import numpy as np
  3 | from sklearn.model_selection import train_test_split
  4 | from sklearn.metrics import mean_absolute_error,mean_squared_error
  5 | import matplotlib.pyplot as plt
  6 | import os
  7 | from sklearn.preprocessing import *
  8 | import xgboost as xgb
  9 | from xgboost import plot_importance
 10 | import joblib
 11 | from eval import evaluation
 12 | import argparse
 13 | 
 14 | if __name__ == '__main__':
 15 | 
 16 |     #设计传参数
 17 |     parser = argparse.ArgumentParser()
 18 |     parser.add_argument('--station', type=int, default=371, help='id of station')
 19 |     parser.add_argument('--ifshuffle',action='store_true',help='shuffle data or not')
 20 |     opt = parser.parse_args()
 21 | 
 22 |     path =os.getcwd()
 23 |     #station的id
 24 |     num = opt.station
 25 | 
 26 |     path_2 = 'train_test_hour'
 27 | 
 28 |     if opt.ifshuffle:
 29 |         path_2 = 'train_test_hour_shuffle'
 30 | 
 31 |     ratio=0.9
 32 | 
 33 | 
 34 |     # 雨水信息
 35 |     train_feature = np.loadtxt("{}/{}/train{}.txt".format(path,path_2, num)).astype(np.float32)
 36 |     train_rain = np.loadtxt("{}/{}/train_label{}.txt".format(path,path_2, num)).astype(np.float32)
 37 |     train_feature = scale(train_feature, axis=0)
 38 |     train_rain = np.reshape(train_rain, (-1, 1))
 39 | 
 40 |     test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32)
 41 |     test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32)
 42 |     test_feature = scale(test_feature, axis=0)
 43 |     test_rain = np.reshape(test_rain, (-1, 1))
 44 | 
 45 | 
 46 |     x_train, x_test, y_train, y_test = train_test_split(train_feature, train_rain, test_size=0.22, random_state=42)
 47 | 
 48 |     """
 49 |     超参数
 50 |     """
 51 |     n_est = np.arange(10, 100, 10)
 52 |     n_dep = np.arange(1, 10, 1)
 53 |     n_lr = np.arange(0.01, 0.1, 0.01)
 54 |     paramters = ['lr', 'est', 'depth']
 55 | 
 56 |     path = './XGB'
 57 | 
 58 |     lr_g = 0.05
 59 |     est_g = 30
 60 |     dep_g = 3
 61 | 
 62 | 
 63 |     if os.path.exists(path):
 64 |         pass
 65 |     else:
 66 |         os.mkdir(path)
 67 | 
 68 |     error = 100
 69 |     # f = open('{}/train_{}_{}.txt'.format(path,num, paramters[0]), 'w+')
 70 |     # for lr in n_lr:
 71 |     #     # 模型训练，使用XGB算法
 72 |     #     XGB = xgb.XGBRegressor(max_depth=dep_g, learning_rate=lr, n_estimators=est_g, silent=True, objective='reg:gamma')
 73 |     #     XGB.fit(x_train, y_train.ravel())
 74 |     #     y_pre_train = XGB.predict(x_train)
 75 |     #     y_pre_test = XGB.predict(x_test)
 76 |     #     error_1 = mean_squared_error(y_train, y_pre_train)
 77 |     #     print('Train MSE: %.3f' % error_1)
 78 |     #     error_2 = mean_squared_error(y_test, y_pre_test)
 79 |     #     print('Val MSE: %.3f' % error_2)
 80 | 
 81 |     #     f.write('{}: '.format(paramters[0]) + str(lr) +
 82 |     #             " Train_MSE: " + str(error_1) + ' Val_MSE: ' + str(error_2) + '\n')
 83 | 
 84 |     #     if error_2 < error:
 85 |     #         lr_g = lr
 86 |     #         error = error_2
 87 |     # f.close()
 88 | 
 89 | 
 90 |     # f = open('{}/train_{}_{}.txt'.format(path,num, paramters[1]), 'w+')
 91 |     # for est in n_est:
 92 |     #     # 模型训练，使用GBDT算法
 93 |     #     XGB = xgb.XGBRegressor(max_depth=dep_g, learning_rate=lr_g, n_estimators=est, silent=True, objective='reg:gamma')
 94 |     #     XGB.fit(x_train, y_train.ravel())
 95 |     #     y_pre_train = XGB.predict(x_train)
 96 |     #     y_pre_test = XGB.predict(x_test)
 97 |     #     error_1 = mean_squared_error(y_train, y_pre_train)
 98 |     #     print('Train MSE: %.3f' % error_1)
 99 |     #     error_2 = mean_squared_error(y_test, y_pre_test)
100 |     #     print('Val MSE: %.3f' % error_2)
101 | 
102 |     #     f.write('{}: '.format(paramters[1]) + str(est) +
103 |     #             " Train_MSE: " + str(error_1) + ' Val_MSE: ' + str(error_2) + '\n')
104 | 
105 |     #     if error_2 < error:
106 |     #         est_g = est
107 |     #         error = error_2
108 |     # f.close()
109 | 
110 | 
111 |     # f = open('{}/train_{}_{}.txt'.format(path,num, paramters[2]), 'w+')
112 |     # for dep in n_dep:
113 |     #     # 模型训练，使用GBDT算法
114 |     #     XGB = xgb.XGBRegressor(max_depth=dep, learning_rate=lr_g, n_estimators=est_g, silent=True, objective='reg:gamma')
115 |     #     XGB.fit(x_train, y_train.ravel())
116 |     #     y_pre_train = np.nan_to_num(XGB.predict(x_train))
117 |     #     y_pre_test = np.nan_to_num(XGB.predict(x_test))
118 |     #     error_1 = mean_squared_error(y_train, y_pre_train)
119 |     #     print('Train MSE: %.3f' % error_1)
120 |     #     error_2 = mean_squared_error(y_test, y_pre_test)
121 |     #     print('Val MSE: %.3f' % error_2)
122 | 
123 |     #     f.write('{}: '.format(paramters[2]) + str(dep) +
124 |     #             " Train_MSE: " + str(error_1) + ' Val_MSE: ' + str(error_2) + '\n')
125 | 
126 |     #     if error_2 < error:
127 |     #         dep_g = dep
128 |     #         error = error_2
129 |     # f.close()
130 | 
131 |     """
132 |     保存模型 加载模型 测试算法
133 |     """
134 |     # 保存模型 加载模型 测试算法
135 |     XGB = xgb.XGBRegressor(max_depth=3, learning_rate=0.03, n_estimators=80, silent=True, objective='reg:gamma')
136 |     XGB.fit(x_train, y_train.ravel())
137 |     joblib.dump(XGB, '{}/train_XGB_{}.m'.format(path,num))  # 保存模型
138 |     clf = joblib.load('{}/train_XGB_{}.m'.format(path,num))
139 |     y_xgb = clf.predict(test_feature)
140 |     error = mean_squared_error(test_rain, y_xgb)
141 |     print('Model Test MSE: %.3f' % error)
142 | 
143 |     plt.plot(test_rain, 'b', label='real')
144 |     plt.plot(y_xgb, 'r', label='prediction', alpha=0.3)
145 |     plt.legend(loc='best')
146 |     plt.show()
147 | 
148 |     plot_importance(XGB)
149 |     plt.show()
150 | 
151 |     rmse, mae, mdae, r2, var = evaluation(test_rain, y_xgb)
152 | 
153 |     print('XGB_rmse: %r' % rmse,
154 |           'XGB_mae: %r' % mae,
155 |           'XGB_mdae: %r' % mdae,
156 |           'XGB_r2: %r' % r2,
157 |           'XGB_var: %r' % var)
158 | 
159 |     f = open('{}/test_XGB_{}.txt'.format(path,num), 'w+')
160 |     f.write('XGB_rmse: %r ' % rmse +
161 |       'XGB_mae: %r ' % mae +
162 |       'XGB_mdae: %r ' % mdae +
163 |       'XGB_r2: %r ' % r2 +
164 |       'XGB_var: %r ' % var)
165 |     f.close()
166 | 


--------------------------------------------------------------------------------
/rain_shuffle/MLP_module.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn, optim
  3 | from torch.autograd import Variable
  4 | from torch import Tensor
  5 | from torch.utils.data import DataLoader
  6 | import numpy as np
  7 | from sklearn.preprocessing import *
  8 | import os
  9 | from sklearn.model_selection import train_test_split
 10 | from eval import  evaluation
 11 | from sklearn.metrics import *
 12 | 
 13 | 
 14 | path ='./MLP'
 15 | 
 16 | # 定义GetLoader类，继承Dataset方法，并重写__getitem__()和__len__()方法
 17 | class GetLoader(torch.utils.data.Dataset):
 18 | 	# 初始化函数，得到数据
 19 |     def __init__(self, data_root, data_label):
 20 |         self.data = data_root
 21 |         self.label = data_label
 22 |     # index是根据batchsize划分数据后得到的索引，最后将data和对应的labels进行一起返回
 23 |     def __getitem__(self, index):
 24 |         data = self.data[index]
 25 |         labels = self.label[index]
 26 |         return data, labels
 27 |     # 该函数返回数据大小长度，目的是DataLoader方便划分，如果不知道大小，DataLoader会一脸懵逼
 28 |     def __len__(self):
 29 |         return len(self.data)
 30 | 
 31 | 
 32 | # 定义 MLP 模型
 33 | class MLP_module(nn.Module):
 34 |     def __init__(self, in_dim, hidden_dim, n_layer, n_class):
 35 |         super(MLP_module, self).__init__()
 36 |         self.n_layer = n_layer
 37 |         self.hidden_dim = hidden_dim
 38 | 
 39 |         self.classifier0 = nn.Linear(in_dim,n_class)
 40 | 
 41 |         self.classifier1 = nn.Linear(in_dim, hidden_dim)
 42 |         self.classifier2 = nn.Linear(hidden_dim, n_class)
 43 | 
 44 |         self.classifier1_2 = nn.Linear(hidden_dim, hidden_dim)
 45 |         self.classifier2_3 = nn.Linear(hidden_dim, hidden_dim)
 46 |         self.classifier3_4 = nn.Linear(hidden_dim, hidden_dim)
 47 | 
 48 |     def forward(self, x):
 49 |         if self.n_layer ==1:
 50 |             out = self.classifier0(x)
 51 |             out = torch.relu(out)
 52 |         elif self.n_layer >=2:
 53 |             out= self.classifier1(x)
 54 |             out =torch.relu(out)
 55 |             if self.n_layer == 3:
 56 |                 out =self.classifier1_2(out)
 57 |             if self.n_layer == 4:
 58 |                 out =self.classifier2_3(out)
 59 |             if self.n_layer == 5:
 60 |                 out =self.classifier3_4(out)
 61 |             out = self.classifier2(out)
 62 |             out = torch.relu(out)
 63 |         return out
 64 | 
 65 | 
 66 | class MLP():
 67 |     def __init__(self,num,input_dim,hidden_dim,n_layer,batch_size = 100,learning_rate = 2e-3,shuffle=True,device_pu='cpu'):
 68 |         self.station =num
 69 |         self.input_dim =input_dim
 70 |         self.hidden_dim =hidden_dim
 71 |         self.n_layer=n_layer
 72 |         self.bs=batch_size
 73 |         self.lr =learning_rate
 74 |         self.shuffle =shuffle
 75 |         self.device =device_pu
 76 |         self.model = MLP_module(input_dim, hidden_dim, n_layer, 1).to(self.device)
 77 |         self.criterion = nn.MSELoss()
 78 |         self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
 79 | 
 80 |     def fit(self,data,label,num_epoches = 100):
 81 | 
 82 |         x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.22, random_state=42)
 83 | 
 84 |         train = GetLoader(x_train, y_train)
 85 |         data_train=torch.utils.data.DataLoader(train, batch_size=self.bs, shuffle=self.shuffle)
 86 |         test = GetLoader(x_test, y_test)
 87 |         data_test = torch.utils.data.DataLoader(test, batch_size=self.bs, shuffle=self.shuffle)
 88 | 
 89 |         if os.path.exists(path):
 90 |             pass
 91 |         else:
 92 |             os.mkdir(path)
 93 | 
 94 |         eval_loss_best = np.inf
 95 | 
 96 |         f = open('{}/train_{}_{}_{}_{}.txt'.format(path,self.station,self.hidden_dim, self.n_layer,self.lr), 'w+')
 97 |         # 开始训练
 98 |         for epoch in range(num_epoches):
 99 |             self.model.train()
100 |             print('epoch {}'.format(epoch + 1))
101 |             print('**************************************')
102 |             running_loss = 0.0
103 | 
104 |             for i, data in enumerate(data_train, 1):
105 |                 """
106 |                 随机打乱的方式不好 应该是全部打乱之后 固定抽取 否则会出现样本利用不均衡的问题
107 |                 """
108 |                 img, label = data
109 |                 img = Variable(img).to(self.device)
110 |                 label = Variable(label).to(self.device)
111 | 
112 |                 # 向前传播
113 |                 out = self.model(img)
114 |                 loss = self.criterion(out, label)
115 |                 running_loss += loss.data.item() * label.size(0)
116 |                 # 向后传播
117 |                 self.optimizer.zero_grad()
118 |                 loss.backward()
119 |                 self.optimizer.step()
120 | 
121 |             train_loss =running_loss / (len(y_train))
122 |             print('Finish {} epoch, Loss: {:.6f}'.format(
123 |                 epoch + 1, train_loss))
124 | 
125 |             self.model.eval()
126 |             eval_loss = 0.
127 |             for data in data_test:
128 |                 img, label = data
129 | 
130 |                 img = Variable(img).to(self.device)
131 |                 label = Variable(label).to(self.device)
132 |                 out = self.model(img)
133 |                 loss = self.criterion(out, label)
134 |                 eval_loss += loss.data.item() * label.size(0)
135 |             val_loss =eval_loss / (len(y_test))
136 |             print('Val Loss: {:.6f}'.format(val_loss))
137 | 
138 | 
139 |             f.write(" Train_MSE: " + str(train_loss) + ' Val_MSE: ' + str(val_loss) + '\n')
140 |             
141 | 
142 |             if val_loss < eval_loss_best:
143 |                 eval_loss_best =val_loss
144 |                 self.eval = eval_loss_best
145 |                 torch.save(self.model, '{}/mlp_{}_{}_{}_{}.pth'.format(path,self.station,self.hidden_dim, self.n_layer,self.lr))
146 |         f.close()
147 |         
148 |         return self.eval
149 | 
150 |     def predict(self,test_data,test_label,):
151 | 
152 |         test_model = torch.load('{}/mlp_{}_{}_{}_{}.pth'.format(path,self.station,self.hidden_dim, self.n_layer,self.lr)).to(self.device)
153 | 
154 |         test_loss = 0
155 |         criterion = nn.MSELoss()
156 |         test = GetLoader(test_data, test_label)
157 |         data_test = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False)
158 | 
159 |         y_mlp = []
160 |         for data in data_test:
161 | 
162 |             img, label = data
163 |             img = Variable(img).to(self.device)
164 |             label = Variable(label).to(self.device)
165 |             out = test_model(img)
166 |             loss = criterion(out, label)
167 |             test_loss += loss.data
168 |             y_mlp.append(out.data)
169 | 
170 | 
171 |         print('Test Loss: {:.6f}'.format(test_loss / (len(
172 |             test_label))))
173 | 
174 |         y_mlp = np.array(y_mlp).squeeze()[:,np.newaxis]
175 | 
176 | 
177 |         return y_mlp
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 
186 | 


--------------------------------------------------------------------------------
/rain_shuffle/run_no.sh:
--------------------------------------------------------------------------------
  1 | #！/bin/bash
  2 | # 第一个bash文件
  3 | clear;
  4 | echo 'Hello';
  5 | # source activate py3 
  6 | 
  7 | #运行ARIMA
  8 | # python ARIMA.py --station 312 
  9 | # python ARIMA.py --station 313 
 10 | # python ARIMA.py --station 314 
 11 | # python ARIMA.py --station 315 
 12 | # python ARIMA.py --station 316 
 13 | # python ARIMA.py --station 371 
 14 | # python ARIMA.py --station 372 
 15 | # python ARIMA.py --station 373 
 16 | # python ARIMA.py --station 374 
 17 | # python ARIMA.py --station 393 
 18 | # python ARIMA.py --station 394 
 19 | # python ARIMA.py --station 396 
 20 | 
 21 | # #运行SVR_rbf
 22 | python SVR_rbf.py --station 312 
 23 | python SVR_rbf.py --station 313 
 24 | python SVR_rbf.py --station 314 
 25 | python SVR_rbf.py --station 315 
 26 | python SVR_rbf.py --station 316 
 27 | python SVR_rbf.py --station 371 
 28 | python SVR_rbf.py --station 372 
 29 | python SVR_rbf.py --station 373 
 30 | python SVR_rbf.py --station 374 
 31 | python SVR_rbf.py --station 393
 32 | python SVR_rbf.py --station 394 
 33 | python SVR_rbf.py --station 396 
 34 | 
 35 | 
 36 | #运行GBRT
 37 | python GBRT.py --station 312 
 38 | python GBRT.py --station 313 
 39 | python GBRT.py --station 314 
 40 | python GBRT.py --station 315 
 41 | python GBRT.py --station 316 
 42 | python GBRT.py --station 371 
 43 | python GBRT.py --station 372 
 44 | python GBRT.py --station 373 
 45 | python GBRT.py --station 374 
 46 | python GBRT.py --station 393 
 47 | python GBRT.py --station 394
 48 | python GBRT.py --station 396 
 49 | 
 50 | 
 51 | # #运行lstm_
 52 | python lstm_.py --station 312  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 53 | python lstm_.py --station 313  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 54 | python lstm_.py --station 314  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 55 | python lstm_.py --station 315  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 56 | python lstm_.py --station 316  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 57 | python lstm_.py --station 371  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 58 | python lstm_.py --station 372  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 59 | python lstm_.py --station 373  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 60 | python lstm_.py --station 374  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 61 | python lstm_.py --station 393  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 62 | python lstm_.py --station 394  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 63 | python lstm_.py --station 396  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 64 | 
 65 | 
 66 | # #运行mlp
 67 | python mlp.py --station 312 --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 68 | python mlp.py --station 313  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 69 | python mlp.py --station 314  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 70 | python mlp.py --station 315  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 71 | python mlp.py --station 316  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 72 | python mlp.py --station 371  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 73 | python mlp.py --station 372  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 74 | python mlp.py --station 373  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 75 | python mlp.py --station 374  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 76 | python mlp.py --station 393  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 77 | python mlp.py --station 394  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 78 | python mlp.py --station 396  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 79 | 
 80 | # # #运行seq2seq
 81 | python seq2seq.py --station 312  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 82 | python seq2seq.py --station 313  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 83 | python seq2seq.py --station 314  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 84 | python seq2seq.py --station 315  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 85 | python seq2seq.py --station 316  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 86 | python seq2seq.py --station 371  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 87 | python seq2seq.py --station 372  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 88 | python seq2seq.py --station 373  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 89 | python seq2seq.py --station 374  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 90 | python seq2seq.py --station 393  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 91 | python seq2seq.py --station 394  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 92 | python seq2seq.py --station 396  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 93 | 
 94 | 
 95 | # # #运行att_seq2seq
 96 | python att_seq2seq.py --station 312  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
 97 | python att_seq2seq.py --station 313  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
 98 | python att_seq2seq.py --station 314  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
 99 | python att_seq2seq.py --station 315  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
100 | python att_seq2seq.py --station 316  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
101 | python att_seq2seq.py --station 371  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
102 | python att_seq2seq.py --station 372  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
103 | python att_seq2seq.py --station 373  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
104 | python att_seq2seq.py --station 374  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
105 | python att_seq2seq.py --station 393  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
106 | python att_seq2seq.py --station 394  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
107 | python att_seq2seq.py --station 396  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
108 | 
109 | 
110 | # # #运行XGB
111 | python XGB.py --station 312 
112 | python XGB.py --station 313 
113 | python XGB.py --station 314 
114 | python XGB.py --station 315 
115 | python XGB.py --station 316 
116 | python XGB.py --station 371 
117 | python XGB.py --station 372 
118 | python XGB.py --station 373 
119 | python XGB.py --station 374 
120 | python XGB.py --station 393 
121 | python XGB.py --station 394 
122 | python XGB.py --station 396 


--------------------------------------------------------------------------------
/rain_shuffle/run_no_time2.sh:
--------------------------------------------------------------------------------
  1 | #！/bin/bash
  2 | # 第一个bash文件
  3 | clear;
  4 | echo 'Hello';
  5 | # source activate py3 
  6 | 
  7 | #运行ARIMA
  8 | # python ARIMA.py --station 312 
  9 | # python ARIMA.py --station 313 
 10 | # python ARIMA.py --station 314 
 11 | # python ARIMA.py --station 315 
 12 | # python ARIMA.py --station 316 
 13 | # python ARIMA.py --station 371 
 14 | # python ARIMA.py --station 372 
 15 | # python ARIMA.py --station 373 
 16 | # python ARIMA.py --station 374 
 17 | # python ARIMA.py --station 393 
 18 | # python ARIMA.py --station 394 
 19 | # python ARIMA.py --station 396 
 20 | 
 21 | # #运行SVR_rbf
 22 | python SVR_rbf.py --station 312 
 23 | python SVR_rbf.py --station 313 
 24 | python SVR_rbf.py --station 314 
 25 | python SVR_rbf.py --station 315 
 26 | python SVR_rbf.py --station 316 
 27 | python SVR_rbf.py --station 371 
 28 | python SVR_rbf.py --station 372 
 29 | python SVR_rbf.py --station 373 
 30 | python SVR_rbf.py --station 374 
 31 | python SVR_rbf.py --station 393
 32 | python SVR_rbf.py --station 394 
 33 | python SVR_rbf.py --station 396 
 34 | 
 35 | 
 36 | #运行GBRT
 37 | python GBRT.py --station 312 
 38 | python GBRT.py --station 313 
 39 | python GBRT.py --station 314 
 40 | python GBRT.py --station 315 
 41 | python GBRT.py --station 316 
 42 | python GBRT.py --station 371 
 43 | python GBRT.py --station 372 
 44 | python GBRT.py --station 373 
 45 | python GBRT.py --station 374 
 46 | python GBRT.py --station 393 
 47 | python GBRT.py --station 394
 48 | python GBRT.py --station 396 
 49 | 
 50 | 
 51 | # #运行lstm_
 52 | python lstm_.py --station 312  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 53 | python lstm_.py --station 313  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 54 | python lstm_.py --station 314  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 55 | python lstm_.py --station 315  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 56 | python lstm_.py --station 316  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 57 | python lstm_.py --station 371  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 58 | python lstm_.py --station 372  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 59 | python lstm_.py --station 373  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 60 | python lstm_.py --station 374  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 61 | python lstm_.py --station 393  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 62 | python lstm_.py --station 394  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 63 | python lstm_.py --station 396  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 64 | 
 65 | 
 66 | # #运行mlp
 67 | python mlp.py --station 312  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 68 | python mlp.py --station 313  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 69 | python mlp.py --station 314  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 70 | python mlp.py --station 315  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 71 | python mlp.py --station 316  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 72 | python mlp.py --station 371  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 73 | python mlp.py --station 372  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 74 | python mlp.py --station 373  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 75 | python mlp.py --station 374  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 76 | python mlp.py --station 393  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 77 | python mlp.py --station 394  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 78 | python mlp.py --station 396  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 79 | 
 80 | # # #运行seq2seq
 81 | python seq2seq.py --station 312  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 82 | python seq2seq.py --station 313  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 83 | python seq2seq.py --station 314  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 84 | python seq2seq.py --station 315  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 85 | python seq2seq.py --station 316  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 86 | python seq2seq.py --station 371  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 87 | python seq2seq.py --station 372  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 88 | python seq2seq.py --station 373  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 89 | python seq2seq.py --station 374  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 90 | python seq2seq.py --station 393  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 91 | python seq2seq.py --station 394  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 92 | python seq2seq.py --station 396  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 93 | 
 94 | 
 95 | # # #运行att_seq2seq
 96 | python att_seq2seq.py --station 312  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
 97 | python att_seq2seq.py --station 313  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
 98 | python att_seq2seq.py --station 314  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
 99 | python att_seq2seq.py --station 315  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
100 | python att_seq2seq.py --station 316  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
101 | python att_seq2seq.py --station 371  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
102 | python att_seq2seq.py --station 372  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
103 | python att_seq2seq.py --station 373  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
104 | python att_seq2seq.py --station 374  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
105 | python att_seq2seq.py --station 393  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
106 | python att_seq2seq.py --station 394  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
107 | python att_seq2seq.py --station 396  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
108 | 
109 | 
110 | # # #运行XGB
111 | # python XGB.py --station 312 
112 | # python XGB.py --station 313 
113 | # python XGB.py --station 314 
114 | # python XGB.py --station 315 
115 | # python XGB.py --station 316 
116 | # python XGB.py --station 371 
117 | # python XGB.py --station 372 
118 | # python XGB.py --station 373 
119 | # python XGB.py --station 374 
120 | # python XGB.py --station 393 
121 | # python XGB.py --station 394 
122 | # python XGB.py --station 396 


--------------------------------------------------------------------------------
/rain_shuffle/run_no_time1.sh:
--------------------------------------------------------------------------------
  1 | #！/bin/bash
  2 | # 第一个bash文件
  3 | clear;
  4 | echo 'Hello';
  5 | # source activate py3 
  6 | 
  7 | #运行ARIMA
  8 | # python ARIMA.py --station 312 
  9 | # python ARIMA.py --station 313 
 10 | # python ARIMA.py --station 314 
 11 | # python ARIMA.py --station 315 
 12 | # python ARIMA.py --station 316 
 13 | # python ARIMA.py --station 371 
 14 | # python ARIMA.py --station 372 
 15 | # python ARIMA.py --station 373 
 16 | # python ARIMA.py --station 374 
 17 | # python ARIMA.py --station 393 
 18 | # python ARIMA.py --station 394 
 19 | # python ARIMA.py --station 396 
 20 | 
 21 | # # #运行SVR_rbf
 22 | # python SVR_rbf.py --station 312 
 23 | # python SVR_rbf.py --station 313 
 24 | # python SVR_rbf.py --station 314 
 25 | # python SVR_rbf.py --station 315 
 26 | # python SVR_rbf.py --station 316 
 27 | # python SVR_rbf.py --station 371 
 28 | # python SVR_rbf.py --station 372 
 29 | # python SVR_rbf.py --station 373 
 30 | # python SVR_rbf.py --station 374 
 31 | # python SVR_rbf.py --station 393
 32 | # python SVR_rbf.py --station 394 
 33 | # python SVR_rbf.py --station 396 
 34 | 
 35 | 
 36 | # #运行GBRT
 37 | # python GBRT.py --station 312 
 38 | # python GBRT.py --station 313 
 39 | # python GBRT.py --station 314 
 40 | # python GBRT.py --station 315 
 41 | # python GBRT.py --station 316 
 42 | # python GBRT.py --station 371 
 43 | # python GBRT.py --station 372 
 44 | # python GBRT.py --station 373 
 45 | # python GBRT.py --station 374 
 46 | # python GBRT.py --station 393 
 47 | # python GBRT.py --station 394
 48 | # python GBRT.py --station 396 
 49 | 
 50 | 
 51 | # #运行mlp
 52 | python mlp.py --station 312  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 53 | python mlp.py --station 313  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 54 | python mlp.py --station 314  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 55 | python mlp.py --station 315  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 56 | python mlp.py --station 316  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 57 | python mlp.py --station 371  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 58 | python mlp.py --station 372  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 59 | python mlp.py --station 373  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 60 | python mlp.py --station 374  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 61 | python mlp.py --station 393  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 62 | python mlp.py --station 394  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 63 | python mlp.py --station 396  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 64 | 
 65 | # #运行lstm_
 66 | python lstm_.py --station 312  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 67 | python lstm_.py --station 313  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 68 | python lstm_.py --station 314  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 69 | python lstm_.py --station 315  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 70 | python lstm_.py --station 316  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 71 | python lstm_.py --station 371  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 72 | python lstm_.py --station 372  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 73 | python lstm_.py --station 373  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 74 | python lstm_.py --station 374  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 75 | python lstm_.py --station 393  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 76 | python lstm_.py --station 394  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 77 | python lstm_.py --station 396  --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 78 | 
 79 | 
 80 | # # #运行seq2seq
 81 | python seq2seq.py --station 312  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 82 | python seq2seq.py --station 313  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 83 | python seq2seq.py --station 314  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 84 | python seq2seq.py --station 315  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 85 | python seq2seq.py --station 316  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 86 | python seq2seq.py --station 371  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 87 | python seq2seq.py --station 372  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 88 | python seq2seq.py --station 373  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 89 | python seq2seq.py --station 374  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 90 | python seq2seq.py --station 393  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 91 | python seq2seq.py --station 394  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 92 | python seq2seq.py --station 396  --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 93 | 
 94 | 
 95 | # # # #运行att_seq2seq
 96 | # python att_seq2seq.py --station 312  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
 97 | # python att_seq2seq.py --station 313  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
 98 | # python att_seq2seq.py --station 314  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
 99 | # python att_seq2seq.py --station 315  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
100 | # python att_seq2seq.py --station 316  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
101 | # python att_seq2seq.py --station 371  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
102 | # python att_seq2seq.py --station 372  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
103 | # python att_seq2seq.py --station 373  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
104 | # python att_seq2seq.py --station 374  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
105 | # python att_seq2seq.py --station 393  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
106 | # python att_seq2seq.py --station 394  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
107 | # python att_seq2seq.py --station 396  --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
108 | 
109 | 
110 | # # #运行XGB
111 | # python XGB.py --station 312 
112 | # python XGB.py --station 313 
113 | # python XGB.py --station 314 
114 | # python XGB.py --station 315 
115 | # python XGB.py --station 316 
116 | # python XGB.py --station 371 
117 | # python XGB.py --station 372 
118 | # python XGB.py --station 373 
119 | # python XGB.py --station 374 
120 | # python XGB.py --station 393 
121 | # python XGB.py --station 394 
122 | # python XGB.py --station 396 


--------------------------------------------------------------------------------
/rain_shuffle/LSTM_module.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn, optim
  3 | from torch.autograd import Variable
  4 | from torch import Tensor
  5 | from torch.utils.data import DataLoader
  6 | import numpy as np
  7 | from sklearn.preprocessing import *
  8 | import os
  9 | from sklearn.model_selection import train_test_split
 10 | from eval import evaluation
 11 | from sklearn.metrics import *
 12 | 
 13 | path = './LSTM'
 14 | 
 15 | 
 16 | # 定义GetLoader类，继承Dataset方法，并重写__getitem__()和__len__()方法
 17 | class GetLoader(torch.utils.data.Dataset):
 18 |     # 初始化函数，得到数据
 19 |     def __init__(self, data_root, data_label):
 20 |         self.data = data_root
 21 |         self.label = data_label
 22 | 
 23 |     # index是根据batchsize划分数据后得到的索引，最后将data和对应的labels进行一起返回
 24 |     def __getitem__(self, index):
 25 |         data = self.data[index]
 26 |         labels = self.label[index]
 27 |         return data, labels
 28 | 
 29 |     # 该函数返回数据大小长度，目的是DataLoader方便划分，如果不知道大小，DataLoader会一脸懵逼
 30 |     def __len__(self):
 31 |         return len(self.data)
 32 | 
 33 | 
 34 | # 定义 Recurrent Network 模型
 35 | class LSTM_module(nn.Module):
 36 |     def __init__(self, in_dim, hidden_dim, n_layer, n_class):
 37 |         super(LSTM_module, self).__init__()
 38 |         self.n_layer = n_layer
 39 |         self.hidden_dim = hidden_dim
 40 |         self.lstm = nn.LSTM(in_dim, hidden_dim, n_layer, batch_first=True)
 41 |         self.classifier = nn.Linear(hidden_dim, n_class)
 42 | 
 43 |     def forward(self, x):
 44 |         out, _ = self.lstm(x, None)
 45 | 
 46 |         out = self.classifier(out[:, -1, :])
 47 |         out = torch.relu(out)
 48 |         return out
 49 | 
 50 | 
 51 | class lstm():
 52 |     def __init__(self, num, input_dim, seq_len, hidden_dim, n_layer, batch_size=100, learning_rate=1e-3, shuffle=True,
 53 |                  device_pu='cpu'):
 54 |         self.station = num
 55 |         self.input_dim = input_dim
 56 |         self.hidden_dim = hidden_dim
 57 |         self.n_layer = n_layer
 58 |         self.seq_len = seq_len
 59 |         self.bs = batch_size
 60 |         self.lr = learning_rate
 61 |         self.shuffle = shuffle
 62 |         self.device = device_pu
 63 |         self.model = LSTM_module(input_dim, hidden_dim, n_layer, 1).to(self.device)
 64 |         self.criterion = nn.MSELoss()
 65 |         self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
 66 | 
 67 |     def fit(self, data, label, num_epoches=100):
 68 | 
 69 |         x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.22, random_state=42)
 70 | 
 71 |         train = GetLoader(x_train, y_train)
 72 |         data_train = torch.utils.data.DataLoader(train, batch_size=self.bs, shuffle=self.shuffle)
 73 |         test = GetLoader(x_test, y_test)
 74 |         data_test = torch.utils.data.DataLoader(test, batch_size=self.bs, shuffle=self.shuffle)
 75 | 
 76 |         if os.path.exists(path):
 77 |             pass
 78 |         else:
 79 |             os.mkdir(path)
 80 | 
 81 |         eval_loss_best = np.inf
 82 | 
 83 |         uncorrect =True
 84 |         while uncorrect:
 85 |             self.model = LSTM_module(self.input_dim, self.hidden_dim, self.n_layer, 1).to(self.device)
 86 |             self.criterion = nn.MSELoss()
 87 |             self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
 88 |             f = open('{}/train_{}_{}_{}_{}.txt'.format(path, self.station, self.hidden_dim, self.n_layer, self.lr), 'w+')
 89 |             train_loss_last = np.inf
 90 |             # 开始训练
 91 |             for epoch in range(num_epoches):
 92 |                 self.model.train()
 93 |                 print('epoch {}'.format(epoch + 1))
 94 |                 print('**************************************')
 95 |                 running_loss = 0.0
 96 | 
 97 |                 for i, data in enumerate(data_train, 1):
 98 |                     """
 99 |                     随机打乱的方式不好 应该是全部打乱之后 固定抽取 否则会出现样本利用不均衡的问题
100 |                     """
101 |                     img, label = data
102 |                     img = Variable(img).to(self.device)
103 |                     label = Variable(label).to(self.device)
104 |                     # 向前传播
105 |                     out = self.model(img.view(-1, self.seq_len, self.input_dim))
106 |                     loss = self.criterion(out, label)
107 |                     running_loss += loss.data.item() * label.size(0)
108 |                     # 向后传播
109 |                     self.optimizer.zero_grad()
110 |                     loss.backward()
111 |                     self.optimizer.step()
112 | 
113 |                 train_loss = running_loss / (len(y_train))
114 |                 print('Finish {} epoch, Loss: {:.6f}'.format(
115 |                     epoch + 1, train_loss))
116 | 
117 |                 if train_loss_last == train_loss  and epoch < 3:
118 |                     break
119 |                 if train_loss_last > train_loss and epoch >=3:
120 |                     uncorrect = False
121 |                 
122 |                 train_loss_last = train_loss
123 | 
124 |                 self.model.eval()
125 |                 eval_loss = 0.
126 |                 for data in data_test:
127 |                     img, label = data
128 | 
129 |                     img = Variable(img).to(self.device)
130 |                     label = Variable(label).to(self.device)
131 |                     out = self.model(img.view(-1, self.seq_len, self.input_dim))
132 |                     loss = self.criterion(out, label)
133 |                     eval_loss += loss.data.item() * label.size(0)
134 |                 val_loss = eval_loss / (len(y_test))
135 |                 print('Val Loss: {:.6f}'.format(val_loss))
136 | 
137 |                 f.write(" Train_MSE: " + str(train_loss) + ' Val_MSE: ' + str(val_loss) + '\n')
138 | 
139 |                 if val_loss < eval_loss_best:
140 |                     eval_loss_best = val_loss
141 |                     self.eval = eval_loss_best
142 |                     torch.save(self.model,
143 |                             '{}/lstm_{}_{}_{}_{}.pth'.format(path, self.station, self.hidden_dim, self.n_layer, self.lr))
144 |             f.close()
145 | 
146 |         return self.eval
147 | 
148 |     def predict(self, test_data, test_label):
149 | 
150 |         test_model = torch.load(
151 |             '{}/lstm_{}_{}_{}_{}.pth'.format(path, self.station, self.hidden_dim, self.n_layer, self.lr)).to(
152 |             self.device)
153 | 
154 |         test_loss = 0
155 |         criterion = nn.MSELoss()
156 |         test = GetLoader(test_data, test_label)
157 |         data_test = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False)
158 | 
159 |         y_mlp = []
160 |         for data in data_test:
161 |             img, label = data
162 |             img = Variable(img).to(self.device)
163 |             label = Variable(label).to(self.device)
164 |             out = test_model(img.view(-1, self.seq_len, self.input_dim))
165 |             loss = criterion(out, label)
166 |             test_loss += loss.data
167 |             y_mlp.append(out.data)
168 | 
169 |         print('Test Loss: {:.6f}'.format(test_loss / (len(
170 |             test_label))))
171 | 
172 |         y_mlp = np.array(y_mlp).squeeze()[:, np.newaxis]
173 | 
174 |         return y_mlp
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 


--------------------------------------------------------------------------------
/rain_shuffle/run_time1.sh:
--------------------------------------------------------------------------------
  1 | #！/bin/bash
  2 | # 第一个bash文件
  3 | clear;
  4 | echo 'Hello';
  5 | # source activate py3 
  6 | 
  7 | #运行ARIMA
  8 | # python ARIMA.py --station 312 
  9 | # python ARIMA.py --station 313 
 10 | # python ARIMA.py --station 314 
 11 | # python ARIMA.py --station 315 
 12 | # python ARIMA.py --station 316 
 13 | # python ARIMA.py --station 371 
 14 | # python ARIMA.py --station 372 
 15 | # python ARIMA.py --station 373 
 16 | # python ARIMA.py --station 374 
 17 | # python ARIMA.py --station 393 
 18 | # python ARIMA.py --station 394 
 19 | # python ARIMA.py --station 396 
 20 | 
 21 | # #运行SVR_rbf
 22 | python SVR_rbf.py --station 312 --ifshuffle
 23 | python SVR_rbf.py --station 313 --ifshuffle
 24 | python SVR_rbf.py --station 314 --ifshuffle
 25 | python SVR_rbf.py --station 315 --ifshuffle
 26 | python SVR_rbf.py --station 316 --ifshuffle
 27 | python SVR_rbf.py --station 371 --ifshuffle
 28 | python SVR_rbf.py --station 372 --ifshuffle
 29 | python SVR_rbf.py --station 373 --ifshuffle
 30 | python SVR_rbf.py --station 374 --ifshuffle
 31 | python SVR_rbf.py --station 393 --ifshuffle
 32 | python SVR_rbf.py --station 394 --ifshuffle
 33 | python SVR_rbf.py --station 396 --ifshuffle
 34 | 
 35 | 
 36 | #运行GBRT
 37 | python GBRT.py --station 312 --ifshuffle
 38 | python GBRT.py --station 313 --ifshuffle
 39 | python GBRT.py --station 314 --ifshuffle
 40 | python GBRT.py --station 315 --ifshuffle
 41 | python GBRT.py --station 316 --ifshuffle
 42 | python GBRT.py --station 371 --ifshuffle
 43 | python GBRT.py --station 372 --ifshuffle
 44 | python GBRT.py --station 373 --ifshuffle
 45 | python GBRT.py --station 374 --ifshuffle
 46 | python GBRT.py --station 393 --ifshuffle
 47 | python GBRT.py --station 394 --ifshuffle
 48 | python GBRT.py --station 396 --ifshuffle
 49 | 
 50 | 
 51 | # #运行lstm_
 52 | python lstm_.py --station 312 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 53 | python lstm_.py --station 313 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 54 | python lstm_.py --station 314 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 55 | python lstm_.py --station 315 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 56 | python lstm_.py --station 316 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 57 | python lstm_.py --station 371 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 58 | python lstm_.py --station 372 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 59 | python lstm_.py --station 373 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 60 | python lstm_.py --station 374 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 61 | python lstm_.py --station 393 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 62 | python lstm_.py --station 394 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 63 | python lstm_.py --station 396 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 1  --seq-len 7
 64 | 
 65 | 
 66 | # #运行mlp
 67 | python mlp.py --station 312 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 68 | python mlp.py --station 313 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 69 | python mlp.py --station 314 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 70 | python mlp.py --station 315 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 71 | python mlp.py --station 316 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 72 | python mlp.py --station 371 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 73 | python mlp.py --station 372 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 74 | python mlp.py --station 373 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 75 | python mlp.py --station 374 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 76 | python mlp.py --station 393 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 77 | python mlp.py --station 394 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 78 | python mlp.py --station 396 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 1  --seq-len 7
 79 | 
 80 | # # #运行seq2seq
 81 | python seq2seq.py --station 312 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 82 | python seq2seq.py --station 313 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 83 | python seq2seq.py --station 314 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 84 | python seq2seq.py --station 315 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 85 | python seq2seq.py --station 316 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 86 | python seq2seq.py --station 371 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 87 | python seq2seq.py --station 372 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 88 | python seq2seq.py --station 373 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 89 | python seq2seq.py --station 374 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 90 | python seq2seq.py --station 393 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 91 | python seq2seq.py --station 394 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 92 | python seq2seq.py --station 396 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 1  --seq-len 7
 93 | 
 94 | 
 95 | # # #运行att_seq2seq
 96 | python att_seq2seq.py --station 312 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
 97 | python att_seq2seq.py --station 313 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
 98 | python att_seq2seq.py --station 314 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
 99 | python att_seq2seq.py --station 315 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
100 | python att_seq2seq.py --station 316 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
101 | python att_seq2seq.py --station 371 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
102 | python att_seq2seq.py --station 372 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
103 | python att_seq2seq.py --station 373 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
104 | python att_seq2seq.py --station 374 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
105 | python att_seq2seq.py --station 393 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
106 | python att_seq2seq.py --station 394 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
107 | python att_seq2seq.py --station 396 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 1  --seq-len 7
108 | 
109 | 
110 | # # #运行XGB
111 | # python XGB.py --station 312 --ifshuffle
112 | # python XGB.py --station 313 --ifshuffle
113 | # python XGB.py --station 314 --ifshuffle
114 | # python XGB.py --station 315 --ifshuffle
115 | # python XGB.py --station 316 --ifshuffle
116 | # python XGB.py --station 371 --ifshuffle
117 | # python XGB.py --station 372 --ifshuffle
118 | # python XGB.py --station 373 --ifshuffle
119 | # python XGB.py --station 374 --ifshuffle
120 | # python XGB.py --station 393 --ifshuffle
121 | # python XGB.py --station 394 --ifshuffle
122 | # python XGB.py --station 396 --ifshuffle


--------------------------------------------------------------------------------
/rain_shuffle/run_time2.sh:
--------------------------------------------------------------------------------
  1 | #！/bin/bash
  2 | # 第一个bash文件
  3 | clear;
  4 | echo 'Hello';
  5 | # source activate py3 
  6 | 
  7 | #运行ARIMA
  8 | # python ARIMA.py --station 312 
  9 | # python ARIMA.py --station 313 
 10 | # python ARIMA.py --station 314 
 11 | # python ARIMA.py --station 315 
 12 | # python ARIMA.py --station 316 
 13 | # python ARIMA.py --station 371 
 14 | # python ARIMA.py --station 372 
 15 | # python ARIMA.py --station 373 
 16 | # python ARIMA.py --station 374 
 17 | # python ARIMA.py --station 393 
 18 | # python ARIMA.py --station 394 
 19 | # python ARIMA.py --station 396 
 20 | 
 21 | # #运行SVR_rbf
 22 | python SVR_rbf.py --station 312 --ifshuffle
 23 | python SVR_rbf.py --station 313 --ifshuffle
 24 | python SVR_rbf.py --station 314 --ifshuffle
 25 | python SVR_rbf.py --station 315 --ifshuffle
 26 | python SVR_rbf.py --station 316 --ifshuffle
 27 | python SVR_rbf.py --station 371 --ifshuffle
 28 | python SVR_rbf.py --station 372 --ifshuffle
 29 | python SVR_rbf.py --station 373 --ifshuffle
 30 | python SVR_rbf.py --station 374 --ifshuffle
 31 | python SVR_rbf.py --station 393 --ifshuffle
 32 | python SVR_rbf.py --station 394 --ifshuffle
 33 | python SVR_rbf.py --station 396 --ifshuffle
 34 | 
 35 | 
 36 | #运行GBRT
 37 | python GBRT.py --station 312 --ifshuffle
 38 | python GBRT.py --station 313 --ifshuffle
 39 | python GBRT.py --station 314 --ifshuffle
 40 | python GBRT.py --station 315 --ifshuffle
 41 | python GBRT.py --station 316 --ifshuffle
 42 | python GBRT.py --station 371 --ifshuffle
 43 | python GBRT.py --station 372 --ifshuffle
 44 | python GBRT.py --station 373 --ifshuffle
 45 | python GBRT.py --station 374 --ifshuffle
 46 | python GBRT.py --station 393 --ifshuffle
 47 | python GBRT.py --station 394 --ifshuffle
 48 | python GBRT.py --station 396 --ifshuffle
 49 | 
 50 | 
 51 | # #运行lstm_
 52 | python lstm_.py --station 312 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 53 | python lstm_.py --station 313 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 54 | python lstm_.py --station 314 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 55 | python lstm_.py --station 315 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 56 | python lstm_.py --station 316 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 57 | python lstm_.py --station 371 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 58 | python lstm_.py --station 372 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 59 | python lstm_.py --station 373 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 60 | python lstm_.py --station 374 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 61 | python lstm_.py --station 393 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 62 | python lstm_.py --station 394 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 63 | python lstm_.py --station 396 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 2  --seq-len 7
 64 | 
 65 | 
 66 | # #运行mlp
 67 | python mlp.py --station 312 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 68 | python mlp.py --station 313 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 69 | python mlp.py --station 314 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 70 | python mlp.py --station 315 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 71 | python mlp.py --station 316 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 72 | python mlp.py --station 371 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 73 | python mlp.py --station 372 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 74 | python mlp.py --station 373 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 75 | python mlp.py --station 374 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 76 | python mlp.py --station 393 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 77 | python mlp.py --station 394 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 78 | python mlp.py --station 396 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 2  --seq-len 7
 79 | 
 80 | # # #运行seq2seq
 81 | python seq2seq.py --station 312 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 82 | python seq2seq.py --station 313 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 83 | python seq2seq.py --station 314 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 84 | python seq2seq.py --station 315 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 85 | python seq2seq.py --station 316 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 86 | python seq2seq.py --station 371 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 87 | python seq2seq.py --station 372 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 88 | python seq2seq.py --station 373 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 89 | python seq2seq.py --station 374 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 90 | python seq2seq.py --station 393 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 91 | python seq2seq.py --station 394 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 92 | python seq2seq.py --station 396 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 2  --seq-len 7
 93 | 
 94 | 
 95 | # # #运行att_seq2seq
 96 | python att_seq2seq.py --station 312 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
 97 | python att_seq2seq.py --station 313 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
 98 | python att_seq2seq.py --station 314 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
 99 | python att_seq2seq.py --station 315 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
100 | python att_seq2seq.py --station 316 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
101 | python att_seq2seq.py --station 371 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
102 | python att_seq2seq.py --station 372 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
103 | python att_seq2seq.py --station 373 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
104 | python att_seq2seq.py --station 374 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
105 | python att_seq2seq.py --station 393 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
106 | python att_seq2seq.py --station 394 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
107 | python att_seq2seq.py --station 396 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 2  --seq-len 7
108 | 
109 | 
110 | # # #运行XGB
111 | # python XGB.py --station 312 --ifshuffle
112 | # python XGB.py --station 313 --ifshuffle
113 | # python XGB.py --station 314 --ifshuffle
114 | # python XGB.py --station 315 --ifshuffle
115 | # python XGB.py --station 316 --ifshuffle
116 | # python XGB.py --station 371 --ifshuffle
117 | # python XGB.py --station 372 --ifshuffle
118 | # python XGB.py --station 373 --ifshuffle
119 | # python XGB.py --station 374 --ifshuffle
120 | # python XGB.py --station 393 --ifshuffle
121 | # python XGB.py --station 394 --ifshuffle
122 | # python XGB.py --station 396 --ifshuffle


--------------------------------------------------------------------------------
/rain_shuffle/run.sh:
--------------------------------------------------------------------------------
  1 | #！/bin/bash
  2 | # 第一个bash文件
  3 | clear;
  4 | echo 'Hello';
  5 | # source activate py3 
  6 | 
  7 | #运行ARIMA
  8 | # python ARIMA.py --station 312 
  9 | # python ARIMA.py --station 313 
 10 | # python ARIMA.py --station 314 
 11 | # python ARIMA.py --station 315 
 12 | # python ARIMA.py --station 316 
 13 | # python ARIMA.py --station 371 
 14 | # python ARIMA.py --station 372 
 15 | # python ARIMA.py --station 373 
 16 | # python ARIMA.py --station 374 
 17 | # python ARIMA.py --station 393 
 18 | # python ARIMA.py --station 394 
 19 | # python ARIMA.py --station 396 
 20 | 
 21 | # #运行SVR_rbf
 22 | # python SVR_rbf.py --station 312 --ifshuffle
 23 | # python SVR_rbf.py --station 313 --ifshuffle
 24 | # python SVR_rbf.py --station 314 --ifshuffle
 25 | # python SVR_rbf.py --station 315 --ifshuffle
 26 | # python SVR_rbf.py --station 316 --ifshuffle
 27 | # python SVR_rbf.py --station 371 --ifshuffle
 28 | # python SVR_rbf.py --station 372 --ifshuffle
 29 | # python SVR_rbf.py --station 373 --ifshuffle
 30 | # python SVR_rbf.py --station 374 --ifshuffle
 31 | # python SVR_rbf.py --station 393 --ifshuffle
 32 | # python SVR_rbf.py --station 394 --ifshuffle
 33 | # python SVR_rbf.py --station 396 --ifshuffle
 34 | 
 35 | 
 36 | #运行GBRT
 37 | # python GBRT.py --station 312 --ifshuffle
 38 | # python GBRT.py --station 313 --ifshuffle
 39 | # python GBRT.py --station 314 --ifshuffle
 40 | # python GBRT.py --station 315 --ifshuffle
 41 | # python GBRT.py --station 316 --ifshuffle
 42 | # python GBRT.py --station 371 --ifshuffle
 43 | # python GBRT.py --station 372 --ifshuffle
 44 | # python GBRT.py --station 373 --ifshuffle
 45 | # python GBRT.py --station 374 --ifshuffle
 46 | # python GBRT.py --station 393 --ifshuffle
 47 | # python GBRT.py --station 394 --ifshuffle
 48 | # python GBRT.py --station 396 --ifshuffle
 49 | 
 50 | 
 51 | # #运行lstm_
 52 | # python lstm_.py --station 312 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 53 | # python lstm_.py --station 313 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 54 | # python lstm_.py --station 314 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 55 | # python lstm_.py --station 315 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 56 | # python lstm_.py --station 316 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 57 | # python lstm_.py --station 371 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 58 | # python lstm_.py --station 372 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 59 | # python lstm_.py --station 373 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 60 | # python lstm_.py --station 374 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 61 | # python lstm_.py --station 393 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 62 | # python lstm_.py --station 394 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 63 | # python lstm_.py --station 396 --ifshuffle --epochs 100 --batch-size 100  --lr 1e-3 --input-dim 3  --seq-len 7
 64 | 
 65 | 
 66 | # #运行mlp
 67 | python mlp.py --station 312 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 68 | # python mlp.py --station 313 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 69 | # python mlp.py --station 314 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 70 | # python mlp.py --station 315 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 71 | # python mlp.py --station 316 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 72 | # python mlp.py --station 371 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 73 | # python mlp.py --station 372 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 74 | # python mlp.py --station 373 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 75 | # python mlp.py --station 374 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 76 | # python mlp.py --station 393 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 77 | # python mlp.py --station 394 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 78 | # python mlp.py --station 396 --ifshuffle --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
 79 | 
 80 | # # #运行seq2seq
 81 | # python seq2seq.py --station 312 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 82 | # python seq2seq.py --station 313 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 83 | # python seq2seq.py --station 314 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 84 | # python seq2seq.py --station 315 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 85 | # python seq2seq.py --station 316 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 86 | # python seq2seq.py --station 371 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 87 | # python seq2seq.py --station 372 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 88 | # python seq2seq.py --station 373 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 89 | # python seq2seq.py --station 374 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 90 | # python seq2seq.py --station 393 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 91 | # python seq2seq.py --station 394 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 92 | # python seq2seq.py --station 396 --ifshuffle --epochs 100 --batch-size 30  --lr 5e-3 --input-dim 3  --seq-len 7
 93 | 
 94 | 
 95 | # # #运行att_seq2seq
 96 | # python att_seq2seq.py --station 312 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
 97 | # python att_seq2seq.py --station 313 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
 98 | # python att_seq2seq.py --station 314 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
 99 | # python att_seq2seq.py --station 315 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
100 | # python att_seq2seq.py --station 316 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
101 | # python att_seq2seq.py --station 371 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
102 | # python att_seq2seq.py --station 372 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
103 | # python att_seq2seq.py --station 373 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
104 | # python att_seq2seq.py --station 374 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
105 | # python att_seq2seq.py --station 393 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
106 | # python att_seq2seq.py --station 394 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
107 | # python att_seq2seq.py --station 396 --ifshuffle --epochs 120 --batch-size 32  --lr 5e-3 --input-dim 3  --seq-len 7
108 | 
109 | 
110 | # # #运行XGB
111 | # python XGB.py --station 312 --ifshuffle
112 | # python XGB.py --station 313 --ifshuffle
113 | # python XGB.py --station 314 --ifshuffle
114 | # python XGB.py --station 315 --ifshuffle
115 | # python XGB.py --station 316 --ifshuffle
116 | # python XGB.py --station 371 --ifshuffle
117 | # python XGB.py --station 372 --ifshuffle
118 | # python XGB.py --station 373 --ifshuffle
119 | # python XGB.py --station 374 --ifshuffle
120 | # python XGB.py --station 393 --ifshuffle
121 | # python XGB.py --station 394 --ifshuffle
122 | # python XGB.py --station 396 --ifshuffle


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 环境搭建
  2 | 
  3 | 从纯裸的windows上跑通，步骤是：
  4 | 
  5 | ### 安装anaconda3最新版2020.02
  6 | 
  7 | ​    网址  <https://repo.anaconda.com/archive/Anaconda3-2020.02-Windows-x86_64.exe>
  8 | 
  9 | ### 从清华源安装pytorch1.5cpu版：
 10 | 
 11 | `conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/`
 12 | 
 13 | `conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/`
 14 | 
 15 | `conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/`
 16 | 
 17 | ` conda config --set show_channel_urls yes `
 18 | 
 19 | `conda install pytorch torchvision cpuonly`
 20 | 
 21 | ### 从清华源安装xgboost 
 22 | 
 23 | `pip install xgboost -i https://pypi.tuna.tsinghua.edu.cn/simple`
 24 | 
 25 | 
 26 | 
 27 | # rain_shuffle代码部分
 28 | 
 29 | 该部分为主要实现降水量预测的代码
 30 | 
 31 | 其主要由以下几个部分构成
 32 | 
 33 | ## hour_data文件夹
 34 | 
 35 | 该文件夹主要保存经过预处理的对应每个气象站id的数据，csv格式
 36 | 
 37 | ## train_test_hour文件夹
 38 | 
 39 | 该文件夹主要保存每个气象站预处理后的数据的特征和标签文件，以9:1分为训练集和测试集，其中训练和测试集未打乱，按时间先后顺序划分。
 40 | 
 41 | ## train_test_hour_shuffle文件夹
 42 | 
 43 | 该文件夹主要保存每个气象站预处理后的数据的特征和标签文件，以9:1分为训练集和测试集，其中训练和测试集打乱划分。
 44 | 
 45 | ## MODEL文件夹
 46 | 
 47 | 主要用来存放需要进行模型集成的训练好的单独模型，属于打乱数据集模型
 48 | 
 49 | ## MODEL_NO文件夹
 50 | 
 51 | 主要用来存放需要进行模型集成的训练好的单独模型，属于未打乱数据集模型
 52 | 
 53 | ## hour_cat.py
 54 | 
 55 | 将原始数据文件按单个气象站拆分，并搜索其中数据量较大的数据集，再对其中的缺失值进行填补，最后保存在`hour_data`文件夹
 56 | 
 57 | ## corr_data.py
 58 | 
 59 | 测试不同气象参数的相关性 采用皮尔逊系数^[1]^，并绘制相关曲线
 60 | 
 61 | > [[1]参考链接](https://baike.baidu.com/item/皮尔逊相关系数/12712835?fromtitle=皮尔逊系数&fromid=16955304&fr=aladdin)
 62 | 
 63 | ## hour2txt_ifshuffle.py
 64 | 
 65 | 通过选择其中的参数，可以修改处理数据的方式，得到是否打乱的数据或者特征代表的时间长度，还有测试集的比例
 66 | 
 67 | ```python
 68 | time_ser = 3 #小时数
 69 | ratio = 0.9
 70 | ifshuffle = False
 71 | ```
 72 | 
 73 | 上述为可以调整的对应参数。
 74 | 
 75 | ## eval.py
 76 | 主要包括：
 77 | 1. 评估模型的各个指标
 78 | 
 79 |     ```python
 80 |     def evaluation(a,b):
 81 |         rmse = np.sqrt(mean_squared_error(a,b))
 82 |         mae = mean_absolute_error(a, b)
 83 |         var = explained_variance_score(a,b)
 84 |         mdae =median_absolute_error(a,b)
 85 |         r2 =r2_score(a,b)
 86 |         return rmse, mae, mdae, r2, var
 87 |     ```
 88 |     具体参考sklearn.metrics的库函数
 89 | 2. 模型结果的后处理
 90 | 
 91 | ## ARIMA.py
 92 | 
 93 | arima的方法训练模型 采用一步预测 所有数据预测下一时刻
 94 | 
 95 | 使用数据的10%做测试集 画出预测和测试集的效果
 96 | 
 97 | ## SVR_rbf.py
 98 | 
 99 | 核函数为rbf的svr模型 
100 | 
101 | 训练集：验证集：测试集：7:2:1
102 | 
103 | 归一化特征参数 sklearn.preprocessing.scale函数
104 | 
105 | 超参数：gamma  C 
106 | 
107 | 训练不同超参数，保存训练集mse和验证集mse，取验证集最好的mse为最优模型并保存
108 | 
109 | 加载保存模型， 在测试集测试，画图，使用eval.py评估
110 | 
111 | ## SVR_sigmoid.py
112 | 
113 | 核函数为sigmoid
114 | 
115 | 同上 但超参数范围不同
116 | 
117 | ## SVR_poly.py
118 | 
119 | 核函数为多项式
120 | 
121 | 同上 但超参数多一个 degree （具体见svr的参数）
122 | 
123 | ## MLP_module.py
124 | 
125 | mlp神经网络的class模块 
126 | 
127 | 实现功能：
128 | 
129 |     1. 初始化函数： 
130 |        输入维度  隐藏单元个数 层数 学习率 batch大小
131 |     2. fit函数：分割训练集 验证集
132 |     ​   训练模型返回最优验证集mse 并保存模型和中间结果
133 |     3. predict函数：加载模型
134 |     ​   打印测试集上的mse 和 返回预测值
135 | 
136 | ## mlp.py
137 | 
138 | mlp实现的主文件
139 | 
140 | 读取文件
141 | 
142 | 训练模型
143 | 
144 | 评估模型
145 | 
146 | 也可以通过人工修改超参数 保存的模型的数据均是带有参数的文件名。
147 | 
148 | ## LSTM_module.py
149 | 
150 | Last神经网络的class模块 
151 | 
152 | 实现功能：
153 | 
154 | 1.初始化函数： 输入维度 序列的长度  隐藏单元个数 层数 学习率 batch大小
155 | 
156 | 2.fit函数：分割训练集 验证集
157 | 
158 | ​		训练模型返回最优验证集mse 并保存模型和中间结果
159 | 
160 | 3.predict函数：加载模型
161 | 
162 | ​			打印测试集上的mse 和 返回预测值
163 | 
164 | ## lstm_.py
165 | 
166 | mlp实现的主文件
167 | 
168 | 读取文件
169 | 
170 | 训练模型
171 | 
172 | 评估模型
173 | 
174 | 也可以通过人工修改超参数 保存的模型的数据均是带有参数的文件名。
175 | 
176 | ## GBRT.py
177 | 
178 | GBRT模型 
179 | 
180 | 训练集：验证集：测试集：7:2:1
181 | 
182 | 归一化特征参数 sklearn.preprocessing.scale函数
183 | 
184 | 超参数：学习率（learning_rate） 树的个数（n_estimators） 树的深度（max_depth）
185 | 
186 | 训练不同超参数，保存训练集mse和验证集mse，取验证集最好的mse为最优模型并保存
187 | 
188 | 加载保存模型， 在测试集测试，画图，使用eval.py评估
189 | 
190 | ## XGB.py
191 | 
192 | XGBoost模型 
193 | 
194 | 训练集：验证集：测试集：7:2:1
195 | 
196 | 归一化特征参数 sklearn.preprocessing.scale函数
197 | 
198 | 超参数：学习率（learning_rate） 树的个数（n_estimators） 树的深度（max_depth）
199 | 
200 | 训练不同超参数，保存训练集mse和验证集mse，取验证集最好的mse为最优模型并保存
201 | 
202 | 加载保存模型， 在测试集测试，画图，使用eval.py评估
203 | 
204 | ## seq2seq_module.py
205 | 
206 | ​	Seq2seq神经网络的class模块 
207 | 
208 | 实现功能：
209 | 
210 | 1.初始化函数： 输入维度 序列的长度  隐藏单元个数  学习率 batch大小
211 | 
212 | 2.fit函数：分割训练集 验证集
213 | 
214 | ​		训练模型返回最优验证集mse 并保存模型和中间结果
215 | 
216 | 3.predict函数：加载模型
217 | 
218 | ​			打印测试集上的mse 和 返回预测值	
219 | 
220 | ## seq2seq.py
221 | 
222 | seq2seq实现的主文件
223 | 
224 | 读取文件
225 | 
226 | 训练模型
227 | 
228 | 评估模型
229 | 
230 | 也可以通过人工修改超参数 保存的模型的数据均是带有参数的文件名。
231 | 
232 | ## att_seq2seq_module.py		
233 | 
234 | ​		att_Seq2seq神经网络的class模块 (带注意力机制)
235 | 
236 | 实现功能：
237 | 
238 | 1.初始化函数： 输入维度 序列的长度  隐藏单元个数  学习率 batch大小
239 | 
240 | 2.fit函数：分割训练集 验证集
241 | 
242 | ​		训练模型返回最优验证集mse 并保存模型和中间结果
243 | 
244 | 3.predict函数：加载模型
245 | 
246 | ​			打印测试集上的mse 和 返回预测值	
247 | 
248 | ​	
249 | 
250 | ## att_seq2seq.py
251 | 
252 | att_seq2seq实现的主文件
253 | 
254 | 读取文件
255 | 
256 | 训练模型
257 | 
258 | 评估模型
259 | 
260 | 也可以通过人工修改超参数 保存的模型的数据均是带有参数的文件名。
261 | 
262 | ## ensemble_learn.py
263 | 
264 | 基础模型训练代码 打乱数据
265 | 
266 | 包含bagging方法和stacking方法
267 | 
268 | ## ensemble_learn_np.py
269 | 
270 | 基础模型训练代码 未打乱数据
271 | 
272 | 包含bagging方法和stacking方法
273 | 
274 | ## test.py
275 | 
276 | 加载模型测试`sample`文件夹中的测试集
277 | 
278 | # 训练过程
279 | 
280 | ### 数据集准备
281 | 
282 | 首先在链接处下载数据集[小时天气数据](https://cloud.tsinghua.edu.cn/d/a96c9fb8f56d4fb5be62/)
283 | 
284 | 解压其中的`hourly-weather-surface.zip`文件，可以得到`sudeste.csv`文件，并将该文件放入`rain_shuffle`文件夹中。
285 | 
286 | ### 构建训练和测试集
287 | 
288 | 在terminal终端进入`rain_shuffle`文件夹，然后在文件夹中执行
289 | 
290 | ```
291 | python hour_cat.py
292 | ```
293 | 
294 | 然后再继续执行
295 | 
296 | ```
297 | python hour2txt_ifshuffle.py
298 | ```
299 | 
300 | ### 训练及测试
301 | 
302 | 继续在该文件夹中，运行以下指令即可训练模型
303 | 
304 | ```
305 | python seq2seq.py --station 313  --epochs 100 --batch-size 30  --lr 1e-3 --input-dim 3  --seq-len 7
306 | ```
307 | 
308 | 其中`seq2seq.py`是训练模型的代码，可以根据之前的介绍改成任意模型的代码名称，`-—statoin`代表所选的气象站的id，`--epochs`为训练代数。
309 | 
310 | 执行该文件后，你可以训练得到对应的模型以及输出对应的测试集结果，可以得到RMSE，MAE，MDAE，r2-score，可释方差得分var等得分。
311 | 
312 | ### 在测试样例上测试
313 | 
314 | 首先将上一个文件夹中的`testset`文件夹中的`station313`~`station393`文件夹转移到`rain_shuffle`文件夹的` sample`文件夹，然后运行下列代码得到验证结果
315 | 
316 | ```
317 | python test.py --station 313 --model_type seq2seq --model SEQ2SEQ/seq2seq_313_128_0.1_0.001.pth 
318 | ```
319 | 
320 | 其中可以通过更换`—-station`的参数来改变要去求解的测试样例，`—model_type`是模型的类型，需要和后面选择的模型对应。
321 | 
322 | ==注==：在选择模型时必须选择和station匹配的模型，例如上述station为313，则需要找到为`seq2seq_313_128_0.1_0.001.pth`的模型，其中被`_`隔开的第一个数字代表所匹配的station。
323 | 
324 | 执行该文件后，你可以训练得到对应的模型以及输出对应的测试集结果，可以得到RMSE，MAE，MDAE，var，r2-score等得分。
325 | 
326 | ### 下载训练好的模型和测试样例
327 | 
328 | 首先下载训练好的模型[训练模型](https://cloud.tsinghua.edu.cn/d/2fd811c7400748eabd72/)
329 | 
330 | 找到其中的
331 | 
332 | ```
333 | seq2seq_313_best.pth
334 | seq2seq_314_best.pth
335 | seq2seq_371_best.pth
336 | seq2seq_372_best.pth
337 | seq2seq_393_best.pth
338 | ```
339 | 
340 | 文件下载后放入`models`文件夹，后继续在`rain_shuffle`文件夹下使用下述命令：
341 | 
342 | ```
343 | python test.py --station 313 --model_type seq2seq --model models/seq2seq_313_best.pth 
344 | ```
345 | 
346 | 其中可以通过更换`—-station`的参数来改变要去求解的测试样例，`-—model_type`是模型的类型，需要和后面选择的模型对应。
347 | 
348 | ==注==：但在选择模型时必须选择和station匹配的模型，例如上述station为313，则需要找到为`seq2seq_313_best.pth`的模型，其中被`_`隔开的第一个数字代表所匹配的station。
349 | 
350 | **以下为理想的输出结果**：
351 | 
352 | ## station313:
353 | 
354 |     SEQ2SEQ_rmse: 0.7847012 
355 |     SEQ2SEQ_mae: 0.17579529 
356 |     SEQ2SEQ_mdae: 0.0 
357 |     SEQ2SEQ_r2: 0.25680770788051666 
358 |     SEQ2SEQ_var: 0.2615431547164917
359 | 
360 | 
361 | ![seq2seq_313](./rain_shuffle/image_example/seq2seq_313.png)
362 | 
363 | ## station314:
364 | 
365 |     SEQ2SEQ_rmse: 0.7056168 
366 |     SEQ2SEQ_mae: 0.1004589 
367 |     SEQ2SEQ_mdae: 0.0 
368 |     SEQ2SEQ_r2: 0.23950347308864373 
369 |     SEQ2SEQ_var: 0.24046140909194946
370 | 
371 | ![seq2seq_314](./rain_shuffle/image_example/seq2seq_314.png)
372 | 
373 | ## station371:
374 | 
375 |     SEQ2SEQ_rmse: 0.92071176 
376 |     SEQ2SEQ_mae: 0.13802044 
377 |     SEQ2SEQ_mdae: 0.0 
378 |     SEQ2SEQ_r2: 0.18028592369689478 
379 |     SEQ2SEQ_var: 0.180952787399292
380 | 
381 | ![seq2seq_371](./rain_shuffle/image_example/seq2seq_371.png)
382 | 
383 | ## station372:
384 | 
385 |     SEQ2SEQ_rmse: 0.56710863 
386 |     SEQ2SEQ_mae: 0.13634275 
387 |     SEQ2SEQ_mdae: 0.0 
388 |     SEQ2SEQ_r2: 0.253192955761976 
389 |     SEQ2SEQ_var: 0.2590576410293579
390 | 
391 | ![seq2seq_372](./rain_shuffle/image_example/seq2seq_372.png)
392 | 
393 | ## station393:
394 | 
395 |     SEQ2SEQ_rmse: 0.92203474 
396 |     SEQ2SEQ_mae: 0.16005377 
397 |     SEQ2SEQ_mdae: 0.0 
398 |     SEQ2SEQ_r2: 0.18053682665056303 
399 |     SEQ2SEQ_var: 0.18120914697647095
400 | 
401 | ![seq2seq_393](./rain_shuffle/image_example/seq2seq_393.png)
402 | 
403 | 


--------------------------------------------------------------------------------
/rain_shuffle/ensemble_learn_np.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | from eval import evaluation, plot
  4 | import joblib
  5 | import torch
  6 | import matplotlib.pyplot as plt
  7 | from torch import Tensor
  8 | from torch.autograd import Variable
  9 | 
 10 | '''
 11 | 1. 必须把想要ensemble的模型保存在MODEL文件夹里，对于.pth模型必须要引入每个神经网络模型的类
 12 | 2. 我修改了hour2txt.py文件，把数据集按照训练集和测试集分别保存在了./train_test_hour_shuffle文件夹以及./train_test_hour文件夹下
 13 | '''
 14 | from FNN import Net
 15 | 
 16 | 
 17 | # 引入模型的类
 18 | # 。。。。
 19 | # 引入模型的类
 20 | 
 21 | 
 22 | def FakeBagging(path, dirlist, test_x, test_y):
 23 |     num = len(dirlist)
 24 |     y_pre = np.zeros((test_y.shape[0], 1))
 25 |     rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], []
 26 |     for ele in dirlist:
 27 |         a = ele.split('.')
 28 |         b = a[-1]
 29 |         if b == 'm':
 30 |             clf = joblib.load('{}/{}'.format(path, ele))
 31 |             y_pre_temp = clf.predict(test_x)
 32 |             y_pre_temp = y_pre_temp.reshape(-1, 1)
 33 |             y_pre += y_pre_temp
 34 |         else:
 35 |             if ele[:3] == 'mlp':
 36 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
 37 |                 x_tensor = Tensor(test_x)
 38 |                 x_tensor = Variable(x_tensor)
 39 |                 # y_tensor = Tensor(test_y)
 40 |                 y_pre_temp = clf(x_tensor).detach().numpy()
 41 |             else:
 42 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
 43 |                 x_tensor = Tensor(test_x)
 44 |                 x_tensor = Variable(x_tensor).view(-1, 7, 3)
 45 |                 # y_tensor = Tensor(test_y)
 46 |                 y_pre_temp = clf(x_tensor).detach().numpy()
 47 |             y_pre += y_pre_temp
 48 |         rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp)
 49 |         #        print('model:',ele)
 50 |         #        print('test_rmse: %r\n' % rmse,
 51 |         #              'test_mae: %r\n' % mae,
 52 |         #              'test_mdae: %r\n' % mdae,
 53 |         #              'test_r2: %r\n' % r2,
 54 |         #              'test_var: %r\n' % var)
 55 |         rmse_list.append(rmse);
 56 |         mae_list.append(mae);
 57 |         mdae_list.append(mdae);
 58 |         r2_list.append(r2);
 59 |         var_list.append(var)
 60 |     y_pre /= num
 61 |     rmse, mae, mdae, r2, var = evaluation(test_y, y_pre)
 62 |     rmse_list.append(rmse);
 63 |     mae_list.append(mae);
 64 |     mdae_list.append(mdae);
 65 |     r2_list.append(r2);
 66 |     var_list.append(var)
 67 | 
 68 |     # 绘图
 69 |     xmark = [ele.split('.')[0] for ele in dirlist]
 70 |     xmark.append('bagging_model')
 71 |     plt.figure();plt.plot(rmse_list,'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 72 |     plt.figure();plt.plot(mae_list,'*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 73 |     plt.plot(mdae_list, 'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 74 |     plt.plot(r2_list, '^-',c='cyan',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 75 |     plt.plot(var_list,'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 76 |     plt.show()
 77 | 
 78 | 
 79 | #    return rmse_list, mae_list, mdae_list, r2_list, var_list
 80 | 
 81 | 
 82 | def getStackingData(path, dirlist, train_x, train_y):
 83 |     num = len(dirlist)  # 基学习期个数
 84 |     n, m = train_x.shape
 85 | 
 86 |     data_x = np.zeros((n, num))  # 次级学习器的输入特征
 87 |     data_y = train_y  # 次级学习器的输出特征
 88 |     for i, ele in enumerate(dirlist):
 89 |         a = ele.split('.')
 90 |         b = a[-1]
 91 |         if b == 'm':
 92 |             clf = joblib.load('{}/{}'.format(path, ele))
 93 |             y_pre_temp = clf.predict(train_x)
 94 |         else:
 95 |             if ele[:3] == 'mlp':
 96 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
 97 |                 x_tensor = Tensor(train_x)
 98 |                 x_tensor = Variable(x_tensor)
 99 |                 # y_tensor = Tensor(test_y)
100 |                 y_pre_temp = clf(x_tensor).detach().numpy()
101 |             else:
102 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
103 |                 x_tensor = Tensor(train_x)
104 |                 x_tensor = Variable(x_tensor).view(-1, 7, 3)
105 |                 # y_tensor = Tensor(test_y)
106 |                 y_pre_temp = clf(x_tensor).detach().numpy()
107 |         y_pre_temp = y_pre_temp.squeeze()
108 |         data_x[:, i] = y_pre_temp
109 | 
110 |     return data_x, data_y
111 | 
112 | 
113 | from sklearn.ensemble import GradientBoostingRegressor
114 | 
115 | 
116 | def stackingUseGBRT(path, dirlist, est_g, dep_g, lr_g, train_x, train_y, test_x, test_y):
117 |     num = len(dirlist)  # 基学习期个数
118 |     rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], []
119 |     for ele in dirlist:
120 |         a = ele.split('.')
121 |         b = a[-1]
122 |         if b == 'm':
123 |             clf = joblib.load('{}/{}'.format(path, ele))
124 |             y_pre_temp = clf.predict(test_x)
125 |         else:
126 |             if ele[:3] == 'mlp':
127 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
128 |                 x_tensor = Tensor(test_x)
129 |                 x_tensor = Variable(x_tensor)
130 |                 # y_tensor = Tensor(test_y)
131 |                 y_pre_temp = clf(x_tensor).detach().numpy()
132 |             else:
133 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
134 |                 x_tensor = Tensor(test_x)
135 |                 x_tensor = Variable(x_tensor).view(-1, 7, 3)
136 |                 # y_tensor = Tensor(test_y)
137 |                 y_pre_temp = clf(x_tensor).detach().numpy()
138 |         y_pre_temp = y_pre_temp.reshape(-1, 1)
139 |         rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp)
140 |         rmse_list.append(rmse);
141 |         mae_list.append(mae);
142 |         mdae_list.append(mdae);
143 |         r2_list.append(r2);
144 |         var_list.append(var)
145 | 
146 |     data_x, data_y = getStackingData(path, dirlist, train_x, train_y)
147 |     gbr = GradientBoostingRegressor(n_estimators=est_g, max_depth=dep_g, min_samples_split=3, learning_rate=lr_g)
148 |     gbr.fit(data_x, data_y.ravel())
149 |     test_x, test_y = getStackingData(path, dirlist, test_x, test_y)
150 |     y_stacking = gbr.predict(test_x)
151 |     y_stacking = y_stacking.reshape(-1, 1)
152 | 
153 |     rmse, mae, mdae, r2, var = evaluation(test_y, y_stacking)
154 |     rmse_list.append(rmse);
155 |     mae_list.append(mae);
156 |     mdae_list.append(mdae);
157 |     r2_list.append(r2);
158 |     var_list.append(var)
159 |     # 绘图
160 |     xmark = [ele.split('.')[0] for ele in dirlist]
161 |     xmark.append('stacking_model')
162 |     plt.figure();plt.plot(rmse_list, 'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
163 |     plt.figure();plt.plot(mae_list, '*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
164 |     plt.plot(mdae_list,'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
165 |     plt.plot(r2_list, '^-',c='cyan',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
166 |     plt.plot(var_list, 'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
167 |     plt.show()
168 | 
169 | 
170 | #    return rmse_list, mae_list, mdae_list, r2_list, var_list
171 | 
172 | 
173 | if __name__ == '__main__':
174 |     pathnow = os.getcwd()
175 |     path = '{}/MODEL_NO'.format(pathnow)
176 |     dirlist = os.listdir(path)  # 加载保存在MODEL文件夹里的所有.pth和.m模型
177 |     dirlist.remove('.DS_Store')
178 | 
179 |     # 加载测试数据集
180 |     id_num = 371
181 |     train_x = np.loadtxt("{}/train_test_hour/train{}.txt".format(pathnow, id_num))
182 |     train_y = np.loadtxt("{}/train_test_hour/train_label{}.txt".format(pathnow, id_num))
183 |     test_x = np.loadtxt("{}/train_test_hour/test{}.txt".format(pathnow, id_num))
184 |     test_y = np.loadtxt("{}/train_test_hour/test_label{}.txt".format(pathnow, id_num))
185 |     from sklearn.preprocessing import *
186 | 
187 |     train_x = scale(train_x, axis=0)
188 |     train_y = np.reshape(train_y, (-1, 1))
189 |     test_x = scale(test_x, axis=0)
190 |     test_y = np.reshape(test_y, (-1, 1))
191 | 
192 |     # bagging
193 |     FakeBagging(path,
194 |                 dirlist,
195 |                 test_x,
196 |                 test_y)
197 | 
198 |     # stacking use GBRT
199 |     est_g = 50
200 |     dep_g = 5
201 |     lr_g = 0.1
202 |     stackingUseGBRT(path,
203 |                     dirlist,
204 |                     est_g,
205 |                     dep_g,
206 |                     lr_g,
207 |                     train_x,
208 |                     train_y,
209 |                     test_x,
210 |                     test_y)
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 


--------------------------------------------------------------------------------
/rain_shuffle/ensemble_learn.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import os
  4 | from eval import evaluation, plot
  5 | from sklearn.externals import joblib
  6 | import torch
  7 | import matplotlib.pyplot as plt
  8 | from torch import Tensor
  9 | from torch.autograd import Variable
 10 | import seaborn as sns
 11 | 
 12 | sns.set(style="white") #这是seaborn默认的风格
 13 | sns.set_palette("muted") #常用
 14 | 
 15 | '''
 16 | 1. 必须把想要ensemble的模型保存在MODEL文件夹里，对于.pth模型必须要引入每个神经网络模型的类
 17 | 2. 我修改了hour2txt.py文件，把数据集按照训练集和测试集分别保存在了./train_test_hour_shuffle文件夹以及./train_test_hour文件夹下
 18 | '''
 19 | from FNN import Net  
 20 | # 引入模型的类
 21 | # 。。。。
 22 | # 引入模型的类
 23 | 
 24 | 
 25 | def FakeBagging(path, dirlist, test_x, test_y):
 26 |     num = len(dirlist)
 27 |     y_pre = np.zeros((test_y.shape[0],1))
 28 |     rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], []
 29 |     for ele in dirlist:
 30 |         a = ele.split('.')
 31 |         b = a[-1]
 32 |         if b=='m':
 33 |             clf = joblib.load('{}/{}'.format(path,ele))
 34 |             y_pre_temp = clf.predict(test_x)
 35 |             y_pre_temp = y_pre_temp.reshape(-1,1)
 36 |             y_pre += y_pre_temp
 37 |         else:
 38 |             if ele[:3]=='mlp':
 39 |                 clf = torch.load('{}/{}'.format(path,ele),map_location=torch.device('cpu'))
 40 |                 x_tensor = Tensor(test_x)
 41 |                 x_tensor = Variable(x_tensor)
 42 |                 # y_tensor = Tensor(test_y)
 43 |                 y_pre_temp = clf(x_tensor).detach().numpy()
 44 |             else:
 45 |                 clf = torch.load('{}/{}'.format(path,ele),map_location=torch.device('cpu'))
 46 |                 x_tensor = Tensor(test_x)
 47 |                 x_tensor = Variable(x_tensor).view(-1,7,3)
 48 |                 # y_tensor = Tensor(test_y)
 49 |                 y_pre_temp = clf(x_tensor).detach().numpy()
 50 |             y_pre += y_pre_temp
 51 |         rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp)
 52 | #        print('model:',ele)
 53 | #        print('test_rmse: %r\n' % rmse,
 54 | #              'test_mae: %r\n' % mae,
 55 | #              'test_mdae: %r\n' % mdae,
 56 | #              'test_r2: %r\n' % r2,
 57 | #              'test_var: %r\n' % var)
 58 |         rmse_list.append(rmse); mae_list.append(mae); mdae_list.append(mdae); r2_list.append(r2); var_list.append(var)
 59 |     y_pre /= num
 60 |     rmse, mae, mdae, r2, var = evaluation(test_y, y_pre)
 61 |     rmse_list.append(rmse); mae_list.append(mae); mdae_list.append(mdae); r2_list.append(r2); var_list.append(var)
 62 |     
 63 |     # 绘图
 64 |     xmark = [ele.split('_')[0] for ele in dirlist]
 65 |     xmark.append('bagging_model')
 66 |     plt.figure();plt.plot(rmse_list,'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 67 |     plt.figure();plt.plot(mae_list,'*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 68 |     plt.plot(mdae_list, 'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 69 |     plt.plot(r2_list, '^-',c='cyan',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 70 |     plt.plot(var_list,'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 71 |     plt.show()
 72 | #    return rmse_list, mae_list, mdae_list, r2_list, var_list
 73 | 
 74 | 
 75 | def getStackingData(path, dirlist, train_x, train_y):
 76 |     num = len(dirlist)  # 基学习期个数
 77 |     n,m = train_x.shape
 78 | 
 79 |     data_x = np.zeros((n, num))  # 次级学习器的输入特征
 80 |     data_y = train_y             # 次级学习器的输出特征
 81 |     for i,ele in enumerate(dirlist):
 82 |         a = ele.split('.')
 83 |         b = a[-1]
 84 |         if b=='m':
 85 |             clf = joblib.load('{}/{}'.format(path,ele))
 86 |             y_pre_temp = clf.predict(train_x)
 87 |         else:
 88 |             if ele[:3] == 'mlp':
 89 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
 90 |                 x_tensor = Tensor(train_x)
 91 |                 x_tensor = Variable(x_tensor)
 92 |                 # y_tensor = Tensor(test_y)
 93 |                 y_pre_temp = clf(x_tensor).detach().numpy()
 94 |             else:
 95 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
 96 |                 x_tensor = Tensor(train_x)
 97 |                 x_tensor = Variable(x_tensor).view(-1, 7, 3)
 98 |                 # y_tensor = Tensor(test_y)
 99 |                 y_pre_temp = clf(x_tensor).detach().numpy()
100 |         y_pre_temp = y_pre_temp.squeeze()
101 |         data_x[:,i] = y_pre_temp
102 |        
103 |     return data_x, data_y
104 | 
105 | 
106 | from sklearn.ensemble import GradientBoostingRegressor
107 | def stackingUseGBRT(path, dirlist, est_g, dep_g, lr_g, train_x, train_y, test_x, test_y):
108 |     num = len(dirlist)  # 基学习期个数
109 |     rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], []
110 |     for ele in dirlist:
111 |         a = ele.split('.')
112 |         b = a[-1]
113 |         if b=='m':
114 |             clf = joblib.load('{}/{}'.format(path,ele))
115 |             y_pre_temp = clf.predict(test_x)
116 |         else:
117 |             if ele[:3] == 'mlp':
118 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
119 |                 x_tensor = Tensor(test_x)
120 |                 x_tensor = Variable(x_tensor)
121 |                 # y_tensor = Tensor(test_y)
122 |                 y_pre_temp = clf(x_tensor).detach().numpy()
123 |             else:
124 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
125 |                 x_tensor = Tensor(test_x)
126 |                 x_tensor = Variable(x_tensor).view(-1, 7, 3)
127 |                 # y_tensor = Tensor(test_y)
128 |                 y_pre_temp = clf(x_tensor).detach().numpy()
129 |         y_pre_temp = y_pre_temp.reshape(-1,1)
130 |         rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp)
131 |         rmse_list.append(rmse); mae_list.append(mae); mdae_list.append(mdae); r2_list.append(r2); var_list.append(var)
132 |     
133 |     data_x, data_y = getStackingData(path, dirlist, train_x, train_y)
134 |     gbr = GradientBoostingRegressor(n_estimators=est_g, max_depth=dep_g, min_samples_split=3, learning_rate=lr_g)
135 |     gbr.fit(data_x, data_y.ravel())
136 |     test_x, test_y = getStackingData(path, dirlist, test_x, test_y)
137 |     y_stacking = gbr.predict(test_x)
138 |     y_stacking = y_stacking.reshape(-1,1)
139 |     
140 |     rmse, mae, mdae, r2, var = evaluation(test_y, y_stacking)
141 |     rmse_list.append(rmse); mae_list.append(mae); mdae_list.append(mdae); r2_list.append(r2); var_list.append(var)
142 |     # 绘图
143 |     xmark = [ele.split('_')[0] for ele in dirlist]
144 |     xmark.append('stacking_model')
145 |     plt.figure();plt.plot(rmse_list, 'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
146 |     plt.figure();plt.plot(mae_list, '*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
147 |     plt.plot(mdae_list,'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
148 |     plt.plot(r2_list, '^-',c='cyan',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
149 |     plt.plot(var_list, 'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
150 |     plt.show()
151 | #    return rmse_list, mae_list, mdae_list, r2_list, var_list
152 | 
153 | 
154 | 
155 | if __name__ == '__main__':
156 |     pathnow = os.getcwd()
157 |     path = '{}/MODEL'.format(pathnow)  
158 |     dirlist = os.listdir(path)[1:]   #加载保存在MODEL文件夹里的所有.pth和.m模型
159 |     
160 |     # 加载测试数据集
161 |     id_num = 371
162 |     train_x=np.loadtxt("{}/train_test_hour_shuffle/train{}.txt".format(pathnow,id_num))
163 |     train_y=np.loadtxt("{}/train_test_hour_shuffle/train_label{}.txt".format(pathnow,id_num))
164 |     test_x=np.loadtxt("{}/train_test_hour_shuffle/test{}.txt".format(pathnow,id_num))
165 |     test_y=np.loadtxt("{}/train_test_hour_shuffle/test_label{}.txt".format(pathnow,id_num))
166 |     from sklearn.preprocessing import *
167 |     train_x = scale(train_x,axis=0)
168 |     train_y = np.reshape(train_y,(-1,1))
169 |     test_x = scale(test_x,axis=0)
170 |     test_y = np.reshape(test_y,(-1,1))
171 |     
172 |     
173 |     # bagging
174 |     FakeBagging(path, 
175 |                 dirlist, 
176 |                 test_x,
177 |                 test_y)
178 |     
179 |     # stacking use GBRT
180 |     est_g = 90
181 |     dep_g = 8
182 |     lr_g = 0.02
183 |     stackingUseGBRT(path, 
184 |                     dirlist, 
185 |                     est_g, 
186 |                     dep_g, 
187 |                     lr_g, 
188 |                     train_x, 
189 |                     train_y, 
190 |                     test_x, 
191 |                     test_y)
192 | 
193 |     
194 | 
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 


--------------------------------------------------------------------------------
/rain_shuffle/ensemble_learn_np 1.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | from eval import evaluation, plot
  4 | import joblib
  5 | import torch
  6 | import matplotlib.pyplot as plt
  7 | from torch import Tensor
  8 | from torch.autograd import Variable
  9 | 
 10 | '''
 11 | 1. 必须把想要ensemble的模型保存在MODEL文件夹里，对于.pth模型必须要引入每个神经网络模型的类
 12 | 2. 我修改了hour2txt.py文件，把数据集按照训练集和测试集分别保存在了./train_test_hour_shuffle文件夹以及./train_test_hour文件夹下
 13 | '''
 14 | # from FNN import Net
 15 | 
 16 | 
 17 | # 引入模型的类
 18 | # 。。。。
 19 | # 引入模型的类
 20 | 
 21 | 
 22 | def FakeBagging(path, dirlist, test_x, test_y, id_num):
 23 |     num = len(dirlist)
 24 |     y_pre = np.zeros((test_y.shape[0], 1))
 25 |     rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], []
 26 |     for ele in dirlist:
 27 |         print(ele)
 28 |         a = ele.split('.')
 29 |         b = a[-1]
 30 |         if b == 'm':
 31 |             clf = joblib.load('{}/{}'.format(path, ele))
 32 |             y_pre_temp = clf.predict(test_x)
 33 |             y_pre_temp = y_pre_temp.reshape(-1, 1)
 34 |             y_pre += y_pre_temp
 35 |         else:
 36 |             if ele[:3] == 'mlp':
 37 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
 38 |                 x_tensor = Tensor(test_x)
 39 |                 x_tensor = Variable(x_tensor)
 40 |                 # y_tensor = Tensor(test_y)
 41 |                 y_pre_temp = clf(x_tensor).detach().numpy()
 42 |             else:
 43 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
 44 |                 x_tensor = Tensor(test_x)
 45 |                 x_tensor = Variable(x_tensor).view(-1, 7, 3)
 46 |                 # y_tensor = Tensor(test_y)
 47 |                 y_pre_temp = clf(x_tensor).detach().numpy()
 48 |             y_pre += y_pre_temp
 49 |         rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp)
 50 |         #        print('model:',ele)
 51 |         #        print('test_rmse: %r\n' % rmse,
 52 |         #              'test_mae: %r\n' % mae,
 53 |         #              'test_mdae: %r\n' % mdae,
 54 |         #              'test_r2: %r\n' % r2,
 55 |         #              'test_var: %r\n' % var)
 56 |         rmse_list.append(rmse);
 57 |         mae_list.append(mae);
 58 |         mdae_list.append(mdae);
 59 |         r2_list.append(r2);
 60 |         var_list.append(var)
 61 |     y_pre /= num
 62 |     rmse, mae, mdae, r2, var = evaluation(test_y, y_pre)
 63 |     rmse_list.append(rmse);
 64 |     mae_list.append(mae);
 65 |     mdae_list.append(mdae);
 66 |     r2_list.append(r2);
 67 |     var_list.append(var)
 68 | 
 69 |     # 绘图
 70 |     xmark = [ele.split('.')[0] for ele in dirlist]
 71 |     xmark = [ele.split('_')[0] for ele in dirlist]
 72 |     for i,ele in enumerate(xmark):
 73 |         if ele == 'train':
 74 |             xmark[i] = dirlist[i].split('_')[1]
 75 |     xmark.append('bagging_model')
 76 |     plt.figure();plt.plot(rmse_list,'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 77 |     plt.grid();plt.savefig('output/bagging_rmse_meric_%d'%id_num,bbox_inches='tight',dpi=500)
 78 |     plt.figure();plt.plot(mae_list,'*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 79 |     plt.plot(mdae_list, 'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 80 |     plt.plot(r2_list, '^-',c='r',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 81 |     plt.plot(var_list,'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
 82 |     plt.grid();plt.savefig('output/bagging_4_merics_%d'%id_num,bbox_inches='tight',dpi=500)
 83 |     plt.show()
 84 | 
 85 | 
 86 | #    return rmse_list, mae_list, mdae_list, r2_list, var_list
 87 | 
 88 | 
 89 | def getStackingData(path, dirlist, train_x, train_y):
 90 |     num = len(dirlist)  # 基学习期个数
 91 |     n, m = train_x.shape
 92 | 
 93 |     data_x = np.zeros((n, num))  # 次级学习器的输入特征
 94 |     data_y = train_y  # 次级学习器的输出特征
 95 |     for i, ele in enumerate(dirlist):
 96 |         a = ele.split('.')
 97 |         b = a[-1]
 98 |         if b == 'm':
 99 |             clf = joblib.load('{}/{}'.format(path, ele))
100 |             y_pre_temp = clf.predict(train_x)
101 |         else:
102 |             if ele[:3] == 'mlp':
103 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
104 |                 x_tensor = Tensor(train_x)
105 |                 x_tensor = Variable(x_tensor)
106 |                 # y_tensor = Tensor(test_y)
107 |                 y_pre_temp = clf(x_tensor).detach().numpy()
108 |             else:
109 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
110 |                 x_tensor = Tensor(train_x)
111 |                 x_tensor = Variable(x_tensor).view(-1, 7, 3)
112 |                 # y_tensor = Tensor(test_y)
113 |                 y_pre_temp = clf(x_tensor).detach().numpy()
114 |         y_pre_temp = y_pre_temp.squeeze()
115 |         data_x[:, i] = y_pre_temp
116 | 
117 |     return data_x, data_y
118 | 
119 | 
120 | from sklearn.ensemble import GradientBoostingRegressor
121 | 
122 | 
123 | def stackingUseGBRT(path, dirlist, est_g, dep_g, lr_g, train_x, train_y, test_x, test_y, id_num):
124 |     num = len(dirlist)  # 基学习期个数
125 |     rmse_list, mae_list, mdae_list, r2_list, var_list = [], [], [], [], []
126 |     for ele in dirlist:
127 |         a = ele.split('.')
128 |         b = a[-1]
129 |         if b == 'm':
130 |             clf = joblib.load('{}/{}'.format(path, ele))
131 |             y_pre_temp = clf.predict(test_x)
132 |         else:
133 |             if ele[:3] == 'mlp':
134 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
135 |                 x_tensor = Tensor(test_x)
136 |                 x_tensor = Variable(x_tensor)
137 |                 # y_tensor = Tensor(test_y)
138 |                 y_pre_temp = clf(x_tensor).detach().numpy()
139 |             else:
140 |                 clf = torch.load('{}/{}'.format(path, ele), map_location=torch.device('cpu'))
141 |                 x_tensor = Tensor(test_x)
142 |                 x_tensor = Variable(x_tensor).view(-1, 7, 3)
143 |                 # y_tensor = Tensor(test_y)
144 |                 y_pre_temp = clf(x_tensor).detach().numpy()
145 |         y_pre_temp = y_pre_temp.reshape(-1, 1)
146 |         rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp)
147 |         rmse_list.append(rmse);
148 |         mae_list.append(mae);
149 |         mdae_list.append(mdae);
150 |         r2_list.append(r2);
151 |         var_list.append(var)
152 | 
153 |     data_x, data_y = getStackingData(path, dirlist, train_x, train_y)
154 |     gbr = GradientBoostingRegressor(n_estimators=est_g, max_depth=dep_g, min_samples_split=3, learning_rate=lr_g)
155 |     gbr.fit(data_x, data_y.ravel())
156 |     test_x, test_y = getStackingData(path, dirlist, test_x, test_y)
157 |     y_stacking = gbr.predict(test_x)
158 |     y_stacking = y_stacking.reshape(-1, 1)
159 | 
160 |     rmse, mae, mdae, r2, var = evaluation(test_y, y_stacking)
161 |     rmse_list.append(rmse);
162 |     mae_list.append(mae);
163 |     mdae_list.append(mdae);
164 |     r2_list.append(r2);
165 |     var_list.append(var)
166 |     # 绘图
167 |     xmark = [ele.split('_')[0] for ele in dirlist]
168 |     for i,ele in enumerate(xmark):
169 |         if ele == 'train':
170 |             xmark[i] = dirlist[i].split('_')[1]
171 |     
172 |     xmark.append('stacking_model')
173 |     plt.figure();plt.plot(rmse_list, 'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
174 |     plt.grid();plt.savefig('output/stacking_rmse_meric_%d'%id_num,bbox_inches='tight',dpi=500)
175 |     plt.figure();plt.plot(mae_list, '*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
176 |     plt.plot(mdae_list,'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
177 |     plt.plot(r2_list, '^-',c='r',label = 'r2_score');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
178 |     plt.plot(var_list, 'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num+1),xmark,rotation=45)
179 |     plt.grid();plt.savefig('output/stacking_4_merics_%d'%id_num,bbox_inches='tight',dpi=500)
180 |     plt.show()
181 | 
182 | 
183 | #    return rmse_list, mae_list, mdae_list, r2_list, var_list
184 | 
185 | 
186 | if __name__ == '__main__':
187 |     pathnow = os.getcwd()
188 |     path = '{}/MODEL_NO'.format(pathnow)
189 |     dirlist = os.listdir(path)  # 加载保存在MODEL文件夹里的所有.pth和.m模型
190 |     # dirlist.remove('.DS_Store')
191 | 
192 |     # 加载测试数据集
193 |     id_num = 312
194 |     train_x = np.loadtxt("{}/train_test_hour/train{}.txt".format(pathnow, id_num))
195 |     train_y = np.loadtxt("{}/train_test_hour/train_label{}.txt".format(pathnow, id_num))
196 |     test_x = np.loadtxt("{}/train_test_hour/test{}.txt".format(pathnow, id_num))
197 |     test_y = np.loadtxt("{}/train_test_hour/test_label{}.txt".format(pathnow, id_num))
198 |     from sklearn.preprocessing import *
199 | 
200 |     train_x = scale(train_x, axis=0)
201 |     train_y = np.reshape(train_y, (-1, 1))
202 |     test_x = scale(test_x, axis=0)
203 |     test_y = np.reshape(test_y, (-1, 1))
204 | 
205 |     # bagging
206 |     FakeBagging(path,
207 |                 dirlist,
208 |                 test_x,
209 |                 test_y,
210 |                 id_num)
211 |     
212 |     # stacking use GBRT
213 |     est_g = 50
214 |     dep_g = 3
215 |     lr_g = 0.1
216 |     stackingUseGBRT(path,
217 |                     dirlist,
218 |                     est_g,
219 |                     dep_g,
220 |                     lr_g,
221 |                     train_x,
222 |                     train_y,
223 |                     test_x,
224 |                     test_y,
225 |                     id_num)
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 
235 | 


--------------------------------------------------------------------------------
/rain_shuffle/seq2seq_module.py:
--------------------------------------------------------------------------------
  1 | from __future__ import unicode_literals, print_function, division
  2 | import os
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch import optim
  6 | import torch.nn.functional as F
  7 | from sklearn.model_selection import train_test_split
  8 | from torch.autograd import Variable
  9 | import numpy as np
 10 | 
 11 | 
 12 | MAX_LENGTH=10
 13 | path ='./SEQ2SEQ'
 14 | 
 15 | 
 16 | # 定义GetLoader类，继承Dataset方法，并重写__getitem__()和__len__()方法
 17 | class GetLoader(torch.utils.data.Dataset):
 18 | 	# 初始化函数，得到数据
 19 |     def __init__(self, data_root, data_label):
 20 |         self.data = data_root
 21 |         self.label = data_label
 22 |     # index是根据batchsize划分数据后得到的索引，最后将data和对应的labels进行一起返回
 23 |     def __getitem__(self, index):
 24 |         data = self.data[index]
 25 |         labels = self.label[index]
 26 |         return data, labels
 27 |     # 该函数返回数据大小长度，目的是DataLoader方便划分，如果不知道大小，DataLoader会一脸懵逼
 28 |     def __len__(self):
 29 |         return len(self.data)
 30 | 
 31 | 
 32 | class EncoderRNN(nn.Module):
 33 |     def __init__(self, input_size, hidden_size):
 34 |         super(EncoderRNN, self).__init__()
 35 |         self.input_size = input_size
 36 |         self.hidden_size = hidden_size
 37 | 
 38 |         self.embedding = nn.Linear(input_size, hidden_size)
 39 |         self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
 40 | 
 41 |     def forward(self, input, hidden):
 42 |         # embedded = self.embedding(input).view(-1, 1 ,self.hidden_size)
 43 |         # output = embedded
 44 |         output = input.view(-1, input.shape[1], self.input_size)
 45 |         output, hidden = self.gru(output, hidden)
 46 |         output = torch.relu(output)
 47 |         return output, hidden
 48 | 
 49 |     def initHidden(self, batch_size):
 50 |         return torch.zeros(1, batch_size, self.hidden_size)
 51 | 
 52 | 
 53 | class DecoderRNN(nn.Module):
 54 |     def __init__(self, hidden_size, output_size, dropout_p):
 55 |         super(DecoderRNN, self).__init__()
 56 |         self.hidden_size = hidden_size
 57 | 
 58 |         self.embedding = nn.Linear(hidden_size, hidden_size)
 59 |         self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
 60 |         self.out = nn.Linear(hidden_size, output_size)
 61 | 
 62 |     def forward(self, input, hidden):
 63 |         # output = self.embedding(input).view(-1, 1 ,self.hidden_size)
 64 | 
 65 |         output = input.view(-1, input.shape[1], self.hidden_size)
 66 |         output, hidden = self.gru(output, hidden)
 67 |         output = self.out(output[:, 0, :])
 68 | 
 69 |         output = torch.relu(output)
 70 | 
 71 |         return output, hidden
 72 | 
 73 |     def initHidden(self, batch_size):
 74 |         return torch.zeros(1, batch_size, self.hidden_size)
 75 | 
 76 | 
 77 | 
 78 | class seq2seq_cell(nn.Module):
 79 |     def __init__(self,input_dim,seq_len,output_dim,hidden_size,dropout,learning_rate,batch_size,device):
 80 |         super().__init__()
 81 |         self.input_dim = input_dim
 82 |         self.seq_len = seq_len
 83 | 
 84 |         self.output_dim = output_dim
 85 | 
 86 |         self.hidden_size = hidden_size
 87 |         self.dropout = dropout
 88 |         self.lr = learning_rate
 89 |         self.bs = batch_size
 90 |         self.device =device
 91 | 
 92 |         self.encoder = EncoderRNN(self.input_dim, self.hidden_size)
 93 |         self.decoder = DecoderRNN(self.hidden_size, self.output_dim, dropout_p=self.dropout)
 94 |     def forward(self,input):
 95 | 
 96 |         encoder_hidden = self.encoder.initHidden(input.shape[0])
 97 |         encoder_outputs = torch.zeros(self.seq_len,input.shape[0], self.hidden_size)
 98 | 
 99 |         # 向前传播
100 |         encoder_output, encoder_hidden = self.encoder(input, encoder_hidden)
101 |         encoder_outputs= encoder_output
102 |         """
103 |         此处可能是对序列不敏感的原因
104 |         """
105 | 
106 |         # encoder_outputs = encoder_output.view(input.shape[1], self.hidden_size)
107 | 
108 |         decoder_hidden = encoder_hidden
109 | 
110 |         decoder_outputs = torch.zeros(input.shape[0], self.seq_len, self.output_dim)
111 | 
112 | 
113 |         decoder_output, decoder_hidden = self.decoder(
114 |             encoder_outputs, decoder_hidden)
115 |         decoder_outputs = decoder_output
116 | 
117 | 
118 |         return decoder_outputs
119 | 
120 | 
121 | 
122 | class Seq2Seq():
123 |     def __init__(self,num,input_dim,seq_len,output_dim,hidden_size,dropout,learning_rate,batch_size,device_pu):
124 |         self.station = num
125 |         self.input_dim =input_dim
126 |         self.output_dim = output_dim
127 |         self.seq_len =seq_len
128 |         self.hidden_size = hidden_size
129 |         self.dropout = dropout
130 |         self.lr =learning_rate
131 |         self.bs =batch_size
132 |         self.device =device_pu
133 |         self.model =seq2seq_cell(self.input_dim,
134 |                                  self.seq_len ,
135 |                                  self.output_dim,
136 |                                  self.hidden_size,
137 |                                  self.dropout,
138 |                                  self.lr,
139 |                                  self.bs,self.device).to(self.device)
140 | 
141 |     def fit(self,data,label,shuffle,num_epoches = 100):
142 | 
143 |         self.shuffle =shuffle
144 | 
145 |         self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr)
146 |         self.criterion = nn.MSELoss()
147 | 
148 |         x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.22, random_state=42)
149 | 
150 |         train = GetLoader(x_train, y_train)
151 |         data_train = torch.utils.data.DataLoader(train, batch_size=self.bs, shuffle=self.shuffle)
152 |         test = GetLoader(x_test, y_test)
153 |         data_test = torch.utils.data.DataLoader(test, batch_size=self.bs, shuffle=self.shuffle)
154 | 
155 |         if os.path.exists(path):
156 |             pass
157 |         else:
158 |             os.mkdir(path)
159 | 
160 |         eval_loss_best = np.inf
161 |         uncorrect = True
162 |         while uncorrect:
163 |             f = open('{}/train_{}_{}_{}_{}.txt'.format(path,self.station, self.hidden_size, self.dropout, self.lr), 'w+')
164 |             self.model =seq2seq_cell(self.input_dim,
165 |                                 self.seq_len ,
166 |                                 self.output_dim,
167 |                                 self.hidden_size,
168 |                                 self.dropout,
169 |                                 self.lr,
170 |                                 self.bs,self.device).to(self.device)
171 |             self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr)
172 |             self.criterion = nn.MSELoss()
173 |             train_loss_last = np.inf
174 |             for iter in range(1, num_epoches + 1):
175 |                 self.model.train()
176 |                 print('epoch {}'.format(iter))
177 |                 print('**************************************')
178 |                 running_loss = 0.0
179 | 
180 |                 for i, data in enumerate(data_train, 1):
181 |                     """
182 |                     随机打乱的方式不好 应该是全部打乱之后 固定抽取 否则会出现样本利用不均衡的问题
183 |                     """
184 |                     img, label = data
185 |                     img = Variable(img)
186 |                     label = Variable(label).to(self.device)
187 | 
188 |                     img = img.view(-1,self.seq_len,self.input_dim).to(self.device)
189 |                     decoder_output =self.model(img)
190 | 
191 |                     loss = self.criterion(decoder_output, label)
192 |                     running_loss += loss.data.item() * label.size(0)
193 |                     # 向后传播
194 |                     self.optimizer.zero_grad()
195 |                     loss.backward()
196 |                     self.optimizer.step()
197 | 
198 |                 train_loss =running_loss / (len(y_train))
199 |                 print('Finish {} epoch, Loss: {:.6f}'.format(
200 |                     iter, train_loss))
201 | 
202 |                 if train_loss_last == train_loss and iter < 3:
203 |                     break
204 |                 if train_loss_last > train_loss and iter >= 3:
205 |                     uncorrect = False
206 |                     
207 |                 train_loss_last =train_loss
208 | 
209 |                 self.model.eval()
210 |                 eval_loss = 0.
211 |                 for data in data_test:
212 |                     img, label = data
213 | 
214 |                     img = Variable(img)
215 |                     label = Variable(label).to(self.device)
216 | 
217 |                     img = img.view( -1,self.seq_len, self.input_dim).to(self.device)
218 | 
219 |                     decoder_output =self.model(img)
220 | 
221 |                     loss = self.criterion(decoder_output, label)
222 |                     eval_loss += loss.data.item() * label.size(0)
223 |                 val_loss = eval_loss / (len(y_test))
224 |                 print('Val Loss: {:.6f}'.format(val_loss))
225 | 
226 |                 
227 |                 f.write(" Train_MSE: " + str(train_loss) + ' Val_MSE: ' + str(val_loss) + '\n')
228 | 
229 |                 if val_loss < eval_loss_best:
230 |                     eval_loss_best = val_loss
231 |                     self.eval = eval_loss_best
232 |                     torch.save(self.model, '{}/seq2seq_{}_{}_{}_{}.pth'.format(path, self.station,self.hidden_size, self.dropout, self.lr))
233 |             f.close()
234 | 
235 |         return self.eval
236 | 
237 | 
238 |     def predict(self,test_data,test_label):
239 | 
240 |         test_model = torch.load('{}/seq2seq_{}_{}_{}_{}.pth'.format(path,self.station,self.hidden_size, self.dropout,self.lr)).to(self.device)
241 | 
242 |         test_loss = 0
243 |         test = GetLoader(test_data, test_label)
244 |         criterion = nn.MSELoss()
245 |         data_test = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False)
246 | 
247 |         y_mlp = []
248 |         for data in data_test:
249 | 
250 |             img, label = data
251 |             img = Variable(img)
252 |             label = Variable(label).to(self.device)
253 | 
254 |             img = img.view( -1, self.seq_len,self.input_dim).to(self.device)
255 | 
256 |             decoder_output = test_model(img)
257 | 
258 |             loss = criterion(decoder_output, label)
259 |             test_loss += loss.data
260 |             y_mlp.append(decoder_output.data)
261 | 
262 | 
263 |         print('Test Loss: {:.6f}'.format(test_loss / (len(
264 |             test_label))))
265 | 
266 |         y_mlp = np.array(y_mlp).squeeze()[:,np.newaxis]
267 | 
268 | 
269 |         return y_mlp
270 | 
271 | 
272 | 
273 | 
274 | 
275 | 


--------------------------------------------------------------------------------
/rain_shuffle/att_seq2seq_module.py:
--------------------------------------------------------------------------------
  1 | from __future__ import unicode_literals, print_function, division
  2 | import os
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch import optim
  6 | import torch.nn.functional as F
  7 | from sklearn.model_selection import train_test_split
  8 | from torch.autograd import Variable
  9 | import numpy as np
 10 | 
 11 | 
 12 | 
 13 | MAX_LENGTH=10
 14 | path ='./ATT_SEQ2SEQ'
 15 | 
 16 | 
 17 | # 定义GetLoader类，继承Dataset方法，并重写__getitem__()和__len__()方法
 18 | class GetLoader(torch.utils.data.Dataset):
 19 | 	# 初始化函数，得到数据
 20 |     def __init__(self, data_root, data_label):
 21 |         self.data = data_root
 22 |         self.label = data_label
 23 |     # index是根据batchsize划分数据后得到的索引，最后将data和对应的labels进行一起返回
 24 |     def __getitem__(self, index):
 25 |         data = self.data[index]
 26 |         labels = self.label[index]
 27 |         return data, labels
 28 |     # 该函数返回数据大小长度，目的是DataLoader方便划分，如果不知道大小，DataLoader会一脸懵逼
 29 |     def __len__(self):
 30 |         return len(self.data)
 31 | 
 32 | 
 33 | class EncoderRNN(nn.Module):
 34 |     def __init__(self, input_size, hidden_size):
 35 |         super(EncoderRNN, self).__init__()
 36 |         self.input_size = input_size
 37 |         self.hidden_size = hidden_size
 38 | 
 39 |         self.embedding = nn.Linear(input_size, hidden_size)
 40 |         self.gru = nn.GRU(input_size, hidden_size,batch_first=True)
 41 | 
 42 |     def forward(self, input, hidden):
 43 |         # embedded = self.embedding(input).view(-1, 1 ,self.hidden_size)
 44 |         # output = embedded
 45 |         output = input.view(-1, 1, self.input_size)
 46 |         output, hidden = self.gru(output, hidden)
 47 |         output = torch.relu(output)
 48 |         return output, hidden
 49 | 
 50 |     def initHidden(self,batch_size):
 51 |         return torch.zeros(1, batch_size , self.hidden_size)
 52 | 
 53 | 
 54 | class DecoderRNN(nn.Module):
 55 |     def __init__(self, hidden_size, output_size,dropout_p,seq_len):
 56 |         super(DecoderRNN, self).__init__()
 57 |         self.hidden_size = hidden_size
 58 |         self.seq_len = seq_len
 59 |         self.embedding = nn.Linear(hidden_size, hidden_size)
 60 | 
 61 |         self.attn = nn.Linear(self.hidden_size * 2, self.seq_len)
 62 |         self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
 63 | 
 64 |         self.gru = nn.GRU(hidden_size, hidden_size,batch_first=True)
 65 |         self.out = nn.Linear(hidden_size, output_size)
 66 | 
 67 | 
 68 |     def forward(self, input, hidden,encoder_outputs):
 69 |         # output = self.embedding(input).view(-1, 1 ,self.hidden_size)
 70 | 
 71 |         output = input.view(1, -1, self.hidden_size)
 72 | 
 73 |         attn_weights = F.softmax(
 74 |             self.attn(torch.cat((output[0], hidden[0]), 1)), dim=1)
 75 | 
 76 |         attn_weights=attn_weights.unsqueeze(0)
 77 |         attn_weights = attn_weights.transpose(1,0)
 78 | 
 79 |         encoder_outputs = encoder_outputs.transpose(1, 0)
 80 |         attn_applied = torch.bmm(attn_weights,
 81 |                                  encoder_outputs).transpose(1,0)
 82 | 
 83 |         output = torch.cat((output[0], attn_applied[0]), 1)
 84 |         output = self.attn_combine(output).unsqueeze(0)
 85 | 
 86 |         output = F.relu(output).transpose(1,0)
 87 |         output, hidden = self.gru(output, hidden)
 88 | 
 89 |         output = torch.relu(self.out(output[:,0,:]))
 90 | 
 91 |         return output, hidden,attn_weights
 92 | 
 93 |     def initHidden(self,batch_size):
 94 |         return torch.zeros(1,batch_size, self.hidden_size)
 95 | 
 96 | 
 97 | 
 98 | class seq2seq_cell(nn.Module):
 99 |     def __init__(self,input_dim,seq_len,output_dim,hidden_size,dropout,learning_rate,batch_size,device):
100 |         super().__init__()
101 |         self.input_dim = input_dim
102 |         self.output_dim = output_dim
103 |         self.seq_len = seq_len
104 |         self.hidden_size = hidden_size
105 |         self.dropout = dropout
106 |         self.lr = learning_rate
107 |         self.bs = batch_size
108 |         self.device = device
109 |         self.encoder = EncoderRNN(self.input_dim, self.hidden_size)
110 |         self.decoder = DecoderRNN(self.hidden_size, self.output_dim, dropout_p=self.dropout,seq_len=self.seq_len)
111 |     def forward(self,input):
112 | 
113 |         encoder_hidden = self.encoder.initHidden(input.shape[0])
114 |         encoder_outputs = torch.zeros( self.seq_len ,input.shape[0],self.hidden_size)
115 | 
116 |         # 向前传播
117 |         for i in range(self.seq_len):
118 |             encoder_output, encoder_hidden = self.encoder(input[:,i,:], encoder_hidden)
119 |             encoder_outputs[i] = encoder_output[0]
120 |         """
121 |         此处可能是对序列不敏感的原因
122 |         """
123 | 
124 |         # encoder_outputs = encoder_output.view(input.shape[1], self.hidden_size)
125 | 
126 |         decoder_hidden = encoder_hidden
127 | 
128 |         decoder_outputs = torch.zeros(input.shape[0],self.seq_len, self.output_dim)
129 | 
130 |         for i in range(self.seq_len):
131 |             decoder_output, decoder_hidden,decoder_attention = self.decoder(
132 |                 encoder_outputs[i], decoder_hidden,encoder_outputs)
133 |             decoder_outputs[:,i,:] =decoder_output
134 | 
135 | 
136 |         return decoder_outputs[:,0,:], decoder_attention[:,0,:]
137 | 
138 | 
139 | 
140 | class Seq2Seq():
141 |     def __init__(self,num,input_dim,seq_len,output_dim,hidden_size,dropout,learning_rate,batch_size,device_pu):
142 |         self.station = num
143 |         self.input_dim =input_dim
144 |         self.output_dim = output_dim
145 |         self.seq_len =seq_len
146 |         self.hidden_size = hidden_size
147 |         self.dropout = dropout
148 |         self.lr =learning_rate
149 |         self.bs =batch_size
150 |         self.device =device_pu
151 |         self.model =seq2seq_cell(self.input_dim,
152 |                                  self.seq_len,
153 |                                  self.output_dim,
154 |                                  self.hidden_size,
155 |                                  self.dropout,
156 |                                  self.lr,
157 |                                  self.bs,self.device).to(self.device )
158 | 
159 |     def fit(self,data,label,shuffle,num_epoches = 100):
160 | 
161 |         self.shuffle =shuffle
162 | 
163 |         self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr)
164 |         self.criterion = nn.MSELoss()
165 | 
166 |         x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.22, random_state=42)
167 | 
168 |         train = GetLoader(x_train, y_train)
169 |         data_train = torch.utils.data.DataLoader(train, batch_size=self.bs, shuffle=self.shuffle)
170 |         test = GetLoader(x_test, y_test)
171 |         data_test = torch.utils.data.DataLoader(test, batch_size=self.bs, shuffle=self.shuffle)
172 | 
173 |         if os.path.exists(path):
174 |             pass
175 |         else:
176 |             os.mkdir(path)
177 | 
178 |         eval_loss_best = np.inf
179 |         uncorrect = True
180 |         while uncorrect:
181 |             f = open('{}/train_{}_{}_{}_{}.txt'.format(path,self.station, self.hidden_size, self.dropout, self.lr), 'w+')
182 |             self.model =seq2seq_cell(self.input_dim,
183 |                                 self.seq_len ,
184 |                                 self.output_dim,
185 |                                 self.hidden_size,
186 |                                 self.dropout,
187 |                                 self.lr,
188 |                                 self.bs,self.device).to(self.device)
189 |             self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr)
190 |             self.criterion = nn.MSELoss()
191 |             train_loss_last = np.inf
192 |             for iter in range(1, num_epoches + 1):
193 |                 self.model.train()
194 |                 print('epoch {}'.format(iter))
195 |                 print('**************************************')
196 |                 running_loss = 0.0
197 | 
198 |                 for i, data in enumerate(data_train, 1):
199 |                     """
200 |                     随机打乱的方式不好 应该是全部打乱之后 固定抽取 否则会出现样本利用不均衡的问题
201 |                     """
202 |                     img, label = data
203 |                     img = Variable(img)
204 |                     label = Variable(label).to(self.device)
205 | 
206 |                     img = img.view(-1,self.seq_len,self.input_dim).to(self.device)
207 |                     decoder_output,decoder_attention =self.model(img)
208 | 
209 |                     loss = self.criterion(decoder_output, label)
210 |                     running_loss += loss.data.item() * label.size(0)
211 |                     # 向后传播
212 |                     self.optimizer.zero_grad()
213 |                     loss.backward()
214 |                     self.optimizer.step()
215 | 
216 |                 train_loss =running_loss / (len(y_train))
217 |                 print('Finish {} epoch, Loss: {:.6f}'.format(
218 |                     iter, train_loss))
219 | 
220 |                 if train_loss_last == train_loss and iter < 3:
221 |                     break
222 |                 if train_loss_last > train_loss and iter >= 3:
223 |                     uncorrect = False
224 |                     
225 |                 train_loss_last =train_loss
226 | 
227 |                 self.model.eval()
228 |                 eval_loss = 0.
229 |                 for data in data_test:
230 |                     img, label = data
231 | 
232 |                     img = Variable(img)
233 |                     label = Variable(label).to(self.device)
234 | 
235 |                     img = img.view( -1, self.seq_len,self.input_dim).to(self.device)
236 | 
237 |                     decoder_output,decoder_attention =self.model(img)
238 | 
239 |                     loss = self.criterion(decoder_output, label)
240 |                     eval_loss += loss.data.item() * label.size(0)
241 |                 val_loss = eval_loss / (len(y_test))
242 |                 print('Val Loss: {:.6f}'.format(val_loss))
243 | 
244 |                 
245 |                 f.write(" Train_MSE: " + str(train_loss) + ' Val_MSE: ' + str(val_loss) + '\n')
246 | 
247 |                 if val_loss < eval_loss_best:
248 |                     eval_loss_best = val_loss
249 |                     self.eval = eval_loss_best
250 |                     torch.save(self.model, '{}/seq2seq_{}_{}_{}_{}.pth'.format(path, self.station,self.hidden_size, self.dropout, self.lr))
251 |             f.close()
252 |         
253 |         return self.eval
254 | 
255 | 
256 |     def predict(self,test_data,test_label):
257 | 
258 |         test_model = torch.load('{}/seq2seq_{}_{}_{}_{}.pth'.format(path,self.station,self.hidden_size, self.dropout,self.lr)).to(self.device )
259 | 
260 |         test_loss = 0
261 |         test = GetLoader(test_data, test_label)
262 |         criterion = nn.MSELoss()
263 |         data_test = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False)
264 | 
265 |         y_mlp = []
266 |         y_attention=[]
267 |         for data in data_test:
268 | 
269 |             img, label = data
270 |             img = Variable(img).to(self.device )
271 |             label = Variable(label).to(self.device )
272 | 
273 |             img = img.view( -1, self.seq_len,self.input_dim)
274 | 
275 |             decoder_output,decoder_attention = test_model(img)
276 | 
277 |             loss = criterion(decoder_output, label)
278 |             test_loss += loss.data
279 |             y_mlp.append(decoder_output.data)
280 |             y_attention.append(decoder_attention.data.cpu().squeeze().numpy())
281 | 
282 | 
283 |         print('Test Loss: {:.6f}'.format(test_loss / (len(
284 |             test_label))))
285 | 
286 |         y_mlp = np.array(y_mlp).squeeze()[:,np.newaxis]
287 |         y_attention =np.array(y_attention)
288 | 
289 | 
290 |         return y_mlp ,y_attention
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 


--------------------------------------------------------------------------------
/rain_shuffle/eval.py:
--------------------------------------------------------------------------------
  1 | from sklearn.metrics import *
  2 | import numpy.linalg as la
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | import os
  6 | import warnings
  7 | warnings.filterwarnings("ignore")#忽略警告
  8 | #import seaborn as sns
  9 | 
 10 | def evaluation(a,b):  # a为真实标签
 11 |     rmse = np.sqrt(mean_squared_error(a,b))
 12 |     mae = mean_absolute_error(a, b)
 13 |     var = explained_variance_score(a,b)
 14 |     mdae = median_absolute_error(a,b)
 15 |     r2 =r2_score(a,b)
 16 |     return rmse, mae, mdae, r2, var
 17 | 
 18 | 
 19 | #绘图
 20 | def plot(a, b, label_a, label_b, title=None):
 21 |     plt.figure()
 22 |     if title != None:
 23 |         plt.title(str(title) )
 24 |     plt.plot(a, color = 'b', label = str(label_a))
 25 |     plt.plot(b, color = 'r', label = str(label_b))
 26 |     plt.legend(loc='best')
 27 |     plt.show()
 28 | 
 29 | 
 30 | def readandplot(filepath, save_name, title = None): # 函数用途：模型寻优过程不同超参数下的损失
 31 |     f = open(filepath,"r")
 32 |     line_all = f.readlines()
 33 |     row_num = len(line_all)
 34 |     label_list = []
 35 |     line_1 = line_all[0].strip('\n')
 36 |     a = line_1.split(' ')
 37 |     m = int(len(a) / 2)
 38 |     for i in [2*ele  for ele in range(m)]:
 39 |         label_list.append(a[i].strip(':'))
 40 |     print(label_list)
 41 |     b = np.zeros((row_num,m))
 42 |     for j,line in enumerate(line_all):
 43 |         line = line.strip('\n') 
 44 |         c = line.split(' ')
 45 |         for k in range(m):
 46 |             l = 2 * k + 1
 47 |             b[j,k] = c[l]  
 48 |     data = b
 49 |     ax = plt.figure()
 50 |     if title != None:
 51 |         plt.title(str(title) )
 52 |     colors = ['r','b']
 53 |     mark = ['v-','o-']
 54 |     if save_name == 'SVR_poly_train_371_degree':
 55 |         data = data[:-5,:]
 56 |     for i in range(m):
 57 |         plt.plot(data[:,0],data[:,i+1], mark[i],c = colors[i],alpha = 0.4,label = label_list[i+1])
 58 |         plt.xlabel(label_list[0])
 59 |         plt.legend(loc='best') 
 60 |         y_min = np.min(data[:,i+1])
 61 |         index = np.argmin(data[:,i+1])
 62 |         x_min = data[:,0][index]
 63 |         plt.text(x_min,y_min,"min",fontdict={'size':'8','color':colors[i]})
 64 |         if i==m-2:
 65 |             break
 66 |     plt.grid()
 67 |     plt.show()
 68 |     ax.savefig('output/%s.png'%save_name,bbox_inches='tight',dpi=500)
 69 | 
 70 | 
 71 | 
 72 | # def readTestResults(filepath):  # 函数用途：每个模型最优参数下的5个指标值对比
 73 | #     f = open(filepath,"r")
 74 | #     line_all = f.readlines()
 75 | #     row_num = len(line_all)
 76 | #     data = np.zeros((row_num,5))
 77 | #     label_list = ['ARIMA','ATT_SEQ2SEQ','GBRT','LSTM','MLP','SEQ2SEQ','SVR_poly','SVR_rbf','SVR_sigmoid','XGB']
 78 | #     for j,line in enumerate(line_all):
 79 | #         line = line.strip('\n').split(' ') 
 80 | #         for k in range(5):
 81 | #             data[j,k] = float(line[2 * k + 1])
 82 |     
 83 | #     colors = ['r','b','darkblue','cyan','violet']
 84 |     
 85 |     
 86 | #     ax = plt.figure()
 87 | #     plt.plot(data[:,0], label = 'rmse' ,c=colors[0], linestyle='--',marker='o')
 88 | #     plt.xticks(range(row_num),label_list,rotation=45) #可以是字符
 89 | #     plt.legend()
 90 | #     plt.grid()
 91 | #     plt.show()
 92 | #     ax.savefig('output/rmse.png',bbox_inches='tight',dpi=500)
 93 |     
 94 |     
 95 | #     ax = plt.figure()
 96 | #     for i,y_mark in enumerate(['mae','mdae','r2','var']):
 97 | #         i = i + 1
 98 | #         plt.plot(data[:,i], label = y_mark ,c=colors[i], linestyle='--',marker='o')
 99 | #         plt.xticks(range(row_num),label_list,rotation=45) #可以是字符
100 | #         plt.legend()
101 | #     plt.grid()
102 | #     plt.show()
103 | #     ax.savefig('output/the_other_4_metics.png',bbox_inches='tight',dpi=500)
104 | #     return data
105 | 
106 | 
107 | 
108 | # import glob
109 | # import xlwt
110 | # import xlrd
111 | # from xlutils.copy import copy as xl_copy
112 | # def processTestResultsOfEachModel(filefolder,name): # 读取当前模型在所有气象站上的测试结果并保存到表格
113 | #     dirlist = []
114 | #     for f in glob.glob('{}/test*.txt'.format(filefolder)): # find all test*.txt files and store their paths into a list
115 | #         dirlist.append(f)
116 | #     station_num = len(dirlist)
117 | #     data = np.zeros((5,station_num))
118 | #     numid = [ele[-7:-4] for ele in dirlist]
119 | #     for i,path in enumerate(dirlist):
120 | #         with open(path, 'r') as f:
121 | #             line = f.readlines()
122 | #             line = line[0].strip('\n').split(' ')
123 | #             data[:,i] = [float(line[2  * j + 1]) for j in range(5)]
124 | #             if i==0:
125 | #                 label_list = [line[2 * j][:-1] for j in range(5)]
126 | #
127 | #     try:
128 | #         rb = xlrd.open_workbook('test_data_of_each_station.xls', formatting_info=True)
129 | #         workbook = xl_copy(rb) # make a copy of it
130 | #     except:
131 | #
132 | #         workbook = xlwt.Workbook(encoding='utf-8')
133 | #     try:
134 | #         booksheet = workbook.add_sheet('%s'%name,cell_overwrite_ok=True)
135 | #     except:
136 | #         print('%s already exists'%name)
137 | #         return
138 | #     booksheet.write(0,0,'station id')
139 | #     for j in range(station_num):
140 | #         booksheet.write(0,j+1,int(numid[j]))
141 | #
142 | #     for i in range(5):
143 | #         booksheet.write(i+1,0,label_list[i])
144 | #
145 | #     for i in range(5):
146 | #         for j in range(station_num):
147 | #             booksheet.write(i+1,j+1,data[i,j])
148 | #
149 | #     booksheet.write(0,station_num+1,'mean_value')
150 | #     for i in range(5):
151 | #         booksheet.write(i+1,station_num+1,data[i,:].mean())
152 | #
153 | #     booksheet.write(0,station_num+2,'mean_value_after_eliminating_2_worst')
154 | #     for i in range(5):
155 | #         if i == 3:
156 | #             booksheet.write(i+1,station_num+2,-np.sort(-data[i,:])[:-2].mean())
157 | #         else:
158 | #             booksheet.write(i+1,station_num+2,np.sort(data[i,:])[:-2].mean())
159 | #
160 | #     workbook.save('test_data_of_each_station.xls')
161 | 
162 | 
163 | 
164 | def plotBestResults():  # 读取表格，把每个模型的最好表现对比绘图，并保存到output文件夹
165 |     rb = xlrd.open_workbook('test_data_of_each_station.xls', formatting_info=True)
166 |     sheetNames = rb.sheet_names() #获取所有sheet的名字，sheetNames为list类型
167 |     a,b,c,d,e = [], [], [], [], []
168 |     
169 |     model_name_list = []
170 |     for sheet in sheetNames:
171 |         table = rb.sheet_by_name(sheet)
172 |         a.append(table.row_values(1)[-1])
173 |         b.append(table.row_values(2)[-1])
174 |         c.append(table.row_values(3)[-1])
175 |         d.append(table.row_values(4)[-1])
176 |         e.append(table.row_values(5)[-1])
177 |         model_name_list.append(table.row_values(1)[0].split('_')[0])
178 |     xmark = model_name_list
179 |     num = len(xmark)
180 |     plt.figure();plt.plot(a,'o-',c='salmon', label = 'rmse');plt.legend();plt.xticks(range(num),xmark,rotation=45);plt.grid();
181 |     plt.savefig('output/rmse.png',bbox_inches='tight',dpi = 500);plt.show()
182 |     plt.figure();plt.plot(b,'*-',c='limegreen', label = 'mae');plt.legend();plt.xticks(range(num),xmark,rotation=45);
183 |     plt.plot(c, 'v-',c='blue',label = 'mdae');plt.legend();plt.xticks(range(num),xmark,rotation=45);
184 |     plt.plot(d, '^-',c='r',label = 'r2_score');plt.legend();plt.xticks(range(num),xmark,rotation=45);
185 |     plt.plot(e,'D-',c='darkorchid', label = 'var');plt.legend();plt.xticks(range(num),xmark,rotation=45);plt.grid();
186 |     plt.savefig('output/the_other_4_merics.png',bbox_inches='tight',dpi = 500)
187 |     plt.show()
188 |     
189 | 
190 | 
191 | import joblib
192 | import torch
193 | from torch import Tensor
194 | from torch.autograd import Variable
195 | def plotPredictionAndTruth(path, model_name, model_folder, num,test_x, test_y):  # 模型预测与真实可视化
196 |     if model_name.split('.')[-1] =='m':
197 |         clf = joblib.load(path)
198 |         y_pre_temp = clf.predict(test_x)
199 |     else:
200 |         if model_name[:3]=='mlp':
201 |             clf = torch.load(path,map_location=torch.device('cpu'))
202 |             x_tensor = Tensor(test_x)
203 |             x_tensor = Variable(x_tensor)
204 |             y_pre_temp = clf(x_tensor).detach().numpy()          
205 |         else:
206 |             clf = torch.load(path,map_location=torch.device('cpu'))
207 |             x_tensor = Tensor(test_x)
208 |             x_tensor = Variable(x_tensor).view(-1,7,3)
209 |             y_pre_temp = clf(x_tensor).detach().numpy()
210 |     
211 |     rmse, mae, mdae, r2, var = evaluation(test_y, y_pre_temp)   
212 |     y_pre_temp = y_pre_temp.reshape(1,-1)  
213 |     test_y = test_y.reshape(1,-1)
214 |     # 绘图
215 |     plt.figure()
216 |     plt.plot(test_y.squeeze() ,c='r', label = 'true value')
217 |     plt.plot(y_pre_temp.squeeze() ,c='b', alpha = 0.5, label = 'prediction value')
218 |     y_max = max(np.append(test_y,y_pre_temp))
219 |     dy = 2
220 |     plt.text(0, y_max, "model:%s"%model_folder, size = 10,color = "black", style = "italic", weight = "light")
221 |     plt.text(0, y_max- dy, "station:%d"%num, size = 10,color = "black", style = "italic", weight = "light")
222 |     plt.text(0, y_max-2*dy, "rmse:%.3f"%rmse, size = 10,color = "black", style = "italic", weight = "light")
223 |     plt.text(0, y_max-3 * dy, "mae:%.3f"%mae, size = 10,color = "black", style = "italic", weight = "light")
224 |     plt.text(0, y_max-4 * dy, "mdae:%.3f"%mdae, size = 10,color = "black", style = "italic", weight = "light")
225 |     plt.text(0, y_max-5 * dy, "r2:%.3f"%r2, size = 10,color = "black", style = "italic", weight = "light")
226 |     plt.text(0, y_max-6 * dy, "var:%.3f"%var, size = 10,color = "black", style = "italic", weight = "light")
227 | 
228 |     plt.legend()
229 |     plt.savefig('output/visualization_%s.png'%model_name.split('.')[0],bbox_inches='tight',dpi = 500)
230 |     plt.show()
231 | 
232 | 
233 | def netModelFindBest(filepath):
234 |     name = filepath.split('/')[-1]   
235 |     txt_list = glob.glob('{}/train_371*.txt'.format(filepath))
236 |     num = len(txt_list) 
237 |     min_val_loss_list = []
238 |     hyp_list = []
239 |     for i in range(num):
240 |         # hyp_list = txt_list[i].split('\\')[-1].split('_')[2:]
241 |         # hyp_list[-1] = hyp_list[-1].split('.txt')[0]
242 |         # hyp_list = [float(ele) for ele in hyp_list]
243 |         hyp = txt_list[i].split('\\')[-1].strip('.txt')[9:]
244 |         f = open(txt_list[i],'r')
245 |         line_all = f.readlines()
246 |         min_val_loss = 100
247 |         for line in line_all:
248 |             
249 |             temp_val_loss = float(line_all[0].strip('\n').split()[-1])
250 |             if temp_val_loss < min_val_loss:
251 |                 min_val_loss = temp_val_loss
252 |         min_val_loss_list.append(min_val_loss)
253 |         hyp_list.append(hyp)
254 |     
255 |     plt.figure()
256 |     plt.plot(min_val_loss_list, 'v-',c='blue',alpha = 0.5,label = 'val_loss')
257 |     plt.legend()
258 |     plt.xticks(range(num),hyp_list,rotation=45)
259 |     plt.grid()
260 |     y_min = np.min(min_val_loss_list)
261 |     x_min = np.argmin(min_val_loss_list)
262 |     plt.text(x_min,y_min,"min",fontdict={'size':'8','color':'b'})
263 |     plt.savefig('output/find_best_hyp_%s.png'%name,bbox_inches='tight',dpi = 500)
264 |     plt.show()
265 |     
266 | 
267 | 
268 | 
269 | 
270 | if __name__ == '__main__':
271 |     pathnow = os.getcwd()
272 |     
273 |     # .m模型模型的寻优过程，图保存在output文件夹里
274 |     # filepath = '{}/SVR_poly/train_371_C.txt'.format(pathnow)
275 |     # filepath2 = '{}/SVR_poly/train_371_degree.txt'.format(pathnow)
276 |     # filepath3 = '{}/SVR_poly/train_371_gamma.txt'.format(pathnow)
277 |     # filepath4 = '{}/GBRT/train_371_depth.txt'.format(pathnow)
278 |     # filepath5 = '{}/GBRT/train_371_est.txt'.format(pathnow)
279 |     # filepath6 = '{}/GBRT/train_371_lr.txt'.format(pathnow)
280 |     # filepath7 = '{}/XGB/train_371_depth.txt'.format(pathnow)
281 |     # filepath8 = '{}/XGB/train_371_est.txt'.format(pathnow)
282 |     # filepath9 = '{}/XGB/train_371_lr.txt'.format(pathnow)
283 |     # filepath10 = '{}/SVR_sigmoid/train_371_C.txt'.format(pathnow)
284 |     # filepath11 = '{}/SVR_sigmoid/train_371_gamma.txt'.format(pathnow)
285 |     # filepath12 = '{}/SVR_rbf/train_371_C.txt'.format(pathnow)
286 |     # filepath13 = '{}/SVR_rbf/train_371_gamma.txt'.format(pathnow)
287 |     # readandplot(filepath,save_name = 'SVR_poly_train_371_C',title = None)
288 |     # readandplot(filepath2,save_name = 'SVR_poly_train_371_degree')  
289 |     # readandplot(filepath3,save_name = 'SVR_poly_train_371_gamma') 
290 |     # readandplot(filepath4,save_name = 'GBRT_train_371_depth') 
291 |     # readandplot(filepath5,save_name = 'GBRT_train_371_est') 
292 |     # readandplot(filepath6,save_name = 'GBRT_train_371_lr') 
293 |     # readandplot(filepath7,save_name = 'XGB_train_371_depth') 
294 |     # readandplot(filepath8,save_name = 'XGB_train_371_est') 
295 |     # readandplot(filepath9,save_name = 'XGB_train_371_lr') 
296 |     # readandplot(filepath10,save_name = 'SVR_sigmoid_train_371_C') 
297 |     # readandplot(filepath11,save_name = 'SVR_sigmoid_train_371_gamma') 
298 |     # readandplot(filepath12,save_name = 'SVR_rbf_train_371_C') 
299 |     # readandplot(filepath13,save_name = 'SVR_rbf_train_371_gamma') 
300 |     
301 |     
302 |     
303 |     
304 |     # 读取每个模型在所有气象站上的测试结果并保存到表格，
305 |     # 再读取表格，把每个模型的最好表现对比绘图，并保存到output文件夹
306 |     # name_list = ['ARIMA','ATT_SEQ2SEQ','GBRT','LSTM','MLP','SEQ2SEQ','SVR_rbf','XGB']
307 |     # for name in name_list:
308 |     #     filefolder = '{}/{}'.format(pathnow,name)
309 |     #     processTestResultsOfEachModel(filefolder,name)
310 |     # plotBestResults()
311 |     
312 |     
313 |     
314 |        
315 |     # 目标值与预测值可视化
316 |     # model_folder = 'MLP'
317 |     
318 |     # path_2 = 'train_test_hour'
319 |     
320 |     # from sklearn.preprocessing import *
321 |     # path = pathnow   
322 |     # num_list = [312,313,314,315,316,371,372,373,374,393,394,396]
323 |     # for num in num_list:
324 |     #     try:
325 |     #         path_to_model = glob.glob('{}/{}/*{}*.m'.format(pathnow,model_folder,num)) [0]
326 |     #     except:
327 |     #         path_to_model = glob.glob('{}/{}/*{}*.pth'.format(pathnow,model_folder,num))[0]
328 |     #     model_name = path_to_model.split('\\')[-1]
329 |     #     test_feature = np.loadtxt("{}/{}/test{}.txt".format(path,path_2, num)).astype(np.float32)
330 |     #     test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(path,path_2, num)).astype(np.float32)
331 |     #     test_feature = scale(test_feature, axis=0)
332 |     #     test_rain = np.reshape(test_rain, (-1, 1))
333 |     #     plotPredictionAndTruth(path_to_model,  model_name, model_folder, num, test_feature, test_rain)
334 | 
335 | 
336 | 
337 | 
338 |     # .pth文件的寻优过程
339 |     # filepath = '{}/MLP'.format(pathnow)
340 |     # netModelFindBest(filepath)
341 |     
342 |     
343 |     
344 |     
345 |     # ATT_SEQ2SEQ的可视化
346 |     # num = 312
347 |     # path_2 = 'train_test_hour'
348 |     # test_feature = np.loadtxt("{}/{}/test{}.txt".format(pathnow,path_2, num)).astype(np.float32)
349 |     # test_rain = np.loadtxt("{}/{}/test_label{}.txt".format(pathnow,path_2, num)).astype(np.float32)
350 |     # from sklearn.preprocessing import *
351 |     # test_feature = scale(test_feature, axis=0)
352 |     # test_rain = np.reshape(test_rain, (-1, 1))
353 |     
354 |     # model = torch.load('{}\ATT_SEQ2SEQ\seq2seq_312_128_0.1_0.01.pth'.format(pathnow),map_location=torch.device('cpu'))
355 |     # x_tensor = Tensor(test_feature)
356 |     # # x_tensor = Variable(x_tensor).view(-1,7,3)
357 |     # x_tensor = Variable(x_tensor).view(-1,3,7)
358 |     # y_seq2seq,y_attention = model(x_tensor)
359 |     
360 |     
361 | 
362 |     
363 |     
364 |     # plt.figure()
365 |     # plt.plot(test_rain.squeeze() ,c='r', label = 'true value')
366 |     # plt.plot(y_seq2seq.squeeze() ,c='b', alpha = 0.5, label = 'prediction value')
367 |     # y_max = max(np.append(test_rain,y_seq2seq.detach().numpy()))
368 |     # dy = 2
369 |     # plt.text(0, y_max, "model:%s"%model_folder, size = 10,color = "black", style = "italic", weight = "light")
370 |     # plt.text(0, y_max- dy, "station:%d"%num, size = 10,color = "black", style = "italic", weight = "light")
371 |     # plt.text(0, y_max-2*dy, "rmse:%.3f"%rmse, size = 10,color = "black", style = "italic", weight = "light")
372 |     # plt.text(0, y_max-3 * dy, "mae:%.3f"%mae, size = 10,color = "black", style = "italic", weight = "light")
373 |     # plt.text(0, y_max-4 * dy, "mdae:%.3f"%mdae, size = 10,color = "black", style = "italic", weight = "light")
374 |     # plt.text(0, y_max-5 * dy, "r2:%.3f"%r2, size = 10,color = "black", style = "italic", weight = "light")
375 |     # plt.text(0, y_max-6 * dy, "var:%.3f"%var, size = 10,color = "black", style = "italic", weight = "light")
376 | 
377 |     # plt.legend()
378 |     # plt.savefig('output/visualization_%s.png'%model_name.split('.')[0],bbox_inches='tight',dpi = 500)
379 |     # plt.show()
380 | 
381 | 
382 | 
383 |     # fig =plt.figure()
384 |     # ax = fig.add_subplot(111)
385 |     # cax = ax.matshow(y_attention[:5,:], cmap='bone')
386 |     # fig.colorbar(cax)
387 |     # plt.show()
388 |     
389 |     # path ='./ATT_SEQ2SEQ'
390 |     # f = open('{}/test_ATT_SEQ2SEQ_{}.txt'.format(path,num), 'w+')
391 |     # f.write('ATT_SEQ2SEQ_rmse: %r ' % rmse +
392 |     #       'ATT_SEQ2SEQ_mae: %r ' % mae +
393 |     #       'ATT_SEQ2SEQ_mdae: %r ' % mdae +
394 |     #       'ATT_SEQ2SEQ_r2: %r ' % r2 +
395 |     #       'ATT_SEQ2SEQ_var: %r ' % var)
396 |     # f.close()
397 | 
398 | 
399 | 


--------------------------------------------------------------------------------