├── GCN-LSTM └── code │ ├── __pycache__ │ ├── dataloader.cpython-37.pyc │ ├── dataloader.cpython-38.pyc │ ├── model.cpython-37.pyc │ ├── model.cpython-38.pyc │ ├── test.cpython-37.pyc │ └── test.cpython-38.pyc │ ├── dataloader.py │ ├── dataparse.py │ ├── model.py │ ├── test.py │ └── train.py └── README.md /GCN-LSTM/code/__pycache__/dataloader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qetmes/GCN-LSTM/9b9e3ad5c2223a3489e576849df18b1deef49593/GCN-LSTM/code/__pycache__/dataloader.cpython-37.pyc -------------------------------------------------------------------------------- /GCN-LSTM/code/__pycache__/dataloader.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qetmes/GCN-LSTM/9b9e3ad5c2223a3489e576849df18b1deef49593/GCN-LSTM/code/__pycache__/dataloader.cpython-38.pyc -------------------------------------------------------------------------------- /GCN-LSTM/code/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qetmes/GCN-LSTM/9b9e3ad5c2223a3489e576849df18b1deef49593/GCN-LSTM/code/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /GCN-LSTM/code/__pycache__/model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qetmes/GCN-LSTM/9b9e3ad5c2223a3489e576849df18b1deef49593/GCN-LSTM/code/__pycache__/model.cpython-38.pyc -------------------------------------------------------------------------------- /GCN-LSTM/code/__pycache__/test.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qetmes/GCN-LSTM/9b9e3ad5c2223a3489e576849df18b1deef49593/GCN-LSTM/code/__pycache__/test.cpython-37.pyc -------------------------------------------------------------------------------- /GCN-LSTM/code/__pycache__/test.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qetmes/GCN-LSTM/9b9e3ad5c2223a3489e576849df18b1deef49593/GCN-LSTM/code/__pycache__/test.cpython-38.pyc -------------------------------------------------------------------------------- /GCN-LSTM/code/dataloader.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset, DataLoader 2 | import torch 3 | 4 | class MyDataset(Dataset): 5 | def __init__(self, n, path): 6 | self.n = n 7 | self.path = path 8 | 9 | def __len__(self): 10 | return self.n 11 | 12 | def __getitem__(self, i): 13 | x, y = torch.load(f'{self.path}/data{i}.pt') 14 | return x, y 15 | 16 | def getDataloader(n, path, batch_size, shuffle=True): 17 | my_dataset = MyDataset(n, path) 18 | return DataLoader(my_dataset, batch_size=batch_size, shuffle=shuffle) 19 | 20 | if __name__ == '__main__': 21 | dataloader = getDataloader(64000, '../data_deal/dataset/train_data', 32) 22 | for sample, label in dataloader: 23 | print(sample.shape) 24 | print(label.shape) -------------------------------------------------------------------------------- /GCN-LSTM/code/dataparse.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from pandas._libs.tslibs.timestamps import Timestamp 4 | from sklearn.linear_model import LinearRegression 5 | import csv #加载csv包便于读取csv文件 6 | 7 | 8 | df = pd.read_pickle('./data.pkl') 9 | # print(df) 10 | 11 | flt_col = ['AIR_CODE', 'FLIGHT_NO', 'DEP_DATE', 'UP_LOCATION', 'DIS_LOCATION'] # 确认具体航班的五个字段 12 | cls_col = ['CA', 'CB', 'CC', 'CD', 'CE', 'CF', 'CG', 'CH', 'CI', 'CJ', 'CK', 'CL','CM', 13 | 'CN', 'CO', 'CP', 'CQ', 'CR', 'CS', 'CT', 'CU', 'CV', 'CW', 'CX', 'CY', 'CZ'] # 26个舱位字段 14 | 15 | 16 | # 利用LKZK(旅客折扣)和CA到CZ的26个舱位字段来计算平均折扣 17 | def get_discount(d): 18 | d = d[d.LKZK > 0] # LKZK(旅客折扣) 19 | print(d) 20 | X = d[cls_col].loc[:, d[cls_col].sum(axis=0) > 10] 21 | Y = d.LKZK * (X.sum(axis=1)) 22 | 23 | reg = LinearRegression(fit_intercept=False) # 用线性回归求每家航司的系数k 24 | reg.fit(X, Y) 25 | 26 | return pd.DataFrame([reg.coef_.round(2)], columns=X.columns) 27 | 28 | 29 | disc = df.groupby(['AIR_CODE']).apply(lambda x:get_discount(x)) 30 | disc = disc.applymap(lambda x:x if x>0 else 0) 31 | disc.index = [x[0] for x in disc.index] 32 | 33 | print(disc) 34 | 35 | 36 | def get_arv_bks(flt): 37 | flt = flt.sort_values(by='EX_DIF') 38 | bkd = flt[cls_col].diff(-1).applymap(lambda x: x if x > 0 else 0) 39 | bkd.iloc[-1] = flt[cls_col].iloc[-1] # 最后一天用累计值 40 | 41 | bks = bkd.sum(axis=1) 42 | arv = (bkd * disc.loc[flt.AIR_CODE.iloc[0]]).sum(axis=1) / bks 43 | 44 | arv_bks = pd.DataFrame([arv, bks], index=['平均折扣', '订票数']).T 45 | # arv_bks.bks = arv_bks.bks.apply(int) 46 | 47 | return pd.concat([flt, arv_bks], axis=1) 48 | 49 | 50 | 51 | 52 | for k, flt in df.groupby(flt_col): 53 | arv_bks = get_arv_bks(flt) 54 | csv_file = open('hb_name_3_.csv') # 打开csv文件 55 | # csv_reader_lines = csv.reader(csv_file) 56 | # for one_line in csv_reader_lines: 57 | # if (k[0] == one_line[2] and str(k[1]) == one_line[3] and k[3] == one_line[4] and k[4] == one_line[5]): 58 | # arv_bks.to_csv('./new_data/data_'+one_line[2]+'_'+one_line[3]+'.csv', mode='a',header=False) 59 | # print(k[0]+'=='+one_line[2] +'--'+str(k[1])+ '=='+one_line[3]+'--'+k[3]+ '=='+one_line[4]+'--'+k[4]+ '=='+one_line[5]) 60 | 61 | 62 | 63 | 64 | # hb_name = pd.DataFrame({'a': [k[0]], 'b': [k[1]], 'c':[k[3]], 'd':[k[4]]}) 65 | # hb_name.to_csv('hb_name_1.csv',mode='a', header=False) 66 | # print(k) 67 | # 68 | # if ( k[0] == '3U' and k[1] == 2543 and k[3] == 'HPG' and k[4] == 'SHE' ): 69 | # arv_bks.to_csv('data_3U_2543.csv', mode='a', header=False) 70 | # 71 | # if ( k[0] == '3U' and k[1] == 2675 and k[3] == 'HPG' and k[4] == 'BRE' ): 72 | # arv_bks.to_csv('data_3U_2675.csv', mode='a', header=False) 73 | # # 74 | # if ( k[0] == '3U' and k[1] == 2776 and k[3] == 'BRE' and k[4] == 'CTS' ): 75 | # arv_bks.to_csv('data_3U_2776.csv', mode='a', header=False) 76 | # # 77 | # if ( k[0] == '3U' and k[1] == 2833 and k[3] == 'MTY' and k[4] == 'UYN' ): 78 | # arv_bks.to_csv('data_3U_2540.csv', mode='a', header=False) 79 | # 80 | # 81 | # 82 | # 83 | # if ( k[0] == 'BK' and k[1] == 2355 and k[3] == 'MIA' and k[4] == 'WUT' ): 84 | # arv_bks.to_csv('data_BK_2355.csv', mode='a', header=False) 85 | # 86 | # if ( k[0] == 'BK' and k[1] == 4057 and k[3] == 'HAK' and k[4] == 'UME' ): 87 | # arv_bks.to_csv('data_BK_4057.csv', mode='a', header=False) 88 | # 89 | # if ( k[0] == 'BK' and k[1] == 4373 and k[3] == 'HAK' and k[4] == 'PIT' ): 90 | # arv_bks.to_csv('data_BK_4373.csv', mode='a', header=False) 91 | # 92 | # if ( k[0] == 'BK' and k[1] == 4750 and k[3] == 'WDS' and k[4] == 'BPX' ): 93 | # arv_bks.to_csv('data_BK_4750.csv', mode='a', header=False) 94 | # 95 | # if ( k[0] == 'BK' and k[1] == 4797 and k[3] == 'HUY' and k[4] == 'WDS'): 96 | # arv_bks.to_csv('data_BK_4797.csv', mode='a', header=False) 97 | # 98 | # 99 | # 100 | # 101 | # if (k[0] == 'CA' and k[1] == 140 and k[3] == 'WDS' and k[4] == 'WNH'): 102 | # arv_bks.to_csv('data_CA_140.csv', mode='a', header=False) 103 | # 104 | # if (k[0] == 'CA' and k[1] == 180 and k[3] == 'WDS' and k[4] == 'YLW'): 105 | # arv_bks.to_csv('data_CA_180.csv', mode='a', header=False) 106 | # 107 | # if (k[0] == 'CA' and k[1] == 1993 and k[3] == 'WNH' and k[4] == 'NZL'): 108 | # arv_bks.to_csv('data_CA_1993.csv', mode='a', header=False) 109 | # 110 | # if (k[0] == 'CA' and k[1] == 2245 and k[3] == 'HSN' and k[4] == 'DUD'): 111 | # arv_bks.to_csv('data_CA_2245.csv', mode='a', header=False) 112 | # 113 | # if (k[0] == 'CA' and k[1] == 2465 and k[3] == 'HPG' and k[4] == 'HAK'): 114 | # arv_bks.to_csv('data_CA_2465.csv', mode='a', header=False) 115 | # 116 | # 117 | # 118 | # 119 | # 120 | # if (k[0] == 'CZ' and k[1] == 802 and k[3] == 'LIM' and k[4] == 'MCO'): 121 | # arv_bks.to_csv('data_CZ_802.csv', mode='a', header=False) 122 | # 123 | # if (k[0] == 'CZ' and k[1] == 2151 and k[3] == 'JXA' and k[4] == 'CDG'): 124 | # arv_bks.to_csv('data_CZ_2151.csv', mode='a', header=False) 125 | # 126 | # if (k[0] == 'CZ' and k[1] == 6936 and k[3] == 'LIM' and k[4] == 'JXA'): 127 | # arv_bks.to_csv('data_CZ_6936.csv', mode='a', header=False) 128 | # 129 | # if (k[0] == 'CZ' and k[1] == 9799 and k[3] == 'JXA' and k[4] == 'LIM'): 130 | # arv_bks.to_csv('data_CZ_9799.csv', mode='a', header=False) 131 | # 132 | # if (k[0] == 'CZ' and k[1] == 7438 and k[3] == 'CAI' and k[4] == 'CWJ'): 133 | # arv_bks.to_csv('data_CZ_7438.csv', mode='a', header=False) 134 | 135 | 136 | 137 | # if k == ('3U', 2534, Timestamp('2020-10-23 00:00:00'), 'LIM', 'DDG'): 138 | # # arv_bks.to_csv('data_3U_2534.csv', mode='a', header=True) 139 | # arv_bks.to_csv('data_3U_2534.csv', mode='a', header=False) 140 | # break 141 | # print(k) 142 | # print(arv_bks) 143 | # 作为样例,只计算一个 144 | # 全部计算需要10个小时 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /GCN-LSTM/code/model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class Config(): 7 | def __init__(self) -> None: 8 | self.model_name = 'Airline' 9 | self.data_path = '../data/' # 所有数据的存储路径 10 | self.model_save_path = '../save/Airline.pkl' # 模型的存储路径 11 | self.log_path = '../log/SummaryWriter' 12 | 13 | self.dropout = 0.5 14 | # self.batch_size = 1000 # 批量的大小 15 | self.shuffle = True # 加载数据时是否随机加载 16 | self.cuda_is_aviable = False # 是否可以GPU加速 17 | self.cuda_device = 2 # 指定训练的GPU 18 | self.learning_rate = 1e-4 # 学习率的大小 19 | self.epoch = 100 20 | self.node_size = 448 # 节点个数 21 | self.input_dim = 44 # 时间序列的长度 22 | self.num_layers = 1 23 | self.batch_first = True 24 | self.input_size = # 特征的个数 25 | self.hidden = 1024 26 | self.num_classes = self.node_size 27 | self.last_hidden = 512 28 | self.hidden2 = 1024 29 | self.hidden3 = 512 30 | self.hidden4 = 256 31 | 32 | 33 | class GCNLSTM(nn.Module): 34 | def __init__(self, adj, config) -> None: 35 | super(GCNLSTM, self).__init__() 36 | self.GCN = GCN(adj, config) 37 | self.lstm = nn.LSTM(config.input_size * config.node_size, config.hidden, 38 | config.num_layers, config.batch_first) 39 | self.dropout = nn.Dropout(config.dropout) 40 | self.fc = nn.Linear(config.hidden * config.input_dim, config.hidden2) 41 | self.fc2 = nn.Linear(config.hidden2, config.hidden3) 42 | self.fc3 = nn.Linear(config.hidden3, config.hidden4) 43 | self.fc4 = nn.Linear(config.hidden4, config.num_classes) 44 | 45 | 46 | def forward(self, x): 47 | out = self.GCN(x) 48 | out, (hn, cn) = self.lstm(out) 49 | out = self.fc(out.view(out.shape[0], -1)) 50 | out = self.dropout(out) 51 | out = nn.ReLU()(self.fc2(out)) 52 | out = nn.ReLU()(self.fc3(out)) 53 | out = self.fc4(out) 54 | return out 55 | 56 | 57 | class GCN(nn.Module): 58 | def __init__(self, adj, config): 59 | super(GCN, self).__init__() 60 | self.register_buffer( 61 | "laplacian", calculate_laplacian_with_self_loop( 62 | torch.FloatTensor(adj)) 63 | ) # 引入邻接矩阵 64 | self._num_nodes = adj.shape[0] 65 | self.input_dim = config.input_dim # 要预测的句子的长度 66 | self.feature_dim = config.input_size 67 | self.out_dim = config.hidden # 输出的隐层的长度 68 | self.weights = nn.Parameter( 69 | torch.FloatTensor(self.input_dim, self.out_dim) 70 | ) 71 | self.fc = nn.Linear(self._num_nodes, 1) 72 | 73 | self.reset_parameters() 74 | 75 | def reset_parameters(self): 76 | nn.init.xavier_uniform_( 77 | self.weights, gain=nn.init.calculate_gain("tanh")) 78 | 79 | def forward(self, x): 80 | 81 | batch_size = x.shape[0] 82 | x = x.transpose(2, 3) 83 | 84 | x = x.transpose(0, 3).transpose(1, 3).transpose(2, 3) 85 | inputs = x.reshape((self._num_nodes, batch_size * 86 | self.feature_dim * self.input_dim)) 87 | 88 | ax = self.laplacian @ inputs 89 | ax = ax.reshape((self._num_nodes, batch_size, 90 | self.feature_dim, self.input_dim)) 91 | ax = ax.reshape((self._num_nodes * batch_size * 92 | self.feature_dim, self.input_dim)) 93 | outputs = ax.reshape( 94 | (self._num_nodes, batch_size, self.feature_dim, self.input_dim)) 95 | outputs = outputs.transpose(0, 1).transpose(1, 2).transpose(2, 3).transpose(1, 2) 96 | # batch_size, time_step, hidden_status 97 | # outputs = nn.ReLU()(self.fc(outputs).squeeze(-1)) 98 | outputs = outputs.reshape(batch_size, self.input_dim, self.feature_dim * self._num_nodes) 99 | return outputs 100 | 101 | 102 | def calculate_laplacian_with_self_loop(matrix): 103 | matrix = matrix + torch.eye(matrix.size(0)) 104 | row_sum = matrix.sum(1) 105 | d_inv_sqrt = torch.pow(row_sum, -0.5).flatten() 106 | d_inv_sqrt[torch.isinf(d_inv_sqrt)] = 0.0 107 | d_mat_inv_sqrt = torch.diag(d_inv_sqrt) 108 | normalized_laplacian = ( 109 | matrix.matmul(d_mat_inv_sqrt).transpose(0, 1).matmul(d_mat_inv_sqrt) 110 | ) 111 | return normalized_laplacian 112 | -------------------------------------------------------------------------------- /GCN-LSTM/code/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import pandas as pd 4 | from model import GCNLSTM, Config 5 | from dataloader import getDataloader 6 | 7 | 8 | def test_accu(net, data, label): 9 | with torch.no_grad(): 10 | pre = net(data) 11 | loss = torch.sum((pre - label) * (pre - label)) 12 | return loss 13 | 14 | 15 | def test_the_model(airline): 16 | # test_data = np.load('../data/{airline}.npy') 17 | # test_label = np.load('../data/{airline}.npy').reshape(-1, 1) 18 | # test_data = torch.tensor(test_data).type(torch.float32) 19 | # test_label = torch.tensor(test_label).type(torch.float32) 20 | 21 | dataloader = getDataloader(16000, '../data_deal/dataset/test_data', 32) 22 | 23 | adj = pd.read_csv('../data_deal/adj.pt', header=None) 24 | adj = torch.tensor(adj) 25 | 26 | config = Config() 27 | model = GCNLSTM(adj, config) 28 | state_dict = torch.load(f'{config.model_save_path}net_parameters') 29 | model.load_state_dict(state_dict) 30 | predict = model(test_data) 31 | return torch.sum((predict-test_label)*(predict-test_label))/test_data[0] 32 | 33 | 34 | if __name__ == '__main__': 35 | import argparse 36 | parser = argparse.ArgumentParser(description='Process some integers.') 37 | parser.add_argument('-a', type=str, help='airline name', 38 | default='AAT_URC') 39 | 40 | args = parser.parse_args() 41 | test_the_model(args.a) 42 | -------------------------------------------------------------------------------- /GCN-LSTM/code/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | # from torch.utils.data import Dataset, DataLoader, TensorDataset 4 | from model import GCNLSTM, Config 5 | from test import test_accu 6 | import numpy as np 7 | import pandas as pd 8 | from torch.utils.tensorboard import SummaryWriter 9 | from dataloader import getDataloader 10 | writer = SummaryWriter('../log/') 11 | 12 | adj = torch.load('../data_deal/adj.pt') 13 | 14 | config = Config() 15 | 16 | 17 | def train(): 18 | net = GCNLSTM(adj, config) 19 | # data = np.load('../data.npy') 20 | # label = np.load('../label.npy').reshape(-1, 1) 21 | # test_data = np.load('../data/testdata.npy') 22 | # test_label = np.load('../data/testlabel.npy').reshape(-1, 1) 23 | # 24 | # test_data = torch.tensor(test_data).type(torch.float32) 25 | # test_label = torch.tensor(test_label).type(torch.float32) 26 | # 27 | # data = torch.tensor(data).type(torch.float32) 28 | # label = torch.tensor(label).type(torch.float32) 29 | # 30 | # data = TensorDataset(data, label) 31 | # data_getloader = DataLoader(data, config.batch_size, config.shuffle) 32 | dataloader = getDataloader(64000, '../data_deal/dataset/train_data', 32) 33 | 34 | 35 | loss_function = nn.MSELoss() 36 | # if config.cuda_is_aviable: 37 | # net = net.cuda(device=config.cuda_device) 38 | optimizer = torch.optim.Adam(net.parameters(), lr=config.learning_rate) 39 | 40 | i = 0 41 | for epoch in range(config.epoch): 42 | total = 0 43 | for data, label in dataloader: 44 | # if data.shape[0] != config.batch_size: 45 | # continue 46 | i += 1 47 | pre = net(data) 48 | 49 | loss = loss_function(pre, label) 50 | loss.backward() 51 | optimizer.step() 52 | optimizer.zero_grad() 53 | total += loss.item() 54 | print("epoch: %s, loss: %s" % (epoch, loss.item()/data.shape[0])) 55 | writer.add_scalar('itear_loss', i, loss.item()/data.shape[0]) 56 | # acc = test_accu(net, test_data, test_label) 57 | # print(acc.item()) 58 | writer.add_scalar('epoch_loss', epoch, total) 59 | stat_dict = {'net': net.state_dict()} 60 | torch.save(stat_dict, config.save_path + 'net_parameters') 61 | 62 | 63 | if __name__ == '__main__': 64 | import argparse 65 | parser = argparse.ArgumentParser(description='param ') 66 | parser.add_argument('-batch', type=int, help='batch size', default=1000) 67 | parser.add_argument('-l', type=float, help='learning rate', default=0.0001) 68 | parser.add_argument('-epoch', type=int, help='epoch', default=100) 69 | parser.add_argument('-GPU', type=int, help='useing GPU ??', default=1) 70 | 71 | args = parser.parse_args() 72 | config.batch_size = args.batch 73 | config.l = args.l 74 | config.epoch = args.epoch 75 | config.cuda_is_aviable = args.GPU 76 | train() 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GCN-LSTM --------------------------------------------------------------------------------