├── STFGNN
    ├── config
    │   ├── PEMS03.json
    │   └── STFGNN.json
    ├── data
    │   ├── dataset
    │   │   ├── multi_step_dataset.py
    │   │   └── stfgnn_dataset.py
    │   └── utils.py
    ├── evaluator
    │   └── evaluator.py
    ├── executor
    │   ├── multi_step_executor.py
    │   └── utils.py
    ├── main.py
    ├── model
    │   ├── STFGNN.py
    │   └── loss.py
    ├── raw_data
    │   └── PEMS03
    │   │   ├── PEMS03.npz
    │   │   └── adj_mx.pkl
    └── utils
    │   ├── GPS_utils.py
    │   ├── Optim.py
    │   ├── argument_list.py
    │   ├── dataset.py
    │   ├── normalization.py
    │   ├── utils.py
    │   └── visualize.py
└── readme.md


/STFGNN/config/PEMS03.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "filename": "raw_data/PEMS03/PEMS03.npz",
 3 | 
 4 |   "adj_type": "connectivity", 
 5 |   "//": "adj_type be  distance or connectivity",
 6 |   "adj_filename": "raw_data/PEMS03/adj_mx.pkl",
 7 | 
 8 |   "input_dim": 1,
 9 |   "output_dim": 1,
10 | 
11 |   "seq_len": 26208,
12 |   "num_nodes": 358,
13 |   "train_rate": 0.6,
14 |   "eval_rate": 0.2,
15 |   "feature_dim": 1,
16 |   "mask": false
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/STFGNN/config/STFGNN.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "device": "cuda:1",
 3 |     "gpu": true,
 4 |     "gpu_id": 1,
 5 |     "save": true,
 6 |     "cuda": true,
 7 |   
 8 |     "normalize": 1,
 9 |     "window": 12,
10 |     "horizon": 12,
11 |     "batch_size": 64,
12 |     "order": 1,
13 |     "lag": 12,
14 |     "period": 288,
15 |     "sparsity": 0.01,
16 |   
17 |     "hidden_dims": [[64, 64, 64], [64, 64, 64], [64, 64, 64]],
18 |     "first_layer_embedding_size": 64,
19 |     "out_layer_dim": 128,
20 |     "strides": 4,
21 |     "temporal_emb": true,
22 |     "spatial_emb": true,
23 |     "activation": "GLU",
24 |     "module_type": "individual",
25 |     "//": ["individual", "sharing"],
26 | 
27 |     "train_loss": "mae",
28 |     "clip": 10,
29 |     "epochs": 100,
30 |     "seed": 54321,
31 |     "log_interval": 2000,
32 |     "optim": "adam",
33 |     "lr": 0.001,
34 |     "patience": 20,
35 |     "lr_decay": false,
36 |     "lr_scheduler": "multisteplr",
37 |     "lr_decay_ratio": 0.1,
38 |     "lr_decay_steps": [5, 20, 40, 70],
39 |     "step_size": 10
40 |   }
41 |   


--------------------------------------------------------------------------------
/STFGNN/data/dataset/multi_step_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | import numpy as np
  4 | import pickle
  5 | import torch
  6 | from torch.autograd import Variable
  7 | 
  8 | from data.utils import DataLoader, load_pickle, DataLoaderM_new
  9 | from utils.normalization import StandardScaler, NormalScaler, NoneScaler, \
 10 |     MinMax01Scaler, MinMax11Scaler, LogScaler
 11 | 
 12 | 
 13 | def get_adjacency_matrix(distance_df, sensor_ids, normalized_k=0.1):
 14 |     """
 15 | 
 16 |     :param distance_df: data frame with three columns: [from, to, distance].
 17 |     :param sensor_ids: list of sensor ids.
 18 |     :param normalized_k: entries that become lower than normalized_k after normalization are set to zero for sparsity.
 19 |     :return:
 20 |     """
 21 |     num_sensors = len(sensor_ids)
 22 |     dist_mx = np.zeros((num_sensors, num_sensors), dtype=np.float32)
 23 |     dist_mx[:] = np.inf
 24 |     # Builds sensor id to index map.
 25 |     sensor_id_to_ind = {}
 26 |     for i, sensor_id in enumerate(sensor_ids):
 27 |         sensor_id_to_ind[sensor_id] = i
 28 | 
 29 |     # Fills cells in the matrix with distances.
 30 |     for row in distance_df.values:
 31 |         if row[0] not in sensor_id_to_ind or row[1] not in sensor_id_to_ind:
 32 |             continue
 33 |         dist_mx[sensor_id_to_ind[row[0]], sensor_id_to_ind[row[1]]] = row[2]
 34 | 
 35 |     # Calculates the standard deviation as theta.
 36 |     distances = dist_mx[~np.isinf(dist_mx)].flatten()
 37 |     std = distances.std()
 38 |     adj_mx = np.exp(-np.square(dist_mx / std))
 39 |     # Make the adjacent matrix symmetric by taking the max.
 40 |     # adj_mx = np.maximum.reduce([adj_mx, adj_mx.T])
 41 | 
 42 |     # Sets entries that lower than a threshold, i.e., k, to zero for sparsity.
 43 |     adj_mx[adj_mx < normalized_k] = 0
 44 |     return sensor_ids, sensor_id_to_ind, adj_mx
 45 | 
 46 | 
 47 | class MultiStepDataset(object):
 48 | 
 49 |     def __init__(self, config):
 50 | 
 51 |         self.config = config
 52 |         self.file_name = self.config.get("filename", " ")
 53 |         self.adj_filename = self.config.get("adj_filename", "")
 54 |         self.graph_sensor_ids = self.config.get("graph_sensor_ids", "")
 55 |         self.distances_file = self.config.get("distances_file", "")
 56 |         self.adj_type = self.config.get("adj_type", None)
 57 | 
 58 |         self.train_rate = self.config.get("train_rate", 0.6)
 59 |         self.valid_rate = self.config.get("eval_rate", 0.2)
 60 |         self.cuda = self.config.get("cuda", True)
 61 | 
 62 |         self.horizon = self.config.get("horizon", 12)
 63 |         self.window = self.config.get("window", 12)
 64 | 
 65 |         self.normalize = self.config.get("normalize", 2)
 66 |         self.batch_size = self.config.get("batch_size", 64)
 67 |         self.adj_mx = None
 68 |         self.add_time_in_day = self.config.get("add_time_in_day", False)
 69 |         self.add_day_in_week = self.config.get("add_day_in_week", False)
 70 |         self.input_dim = self.config.get("input_dim", 1)
 71 |         self.output_dim = self.config.get("output_dim", 1)
 72 |         #self.ensure_adj_mat()
 73 |         self._load_origin_data(self.file_name, self.adj_filename)
 74 | 
 75 |         self.data = self._gene_dataset()
 76 | 
 77 |     def ensure_adj_mat(self):
 78 |         if os.path.exists(self.adj_filename):
 79 |             return
 80 |         else:
 81 |             with open(self.graph_sensor_ids) as f:
 82 |                 sensor_ids = f.read().strip().split(',')
 83 |             distance_df = pd.read_csv(self.distances_file, dtype={'from': 'str', 'to': 'str'})
 84 |             _, sensor_id_to_ind, adj_mx = get_adjacency_matrix(distance_df, sensor_ids, normalized_k=0.1)
 85 |             with open(self.adj_filename, 'wb') as f:
 86 |                 pickle.dump([sensor_ids, sensor_id_to_ind, adj_mx], f, protocol=2)
 87 |             return
 88 | 
 89 |     def _load_origin_data(self, file_name, adj_name):
 90 |         if file_name[-3:] == "txt":
 91 |             fin = open(file_name)
 92 |             self.rawdat = np.loadtxt(fin, delimiter=',')
 93 |         elif file_name[-3:] == "csv":
 94 |             self.rawdat = pd.read_csv(file_name).values
 95 |         elif file_name[-2:] == "h5":
 96 |             self.rawdat = pd.read_hdf(file_name)
 97 |         elif file_name[-3:] == "npz":
 98 |             mid_dat = np.load(file_name)
 99 |             self.rawdat = mid_dat[mid_dat.files[0]]
100 |         else:
101 |             raise ValueError('file_name type error!')
102 | 
103 |         if adj_name == "":
104 |             self.adj_mx = None
105 |         elif adj_name[-3:] == "pkl":
106 |             sensor_ids, sensor_id_to_ind, adj = load_pickle(adj_name)
107 |             if self.adj_type == "distance":
108 |                 self.adj_mx = adj
109 |             else:
110 |                 row, col = adj.shape
111 |                 for i in range(row):
112 |                     for j in range(i, col):
113 |                         if adj[i][j] > 0:
114 |                             adj[i][j] = 1
115 |                             adj[j][i] = 1
116 |                         else:
117 |                             adj[i][j] = 0
118 |                             adj[j][i] = 0
119 |                 self.adj_mx = adj
120 |         else:
121 |             raise ValueError('adj_name error!')
122 | 
123 |     def _get_scalar(self, x_train, y_train):
124 |         """
125 |         根据全局参数`scaler_type`选择数据归一化方法
126 | 
127 |         Args:
128 |             x_train: 训练数据X
129 |             y_train: 训练数据y
130 | 
131 |         Returns:
132 |             Scaler: 归一化对象
133 |         """
134 |         if self.normalize == 2:
135 |             scaler = NormalScaler(maxx=max(x_train.max(), y_train.max()))
136 |             print('NormalScaler max: ' + str(scaler.max))
137 |         elif self.normalize == 1:
138 |             scaler = StandardScaler(mean=x_train.mean(), std=x_train.std())
139 |             print('StandardScaler mean: ' + str(scaler.mean) + ', std: ' + str(scaler.std))
140 |         elif self.normalize == 3:
141 |             scaler = MinMax01Scaler(
142 |                 maxx=max(x_train.max(), y_train.max()), minn=min(x_train.min(), y_train.min()))
143 |             print('MinMax01Scaler max: ' + str(scaler.max) + ', min: ' + str(scaler.min))
144 |         elif self.normalize == 4:
145 |             scaler = MinMax11Scaler(
146 |                 maxx=max(x_train.max(), y_train.max()), minn=min(x_train.min(), y_train.min()))
147 |             print('MinMax11Scaler max: ' + str(scaler.max) + ', min: ' + str(scaler.min))
148 |         elif self.normalize == 5:
149 |             scaler = LogScaler()
150 |             print('LogScaler')
151 |         elif self.normalize == 0:
152 |             scaler = NoneScaler()
153 |             print('NoneScaler')
154 |         else:
155 |             raise ValueError('Scaler type error!')
156 |         return scaler
157 | 
158 |     def _generate_graph_seq2seq_io_data(
159 |             self, df, x_offsets, y_offsets, add_time_in_day=False, add_day_in_week=False, scaler=None
160 |     ):
161 |         """
162 |         生成seq2seq样本数据
163 |         :param data: np数据 [B, N, D] 其中D为3
164 |         :param x_offsets:
165 |         :param y_offsets:
166 |         :return:
167 |         """
168 |         num_samples, num_nodes = df.shape[0], df.shape[1]
169 |         if not isinstance(df, np.ndarray):
170 |             data = np.expand_dims(df.values, axis=-1)
171 |             data_list = [data]
172 |         else:
173 |             data_list = [df]
174 |         if add_time_in_day:
175 |             time_ind = (df.index.values - df.index.values.astype("datetime64[D]")) / np.timedelta64(1, "D")
176 |             time_in_day = np.tile(time_ind, [1, num_nodes, 1]).transpose((2, 1, 0))
177 |             data_list.append(time_in_day)
178 |         if add_day_in_week:
179 |             day_in_week = np.zeros(shape=(num_samples, num_nodes, 7))
180 |             day_in_week[np.arange(num_samples), :, df.index.dayofweek] = 1
181 |             data_list.append(day_in_week)
182 | 
183 |         data = np.concatenate(data_list, axis=-1)
184 | 
185 |         x, y = [], []
186 |         # t is the index of the last observation.
187 |         min_t = abs(min(x_offsets))
188 |         max_t = abs(num_samples - abs(max(y_offsets)))  # Exclusive
189 |         for t in range(min_t, max_t):
190 |             x_t = data[t + x_offsets, ...]
191 |             y_t = data[t + y_offsets, ...]
192 |             x.append(x_t)
193 |             y.append(y_t)
194 |         x = np.stack(x, axis=0)
195 |         y = np.stack(y, axis=0)
196 | 
197 |         return x, y
198 | 
199 |     def _generate_train_val_test(self):
200 |         seq_length_x, seq_length_y = self.window, self.horizon
201 |         x_offsets = np.arange(-(seq_length_x - 1), 1, 1)
202 |         y_offsets = np.arange(1, (seq_length_y + 1), 1)
203 |         x, y = self._generate_graph_seq2seq_io_data(self.rawdat, x_offsets,
204 |                                                     y_offsets, self.add_time_in_day, self.add_day_in_week)
205 |         print("x shape: ", x.shape, ", y shape: ", y.shape)
206 |         num_samples = x.shape[0]
207 |         num_val = round(num_samples * self.valid_rate)
208 |         num_train = round(num_samples * self.train_rate)
209 |         num_test = num_samples - num_train - num_val
210 |         return [x[:num_train], y[:num_train]], \
211 |                [x[num_train:num_train + num_val], y[num_train:num_train + num_val]], \
212 |                [x[num_train + num_val:], y[num_train + num_val:]]
213 | 
214 |     def _gene_dataset(self):
215 |         data = {}
216 |         self.train, self.valid, self.test = self._generate_train_val_test()
217 |         x_train, y_train = self.train[0], self.train[1]
218 |         x_valid, y_valid = self.valid[0], self.valid[1]
219 |         x_test, y_test = self.test[0], self.test[1]
220 |         self.scaler = self._get_scalar(x_train[..., :self.output_dim], y_train[..., :self.output_dim])
221 |         x_train[..., :self.output_dim] = self.scaler.transform(x_train[..., :self.output_dim])
222 |         y_train[..., :self.output_dim] = self.scaler.transform(y_train[..., :self.output_dim])
223 |         x_valid[..., :self.output_dim] = self.scaler.transform(x_valid[..., :self.output_dim])
224 |         y_valid[..., :self.output_dim] = self.scaler.transform(y_valid[..., :self.output_dim])
225 |         x_test[..., :self.output_dim] = self.scaler.transform(x_test[..., :self.output_dim])
226 |         y_test[..., :self.output_dim] = self.scaler.transform(y_test[..., :self.output_dim])
227 | 
228 |         data['train_loader'] = DataLoader(x_train[..., :self.input_dim], y_train[..., :self.output_dim],
229 |                                           self.batch_size)
230 |         data['valid_loader'] = DataLoader(x_valid[..., :self.input_dim], y_valid[..., :self.output_dim],
231 |                                           self.batch_size)
232 |         data['test_loader'] = DataLoader(x_test[..., :self.input_dim], y_test[..., :self.output_dim], self.batch_size)
233 |         data['scaler'] = self.scaler
234 |         data['num_batches'] = x_train.shape[0] / self.batch_size
235 |         return data
236 | 
237 |     def get_data(self):
238 |         """
239 |         返回数据的DataLoader，包括训练数据、测试数据、验证数据
240 | 
241 |         Returns:
242 |             tuple: tuple contains:
243 |                 train_dataloader:
244 |                 eval_dataloader:
245 |                 test_dataloader:
246 |         """
247 |         # 加载数据集
248 | 
249 |         return self.data["train_loader"], self.data["valid_loader"], self.data["test_loader"]
250 | 
251 |     def get_data_feature(self):
252 |         """
253 |         返回数据集特征，子类必须实现这个函数，返回必要的特征
254 | 
255 |         Returns:
256 |             dict: 包含数据集的相关特征的字典
257 |         """
258 |         feature = {
259 |             "scaler": self.data["scaler"],
260 |             "adj_mx": self.adj_mx,
261 |             "num_batches": self.data['num_batches']
262 |         }
263 | 
264 |         return feature
265 | 


--------------------------------------------------------------------------------
/STFGNN/data/dataset/stfgnn_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | import numpy as np
  4 | import pickle
  5 | import torch
  6 | import time
  7 | from torch.autograd import Variable
  8 | 
  9 | from data.dataset.multi_step_dataset import MultiStepDataset
 10 | 
 11 | def gen_data(data, ntr, N):
 12 |     '''
 13 |     if flag:
 14 |         data=pd.read_csv(fname)
 15 |     else:
 16 |         data=pd.read_csv(fname,header=None)
 17 |     '''
 18 |     #data=data.as_matrix()
 19 |     data=np.reshape(data,[-1,288,N])
 20 |     return data[0:ntr]
 21 | 
 22 | def normalize(a):
 23 |     mu=np.mean(a,axis=1,keepdims=True)
 24 |     std=np.std(a,axis=1,keepdims=True)
 25 |     return (a-mu)/std
 26 | 
 27 | def compute_dtw(a,b,order=1,Ts=12,normal=True):
 28 |     if normal:
 29 |         a=normalize(a)
 30 |         b=normalize(b)
 31 |     T0=a.shape[1]
 32 |     d=np.reshape(a,[-1,1,T0])-np.reshape(b,[-1,T0,1])
 33 |     d=np.linalg.norm(d,axis=0,ord=order)
 34 |     D=np.zeros([T0,T0])
 35 |     for i in range(T0):
 36 |         for j in range(max(0,i-Ts),min(T0,i+Ts+1)):
 37 |             if (i==0) and (j==0):
 38 |                 D[i,j]=d[i,j]**order
 39 |                 continue
 40 |             if (i==0):
 41 |                 D[i,j]=d[i,j]**order+D[i,j-1]
 42 |                 continue
 43 |             if (j==0):
 44 |                 D[i,j]=d[i,j]**order+D[i-1,j]
 45 |                 continue
 46 |             if (j==i-Ts):
 47 |                 D[i,j]=d[i,j]**order+min(D[i-1,j-1],D[i-1,j])
 48 |                 continue
 49 |             if (j==i+Ts):
 50 |                 D[i,j]=d[i,j]**order+min(D[i-1,j-1],D[i,j-1])
 51 |                 continue
 52 |             D[i,j]=d[i,j]**order+min(D[i-1,j-1],D[i-1,j],D[i,j-1])
 53 |     return D[-1,-1]**(1.0/order)
 54 | 
 55 | def construct_adj_fusion(A, A_dtw, steps):
 56 |     '''
 57 |     construct a bigger adjacency matrix using the given matrix
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     A: np.ndarray, adjacency matrix, shape is (N, N)
 62 | 
 63 |     steps: how many times of the does the new adj mx bigger than A
 64 | 
 65 |     Returns
 66 |     ----------
 67 |     new adjacency matrix: csr_matrix, shape is (N * steps, N * steps)
 68 | 
 69 |     ----------
 70 |     This is 4N_1 mode:
 71 | 
 72 |     [T, 1, 1, T
 73 |      1, S, 1, 1
 74 |      1, 1, S, 1
 75 |      T, 1, 1, T]
 76 | 
 77 |     '''
 78 | 
 79 |     N = len(A)
 80 |     adj = np.zeros([N * steps] * 2) # "steps" = 4 !!!
 81 | 
 82 |     for i in range(steps):
 83 |         if (i == 1) or (i == 2):
 84 |             adj[i * N: (i + 1) * N, i * N: (i + 1) * N] = A
 85 |         else:
 86 |             adj[i * N: (i + 1) * N, i * N: (i + 1) * N] = A_dtw
 87 |     #'''
 88 |     for i in range(N):
 89 |         for k in range(steps - 1):
 90 |             adj[k * N + i, (k + 1) * N + i] = 1
 91 |             adj[(k + 1) * N + i, k * N + i] = 1
 92 |     #'''
 93 |     adj[3 * N: 4 * N, 0:  N] = A_dtw #adj[0 * N : 1 * N, 1 * N : 2 * N]
 94 |     adj[0 : N, 3 * N: 4 * N] = A_dtw #adj[0 * N : 1 * N, 1 * N : 2 * N]
 95 | 
 96 |     adj[2 * N: 3 * N, 0 : N] = adj[0 * N : 1 * N, 1 * N : 2 * N]
 97 |     adj[0 : N, 2 * N: 3 * N] = adj[0 * N : 1 * N, 1 * N : 2 * N]
 98 |     adj[1 * N: 2 * N, 3 * N: 4 * N] = adj[0 * N : 1 * N, 1 * N : 2 * N]
 99 |     adj[3 * N: 4 * N, 1 * N: 2 * N] = adj[0 * N : 1 * N, 1 * N : 2 * N]
100 | 
101 | 
102 |     for i in range(len(adj)):
103 |         adj[i, i] = 1
104 | 
105 |     return adj
106 | 
107 | 
108 | class STFGNNDataset(MultiStepDataset):
109 | 
110 |     def __init__(self, config):
111 |         super().__init__(config)
112 |         self.strides = self.config.get("strides", 4)
113 |         self.order = self.config.get("order", 1)
114 |         self.lag = self.config.get("lag", 12)
115 |         self.period = self.config.get("period", 288)
116 |         self.sparsity = self.config.get("sparsity", 0.01)
117 |         self.train_rate = self.config.get("train_rate", 0.6)
118 |         self.adj_mx = torch.FloatTensor(self._construct_adj())
119 |         # self.adj_mx = torch.randn((1432, 1432))
120 | 
121 | 
122 |     def _construct_dtw(self):
123 |         data = self.rawdat[:, :, 0]
124 |         total_day = data.shape[0] / 288
125 |         tr_day = int(total_day * 0.6)
126 |         n_route = data.shape[1]
127 |         xtr = gen_data(data, tr_day, n_route)
128 |         print(np.shape(xtr))
129 |         T0 = 288
130 |         T = 12
131 |         N = n_route
132 |         d = np.zeros([N, N])
133 |         for i in range(N):
134 |             for j in range(i+1,N):
135 |                 d[i,j]=compute_dtw(xtr[:,:,i],xtr[:,:,j])
136 | 
137 |         print("The calculation of time series is done!")
138 |         dtw = d+ d.T
139 |         n = dtw.shape[0]
140 |         w_adj = np.zeros([n,n])
141 |         adj_percent = 0.01
142 |         top = int(n * adj_percent)
143 |         for i in range(dtw.shape[0]):
144 |             a = dtw[i,:].argsort()[0:top]
145 |             for j in range(top):
146 |                 w_adj[i, a[j]] = 1
147 |         
148 |         for i in range(n):
149 |             for j in range(n):
150 |                 if (w_adj[i][j] != w_adj[j][i] and w_adj[i][j] ==0):
151 |                     w_adj[i][j] = 1
152 |                 if( i==j):
153 |                     w_adj[i][j] = 1
154 | 
155 |         print("Total route number: ", n)
156 |         print("Sparsity of adj: ", len(w_adj.nonzero()[0])/(n*n))
157 |         print("The weighted matrix of temporal graph is generated!")
158 |         self.dtw = w_adj
159 | 
160 | 
161 |     def _construct_adj(self):
162 |         """
163 |         构建local 时空图
164 |         :param A: np.ndarray, adjacency matrix, shape is (N, N)
165 |         :param steps: 选择几个时间步来构建图
166 |         :return: new adjacency matrix: csr_matrix, shape is (N * steps, N * steps)
167 |         """
168 |         self._construct_dtw()
169 |         adj_mx = construct_adj_fusion(self.adj_mx, self.dtw, self.strides)
170 |         print("The shape of localized adjacency matrix: {}".format(
171 |         adj_mx.shape), flush=True)
172 | 
173 |         return adj_mx
174 | 
175 |     def get_data(self):
176 |         """
177 |         返回数据的DataLoader，包括训练数据、测试数据、验证数据
178 | 
179 |         Returns:
180 |             tuple: tuple contains:
181 |                 train_dataloader:
182 |                 eval_dataloader:
183 |                 test_dataloader:
184 |         """
185 |         # 加载数据集
186 | 
187 |         return self.data["train_loader"], self.data["valid_loader"], self.data["test_loader"]
188 | 
189 |     def get_data_feature(self):
190 |         """
191 |         返回数据集特征，子类必须实现这个函数，返回必要的特征
192 | 
193 |         Returns:
194 |             dict: 包含数据集的相关特征的字典
195 |         """
196 |         feature = {
197 |             "scaler": self.data["scaler"],
198 |             "adj_mx": self.adj_mx,
199 |             "num_batches": self.data['num_batches']
200 |         }
201 | 
202 |         return feature
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 


--------------------------------------------------------------------------------
/STFGNN/data/utils.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | import numpy as np
  3 | import copy
  4 | import pickle
  5 | import torch.utils.data as torch_data
  6 | import torch
  7 | import pandas as pd
  8 | 
  9 | 
 10 | 
 11 | def get_dataset(config):
 12 |     """
 13 |     according the config['dataset_class'] to create the dataset
 14 | 
 15 |     Args:
 16 |         config(ConfigParser): config
 17 | 
 18 |     Returns:
 19 |         AbstractDataset: the loaded dataset
 20 |     """
 21 |     if config["task"] == "multi_step":
 22 |         try:
 23 |             return getattr(importlib.import_module('libcity.data.dataset.multi_step_dataset'),
 24 |                         config['dataset_class'])(config)
 25 |         except AttributeError:
 26 |             raise AttributeError('dataset_class is not found')
 27 |     elif config["task"] == "single_step":
 28 |         try:
 29 |             return getattr(importlib.import_module('libcity.data.dataset.single_step_dataset'),
 30 |                 config['dataset_class'])(config)
 31 |         except AttributeError:
 32 |             raise AttributeError('dataset_class is not found')
 33 | 
 34 | 
 35 | def load_pickle(pickle_file):
 36 |     try:
 37 |         with open(pickle_file, 'rb') as f:
 38 |             pickle_data = pickle.load(f)
 39 |     except UnicodeDecodeError as e:
 40 |         with open(pickle_file, 'rb') as f:
 41 |             pickle_data = pickle.load(f, encoding='latin1')
 42 |     except Exception as e:
 43 |         print('Unable to load data ', pickle_file, ':', e)
 44 |         raise
 45 |     return pickle_data
 46 | 
 47 | 
 48 | class DataLoader(object):
 49 |     def __init__(self, xs, ys, batch_size, pad_with_last_sample=True, shuffle=False):
 50 |         """
 51 | 
 52 |         :param xs:
 53 |         :param ys:
 54 |         :param batch_size:
 55 |         :param pad_with_last_sample: pad with the last sample to make number of samples divisible to batch_size.
 56 |         """
 57 |         self.batch_size = batch_size
 58 |         self.current_ind = 0
 59 |         self.seq_len = ys.shape[0]
 60 |         if pad_with_last_sample:
 61 |             num_padding = (batch_size - (len(xs) % batch_size)) % batch_size
 62 |             x_padding = np.repeat(xs[-1:], num_padding, axis=0)
 63 |             y_padding = np.repeat(ys[-1:], num_padding, axis=0)
 64 |             xs = np.concatenate([xs, x_padding], axis=0)
 65 |             ys = np.concatenate([ys, y_padding], axis=0)
 66 |         self.size = len(xs)
 67 |         self.num_batch = int(self.size // self.batch_size)
 68 |         if shuffle:
 69 |             permutation = np.random.permutation(self.size)
 70 |             xs, ys = xs[permutation], ys[permutation]
 71 |         self.xs = xs
 72 |         self.ys = ys
 73 | 
 74 |     def shuffle(self):
 75 |         """洗牌"""
 76 |         permutation = np.random.permutation(self.size)
 77 |         xs, ys = self.xs[permutation], self.ys[permutation]
 78 |         self.xs = xs
 79 |         self.ys = ys
 80 | 
 81 |     def get_iterator(self):
 82 |         self.current_ind = 0
 83 | 
 84 |         def _wrapper():
 85 |             while self.current_ind < self.num_batch:
 86 |                 start_ind = self.batch_size * self.current_ind
 87 |                 end_ind = min(self.size, self.batch_size * (self.current_ind + 1))
 88 |                 x_i = self.xs[start_ind: end_ind, ...]
 89 |                 y_i = self.ys[start_ind: end_ind, ...]
 90 |                 yield (x_i, y_i)
 91 |                 self.current_ind += 1
 92 | 
 93 |         return _wrapper()
 94 | 
 95 | 
 96 | class DataLoaderM_new(object):
 97 |     def __init__(self, xs, ys, ycl, batch_size, pad_with_last_sample=True):
 98 |         """
 99 |         :param xs:
100 |         :param ys:
101 |         :param batch_size:
102 |         :param pad_with_last_sample: pad with the last sample to make number of samples divisible to batch_size.
103 |         """
104 |         self.batch_size = batch_size
105 |         self.current_ind = 0
106 |         if pad_with_last_sample:
107 |             num_padding = (batch_size - (len(xs) % batch_size)) % batch_size
108 |             x_padding = np.repeat(xs[-1:], num_padding, axis=0)
109 |             y_padding = np.repeat(ys[-1:], num_padding, axis=0)
110 |             xs = np.concatenate([xs, x_padding], axis=0)
111 |             ys = np.concatenate([ys, y_padding], axis=0)
112 |             ycl = np.concatenate([ycl, y_padding], axis=0)
113 |         self.size = len(xs)
114 |         self.num_batch = int(self.size // self.batch_size)
115 |         self.xs = xs
116 |         self.ys = ys
117 |         self.ycl = ycl
118 | 
119 |     def shuffle(self):
120 |         permutation = np.random.permutation(self.size)
121 |         xs, ys, ycl = self.xs[permutation], self.ys[permutation], self.ycl[
122 |             permutation]
123 |         self.xs = xs
124 |         self.ys = ys
125 |         self.ycl = ycl
126 | 
127 |     def get_iterator(self):
128 |         self.current_ind = 0
129 | 
130 |         def _wrapper():
131 |             while self.current_ind < self.num_batch:
132 |                 start_ind = self.batch_size * self.current_ind
133 |                 end_ind = min(self.size,
134 |                               self.batch_size * (self.current_ind + 1))
135 |                 x_i = self.xs[start_ind:end_ind, ...]
136 |                 y_i = self.ys[start_ind:end_ind, ...]
137 |                 y_i_cl = self.ycl[start_ind:end_ind, ...]
138 |                 yield (x_i, y_i, y_i_cl)
139 |                 self.current_ind += 1
140 | 
141 |         return _wrapper()
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/STFGNN/evaluator/evaluator.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import numpy as np
  3 | import os
  4 | import scipy.sparse as sp
  5 | import torch
  6 | from scipy.sparse import linalg
  7 | from torch.autograd import Variable
  8 | from collections import defaultdict
  9 | 
 10 | 
 11 | def rse_np(preds, labels):
 12 |     if not isinstance(preds, np.ndarray):
 13 |         preds = preds.cpu().numpy()
 14 |         labels = labels.cpu().numpy()
 15 |     mse = np.sum(np.square(np.subtract(preds, labels)).astype('float32'))
 16 |     means = np.mean(labels)
 17 |     labels_mse = np.sum(np.square(np.subtract(labels, means)).astype('float32'))
 18 |     return np.sqrt(mse/labels_mse)
 19 | 
 20 | 
 21 | def mae_np(preds, labels):
 22 |     if isinstance(preds, np.ndarray):
 23 |         mae = np.abs(np.subtract(preds, labels)).astype('float32')
 24 |     else:
 25 |         mae = np.abs(np.subtract(preds.cpu().numpy(), labels.cpu().numpy())).astype('float32')
 26 |     return np.mean(mae)
 27 | 
 28 | 
 29 | def rmse_np(preds, labels):
 30 |     mse = mse_np(preds, labels)
 31 |     return np.sqrt(mse)
 32 | 
 33 | def mse_np(preds, labels):
 34 |     if isinstance(preds, np.ndarray):
 35 |         return np.mean(np.square(np.subtract(preds, labels)).astype('float32'))
 36 |     else:
 37 |         return np.mean(np.square(np.subtract(preds.cpu().numpy(), labels.cpu().numpy())).astype('float32'))
 38 | 
 39 | def mape_np(preds, labels):
 40 |     if isinstance(preds, np.ndarray):
 41 |         mape = np.abs(np.divide(np.subtract(preds, labels).astype('float32'), labels))
 42 |     else:
 43 |         mape = np.abs(np.divide(np.subtract(preds.cpu().numpy(), labels.cpu().numpy()).astype('float32'), labels.cpu().numpy()))
 44 |     return np.mean(mape)
 45 | 
 46 | 
 47 | 
 48 | def rae_np(preds, labels):
 49 |     mse = np.sum(np.abs(np.subtract(preds, labels)).astype('float32'))
 50 |     means = np.mean(labels)
 51 |     labels_mse = np.sum(np.abs(np.subtract(labels, means)).astype('float32'))
 52 |     return mse/labels_mse
 53 | 
 54 | 
 55 | 
 56 | def pcc_np(x, y):
 57 |     if not isinstance(x, np.ndarray):
 58 |         x, y = x.cpu().numpy(), y.cpu().numpy()
 59 |     x,y = x.reshape(-1),y.reshape(-1)
 60 |     return np.corrcoef(x,y)[0][1]
 61 | 
 62 | 
 63 | def node_pcc_np(x, y):
 64 |     if not isinstance(x, np.ndarray):
 65 |         x, y = x.cpu().numpy(), y.cpu().numpy()
 66 |     sigma_x = x.std(axis=0)
 67 |     sigma_y = y.std(axis=0)
 68 |     mean_x = x.mean(axis=0)
 69 |     mean_y = y.mean(axis=0)
 70 |     cor = ((x - mean_x) * (y - mean_y)).mean(0) / (sigma_x * sigma_y + 0.000000000001)
 71 |     return cor.mean()
 72 | 
 73 | def corr_np(preds, labels):
 74 |     sigma_p = (preds).std(axis=0)
 75 |     sigma_g = (labels).std(axis=0)
 76 |     mean_p = preds.mean(axis=0)
 77 |     mean_g = labels.mean(axis=0)
 78 |     index = (sigma_g != 0)
 79 |     correlation = ((preds - mean_p) * (labels - mean_g)).mean(axis=0) / (sigma_p * sigma_g)
 80 |     correlation = (correlation[index]).mean()
 81 |     return correlation
 82 | 
 83 | 
 84 | def stemgnn_mape(preds,labels, axis=None):
 85 |     '''
 86 |     Mean absolute percentage error.
 87 |     :param labels: np.ndarray or int, ground truth.
 88 |     :param preds: np.ndarray or int, prediction.
 89 |     :param axis: axis to do calculation.
 90 |     :return: int, MAPE averages on all elements of input.
 91 |     '''
 92 |     if not isinstance(preds, np.ndarray):
 93 |         preds = preds.cpu().numpy()
 94 |         labels = labels.cpu().numpy()
 95 |     mape = (np.abs(preds - labels) / (np.abs(labels)+1e-5)).astype(np.float64)
 96 |     mape = np.where(mape > 5, 5, mape)
 97 |     return np.mean(mape, axis)
 98 | 
 99 | 
100 | def masked_rmse_np(preds, labels, null_val=np.nan):
101 |     return np.sqrt(masked_mse_np(preds=preds, labels=labels, null_val=null_val))
102 | 
103 | 
104 | def masked_mse_np(preds, labels, null_val=np.nan):
105 |     with np.errstate(divide='ignore', invalid='ignore'):
106 |         if np.isnan(null_val):
107 |             mask = ~np.isnan(labels)
108 |         else:
109 |             mask = np.not_equal(labels, null_val)
110 |         mask = mask.astype('float32')
111 |         mask /= np.mean(mask)
112 |         mse = np.square(np.subtract(preds, labels)).astype('float32')
113 |         mse = np.nan_to_num(mse * mask)
114 |         return np.mean(mse)
115 | 
116 | 
117 | def masked_mae_np(preds, labels, null_val=np.nan):
118 |     with np.errstate(divide='ignore', invalid='ignore'):
119 |         if np.isnan(null_val):
120 |             mask = ~np.isnan(labels)
121 |         else:
122 |             mask = np.not_equal(labels, null_val)
123 |         mask = mask.astype('float32')
124 |         mask /= np.mean(mask)
125 |         mae = np.abs(np.subtract(preds, labels)).astype('float32')
126 |         mae = np.nan_to_num(mae * mask)
127 |         return np.mean(mae)
128 | 
129 | 
130 | def masked_mape_np(preds, labels, null_val=np.nan):
131 |     if not isinstance(preds, np.ndarray):
132 |         preds = preds.cpu().numpy()
133 |         labels = labels.cpu().numpy()
134 |     with np.errstate(divide='ignore', invalid='ignore'):
135 |         if np.isnan(null_val):
136 |             mask = ~np.isnan(labels)
137 |         else:
138 |             mask = np.not_equal(labels, null_val)
139 |         mask = mask.astype('float32')
140 |         mask /= np.mean(mask)
141 |         mape = np.abs(np.divide(np.subtract(preds, labels).astype('float32'), labels))
142 |         mape = np.nan_to_num(mask * mape)
143 |         return np.mean(mape)
144 | 
145 | 
146 | class Evaluator(object):
147 |     def __init__(self, config):
148 |         self.config = config
149 |         self.mask = self.config.get("mask", False)
150 |         self.out_catagory = "multi"
151 | 
152 | 
153 |     def _evaluate(self, output:np.ndarray, groud_truth:np.ndarray, mask: int, out_catagory: str):
154 |         """
155 |         evluate the model performance
156 |         : multi
157 |         :param output: [n_samples, 12, n_nodes, n_features]
158 |         :param groud_truth: [n_samples, 12, n_nodes, n_features]
159 |         : single
160 |         
161 |         :return: dict [str -> float]
162 |         """
163 |         if out_catagory == 'multi':
164 |             if bool(mask):
165 |                 if output.shape != groud_truth.shape:
166 |                     groud_truth = np.expand_dims( groud_truth[...,0], axis=-1)
167 |                 assert output.shape == groud_truth.shape, f'{output.shape}, {groud_truth.shape}'
168 |                 batch, steps, scores, node = output.shape[0], output.shape[1], defaultdict(dict), output.shape[2]
169 |                 for step in range(steps):
170 |                     y_pred = np.reshape(output[:,step],(batch, -1))
171 |                     y_true = np.reshape(groud_truth[:,step],(batch,-1))
172 |                     scores['masked_MAE'][f'horizon-{step}'] = masked_mae_np(y_pred, y_true, null_val=0.0)
173 |                     scores['masked_RMSE'][f'horizon-{step}'] = masked_rmse_np(y_pred, y_true, null_val=0.0)
174 |                     scores['masked_MAPE'][f'horizon-{step}'] = masked_mape_np(y_pred, y_true, null_val=0.0) * 100.0
175 |                     scores['node_wise_PCC'][f'horizon-{step}']= node_pcc_np(y_pred.swapaxes(1,-1).reshape((-1,node)), y_true.swapaxes(1,-1).reshape((-1,node)))
176 |                     scores['PCC'][f'horizon-{step}'] = pcc_np(y_pred, y_true)
177 |                 scores['masked_MAE']['all'] = masked_mae_np(output,groud_truth ,null_val=0.0)
178 |                 scores['masked_RMSE']['all'] = masked_rmse_np( output,groud_truth, null_val=0.0)
179 |                 scores['masked_MAPE']['all'] = masked_mape_np( output,groud_truth, null_val=0.0) * 100.0
180 |                 scores['PCC']['all'] = pcc_np(output,groud_truth)
181 |                 scores["node_pcc"]['all'] = node_pcc_np(output, groud_truth)
182 |             else:
183 |                 if output.shape != groud_truth.shape:
184 |                     groud_truth = np.expand_dims( groud_truth[...,0], axis=-1)
185 |                 assert output.shape == groud_truth.shape, f'{output.shape}, {groud_truth.shape}'
186 |                 batch, steps, scores, node = output.shape[0], output.shape[1], defaultdict(dict), output.shape[2]
187 |                 for step in range(steps):
188 |                     y_pred = output[:,step]
189 |                     y_true = groud_truth[:,step]
190 |                     scores['MAE'][f'horizon-{step}'] = mae_np(y_pred, y_true)
191 |                     scores['RMSE'][f'horizon-{step}'] = rmse_np(y_pred, y_true)
192 |                     # scores['MAPE'][f'horizon-{step}'] = mape_np(y_pred,y_true) * 100.0
193 |                     scores['masked_MAPE'][f'horizon-{step}'] = masked_mape_np(y_pred, y_true, null_val=0.0) * 100.0
194 |                     scores['StemGNN_MAPE'][f'horizon-{step}'] = stemgnn_mape(y_pred, y_true) * 100.0
195 |                     scores['PCC'][f'horizon-{step}'] = pcc_np(y_pred, y_true)
196 |                     scores['node_wise_PCC'][f'horizon-{step}']= node_pcc_np(y_pred.swapaxes(1,-1).reshape((-1,node)), y_true.swapaxes(1,-1).reshape((-1,node)))
197 |                 scores['MAE']['all'] = mae_np(output,groud_truth)
198 |                 scores['RMSE']['all'] = rmse_np(output,groud_truth)
199 |                 scores['masked_MAPE']['all'] = masked_mape_np( output,groud_truth, null_val=0.0) * 100.0
200 |                 scores['StemGNN_MAPE']['all'] = stemgnn_mape(output,groud_truth) * 100.0
201 |                 scores['PCC']['all'] = pcc_np(output,groud_truth)
202 |                 scores['node_wise_PCC']['all'] = node_pcc_np(output.swapaxes(2,-1).reshape((-1,node)), groud_truth.swapaxes(2,-1).reshape((-1,node)))
203 |         else:
204 |             output = output.squeeze()
205 |             groud_truth = groud_truth.squeeze()
206 |             assert output.shape == groud_truth.shape, f'{output.shape}, {groud_truth.shape}'
207 |             scores = defaultdict(dict)
208 | 
209 |             scores['RMSE']['all'] = rmse_np(output, groud_truth)
210 |             scores['masked_MAPE']['all'] = masked_mape_np(output, groud_truth, null_val=0.0) * 100.0
211 |             scores['PCC']['all'] = node_pcc_np(output, groud_truth)
212 |             scores['rse']['all'] = rse_np(output, groud_truth)
213 |             scores['rae']['all'] = rae_np(output, groud_truth)
214 |             scores['MAPE']['all'] = stemgnn_mape(output, groud_truth) * 100.0
215 |             scores['MAE']['all'] = mae_np(output, groud_truth)
216 |             scores["node_pcc"]['all'] = node_pcc_np(output, groud_truth)
217 |             scores['CORR']['all'] = corr_np(output, groud_truth)
218 |         return scores
219 | 
220 | 
221 |     def evaluate(self, output, groud_truth):
222 |         if not isinstance(output, np.ndarray):
223 |             output = output.cpu().numpy()
224 |         if not isinstance(groud_truth, np.ndarray):
225 |             groud_truth = groud_truth.cpu().numpy()
226 |         return self._evaluate(output, groud_truth, self.mask, self.out_catagory)
227 | 


--------------------------------------------------------------------------------
/STFGNN/executor/multi_step_executor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import numpy as np
  4 | import torch
  5 | import math
  6 | import time
  7 | import torch.nn as nn
  8 | from torch.autograd import Variable
  9 | from logging import getLogger
 10 | import tqdm
 11 | from torch.utils.tensorboard import SummaryWriter
 12 | from executor.utils import get_train_loss
 13 | from utils.Optim import Optim
 14 | from evaluator.evaluator import Evaluator
 15 | from utils.utils import ensure_dir
 16 | 
 17 | from model import loss
 18 | from functools import partial
 19 | 
 20 | 
 21 | class MultiStepExecutor(object):
 22 |     def __init__(self, config, model):
 23 |         self.config = config
 24 |         self.evaluator = Evaluator(config)
 25 | 
 26 |         _device = self.config.get('device', torch.device('cpu'))
 27 |         self.device = torch.device(_device)
 28 |         self.model = model.to(self.device)
 29 | 
 30 |         self.cache_dir = 'cache/model_cache'
 31 |         self.evaluate_res_dir = 'cache/evaluate_cache'
 32 |         self.summary_writer_dir = 'log/runs'
 33 |         ensure_dir(self.cache_dir)
 34 |         ensure_dir(self.evaluate_res_dir)
 35 |         ensure_dir(self.summary_writer_dir)
 36 | 
 37 |         self._writer = SummaryWriter(self.summary_writer_dir)
 38 |         self._logger = getLogger()
 39 |         self._logger.info(self.model)
 40 | 
 41 |         for name, param in self.model.named_parameters():
 42 |             self._logger.info(str(name) + '\t' + str(param.shape) + '\t' +
 43 |                               str(param.device) + '\t' + str(param.requires_grad))
 44 | 
 45 |         total_num = sum([param.nelement() for param in self.model.parameters()])
 46 |         self._logger.info('Total parameter numbers: {}'.format(total_num))
 47 | 
 48 |         self.train_loss = self.config.get("train_loss", "masked_mae")
 49 |         self.criterion = get_train_loss(self.train_loss) 
 50 | 
 51 |         self.cuda = self.config.get("cuda", True)
 52 |         self.best_val = 10000000
 53 |         self.optim = Optim(
 54 |             model.parameters(), self.config
 55 |         )
 56 |         self.epochs = self.config.get("epochs", 100)
 57 |         self.scaler = self.model.scaler
 58 |         self.num_batches = self.model.num_batches
 59 |         self.num_nodes = self.config.get("num_nodes", 0)
 60 |         self.batch_size = self.config.get("batch_size", 64)
 61 |         self.patience = self.config.get("patience", 20)
 62 |         self.lr_decay = self.config.get("lr_decay", False)
 63 |         self.mask = self.config.get("mask", True)
 64 | 
 65 | 
 66 |     def train(self, train_data, valid_data):
 67 |         print("begin training")
 68 |         wait = 0
 69 |         batches_seen = self.num_batches * 0
 70 |         
 71 | 
 72 |         for epoch in tqdm.tqdm(range(1, self.epochs + 1)):
 73 |             epoch_start_time = time.time()
 74 |             train_loss = []
 75 |             train_data.shuffle()
 76 | 
 77 |             for iter, (x,y) in enumerate(train_data.get_iterator()):
 78 |                 self.model.train()
 79 |                 self.model.zero_grad() 
 80 |                 trainx = torch.Tensor(x).to(self.device)  # [batch_size, window, num_nodes, dim]
 81 |                 trainy = torch.Tensor(y).to(self.device)  # [batch_size, horizon, num_nodes, dim]
 82 |                 output = self.model(trainx)
 83 |                 loss = self.criterion(self.scaler.inverse_transform(output), 
 84 |                     self.scaler.inverse_transform(trainy))
 85 |                 
 86 |                 loss.backward()
 87 |                 self.optim.step()
 88 |                 train_loss.append(loss.item())
 89 |                 
 90 |             
 91 |             if self.lr_decay:
 92 |                 self.optim.lr_scheduler.step()
 93 | 
 94 |             valid_loss = []
 95 |             valid_mape = []
 96 |             valid_rmse = []
 97 |             valid_pcc = []
 98 |             for iter, (x, y) in enumerate(valid_data.get_iterator()):
 99 |                 self.model.eval()
100 |                 valx = torch.Tensor(x).to(self.device)
101 |                 valy = torch.Tensor(y).to(self.device)
102 |                 with torch.no_grad():
103 |                     output = self.model(valx)
104 |                 score = self.evaluator.evaluate(self.scaler.inverse_transform(output), \
105 |                     self.scaler.inverse_transform(valy))
106 |                 if self.mask:
107 |                     vloss = score["masked_MAE"]["all"]
108 |                 else:
109 |                     vloss = score["MAE"]["all"]
110 |                     
111 |                 valid_loss.append(vloss)
112 |             
113 | 
114 |             mtrain_loss = np.mean(train_loss)
115 | 
116 |             mvalid_loss = np.mean(valid_loss)
117 | 
118 |             print(
119 |                 '| end of epoch {:3d} | time: {:5.2f}s | train_loss {:5.4f} | valid mae {:5.4f}'.format(
120 |                     epoch, (time.time() - epoch_start_time), mtrain_loss, \
121 |                         mvalid_loss))
122 | 
123 |             if mvalid_loss < self.best_val:
124 |                 self.best_val = mvalid_loss
125 |                 wait = 0
126 |                 self.best_val = mvalid_loss
127 |                 self.best_model = self.model
128 |             else:
129 |                 wait += 1
130 | 
131 |             if wait >= self.patience:
132 |                 print('early stop at epoch: {:04d}'.format(epoch))
133 |                 break
134 |         
135 |         self.model = self.best_model
136 | 
137 | 
138 |     def evaluate(self, test_data):
139 |         """
140 |         use model to test data
141 | 
142 |         Args:
143 |             test_dataloader(torch.Dataloader): Dataloader
144 |         """
145 |         self._logger.info('Start evaluating ...')
146 |         outputs = []
147 |         realy = []
148 |         seq_len = test_data.seq_len  #test_data["y_test"]
149 |         self.model.eval()
150 |         for iter, (x, y) in enumerate(test_data.get_iterator()):
151 |             testx = torch.Tensor(x).to(self.device)
152 |             testy = torch.Tensor(y).to(self.device)
153 |             with torch.no_grad():
154 |                 # self.evaluator.clear()
155 |                 pred = self.model(testx)
156 |                 outputs.append(pred) 
157 |                 realy.append(testy)
158 |         realy = torch.cat(realy, dim=0)
159 |         yhat = torch.cat(outputs, dim=0)
160 | 
161 |         realy = realy[:seq_len, ...]
162 |         yhat = yhat[:seq_len, ...]
163 | 
164 |         realy = self.scaler.inverse_transform(realy)
165 |         preds = self.scaler.inverse_transform(yhat)
166 | 
167 |         res_scores = self.evaluator.evaluate(preds, realy)
168 |         for _index in res_scores.keys():
169 |             print(_index, " :")
170 |             step_dict = res_scores[_index]
171 |             for j, k in step_dict.items():
172 |                 print(j, " : ", k.item())
173 |         
174 |         
175 | 
176 |     def save_model(self, cache_name):
177 |         """
178 |         将当前的模型保存到文件
179 | 
180 |         Args:
181 |             cache_name(str): 保存的文件名
182 |         """
183 |         ensure_dir(self.cache_dir)
184 |         self._logger.info("Saved model at " + cache_name)
185 |         torch.save(self.model.state_dict(), cache_name)
186 | 
187 |     def load_model(self, cache_name):
188 |         """
189 |         加载对应模型的 cache
190 | 
191 |         Args:
192 |             cache_name(str): 保存的文件名
193 |         """
194 |         self._logger.info("Loaded model at " + cache_name)
195 |         model_state = torch.load(cache_name)
196 |         self.model.load_state_dict(model_state)
197 | 


--------------------------------------------------------------------------------
/STFGNN/executor/utils.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import numpy as np
 3 | import copy
 4 | import pickle
 5 | from model import loss
 6 | from functools import partial
 7 | 
 8 | def get_train_loss(train_loss):
 9 |     """
10 |     get the loss func
11 |     """
12 |     if train_loss.lower() == 'none':
13 |         print('Warning. Received none train loss func and will use the loss func defined in the model.')
14 |         return None
15 |     
16 |     def func(preds, labels):
17 | 
18 |         if train_loss.lower() == 'mae':
19 |             lf = loss.masked_mae_torch
20 |         elif train_loss.lower() == 'mse':
21 |             lf = loss.masked_mse_torch
22 |         elif train_loss.lower() == 'rmse':
23 |             lf = loss.masked_rmse_torch
24 |         elif train_loss.lower() == 'mape':
25 |             lf = loss.masked_mape_torch
26 |         elif train_loss.lower() == 'logcosh':
27 |             lf = loss.log_cosh_loss
28 |         elif train_loss.lower() == 'huber':
29 |             lf = loss.huber_loss
30 |         elif train_loss.lower() == 'quantile':
31 |             lf = loss.quantile_loss
32 |         elif train_loss.lower() == 'masked_mae':
33 |             lf = partial(loss.masked_mae_torch, null_val=0)
34 |         elif train_loss.lower() == 'masked_mse':
35 |             lf = partial(loss.masked_mse_torch, null_val=0)
36 |         elif train_loss.lower() == 'masked_rmse':
37 |             lf = partial(loss.masked_rmse_torch, null_val=0)
38 |         elif train_loss.lower() == 'masked_mape':
39 |             lf = partial(loss.masked_mape_torch, null_val=0)
40 |         elif train_loss.lower() == 'r2':
41 |             lf = loss.r2_score_torch
42 |         elif train_loss.lower() == 'evar':
43 |             lf = loss.explained_variance_score_torch
44 |         else:
45 |             lf = loss.masked_mae_torch
46 | 
47 |         return lf(preds, labels)
48 |     return func
49 | 
50 | 


--------------------------------------------------------------------------------
/STFGNN/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import torch
 4 | 
 5 | from data.dataset.stfgnn_dataset import STFGNNDataset
 6 | from model.STFGNN import STFGNN
 7 | from executor.multi_step_executor import MultiStepExecutor as STFGNNExecutor
 8 | 
 9 | 
10 | config = {}
11 | for filename in ["config/PEMS03.json", "config/STFGNN.json"]:
12 |     with open(filename, "r") as f:
13 |         _config = json.load(f)
14 |         for key in _config:
15 |             if key not in config:
16 |                 config[key] = _config[key]
17 | 
18 | dataset = STFGNNDataset(config)
19 | 
20 | train_data, valid_data, test_data = dataset.get_data()
21 | data_feature = dataset.get_data_feature()
22 | 
23 | model_cache_file = 'cache/model_cache/PEMS03_STFGNN.m'
24 | 
25 | model = STFGNN(config, data_feature)
26 | 
27 | executor = STFGNNExecutor(config, model)
28 | 
29 | 
30 | train = True #标识是否需要重新训练
31 | 
32 | if train or not os.path.exists(model_cache_file):
33 |     executor.train(train_data, valid_data)
34 |     executor.save_model(model_cache_file)
35 | else:
36 |     executor.load_model(model_cache_file)
37 | # 评估，评估结果将会放在 cache/evaluate_cache 下
38 | executor.evaluate(test_data)
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/STFGNN/model/STFGNN.py:
--------------------------------------------------------------------------------
  1 | # from _typeshed import Self
  2 | import torch
  3 | import torch.nn.functional as F
  4 | import torch.nn as nn
  5 | 
  6 | 
  7 | class gcn_operation(nn.Module):
  8 |     def __init__(self, adj, in_dim, out_dim, num_vertices, activation='GLU'):
  9 |         """
 10 |         图卷积模块
 11 |         :param adj: 邻接图
 12 |         :param in_dim: 输入维度
 13 |         :param out_dim: 输出维度
 14 |         :param num_vertices: 节点数量
 15 |         :param activation: 激活方式 {'relu', 'GLU'}
 16 |         """
 17 |         super(gcn_operation, self).__init__()
 18 |         self.adj = adj
 19 |         self.in_dim = in_dim
 20 |         self.out_dim = out_dim
 21 |         self.num_vertices = num_vertices
 22 |         self.activation = activation
 23 | 
 24 |         assert self.activation in {'GLU', 'relu'}
 25 | 
 26 |         if self.activation == 'GLU':
 27 |             self.FC = nn.Linear(self.in_dim, 2 * self.out_dim, bias=True)
 28 |         else:
 29 |             self.FC = nn.Linear(self.in_dim, self.out_dim, bias=True)
 30 | 
 31 |     def forward(self, x, mask=None):
 32 |         """
 33 |         :param x: (3*N, B, Cin)
 34 |         :param mask:(3*N, 3*N)
 35 |         :return: (3*N, B, Cout)
 36 |         """
 37 |         adj = self.adj
 38 |         if mask is not None:
 39 |             adj = adj.to(mask.device) * mask
 40 | 
 41 |         x = torch.einsum('nm, mbc->nbc', adj.to(x.device), x)  # 4*N, B, Cin
 42 | 
 43 |         if self.activation == 'GLU':
 44 |             lhs_rhs = self.FC(x)  # 4*N, B, 2*Cout
 45 |             lhs, rhs = torch.split(lhs_rhs, self.out_dim, dim=-1)  # 4*N, B, Cout
 46 | 
 47 |             out = lhs * torch.sigmoid(rhs)
 48 |             del lhs, rhs, lhs_rhs
 49 | 
 50 |             return out
 51 | 
 52 |         elif self.activation == 'relu':
 53 |             return torch.relu(self.FC(x))  # 3*N, B, Cout
 54 | 
 55 | 
 56 | class STSGCM(nn.Module):
 57 |     def __init__(self, adj, in_dim, out_dims, num_of_vertices, activation='GLU'):
 58 |         """
 59 |         :param adj: 邻接矩阵
 60 |         :param in_dim: 输入维度
 61 |         :param out_dims: list 各个图卷积的输出维度
 62 |         :param num_of_vertices: 节点数量
 63 |         :param activation: 激活方式 {'relu', 'GLU'}
 64 |         """
 65 |         super(STSGCM, self).__init__()
 66 |         self.adj = adj
 67 |         self.in_dim = in_dim
 68 |         self.out_dims = out_dims
 69 |         self.num_of_vertices = num_of_vertices
 70 |         self.activation = activation
 71 | 
 72 |         self.gcn_operations = nn.ModuleList()
 73 | 
 74 |         self.gcn_operations.append(
 75 |             gcn_operation(
 76 |                 adj=self.adj,
 77 |                 in_dim=self.in_dim,
 78 |                 out_dim=self.out_dims[0],
 79 |                 num_vertices=self.num_of_vertices,
 80 |                 activation=self.activation
 81 |             )
 82 |         )
 83 | 
 84 |         for i in range(1, len(self.out_dims)):
 85 |             self.gcn_operations.append(
 86 |                 gcn_operation(
 87 |                     adj=self.adj,
 88 |                     in_dim=self.out_dims[i-1],
 89 |                     out_dim=self.out_dims[i],
 90 |                     num_vertices=self.num_of_vertices,
 91 |                     activation=self.activation
 92 |                 )
 93 |             )
 94 | 
 95 |     def forward(self, x, mask=None):
 96 |         """
 97 |         :param x: (3N, B, Cin)
 98 |         :param mask: (3N, 3N)
 99 |         :return: (N, B, Cout)
100 |         """
101 |         need_concat = []
102 | 
103 |         for i in range(len(self.out_dims)):
104 |             x = self.gcn_operations[i](x, mask)
105 |             need_concat.append(x)
106 | 
107 |         # shape of each element is (1, N, B, Cout)
108 |         need_concat = [
109 |             torch.unsqueeze(
110 |                 h[self.num_of_vertices: 2 * self.num_of_vertices], dim=0
111 |             ) for h in need_concat
112 |         ]
113 | 
114 |         out = torch.max(torch.cat(need_concat, dim=0), dim=0).values  # (N, B, Cout)
115 | 
116 |         del need_concat
117 | 
118 |         return out
119 | 
120 | 
121 | class STSGCL(nn.Module):
122 |     def __init__(self,
123 |                  adj,
124 |                  history,
125 |                  num_of_vertices,
126 |                  in_dim,
127 |                  out_dims,
128 |                  strides=4,
129 |                  activation='GLU',
130 |                  temporal_emb=True,
131 |                  spatial_emb=True):
132 |         """
133 |         :param adj: 邻接矩阵
134 |         :param history: 输入时间步长
135 |         :param in_dim: 输入维度
136 |         :param out_dims: list 各个图卷积的输出维度
137 |         :param strides: 滑动窗口步长，local时空图使用几个时间步构建的，默认为3
138 |         :param num_of_vertices: 节点数量
139 |         :param activation: 激活方式 {'relu', 'GLU'}
140 |         :param temporal_emb: 加入时间位置嵌入向量
141 |         :param spatial_emb: 加入空间位置嵌入向量
142 |         """
143 |         super(STSGCL, self).__init__()
144 |         self.adj = adj
145 |         self.strides = strides
146 |         self.history = history
147 |         self.in_dim = in_dim
148 |         self.out_dims = out_dims
149 |         self.num_of_vertices = num_of_vertices
150 | 
151 |         self.activation = activation
152 |         self.temporal_emb = temporal_emb
153 |         self.spatial_emb = spatial_emb
154 | 
155 | 
156 |         self.conv1 = nn.Conv1d(self.in_dim, self.out_dims[-1], kernel_size=(1, 2), stride=(1, 1), dilation=(1, 3))
157 |         self.conv2 = nn.Conv1d(self.in_dim, self.out_dims[-1], kernel_size=(1, 2), stride=(1, 1), dilation=(1, 3))
158 | 
159 | 
160 |         self.STSGCMS = nn.ModuleList()
161 |         for i in range(self.history - self.strides + 1):
162 |             self.STSGCMS.append(
163 |                 STSGCM(
164 |                     adj=self.adj,
165 |                     in_dim=self.in_dim,
166 |                     out_dims=self.out_dims,
167 |                     num_of_vertices=self.num_of_vertices,
168 |                     activation=self.activation
169 |                 )
170 |             )
171 | 
172 |         if self.temporal_emb:
173 |             self.temporal_embedding = nn.Parameter(torch.FloatTensor(1, self.history, 1, self.in_dim))
174 |             # 1, T, 1, Cin
175 | 
176 |         if self.spatial_emb:
177 |             self.spatial_embedding = nn.Parameter(torch.FloatTensor(1, 1, self.num_of_vertices, self.in_dim))
178 |             # 1, 1, N, Cin
179 | 
180 |         self.reset()
181 | 
182 |     def reset(self):
183 |         if self.temporal_emb:
184 |             nn.init.xavier_normal_(self.temporal_embedding, gain=0.0003)
185 | 
186 |         if self.spatial_emb:
187 |             nn.init.xavier_normal_(self.spatial_embedding, gain=0.0003)
188 | 
189 |     def forward(self, x, mask=None):
190 |         """
191 |         :param x: B, T, N, Cin
192 |         :param mask: (N, N)
193 |         :return: B, T-3, N, Cout
194 |         """
195 |         if self.temporal_emb:
196 |             x = x + self.temporal_embedding
197 | 
198 |         if self.spatial_emb:
199 |             x = x + self.spatial_embedding
200 | 
201 |         #############################################
202 |         # shape is (B, C, N, T)
203 |         data_temp = x.permute(0, 3, 2, 1)
204 |         data_left = torch.sigmoid(self.conv1(data_temp))
205 |         data_right = torch.tanh(self.conv2(data_temp))
206 |         data_time_axis = data_left * data_right
207 |         data_res = data_time_axis.permute(0, 3, 2, 1)
208 |         # shape is (B, T-3, N, C)
209 |         #############################################
210 | 
211 |         need_concat = []
212 |         batch_size = x.shape[0]
213 | 
214 |         for i in range(self.history - self.strides + 1):
215 |             t = x[:, i: i+self.strides, :, :]  # (B, 4, N, Cin)
216 | 
217 |             t = torch.reshape(t, shape=[batch_size, self.strides * self.num_of_vertices, self.in_dim])
218 |             # (B, 4*N, Cin)
219 | 
220 |             t = self.STSGCMS[i](t.permute(1, 0, 2), mask)  # (4*N, B, Cin) -> (N, B, Cout)
221 | 
222 |             t = torch.unsqueeze(t.permute(1, 0, 2), dim=1)  # (N, B, Cout) -> (B, N, Cout) ->(B, 1, N, Cout)
223 | 
224 |             need_concat.append(t)
225 | 
226 |         mid_out = torch.cat(need_concat, dim=1)  # (B, T-3, N, Cout)
227 |         out = mid_out + data_res
228 | 
229 |         del need_concat, batch_size
230 | 
231 |         return out
232 | 
233 | 
234 | class output_layer(nn.Module):
235 |     def __init__(self, num_of_vertices, history, in_dim, out_dim, 
236 |                  hidden_dim=128, horizon=12):
237 |         """
238 |         预测层，注意在作者的实验中是对每一个预测时间step做处理的，也即他会令horizon=1
239 |         :param num_of_vertices:节点数
240 |         :param history:输入时间步长
241 |         :param in_dim: 输入维度
242 |         :param hidden_dim:中间层维度
243 |         :param horizon:预测时间步长
244 |         """
245 |         super(output_layer, self).__init__()
246 |         self.num_of_vertices = num_of_vertices
247 |         self.history = history
248 |         self.in_dim = in_dim
249 |         self.out_dim = out_dim
250 |         self.hidden_dim = hidden_dim
251 |         self.horizon = horizon
252 | 
253 |         #print("#####################")
254 |         #print(self.in_dim)
255 |         #print(self.history)
256 |         #print(self.hidden_dim)
257 | 
258 |         self.FC1 = nn.Linear(self.in_dim * self.history, self.hidden_dim, bias=True)
259 | 
260 |         #self.FC2 = nn.Linear(self.hidden_dim, self.horizon , bias=True)
261 | 
262 |         self.FC2 = nn.Linear(self.hidden_dim, self.horizon * self.out_dim, bias=True)
263 | 
264 |     def forward(self, x):
265 |         """
266 |         :param x: (B, Tin, N, Cin)
267 |         :return: (B, Tout, N)
268 |         """
269 |         batch_size = x.shape[0]
270 | 
271 |         x = x.permute(0, 2, 1, 3)  # B, N, Tin, Cin
272 | 
273 |         out1 = torch.relu(self.FC1(x.reshape(batch_size, self.num_of_vertices, -1)))
274 |         # (B, N, Tin, Cin) -> (B, N, Tin * Cin) -> (B, N, hidden)
275 | 
276 |         out2 = self.FC2(out1)  # (B, N, hidden) -> (B, N, horizon * 2)
277 | 
278 |         out2 = out2.reshape(batch_size, self.num_of_vertices, self.horizon, self.out_dim)
279 | 
280 |         del out1, batch_size
281 | 
282 |         return out2.permute(0, 2, 1, 3)  # B, horizon, N
283 |         # return out2.permute(0, 2, 1)  # B, horizon, N
284 | 
285 | 
286 | class STFGNN(nn.Module):
287 |     def __init__(self, config, data_feature):
288 |         """
289 | 
290 |         :param adj: local时空间矩阵
291 |         :param history:输入时间步长
292 |         :param num_of_vertices:节点数量
293 |         :param in_dim:输入维度
294 |         :param hidden_dims: lists, 中间各STSGCL层的卷积操作维度
295 |         :param first_layer_embedding_size: 第一层输入层的维度
296 |         :param out_layer_dim: 输出模块中间层维度
297 |         :param activation: 激活函数 {relu, GlU}
298 |         :param use_mask: 是否使用mask矩阵对adj进行优化
299 |         :param temporal_emb:是否使用时间嵌入向量
300 |         :param spatial_emb:是否使用空间嵌入向量
301 |         :param horizon:预测时间步长
302 |         :param strides:滑动窗口步长，local时空图使用几个时间步构建的，默认为4
303 |         """
304 |         super(STFGNN, self).__init__()
305 | 
306 |         self.config = config
307 |         self.data_feature = data_feature
308 |         self.scaler = data_feature["scaler"]
309 |         self.num_batches = data_feature["num_batches"]
310 | 
311 |         adj = self.data_feature["adj_mx"]
312 |         history = self.config.get("window", 12)
313 |         num_of_vertices = self.config.get("num_nodes", None)
314 |         in_dim = self.config.get("input_dim", 1)
315 |         out_dim = self.config.get("output_dim", 1)
316 |         hidden_dims = self.config.get("hidden_dims", None)
317 |         first_layer_embedding_size = self.config.get("first_layer_embedding_size", None)
318 |         out_layer_dim = self.config.get("out_layer_dim", None)
319 |         activation = self.config.get("activation", "GLU")
320 |         use_mask = self.config.get("mask")
321 |         temporal_emb = self.config.get("temporal_emb", True)
322 |         spatial_emb = self.config.get("spatial_emb", True)
323 |         horizon = self.config.get("horizon", 12)
324 |         strides = self.config.get("strides", 4)
325 | 
326 |         self.adj = adj
327 |         self.num_of_vertices = num_of_vertices
328 |         self.hidden_dims = hidden_dims
329 |         self.out_layer_dim = out_layer_dim
330 |         self.activation = activation
331 |         self.use_mask = use_mask
332 | 
333 |         self.temporal_emb = temporal_emb
334 |         self.spatial_emb = spatial_emb
335 |         self.horizon = horizon
336 |         self.strides = 4
337 | 
338 |         self.First_FC = nn.Linear(in_dim, first_layer_embedding_size, bias=True)
339 |         self.STSGCLS = nn.ModuleList()
340 |         #print("____________________")
341 |         #print(history)
342 | 
343 |         self.STSGCLS.append(
344 |             STSGCL(
345 |                 adj=self.adj,
346 |                 history=history,
347 |                 num_of_vertices=self.num_of_vertices,
348 |                 in_dim=first_layer_embedding_size,
349 |                 out_dims=self.hidden_dims[0],
350 |                 strides=self.strides,
351 |                 activation=self.activation,
352 |                 temporal_emb=self.temporal_emb,
353 |                 spatial_emb=self.spatial_emb
354 |             )
355 |         )
356 | 
357 |         in_dim = self.hidden_dims[0][-1]
358 |         history -= (self.strides - 1)
359 | 
360 |         #print("!!!!!!!!!!!!!!!!!!!")
361 |         #print(history)
362 | 
363 |         for idx, hidden_list in enumerate(self.hidden_dims):
364 |             #print("?????? ", idx)
365 |             if idx == 0:
366 |                 continue
367 |             #print("---------", idx)
368 |             self.STSGCLS.append(
369 |                 STSGCL(
370 |                     adj=self.adj,
371 |                     history=history,
372 |                     num_of_vertices=self.num_of_vertices,
373 |                     in_dim=in_dim,
374 |                     out_dims=hidden_list,
375 |                     strides=self.strides,
376 |                     activation=self.activation,
377 |                     temporal_emb=self.temporal_emb,
378 |                     spatial_emb=self.spatial_emb
379 |                 )
380 |             )
381 |             history -= (self.strides - 1)
382 |             in_dim = hidden_list[-1]
383 | 
384 |         self.predictLayer = nn.ModuleList()
385 |         #print("***********************")
386 |         #print(history)
387 |         for t in range(self.horizon):
388 |             self.predictLayer.append(
389 |                 output_layer(
390 |                     num_of_vertices=self.num_of_vertices,
391 |                     history=history,
392 |                     in_dim=in_dim,
393 |                     out_dim = out_dim,
394 |                     hidden_dim=out_layer_dim,
395 |                     horizon=1
396 |                 )
397 |             )
398 | 
399 |         if self.use_mask:
400 |             mask = torch.zeros_like(self.adj)
401 |             mask[self.adj != 0] = self.adj[self.adj != 0]
402 |             self.mask = nn.Parameter(mask)
403 |         else:
404 |             self.mask = None
405 | 
406 |     def forward(self, x):
407 |         """
408 |         :param x: B, Tin, N, Cin)
409 |         :return: B, Tout, N
410 |         """
411 | 
412 |         x = torch.relu(self.First_FC(x))  # B, Tin, N, Cin
413 |         #print(1)
414 | 
415 |         for model in self.STSGCLS:
416 |             x = model(x, self.mask)
417 |         # (B, T - 8, N, Cout)
418 |         #print(2)
419 |         need_concat = []
420 |         for i in range(self.horizon):
421 |             out_step = self.predictLayer[i](x)  # (B, 1, N, 2)
422 |             need_concat.append(out_step)
423 |         #print(3)
424 |         out = torch.cat(need_concat, dim=1)  # B, Tout, N, 2
425 | 
426 |         del need_concat
427 | 
428 |         return out
429 | 
430 | 
431 | 
432 | 


--------------------------------------------------------------------------------
/STFGNN/model/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from sklearn.metrics import r2_score, explained_variance_score
  4 | 
  5 | 
  6 | def masked_mae_loss(y_pred, y_true):
  7 |     mask = (y_true != 0).float()
  8 |     mask /= mask.mean()
  9 |     loss = torch.abs(y_pred - y_true)
 10 |     loss = loss * mask
 11 |     # trick for nans:
 12 |     # https://discuss.pytorch.org/t/how-to-set-nan-in-tensor-to-0/3918/3
 13 |     loss[loss != loss] = 0
 14 |     return loss.mean()
 15 | 
 16 | 
 17 | def masked_mae_torch(preds, labels, null_val=np.nan):
 18 |     labels[torch.abs(labels) < 1e-4] = 0
 19 |     if np.isnan(null_val):
 20 |         mask = ~torch.isnan(labels)
 21 |     else:
 22 |         mask = labels.ne(null_val)
 23 |     mask = mask.float()
 24 |     mask /= torch.mean(mask)
 25 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
 26 |     loss = torch.abs(torch.sub(preds, labels))
 27 |     loss = loss * mask
 28 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
 29 |     return torch.mean(loss)
 30 | 
 31 | 
 32 | def log_cosh_loss(preds, labels):
 33 |     loss = torch.log(torch.cosh(preds - labels))
 34 |     return torch.mean(loss)
 35 | 
 36 | 
 37 | def huber_loss(preds, labels, delta=1.0):
 38 |     residual = torch.abs(preds - labels)
 39 |     condition = torch.le(residual, delta)
 40 |     small_res = 0.5 * torch.square(residual)
 41 |     large_res = delta * residual - 0.5 * delta * delta
 42 |     return torch.mean(torch.where(condition, small_res, large_res))
 43 |     # lo = torch.nn.SmoothL1Loss()
 44 |     # return lo(preds, labels)
 45 | 
 46 | 
 47 | def quantile_loss(preds, labels, delta=0.25):
 48 |     condition = torch.ge(labels, preds)
 49 |     large_res = delta * (labels - preds)
 50 |     small_res = (1 - delta) * (preds - labels)
 51 |     return torch.mean(torch.where(condition, large_res, small_res))
 52 | 
 53 | 
 54 | def masked_mape_torch(preds, labels, null_val=np.nan, eps=0):
 55 |     labels[torch.abs(labels) < 1e-4] = 0
 56 |     if np.isnan(null_val) and eps != 0:
 57 |         loss = torch.abs((preds - labels) / (labels + eps))
 58 |         return torch.mean(loss)
 59 |     if np.isnan(null_val):
 60 |         mask = ~torch.isnan(labels)
 61 |     else:
 62 |         mask = labels.ne(null_val)
 63 |     mask = mask.float()
 64 |     mask /= torch.mean(mask)
 65 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
 66 |     loss = torch.abs((preds - labels) / labels)
 67 |     loss = loss * mask
 68 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
 69 |     return torch.mean(loss)
 70 | 
 71 | 
 72 | def masked_mse_torch(preds, labels, null_val=np.nan):
 73 |     labels[torch.abs(labels) < 1e-4] = 0
 74 |     if np.isnan(null_val):
 75 |         mask = ~torch.isnan(labels)
 76 |     else:
 77 |         mask = labels.ne(null_val)
 78 |     mask = mask.float()
 79 |     mask /= torch.mean(mask)
 80 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
 81 |     loss = torch.square(torch.sub(preds, labels))
 82 |     loss = loss * mask
 83 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
 84 |     return torch.mean(loss)
 85 | 
 86 | 
 87 | def masked_rmse_torch(preds, labels, null_val=np.nan):
 88 |     labels[torch.abs(labels) < 1e-4] = 0
 89 |     return torch.sqrt(masked_mse_torch(preds=preds, labels=labels,
 90 |                                        null_val=null_val))
 91 | 
 92 | 
 93 | def r2_score_torch(preds, labels):
 94 |     preds = preds.cpu().flatten()
 95 |     labels = labels.cpu().flatten()
 96 |     return r2_score(labels, preds)
 97 | 
 98 | 
 99 | def explained_variance_score_torch(preds, labels):
100 |     preds = preds.cpu().flatten()
101 |     labels = labels.cpu().flatten()
102 |     return explained_variance_score(labels, preds)
103 | 
104 | 
105 | def masked_rmse_np(preds, labels, null_val=np.nan):
106 |     return np.sqrt(masked_mse_np(preds=preds, labels=labels,
107 |                    null_val=null_val))
108 | 
109 | 
110 | def masked_mse_np(preds, labels, null_val=np.nan):
111 |     with np.errstate(divide='ignore', invalid='ignore'):
112 |         if np.isnan(null_val):
113 |             mask = ~np.isnan(labels)
114 |         else:
115 |             mask = np.not_equal(labels, null_val)
116 |         mask = mask.astype('float32')
117 |         mask /= np.mean(mask)
118 |         rmse = np.square(np.subtract(preds, labels)).astype('float32')
119 |         rmse = np.nan_to_num(rmse * mask)
120 |         return np.mean(rmse)
121 | 
122 | 
123 | def masked_mae_np(preds, labels, null_val=np.nan):
124 |     with np.errstate(divide='ignore', invalid='ignore'):
125 |         if np.isnan(null_val):
126 |             mask = ~np.isnan(labels)
127 |         else:
128 |             mask = np.not_equal(labels, null_val)
129 |         mask = mask.astype('float32')
130 |         mask /= np.mean(mask)
131 |         mae = np.abs(np.subtract(preds, labels)).astype('float32')
132 |         mae = np.nan_to_num(mae * mask)
133 |         return np.mean(mae)
134 | 
135 | 
136 | def masked_mape_np(preds, labels, null_val=np.nan):
137 |     with np.errstate(divide='ignore', invalid='ignore'):
138 |         if np.isnan(null_val):
139 |             mask = ~np.isnan(labels)
140 |         else:
141 |             mask = np.not_equal(labels, null_val)
142 |         mask = mask.astype('float32')
143 |         mask /= np.mean(mask)
144 |         mape = np.abs(np.divide(np.subtract(
145 |             preds, labels).astype('float32'), labels))
146 |         mape = np.nan_to_num(mask * mape)
147 |         return np.mean(mape)
148 | 
149 | 
150 | def r2_score_np(preds, labels):
151 |     preds = preds.flatten()
152 |     labels = labels.flatten()
153 |     return r2_score(labels, preds)
154 | 
155 | 
156 | def explained_variance_score_np(preds, labels):
157 |     preds = preds.flatten()
158 |     labels = labels.flatten()
159 |     return explained_variance_score(labels, preds)
160 | 


--------------------------------------------------------------------------------
/STFGNN/raw_data/PEMS03/PEMS03.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lwm412/STFGNN-Pytorch/ae7c95866d036d2bd7143d6d6020228a4e902e50/STFGNN/raw_data/PEMS03/PEMS03.npz


--------------------------------------------------------------------------------
/STFGNN/raw_data/PEMS03/adj_mx.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lwm412/STFGNN-Pytorch/ae7c95866d036d2bd7143d6d6020228a4e902e50/STFGNN/raw_data/PEMS03/adj_mx.pkl


--------------------------------------------------------------------------------
/STFGNN/utils/GPS_utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | R_EARTH = 6371000  # meter
  4 | 
  5 | 
  6 | def angle2radian(angle):
  7 |     """
  8 |     convert from an angle to a radian
  9 |     :param angle: (float)
 10 |     :return: radian (float)
 11 |     """
 12 |     return math.radians(angle)
 13 | 
 14 | 
 15 | def radian2angle(radian):
 16 |     return math.degrees(radian)
 17 | 
 18 | 
 19 | def spherical_law_of_cosines(phi1, lambda1, phi2, lambda2):
 20 |     """
 21 |     calculate great circle distance with spherical law of cosines
 22 |     phi/lambda for latitude/longitude in radians
 23 |     :param phi1: point one's latitude in radians
 24 |     :param lambda1: point one's longitude in radians
 25 |     :param phi2: point two's latitude in radians
 26 |     :param lambda2: point two's longitude in radians
 27 |     :return:
 28 |     """
 29 |     d_lambda = lambda2 - lambda1
 30 |     return math.acos(math.sin(phi1) * math.sin(phi2) + math.cos(phi1) * math.cos(phi2) * math.cos(d_lambda))
 31 | 
 32 | 
 33 | def haversine(phi1, lambda1, phi2, lambda2):
 34 |     """
 35 |     calculate angular great circle distance with haversine formula
 36 |     see parameters in spherical_law_of_cosines
 37 |     """
 38 |     d_phi = phi2 - phi1
 39 |     d_lambda = lambda2 - lambda1
 40 |     a = math.pow(math.sin(d_phi / 2), 2) + \
 41 |         math.cos(phi1) * math.cos(phi2) * math.pow(math.sin(d_lambda / 2), 2)
 42 |     c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
 43 |     return c
 44 | 
 45 | 
 46 | def equirectangular_approximation(phi1, lambda1, phi2, lambda2):
 47 |     """
 48 |     calculate angular great circle distance with Pythagoras’ theorem performed on an equirectangular projection
 49 |     see parameters in spherical_law_of_cosines
 50 |     """
 51 |     x = (lambda2 - lambda1) * math.cos((phi1 + phi2) / 2)
 52 |     y = phi2 - phi1
 53 |     return math.sqrt(math.pow(x, 2) + math.pow(y, 2))
 54 | 
 55 | 
 56 | def dist(phi1, lambda1, phi2, lambda2, r=R_EARTH, method='hav'):
 57 |     """
 58 |     calculate great circle distance with given latitude and longitude,
 59 |     :param phi1: point one's latitude in angle
 60 |     :param lambda1: point one's longitude in angle
 61 |     :param phi2: point two's latitude in angle
 62 |     :param lambda2: point two's longitude in angle
 63 |     :param r: earth radius(m)
 64 |     :param method:  'hav' means haversine,
 65 |                     'LoC' means Spherical Law of Cosines,
 66 |                     'approx' means Pythagoras’ theorem performed on an equirectangular projection
 67 |     :return: distance (m)
 68 |     """
 69 |     return angular_dist(phi1, lambda1, phi2, lambda2, method) * r
 70 | 
 71 | 
 72 | def angular_dist(phi1, lambda1, phi2, lambda2, method='hav'):
 73 |     """
 74 |     calculate angular great circle distance with given latitude and longitude
 75 |     :return: angle
 76 |     """
 77 |     if method.lower() == 'hav':
 78 |         return haversine(phi1, lambda1, phi2, lambda2)
 79 |     elif method.lower() == 'loc':
 80 |         return spherical_law_of_cosines(phi1, lambda1, phi2, lambda2)
 81 |     elif method.lower() == 'approx':
 82 |         return equirectangular_approximation(phi1, lambda1, phi2, lambda2)
 83 |     else:
 84 |         assert False
 85 | 
 86 | 
 87 | def destination(phi1, lambda1, brng, distance, r=R_EARTH):
 88 |     """
 89 | 
 90 |     :param phi1:
 91 |     :param lambda1:
 92 |     :param brng:
 93 |     :param distance:
 94 |     :return:
 95 |     """
 96 |     delta = distance / r
 97 |     phi2 = math.asin(math.sin(phi1) * math.cos(delta) + math.cos(phi1) * math.sin(delta) * math.cos(brng))
 98 |     lambda2 = lambda1 + math.atan2(
 99 |         math.sin(brng) * math.sin(delta) * math.cos(phi1), math.cos(delta) - math.sin(phi1) * math.sin(phi2)
100 |     )
101 |     return phi2, lambda2
102 | 
103 | 
104 | def init_bearing(phi1, lambda1, phi2, lambda2):
105 |     """
106 |     initial bearing of a great circle route
107 |     :return: 0~360
108 |     """
109 |     y = math.sin(lambda2 - lambda1) * math.cos(phi2)
110 |     x = math.cos(phi1) * math.sin(phi2) - math.sin(phi1) * math.cos(phi2) * math.cos(lambda2 - lambda1)
111 |     theta = math.atan2(y, x)
112 |     brng = (theta * 180 / math.pi + 360) % 360
113 |     return brng


--------------------------------------------------------------------------------
/STFGNN/utils/Optim.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.optim as optim
 4 | 
 5 | 
 6 | class Optim(object):
 7 | 
 8 |     def __init__(self, params, config):
 9 |         self.params = list(params)  # careful: params may be a generator
10 |         self.config = config
11 |         self.last_ppl = None
12 |         self.lr = self.config.get("lr", 0.001)
13 |         self.max_grad_norm = self.config.get("clip", 10)
14 |         self.method = self.config.get("optim", "adam")
15 |         self.lr_decay = self.config.get("lr_decay", False)
16 |         self.lr_scheduler_type = self.config.get('lr_scheduler', 'multisteplr')
17 |         self.lr_decay_ratio = self.config.get("lr_decay_ratio", 0.1)
18 |         self.milestones = self.config.get("lr_decay_steps", [])
19 |         self.step_size = self.config.get("step_size", 10)
20 | 
21 |         self._makeOptimizer()
22 |         self.lr_scheduler = self._build_lr_scheduler()
23 | 
24 | 
25 |     def _makeOptimizer(self):
26 |         if self.method == 'sgd':
27 |             self.optimizer = optim.SGD(self.params, lr=self.lr)
28 |         elif self.method == 'adagrad':
29 |             self.optimizer = optim.Adagrad(self.params, lr=self.lr)
30 |         elif self.method == 'adadelta':
31 |             self.optimizer = optim.Adadelta(self.params, lr=self.lr)
32 |         elif self.method == 'adam':
33 |             self.optimizer = optim.Adam(self.params, lr=self.lr)
34 |         else:
35 |             raise RuntimeError("Invalid optim method: " + self.method)
36 | 
37 |     def _build_lr_scheduler(self):
38 |         """
39 |         根据全局参数`lr_scheduler`选择对应的lr_scheduler
40 |         """
41 |         if self.lr_decay:
42 |             if self.lr_scheduler_type.lower() == 'multisteplr':
43 |                 lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
44 |                     self.optimizer, milestones=self.milestones, gamma=self.lr_decay_ratio)
45 |             elif self.lr_scheduler_type.lower() == 'steplr':
46 |                 lr_scheduler = torch.optim.lr_scheduler.StepLR(
47 |                     self.optimizer, step_size=self.step_size, gamma=self.lr_decay_ratio)
48 |             elif self.lr_scheduler_type.lower() == 'exponentiallr':
49 |                 lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
50 |                     self.optimizer, gamma=self.lr_decay_ratio)
51 |             else:
52 |                 print('Received unrecognized lr_scheduler, '
53 |                                      'please check the parameter `lr_scheduler`.')
54 |                 lr_scheduler = None
55 |         else:
56 |             lr_scheduler = None
57 |         return lr_scheduler
58 | 
59 | 
60 |     def step(self):
61 |         # Compute gradients norm.
62 |         grad_norm = 0
63 | 
64 |         if self.max_grad_norm is not None:
65 |             torch.nn.utils.clip_grad_norm_(self.params, self.max_grad_norm)
66 | 
67 |         self.optimizer.step()
68 |         return grad_norm
69 | 
70 |     
71 |     def zero_grad(self):
72 |         self.optimizer.zero_grad()
73 |         return
74 | 
75 | 
76 |     # decay learning rate if val perf does not improve or we hit the start_decay_at limit
77 |     def updateLearningRate(self, ppl, epoch):
78 |         if self.start_decay_at is not None and epoch >= self.start_decay_at:
79 |             self.start_decay = True
80 |         if self.last_ppl is not None and ppl > self.last_ppl:
81 |             self.start_decay = True
82 | 
83 |         if self.start_decay:
84 |             self.lr = self.lr * self.lr_decay
85 |             print("Decaying learning rate to %g" % self.lr)
86 |         #only decay for one epoch
87 |         self.start_decay = False
88 | 
89 |         self.last_ppl = ppl
90 | 
91 |         self._makeOptimizer()
92 | 


--------------------------------------------------------------------------------
/STFGNN/utils/argument_list.py:
--------------------------------------------------------------------------------
 1 | """
 2 | store the arguments can be modified by the user
 3 | """
 4 | import argparse
 5 | 
 6 | general_arguments = {
 7 |     "gpu": "bool",
 8 |     "batch_size": "int",
 9 |     "train_rate": "float",
10 |     "eval_rate": "float",
11 |     "learning_rate": "float",
12 |     "max_epoch": "int",
13 |     "gpu_id": "int"
14 | }
15 | 
16 | hyper_arguments = {
17 |     "gpu": {
18 |         "type": "bool",
19 |         "default": None,
20 |         "help": "whether use gpu"
21 |     },
22 |     "gpu_id": {
23 |         "type": "int",
24 |         "default": None,
25 |         "help": "the gpu id to use"
26 |     },
27 |     "train_rate": {
28 |         "type": "float",
29 |         "default": None,
30 |         "help": "the train set rate"
31 |     },
32 |     "eval_rate": {
33 |         "type": "float",
34 |         "default": None,
35 |         "help": "the validation set rate"
36 |     },
37 |     "batch_size": {
38 |         "type": "int",
39 |         "default": None,
40 |         "help": "the batch size"
41 |     }
42 | }
43 | 
44 | 
45 | def str2bool(s):
46 |     if isinstance(s, bool):
47 |         return s
48 |     if s.lower() in ('yes', 'true'):
49 |         return True
50 |     elif s.lower() in ('no', 'false'):
51 |         return False
52 |     else:
53 |         raise argparse.ArgumentTypeError('bool value expected.')
54 | 
55 | 
56 | def str2float(s):
57 |     if isinstance(s, float):
58 |         return s
59 |     try:
60 |         x = float(s)
61 |     except ValueError:
62 |         raise argparse.ArgumentTypeError('float value expected.')
63 |     return x
64 | 


--------------------------------------------------------------------------------
/STFGNN/utils/dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 数据预处理阶段相关的工具函数
  3 | """
  4 | import numpy as np
  5 | import time
  6 | from datetime import datetime, timedelta
  7 | from collections import defaultdict
  8 | 
  9 | 
 10 | def parse_time(time_in, timezone_offset_in_minute=0):
 11 |     """
 12 |     将 json 中 time_format 格式的 time 转化为 local datatime
 13 |     """
 14 |     date = datetime.strptime(time_in, '%Y-%m-%dT%H:%M:%SZ')  # 这是 UTC 时间
 15 |     return date + timedelta(minutes=timezone_offset_in_minute)
 16 | 
 17 | 
 18 | def cal_basetime(start_time, base_zero):
 19 |     """
 20 |     用于切分轨迹成一个 session，
 21 |     思路为：给定一个 start_time 找到一个基准时间 base_time，
 22 |     在该 base_time 到 base_time + time_length 区间的点划分到一个 session 内，
 23 |     选取 base_time 来做的理由是：这样可以保证同一个小时段总是被 encode 成同一个数
 24 |     """
 25 |     if base_zero:
 26 |         return start_time - timedelta(hours=start_time.hour,
 27 |                                       minutes=start_time.minute,
 28 |                                       seconds=start_time.second,
 29 |                                       microseconds=start_time.microsecond)
 30 |     else:
 31 |         # time length = 12
 32 |         if start_time.hour < 12:
 33 |             return start_time - timedelta(hours=start_time.hour,
 34 |                                           minutes=start_time.minute,
 35 |                                           seconds=start_time.second,
 36 |                                           microseconds=start_time.microsecond)
 37 |         else:
 38 |             return start_time - timedelta(hours=start_time.hour - 12,
 39 |                                           minutes=start_time.minute,
 40 |                                           seconds=start_time.second,
 41 |                                           microseconds=start_time.microsecond)
 42 | 
 43 | 
 44 | def cal_timeoff(now_time, base_time):
 45 |     """
 46 |     计算两个时间之间的差值，返回值以小时为单位
 47 |     """
 48 |     # 先将 now 按小时对齐
 49 |     delta = now_time - base_time
 50 |     return delta.days * 24 + delta.seconds / 3600
 51 | 
 52 | 
 53 | def caculate_time_sim(data):
 54 |     time_checkin_set = defaultdict(set)
 55 |     tim_size = data['tim_size']
 56 |     data_neural = data['data']
 57 |     for uid in data_neural:
 58 |         uid_sessions = data_neural[uid]
 59 |         for session in uid_sessions:
 60 |             for checkin in session:
 61 |                 timid = checkin[1]
 62 |                 locid = checkin[0]
 63 |                 if timid not in time_checkin_set:
 64 |                     time_checkin_set[timid] = set()
 65 |                 time_checkin_set[timid].add(locid)
 66 |     sim_matrix = np.zeros((tim_size, tim_size))
 67 |     for i in range(tim_size):
 68 |         for j in range(tim_size):
 69 |             set_i = time_checkin_set[i]
 70 |             set_j = time_checkin_set[j]
 71 |             if len(set_i | set_j) != 0:
 72 |                 jaccard_ij = len(set_i & set_j) / len(set_i | set_j)
 73 |                 sim_matrix[i][j] = jaccard_ij
 74 |     return sim_matrix
 75 | 
 76 | 
 77 | def parse_coordinate(coordinate):
 78 |     items = coordinate[1:-1].split(',')
 79 |     return float(items[0]), float(items[1])
 80 | 
 81 | 
 82 | def string2timestamp(strings, offset_frame):
 83 |     ts = []
 84 |     for t in strings:
 85 |         dtstr = '-'.join([t[:4].decode(), t[4:6].decode(), t[6:8].decode()])
 86 |         slot = int(t[8:]) - 1
 87 |         ts.append(np.datetime64(dtstr, 'm') + slot * offset_frame)
 88 |     return ts  # [numpy.datetime64('2014-01-01T00:00'), ...]
 89 | 
 90 | 
 91 | def timestamp2array(timestamps, t):
 92 |     """
 93 |     把时间戳的序列中的每一个时间戳转成特征数组，考虑了星期和小时，
 94 |     时间戳: numpy.datetime64('2013-07-01T00:00:00.000000000')
 95 | 
 96 |     Args:
 97 |         timestamps: 时间戳序列
 98 |         t: 一天有多少个时间步
 99 | 
100 |     Returns:
101 |         np.ndarray: 特征数组，shape: (len(timestamps), ext_dim)
102 |     """
103 |     vec_wday = [time.strptime(
104 |         str(t)[:10], '%Y-%m-%d').tm_wday for t in timestamps]
105 |     vec_hour = [time.strptime(str(t)[11:13], '%H').tm_hour for t in timestamps]
106 |     vec_minu = [time.strptime(str(t)[14:16], '%M').tm_min for t in timestamps]
107 |     ret = []
108 |     for idx, wday in enumerate(vec_wday):
109 |         # day
110 |         v = [0 for _ in range(7)]
111 |         v[wday] = 1
112 |         if wday >= 5:  # 0是周一, 6是周日
113 |             v.append(0)  # weekend
114 |         else:
115 |             v.append(1)  # weekday len(v)=8
116 |         # hour
117 |         v += [0 for _ in range(t)]  # len(v)=8+T
118 |         hour = vec_hour[idx]
119 |         minu = vec_minu[idx]
120 |         # 24*60/T 表示一个时间步是多少分钟
121 |         # hour * 60 + minu 是从0:0开始到现在是多少分钟，相除计算是第几个时间步
122 |         # print(hour, minu, T, (hour * 60 + minu) / (24 * 60 / T))
123 |         v[int((hour * 60 + minu) / (24 * 60 / t))] = 1
124 |         # +8是因为v前边有表示星期的8位
125 |         if hour >= 18 or hour < 6:
126 |             v.append(0)  # night
127 |         else:
128 |             v.append(1)  # day
129 |         ret.append(v)  # len(v)=7+1+T+1=T+9
130 |     return np.asarray(ret)
131 | 
132 | 
133 | def timestamp2vec_origin(timestamps):
134 |     """
135 |     把时间戳的序列中的每一个时间戳转成特征数组，只考虑星期，
136 |     时间戳: numpy.datetime64('2013-07-01T00:00:00.000000000')
137 | 
138 |     Args:
139 |         timestamps: 时间戳序列
140 | 
141 |     Returns:
142 |         np.ndarray: 特征数组，shape: (len(timestamps), 8)
143 |     """
144 |     vec = [time.strptime(str(t)[:10], '%Y-%m-%d').tm_wday for t in timestamps]
145 |     ret = []
146 |     for i in vec:
147 |         v = [0 for _ in range(7)]
148 |         v[i] = 1
149 |         if i >= 5:
150 |             v.append(0)  # weekend
151 |         else:
152 |             v.append(1)  # weekday
153 |         ret.append(v)
154 |     return np.asarray(ret)
155 | 


--------------------------------------------------------------------------------
/STFGNN/utils/normalization.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class Scaler:
  5 |     """
  6 |     归一化接口
  7 |     """
  8 | 
  9 |     def transform(self, data):
 10 |         """
 11 |         数据归一化接口
 12 | 
 13 |         Args:
 14 |             data(np.ndarray): 归一化前的数据
 15 | 
 16 |         Returns:
 17 |             np.ndarray: 归一化后的数据
 18 |         """
 19 |         raise NotImplementedError("Transform not implemented")
 20 | 
 21 |     def inverse_transform(self, data):
 22 |         """
 23 |         数据逆归一化接口
 24 | 
 25 |         Args:
 26 |             data(np.ndarray): 归一化后的数据
 27 | 
 28 |         Returns:
 29 |             np.ndarray: 归一化前的数据
 30 |         """
 31 |         raise NotImplementedError("Inverse_transform not implemented")
 32 | 
 33 | 
 34 | class NoneScaler(Scaler):
 35 |     """
 36 |     不归一化
 37 |     """
 38 | 
 39 |     def transform(self, data):
 40 |         return data
 41 | 
 42 |     def inverse_transform(self, data):
 43 |         return data
 44 | 
 45 | 
 46 | class NormalScaler(Scaler):
 47 |     """
 48 |     除以最大值归一化
 49 |     x = x / x.max
 50 |     """
 51 | 
 52 |     def __init__(self, maxx):
 53 |         self.max = maxx
 54 | 
 55 |     def transform(self, data):
 56 |         return data / self.max
 57 | 
 58 |     def inverse_transform(self, data):
 59 |         return data * self.max
 60 | 
 61 | 
 62 | class StandardScaler(Scaler):
 63 |     """
 64 |     Z-score归一化
 65 |     x = (x - x.mean) / x.std
 66 |     """
 67 | 
 68 |     def __init__(self, mean, std):
 69 |         self.mean = mean
 70 |         self.std = std
 71 | 
 72 |     def transform(self, data):
 73 |         return (data - self.mean) / self.std
 74 | 
 75 |     def inverse_transform(self, data):
 76 |         return (data * self.std) + self.mean
 77 | 
 78 | 
 79 | class MinMax01Scaler(Scaler):
 80 |     """
 81 |     MinMax归一化 结果区间[0, 1]
 82 |     x = (x - min) / (max - min)
 83 |     """
 84 | 
 85 |     def __init__(self, minn, maxx):
 86 |         self.min = minn
 87 |         self.max = maxx
 88 | 
 89 |     def transform(self, data):
 90 |         return (data - self.min) / (self.max - self.min)
 91 | 
 92 |     def inverse_transform(self, data):
 93 |         return data * (self.max - self.min) + self.min
 94 | 
 95 | 
 96 | class MinMax11Scaler(Scaler):
 97 |     """
 98 |     MinMax归一化 结果区间[-1, 1]
 99 |     x = (x - min) / (max - min)
100 |     x = x * 2 - 1
101 |     """
102 | 
103 |     def __init__(self, minn, maxx):
104 |         self.min = minn
105 |         self.max = maxx
106 | 
107 |     def transform(self, data):
108 |         return ((data - self.min) / (self.max - self.min)) * 2. - 1.
109 | 
110 |     def inverse_transform(self, data):
111 |         return ((data + 1.) / 2.) * (self.max - self.min) + self.min
112 | 
113 | 
114 | class LogScaler(Scaler):
115 |     """
116 |     Log scaler
117 |     x = log(x+eps)
118 |     """
119 | 
120 |     def __init__(self, eps=0.999):
121 |         self.eps = eps
122 | 
123 |     def transform(self, data):
124 |         return np.log(data + self.eps)
125 | 
126 |     def inverse_transform(self, data):
127 |         return np.exp(data) - self.eps
128 | 


--------------------------------------------------------------------------------
/STFGNN/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | import logging
  3 | import datetime
  4 | import os
  5 | import sys
  6 | import numpy as np
  7 | 
  8 | 
  9 | def get_executor(config, model):
 10 |     """
 11 |     according the config['executor'] to create the executor
 12 | 
 13 |     Args:
 14 |         config(ConfigParser): config
 15 |         model(AbstractModel): model
 16 | 
 17 |     Returns:
 18 |         AbstractExecutor: the loaded executor
 19 |     """
 20 |     if config["task"] == "single_step":
 21 |         try:
 22 |             return getattr(importlib.import_module('libcity.executor.single_step_executor'),
 23 |                         config['executor'])(config, model)
 24 |         except AttributeError:
 25 |             raise AttributeError('executor is not found')
 26 |     elif config["task"] == "multi_step":
 27 |         try:
 28 |             return getattr(importlib.import_module('libcity.executor.multi_step_executor'),
 29 |                         config['executor'])(config, model)
 30 |         except AttributeError:
 31 |             raise AttributeError('executor is not found')
 32 |     else:
 33 |         raise AttributeError('task is not found')
 34 | 
 35 | 
 36 | 
 37 | def get_model(config, data_feature):
 38 |     """
 39 |     according the config['model'] to create the model
 40 | 
 41 |     Args:
 42 |         config(ConfigParser): config
 43 |         data_feature(dict): feature of the data
 44 | 
 45 |     Returns:
 46 |         AbstractModel: the loaded model
 47 |     """
 48 |     if config['task'] == 'multi_step':
 49 |         print("config[model]: ", config['model'])
 50 |         try:
 51 |             return getattr(importlib.import_module('libcity.model.multi_step_model'),
 52 |                            config['model'])(config, data_feature)
 53 |         except AttributeError:
 54 |             raise AttributeError('model is not found')
 55 |     elif config["task"] == "single_step":
 56 |         print("config[model]: ", config['model'])
 57 |         try:
 58 |             return getattr(importlib.import_module('libcity.model.single_step_model'),
 59 |                            config['model'])(config, data_feature)
 60 |         except AttributeError:
 61 |             raise AttributeError('model is not found')
 62 | 
 63 |     else:
 64 |         raise AttributeError('task is not found')
 65 | 
 66 | 
 67 | def get_evaluator(config):
 68 |     """
 69 |     according the config['evaluator'] to create the evaluator
 70 | 
 71 |     Args:
 72 |         config(ConfigParser): config
 73 | 
 74 |     Returns:
 75 |         AbstractEvaluator: the loaded evaluator
 76 |     """
 77 |     try:
 78 |         return getattr(importlib.import_module('libcity.evaluator'),
 79 |                        config['evaluator'])(config)
 80 |     except AttributeError:
 81 |         raise AttributeError('evaluator is not found')
 82 | 
 83 | 
 84 | def get_logger(config, name=None):
 85 |     """
 86 |     获取Logger对象
 87 | 
 88 |     Args:
 89 |         config(ConfigParser): config
 90 |         name: specified name
 91 | 
 92 |     Returns:
 93 |         Logger: logger
 94 |     """
 95 |     log_dir = './libcity/log'
 96 |     if not os.path.exists(log_dir):
 97 |         os.makedirs(log_dir)
 98 |     log_filename = '{}-{}-{}.log'.format(
 99 |         config['model'], config['dataset'], get_local_time())
100 |     logfilepath = os.path.join(log_dir, log_filename)
101 | 
102 |     logger = logging.getLogger(name)
103 | 
104 |     log_level = config.get('log_level', 'INFO')
105 | 
106 |     if log_level.lower() == 'info':
107 |         level = logging.INFO
108 |     elif log_level.lower() == 'debug':
109 |         level = logging.DEBUG
110 |     elif log_level.lower() == 'error':
111 |         level = logging.ERROR
112 |     elif log_level.lower() == 'warning':
113 |         level = logging.WARNING
114 |     elif log_level.lower() == 'critical':
115 |         level = logging.CRITICAL
116 |     else:
117 |         level = logging.INFO
118 | 
119 |     logger.setLevel(level)
120 | 
121 |     formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
122 |     file_handler = logging.FileHandler(logfilepath)
123 |     file_handler.setFormatter(formatter)
124 | 
125 |     console_formatter = logging.Formatter(
126 |         '%(asctime)s - %(levelname)s - %(message)s')
127 |     console_handler = logging.StreamHandler(sys.stdout)
128 |     console_handler.setFormatter(console_formatter)
129 | 
130 |     logger.addHandler(file_handler)
131 |     logger.addHandler(console_handler)
132 | 
133 |     logger.info('Log directory: %s', log_dir)
134 |     return logger
135 | 
136 | 
137 | def get_local_time():
138 |     """
139 |     获取时间
140 | 
141 |     Return:
142 |         datetime: 时间
143 |     """
144 |     cur = datetime.datetime.now()
145 |     cur = cur.strftime('%b-%d-%Y_%H-%M-%S')
146 |     return cur
147 | 
148 | 
149 | def ensure_dir(dir_path):
150 |     """Make sure the directory exists, if it does not exist, create it.
151 | 
152 |     Args:
153 |         dir_path (str): directory path
154 |     """
155 |     if not os.path.exists(dir_path):
156 |         os.makedirs(dir_path)
157 | 
158 | 
159 | def trans_naming_rule(origin, origin_rule, target_rule):
160 |     """
161 |     名字转换规则
162 | 
163 |     Args:
164 |         origin (str): 源命名格式下的变量名
165 |         origin_rule (str): 源命名格式，枚举类
166 |         target_rule (str): 目标命名格式，枚举类
167 | 
168 |     Return:
169 |         target (str): 转换之后的结果
170 |     """
171 |     # TODO: 请确保输入是符合 origin_rule，这里目前不做检查
172 |     target = ''
173 |     if origin_rule == 'upper_camel_case' and target_rule == 'under_score_rule':
174 |         for i, c in enumerate(origin):
175 |             if i == 0:
176 |                 target = c.lower()
177 |             else:
178 |                 target += '_' + c.lower() if c.isupper() else c
179 |         return target
180 |     else:
181 |         raise NotImplementedError(
182 |             'trans naming rule only support from upper_camel_case to \
183 |                 under_score_rule')
184 | 
185 | 
186 | def preprocess_data(data, config):
187 |     """
188 |     split by input_window and output_window
189 | 
190 |     Args:
191 |         data: shape (T, ...)
192 | 
193 |     Returns:
194 |         np.ndarray: (train_size/test_size, input_window, ...)
195 |                     (train_size/test_size, output_window, ...)
196 | 
197 |     """
198 |     train_rate = config.get('train_rate', 0.7)
199 |     eval_rate = config.get('eval_rate', 0.1)
200 | 
201 |     input_window = config.get('input_window', 12)
202 |     output_window = config.get('output_window', 3)
203 | 
204 |     x, y = [], []
205 |     for i in range(len(data) - input_window - output_window):
206 |         a = data[i: i + input_window + output_window]  # (in+out, ...)
207 |         x.append(a[0: input_window])  # (in, ...)
208 |         y.append(a[input_window: input_window + output_window])  # (out, ...)
209 |     x = np.array(x)  # (num_samples, in, ...)
210 |     y = np.array(y)  # (num_samples, out, ...)
211 | 
212 |     train_size = int(x.shape[0] * (train_rate + eval_rate))
213 |     trainx = x[:train_size]  # (train_size, in, ...)
214 |     trainy = y[:train_size]  # (train_size, out, ...)
215 |     testx = x[train_size:x.shape[0]]  # (test_size, in, ...)
216 |     testy = y[train_size:x.shape[0]]  # (test_size, out, ...)
217 |     return trainx, trainy, testx, testy
218 | 
219 | 


--------------------------------------------------------------------------------
/STFGNN/utils/visualize.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import json
  3 | from libcity.utils.utils import ensure_dir
  4 | import os
  5 | 
  6 | 
  7 | class VisHelper:
  8 |     def __init__(self, _config):
  9 | 
 10 |         self.config = _config
 11 |         self.raw_path = './raw_data/'
 12 |         self.dataset = _config.get("dataset", "")
 13 |         self.save_path = _config.get("save_path", "./visualized_data/")
 14 | 
 15 |         # get type
 16 |         self.config_path = self.raw_path + self.dataset + '/config.json'
 17 |         self.data_config = json.load(open(self.config_path, 'r'))
 18 |         if 'dyna' in self.data_config and ['state'] == self.data_config['dyna']['including_types']:
 19 |             self.type = 'state'
 20 |         elif 'grid' in self.data_config and ['state'] == self.data_config['grid']['including_types']:
 21 |             self.type = 'grid'
 22 |         else:
 23 |             self.type = 'trajectory'
 24 |         # get geo and dyna files
 25 |         all_files = os.listdir(self.raw_path + self.dataset)
 26 |         self.geo_file = []
 27 |         self.geo_path = None
 28 |         self.dyna_file = []
 29 |         self.dyna_path = None
 30 |         self.grid_file = []
 31 |         self.grid_path = None
 32 |         for file in all_files:
 33 |             if file.split('.')[1] == 'geo':
 34 |                 self.geo_file.append(file)
 35 |             if file.split('.')[1] == 'dyna':
 36 |                 self.dyna_file.append(file)
 37 |             if file.split('.')[1] == 'grid':
 38 |                 self.grid_file.append(file)
 39 | 
 40 |         assert len(self.geo_file) == 1
 41 | 
 42 |         # reserved columns
 43 |         self.geo_reserved_lst = ['type', 'coordinates']
 44 |         self.dyna_reserved_lst = ['dyna_id', 'type', 'time', 'entity_id', 'traj_id', 'coordinates']
 45 |         self.grid_reserved_lst = ['dyna_id', 'type', 'time', 'row_id', 'column_id']
 46 | 
 47 |     def visualize(self):
 48 |         if self.type == 'trajectory':
 49 |             # geo
 50 |             self.geo_path = self.raw_path + self.dataset + '/' + self.geo_file[0]
 51 |             self._visualize_geo()
 52 | 
 53 |             # dyna
 54 |             for dyna_file in self.dyna_file:
 55 |                 self.dyna_path = self.raw_path + self.dataset + '/' + dyna_file
 56 |                 self._visualize_dyna()
 57 | 
 58 |         elif self.type == 'state':
 59 |             self.geo_path = self.raw_path + self.dataset + '/' + self.geo_file[0]
 60 |             for dyna_file in self.dyna_file:
 61 |                 self.dyna_path = self.raw_path + self.dataset + '/' + dyna_file
 62 |                 self._visualize_state()
 63 |         elif self.type == 'grid':
 64 |             self.geo_path = self.raw_path + self.dataset + '/' + self.geo_file[0]
 65 |             for grid_file in self.grid_file:
 66 |                 self.grid_path = self.raw_path + self.dataset + '/' + grid_file
 67 |                 self._visualize_grid()
 68 | 
 69 |     def _visualize_state(self):
 70 |         geo_file = pd.read_csv(self.geo_path, index_col=None)
 71 |         dyna_file = pd.read_csv(self.dyna_path, index_col=None)
 72 |         geojson_obj = {'type': "FeatureCollection", 'features': []}
 73 | 
 74 |         # get feature_lst
 75 |         geo_feature_lst = [_ for _ in list(geo_file.columns) if _ not in self.geo_reserved_lst]
 76 |         dyna_feature_lst = [_ for _ in list(dyna_file.columns) if _ not in self.dyna_reserved_lst]
 77 | 
 78 |         for _, row in geo_file.iterrows():
 79 | 
 80 |             # get feature dictionary
 81 |             geo_id = row['geo_id']
 82 |             feature_dct = row[geo_feature_lst].to_dict()
 83 |             dyna_i = dyna_file[dyna_file['entity_id'] == geo_id]
 84 |             for f in dyna_feature_lst:
 85 |                 feature_dct[f] = float(dyna_i[f].mean())
 86 | 
 87 |             # form a feature
 88 |             feature_i = dict()
 89 |             feature_i['type'] = 'Feature'
 90 |             feature_i['properties'] = feature_dct
 91 |             feature_i['geometry'] = {}
 92 |             feature_i['geometry']['type'] = row['type']
 93 |             feature_i['geometry']['coordinates'] = eval(row['coordinates'])
 94 |             geojson_obj['features'].append(feature_i)
 95 | 
 96 |         ensure_dir(self.save_path)
 97 |         save_name = "_".join(self.dyna_path.split('/')[-1].split('.')) + '.json'
 98 |         json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w',
 99 |                                     encoding='utf-8'),
100 |                   ensure_ascii=False, indent=4)
101 | 
102 |     def _visualize_grid(self):
103 |         geo_file = pd.read_csv(self.geo_path, index_col=None)
104 |         grid_file = pd.read_csv(self.grid_path, index_col=None)
105 |         geojson_obj = {'type': "FeatureCollection", 'features': []}
106 | 
107 |         # get feature_lst
108 |         geo_feature_lst = [_ for _ in list(geo_file.columns) if _ not in self.geo_reserved_lst]
109 |         grid_feature_lst = [_ for _ in list(grid_file.columns) if _ not in self.grid_reserved_lst]
110 | 
111 |         for _, row in geo_file.iterrows():
112 | 
113 |             # get feature dictionary
114 |             row_id, column_id = row['row_id'], row['column_id']
115 |             feature_dct = row[geo_feature_lst].to_dict()
116 |             dyna_i = grid_file[(grid_file['row_id'] == row_id) & (grid_file['column_id'] == column_id)]
117 |             for f in grid_feature_lst:
118 |                 feature_dct[f] = float(dyna_i[f].mean())
119 | 
120 |             # form a feature
121 |             feature_i = dict()
122 |             feature_i['type'] = 'Feature'
123 |             feature_i['properties'] = feature_dct
124 |             feature_i['geometry'] = {}
125 |             feature_i['geometry']['type'] = row['type']
126 |             feature_i['geometry']['coordinates'] = eval(row['coordinates'])
127 |             geojson_obj['features'].append(feature_i)
128 | 
129 |         ensure_dir(self.save_path)
130 |         save_name = "_".join(self.grid_path.split('/')[-1].split('.')) + '.json'
131 |         json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w',
132 |                                     encoding='utf-8'),
133 |                   ensure_ascii=False, indent=4)
134 | 
135 |     def _visualize_geo(self):
136 |         geo_file = pd.read_csv(self.geo_path, index_col=None)
137 |         geojson_obj = {'type': "FeatureCollection", 'features': []}
138 |         extra_feature = [_ for _ in list(geo_file.columns) if _ not in self.geo_reserved_lst]
139 |         for _, row in geo_file.iterrows():
140 |             feature_dct = row[extra_feature].to_dict()
141 |             feature_i = dict()
142 |             feature_i['type'] = 'Feature'
143 |             feature_i['properties'] = feature_dct
144 |             feature_i['geometry'] = {}
145 |             feature_i['geometry']['type'] = row['type']
146 |             feature_i['geometry']['coordinates'] = eval(row['coordinates'])
147 |             geojson_obj['features'].append(feature_i)
148 | 
149 |         ensure_dir(self.save_path)
150 |         save_name = "_".join(self.geo_path.split('/')[-1].split('.')) + '.json'
151 |         json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w',
152 |                                     encoding='utf-8'),
153 |                   ensure_ascii=False, indent=4)
154 | 
155 |     def _visualize_dyna(self):
156 |         dyna_file = pd.read_csv(self.dyna_path, index_col=None)
157 |         dyna_feature_lst = [_ for _ in list(dyna_file.columns) if _ not in self.dyna_reserved_lst]
158 |         geojson_obj = {'type': "FeatureCollection", 'features': []}
159 |         trajectory = {}
160 |         GPS_traj = "coordinates" in dyna_file.columns
161 |         if not GPS_traj:
162 |             geo_file = pd.read_csv(self.geo_path, index_col=None)
163 | 
164 |         a = dyna_file.groupby("entity_id")
165 |         for entity_id, entity_value in a:
166 |             if "traj_id" in dyna_file.columns:
167 |                 trajectory[entity_id] = {}
168 |                 entity_value = entity_value.groupby("traj_id")
169 |                 for traj_id, traj_value in entity_value:
170 |                     feature_dct = {"usr_id": entity_id, "traj_id": traj_id}
171 |                     for f in dyna_feature_lst:
172 |                         feature_dct[f] = float(traj_value[f].mean())
173 |                     feature_i = dict()
174 |                     feature_i['type'] = 'Feature'
175 |                     feature_i['properties'] = feature_dct
176 |                     feature_i['geometry'] = {}
177 |                     feature_i['geometry']['type'] = "LineString"
178 |                     feature_i['geometry']['coordinates'] = []
179 |                     if GPS_traj:
180 |                         for _, row in traj_value.iterrows():
181 |                             feature_i['geometry']['coordinates'].append(eval(row['coordinates']))
182 |                     else:
183 |                         for _, row in traj_value.iterrows():
184 |                             coor = eval(geo_file.loc[row['location']]['coordinates'])
185 |                             if _ == 0:
186 |                                 feature_i['geometry']['coordinates'].append(coor[0])
187 |                             feature_i['geometry']['coordinates'].append(coor[1])
188 |                     geojson_obj['features'].append(feature_i)
189 | 
190 |             else:
191 |                 feature_dct = {"usr_id": entity_id}
192 |                 feature_i = dict()
193 |                 feature_i['type'] = 'Feature'
194 |                 feature_i['properties'] = feature_dct
195 |                 feature_i['geometry'] = {}
196 |                 feature_i['geometry']['type'] = "LineString"
197 |                 feature_i['geometry']['coordinates'] = []
198 |                 if GPS_traj:
199 |                     for _, row in entity_value.iterrows():
200 |                         feature_i['geometry']['coordinates'].append(eval(row['coordinates']))
201 |                 else:
202 |                     for _, row in entity_value.iterrows():
203 |                         coor = eval(geo_file.loc[row['location']]['coordinates'])
204 |                         if _ == 0:
205 |                             feature_i['geometry']['coordinates'].append(coor[0])
206 |                         feature_i['geometry']['coordinates'].append(coor[1])
207 |                 geojson_obj['features'].append(feature_i)
208 | 
209 |         ensure_dir(self.save_path)
210 |         save_name = "_".join(self.dyna_path.split('/')[-1].split('.')) + '.json'
211 |         json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w',
212 |                                     encoding='utf-8'),
213 |                   ensure_ascii=False, indent=4)
214 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | ## STFGNN-Pytorch
 2 | 
 3 | This is the pytorch re-implement of the STFGNN model described in https://arxiv.org/abs/2012.09641.
 4 | 
 5 | ### Quick start
 6 | 
 7 | Put your data in <u>**STFGNN/raw_data**.</u> 
 8 | 
 9 | For example, if you want to run model on dataset PEMS03, put the file **<u>adj_mx.pkl</u>** and file <u>**PEMS03.npz**</u> in **<u>STFGNN/raw_data/PEMS03/</u>** .
10 | 
11 | Set appropriate value of parameter in **<u>STFGNN/config/*.json</u>**.
12 | 
13 | Run <u>**python main.py**</u> to run the model. 
14 | 
15 | ### Discussion
16 | Contact me via lwm568@buaa.edu.cn.
17 | Any issue is welcome to be raised and actively communicated.
18 | 


--------------------------------------------------------------------------------