├── STFGNN
├── config
│ ├── PEMS03.json
│ └── STFGNN.json
├── data
│ ├── dataset
│ │ ├── multi_step_dataset.py
│ │ └── stfgnn_dataset.py
│ └── utils.py
├── evaluator
│ └── evaluator.py
├── executor
│ ├── multi_step_executor.py
│ └── utils.py
├── main.py
├── model
│ ├── STFGNN.py
│ └── loss.py
├── raw_data
│ └── PEMS03
│ │ ├── PEMS03.npz
│ │ └── adj_mx.pkl
└── utils
│ ├── GPS_utils.py
│ ├── Optim.py
│ ├── argument_list.py
│ ├── dataset.py
│ ├── normalization.py
│ ├── utils.py
│ └── visualize.py
└── readme.md
/STFGNN/config/PEMS03.json:
--------------------------------------------------------------------------------
1 | {
2 | "filename": "raw_data/PEMS03/PEMS03.npz",
3 |
4 | "adj_type": "connectivity",
5 | "//": "adj_type be distance or connectivity",
6 | "adj_filename": "raw_data/PEMS03/adj_mx.pkl",
7 |
8 | "input_dim": 1,
9 | "output_dim": 1,
10 |
11 | "seq_len": 26208,
12 | "num_nodes": 358,
13 | "train_rate": 0.6,
14 | "eval_rate": 0.2,
15 | "feature_dim": 1,
16 | "mask": false
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/STFGNN/config/STFGNN.json:
--------------------------------------------------------------------------------
1 | {
2 | "device": "cuda:1",
3 | "gpu": true,
4 | "gpu_id": 1,
5 | "save": true,
6 | "cuda": true,
7 |
8 | "normalize": 1,
9 | "window": 12,
10 | "horizon": 12,
11 | "batch_size": 64,
12 | "order": 1,
13 | "lag": 12,
14 | "period": 288,
15 | "sparsity": 0.01,
16 |
17 | "hidden_dims": [[64, 64, 64], [64, 64, 64], [64, 64, 64]],
18 | "first_layer_embedding_size": 64,
19 | "out_layer_dim": 128,
20 | "strides": 4,
21 | "temporal_emb": true,
22 | "spatial_emb": true,
23 | "activation": "GLU",
24 | "module_type": "individual",
25 | "//": ["individual", "sharing"],
26 |
27 | "train_loss": "mae",
28 | "clip": 10,
29 | "epochs": 100,
30 | "seed": 54321,
31 | "log_interval": 2000,
32 | "optim": "adam",
33 | "lr": 0.001,
34 | "patience": 20,
35 | "lr_decay": false,
36 | "lr_scheduler": "multisteplr",
37 | "lr_decay_ratio": 0.1,
38 | "lr_decay_steps": [5, 20, 40, 70],
39 | "step_size": 10
40 | }
41 |
--------------------------------------------------------------------------------
/STFGNN/data/dataset/multi_step_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | import numpy as np
4 | import pickle
5 | import torch
6 | from torch.autograd import Variable
7 |
8 | from data.utils import DataLoader, load_pickle, DataLoaderM_new
9 | from utils.normalization import StandardScaler, NormalScaler, NoneScaler, \
10 | MinMax01Scaler, MinMax11Scaler, LogScaler
11 |
12 |
13 | def get_adjacency_matrix(distance_df, sensor_ids, normalized_k=0.1):
14 | """
15 |
16 | :param distance_df: data frame with three columns: [from, to, distance].
17 | :param sensor_ids: list of sensor ids.
18 | :param normalized_k: entries that become lower than normalized_k after normalization are set to zero for sparsity.
19 | :return:
20 | """
21 | num_sensors = len(sensor_ids)
22 | dist_mx = np.zeros((num_sensors, num_sensors), dtype=np.float32)
23 | dist_mx[:] = np.inf
24 | # Builds sensor id to index map.
25 | sensor_id_to_ind = {}
26 | for i, sensor_id in enumerate(sensor_ids):
27 | sensor_id_to_ind[sensor_id] = i
28 |
29 | # Fills cells in the matrix with distances.
30 | for row in distance_df.values:
31 | if row[0] not in sensor_id_to_ind or row[1] not in sensor_id_to_ind:
32 | continue
33 | dist_mx[sensor_id_to_ind[row[0]], sensor_id_to_ind[row[1]]] = row[2]
34 |
35 | # Calculates the standard deviation as theta.
36 | distances = dist_mx[~np.isinf(dist_mx)].flatten()
37 | std = distances.std()
38 | adj_mx = np.exp(-np.square(dist_mx / std))
39 | # Make the adjacent matrix symmetric by taking the max.
40 | # adj_mx = np.maximum.reduce([adj_mx, adj_mx.T])
41 |
42 | # Sets entries that lower than a threshold, i.e., k, to zero for sparsity.
43 | adj_mx[adj_mx < normalized_k] = 0
44 | return sensor_ids, sensor_id_to_ind, adj_mx
45 |
46 |
47 | class MultiStepDataset(object):
48 |
49 | def __init__(self, config):
50 |
51 | self.config = config
52 | self.file_name = self.config.get("filename", " ")
53 | self.adj_filename = self.config.get("adj_filename", "")
54 | self.graph_sensor_ids = self.config.get("graph_sensor_ids", "")
55 | self.distances_file = self.config.get("distances_file", "")
56 | self.adj_type = self.config.get("adj_type", None)
57 |
58 | self.train_rate = self.config.get("train_rate", 0.6)
59 | self.valid_rate = self.config.get("eval_rate", 0.2)
60 | self.cuda = self.config.get("cuda", True)
61 |
62 | self.horizon = self.config.get("horizon", 12)
63 | self.window = self.config.get("window", 12)
64 |
65 | self.normalize = self.config.get("normalize", 2)
66 | self.batch_size = self.config.get("batch_size", 64)
67 | self.adj_mx = None
68 | self.add_time_in_day = self.config.get("add_time_in_day", False)
69 | self.add_day_in_week = self.config.get("add_day_in_week", False)
70 | self.input_dim = self.config.get("input_dim", 1)
71 | self.output_dim = self.config.get("output_dim", 1)
72 | #self.ensure_adj_mat()
73 | self._load_origin_data(self.file_name, self.adj_filename)
74 |
75 | self.data = self._gene_dataset()
76 |
77 | def ensure_adj_mat(self):
78 | if os.path.exists(self.adj_filename):
79 | return
80 | else:
81 | with open(self.graph_sensor_ids) as f:
82 | sensor_ids = f.read().strip().split(',')
83 | distance_df = pd.read_csv(self.distances_file, dtype={'from': 'str', 'to': 'str'})
84 | _, sensor_id_to_ind, adj_mx = get_adjacency_matrix(distance_df, sensor_ids, normalized_k=0.1)
85 | with open(self.adj_filename, 'wb') as f:
86 | pickle.dump([sensor_ids, sensor_id_to_ind, adj_mx], f, protocol=2)
87 | return
88 |
89 | def _load_origin_data(self, file_name, adj_name):
90 | if file_name[-3:] == "txt":
91 | fin = open(file_name)
92 | self.rawdat = np.loadtxt(fin, delimiter=',')
93 | elif file_name[-3:] == "csv":
94 | self.rawdat = pd.read_csv(file_name).values
95 | elif file_name[-2:] == "h5":
96 | self.rawdat = pd.read_hdf(file_name)
97 | elif file_name[-3:] == "npz":
98 | mid_dat = np.load(file_name)
99 | self.rawdat = mid_dat[mid_dat.files[0]]
100 | else:
101 | raise ValueError('file_name type error!')
102 |
103 | if adj_name == "":
104 | self.adj_mx = None
105 | elif adj_name[-3:] == "pkl":
106 | sensor_ids, sensor_id_to_ind, adj = load_pickle(adj_name)
107 | if self.adj_type == "distance":
108 | self.adj_mx = adj
109 | else:
110 | row, col = adj.shape
111 | for i in range(row):
112 | for j in range(i, col):
113 | if adj[i][j] > 0:
114 | adj[i][j] = 1
115 | adj[j][i] = 1
116 | else:
117 | adj[i][j] = 0
118 | adj[j][i] = 0
119 | self.adj_mx = adj
120 | else:
121 | raise ValueError('adj_name error!')
122 |
123 | def _get_scalar(self, x_train, y_train):
124 | """
125 | 根据全局参数`scaler_type`选择数据归一化方法
126 |
127 | Args:
128 | x_train: 训练数据X
129 | y_train: 训练数据y
130 |
131 | Returns:
132 | Scaler: 归一化对象
133 | """
134 | if self.normalize == 2:
135 | scaler = NormalScaler(maxx=max(x_train.max(), y_train.max()))
136 | print('NormalScaler max: ' + str(scaler.max))
137 | elif self.normalize == 1:
138 | scaler = StandardScaler(mean=x_train.mean(), std=x_train.std())
139 | print('StandardScaler mean: ' + str(scaler.mean) + ', std: ' + str(scaler.std))
140 | elif self.normalize == 3:
141 | scaler = MinMax01Scaler(
142 | maxx=max(x_train.max(), y_train.max()), minn=min(x_train.min(), y_train.min()))
143 | print('MinMax01Scaler max: ' + str(scaler.max) + ', min: ' + str(scaler.min))
144 | elif self.normalize == 4:
145 | scaler = MinMax11Scaler(
146 | maxx=max(x_train.max(), y_train.max()), minn=min(x_train.min(), y_train.min()))
147 | print('MinMax11Scaler max: ' + str(scaler.max) + ', min: ' + str(scaler.min))
148 | elif self.normalize == 5:
149 | scaler = LogScaler()
150 | print('LogScaler')
151 | elif self.normalize == 0:
152 | scaler = NoneScaler()
153 | print('NoneScaler')
154 | else:
155 | raise ValueError('Scaler type error!')
156 | return scaler
157 |
158 | def _generate_graph_seq2seq_io_data(
159 | self, df, x_offsets, y_offsets, add_time_in_day=False, add_day_in_week=False, scaler=None
160 | ):
161 | """
162 | 生成seq2seq样本数据
163 | :param data: np数据 [B, N, D] 其中D为3
164 | :param x_offsets:
165 | :param y_offsets:
166 | :return:
167 | """
168 | num_samples, num_nodes = df.shape[0], df.shape[1]
169 | if not isinstance(df, np.ndarray):
170 | data = np.expand_dims(df.values, axis=-1)
171 | data_list = [data]
172 | else:
173 | data_list = [df]
174 | if add_time_in_day:
175 | time_ind = (df.index.values - df.index.values.astype("datetime64[D]")) / np.timedelta64(1, "D")
176 | time_in_day = np.tile(time_ind, [1, num_nodes, 1]).transpose((2, 1, 0))
177 | data_list.append(time_in_day)
178 | if add_day_in_week:
179 | day_in_week = np.zeros(shape=(num_samples, num_nodes, 7))
180 | day_in_week[np.arange(num_samples), :, df.index.dayofweek] = 1
181 | data_list.append(day_in_week)
182 |
183 | data = np.concatenate(data_list, axis=-1)
184 |
185 | x, y = [], []
186 | # t is the index of the last observation.
187 | min_t = abs(min(x_offsets))
188 | max_t = abs(num_samples - abs(max(y_offsets))) # Exclusive
189 | for t in range(min_t, max_t):
190 | x_t = data[t + x_offsets, ...]
191 | y_t = data[t + y_offsets, ...]
192 | x.append(x_t)
193 | y.append(y_t)
194 | x = np.stack(x, axis=0)
195 | y = np.stack(y, axis=0)
196 |
197 | return x, y
198 |
199 | def _generate_train_val_test(self):
200 | seq_length_x, seq_length_y = self.window, self.horizon
201 | x_offsets = np.arange(-(seq_length_x - 1), 1, 1)
202 | y_offsets = np.arange(1, (seq_length_y + 1), 1)
203 | x, y = self._generate_graph_seq2seq_io_data(self.rawdat, x_offsets,
204 | y_offsets, self.add_time_in_day, self.add_day_in_week)
205 | print("x shape: ", x.shape, ", y shape: ", y.shape)
206 | num_samples = x.shape[0]
207 | num_val = round(num_samples * self.valid_rate)
208 | num_train = round(num_samples * self.train_rate)
209 | num_test = num_samples - num_train - num_val
210 | return [x[:num_train], y[:num_train]], \
211 | [x[num_train:num_train + num_val], y[num_train:num_train + num_val]], \
212 | [x[num_train + num_val:], y[num_train + num_val:]]
213 |
214 | def _gene_dataset(self):
215 | data = {}
216 | self.train, self.valid, self.test = self._generate_train_val_test()
217 | x_train, y_train = self.train[0], self.train[1]
218 | x_valid, y_valid = self.valid[0], self.valid[1]
219 | x_test, y_test = self.test[0], self.test[1]
220 | self.scaler = self._get_scalar(x_train[..., :self.output_dim], y_train[..., :self.output_dim])
221 | x_train[..., :self.output_dim] = self.scaler.transform(x_train[..., :self.output_dim])
222 | y_train[..., :self.output_dim] = self.scaler.transform(y_train[..., :self.output_dim])
223 | x_valid[..., :self.output_dim] = self.scaler.transform(x_valid[..., :self.output_dim])
224 | y_valid[..., :self.output_dim] = self.scaler.transform(y_valid[..., :self.output_dim])
225 | x_test[..., :self.output_dim] = self.scaler.transform(x_test[..., :self.output_dim])
226 | y_test[..., :self.output_dim] = self.scaler.transform(y_test[..., :self.output_dim])
227 |
228 | data['train_loader'] = DataLoader(x_train[..., :self.input_dim], y_train[..., :self.output_dim],
229 | self.batch_size)
230 | data['valid_loader'] = DataLoader(x_valid[..., :self.input_dim], y_valid[..., :self.output_dim],
231 | self.batch_size)
232 | data['test_loader'] = DataLoader(x_test[..., :self.input_dim], y_test[..., :self.output_dim], self.batch_size)
233 | data['scaler'] = self.scaler
234 | data['num_batches'] = x_train.shape[0] / self.batch_size
235 | return data
236 |
237 | def get_data(self):
238 | """
239 | 返回数据的DataLoader,包括训练数据、测试数据、验证数据
240 |
241 | Returns:
242 | tuple: tuple contains:
243 | train_dataloader:
244 | eval_dataloader:
245 | test_dataloader:
246 | """
247 | # 加载数据集
248 |
249 | return self.data["train_loader"], self.data["valid_loader"], self.data["test_loader"]
250 |
251 | def get_data_feature(self):
252 | """
253 | 返回数据集特征,子类必须实现这个函数,返回必要的特征
254 |
255 | Returns:
256 | dict: 包含数据集的相关特征的字典
257 | """
258 | feature = {
259 | "scaler": self.data["scaler"],
260 | "adj_mx": self.adj_mx,
261 | "num_batches": self.data['num_batches']
262 | }
263 |
264 | return feature
265 |
--------------------------------------------------------------------------------
/STFGNN/data/dataset/stfgnn_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | import numpy as np
4 | import pickle
5 | import torch
6 | import time
7 | from torch.autograd import Variable
8 |
9 | from data.dataset.multi_step_dataset import MultiStepDataset
10 |
11 | def gen_data(data, ntr, N):
12 | '''
13 | if flag:
14 | data=pd.read_csv(fname)
15 | else:
16 | data=pd.read_csv(fname,header=None)
17 | '''
18 | #data=data.as_matrix()
19 | data=np.reshape(data,[-1,288,N])
20 | return data[0:ntr]
21 |
22 | def normalize(a):
23 | mu=np.mean(a,axis=1,keepdims=True)
24 | std=np.std(a,axis=1,keepdims=True)
25 | return (a-mu)/std
26 |
27 | def compute_dtw(a,b,order=1,Ts=12,normal=True):
28 | if normal:
29 | a=normalize(a)
30 | b=normalize(b)
31 | T0=a.shape[1]
32 | d=np.reshape(a,[-1,1,T0])-np.reshape(b,[-1,T0,1])
33 | d=np.linalg.norm(d,axis=0,ord=order)
34 | D=np.zeros([T0,T0])
35 | for i in range(T0):
36 | for j in range(max(0,i-Ts),min(T0,i+Ts+1)):
37 | if (i==0) and (j==0):
38 | D[i,j]=d[i,j]**order
39 | continue
40 | if (i==0):
41 | D[i,j]=d[i,j]**order+D[i,j-1]
42 | continue
43 | if (j==0):
44 | D[i,j]=d[i,j]**order+D[i-1,j]
45 | continue
46 | if (j==i-Ts):
47 | D[i,j]=d[i,j]**order+min(D[i-1,j-1],D[i-1,j])
48 | continue
49 | if (j==i+Ts):
50 | D[i,j]=d[i,j]**order+min(D[i-1,j-1],D[i,j-1])
51 | continue
52 | D[i,j]=d[i,j]**order+min(D[i-1,j-1],D[i-1,j],D[i,j-1])
53 | return D[-1,-1]**(1.0/order)
54 |
55 | def construct_adj_fusion(A, A_dtw, steps):
56 | '''
57 | construct a bigger adjacency matrix using the given matrix
58 |
59 | Parameters
60 | ----------
61 | A: np.ndarray, adjacency matrix, shape is (N, N)
62 |
63 | steps: how many times of the does the new adj mx bigger than A
64 |
65 | Returns
66 | ----------
67 | new adjacency matrix: csr_matrix, shape is (N * steps, N * steps)
68 |
69 | ----------
70 | This is 4N_1 mode:
71 |
72 | [T, 1, 1, T
73 | 1, S, 1, 1
74 | 1, 1, S, 1
75 | T, 1, 1, T]
76 |
77 | '''
78 |
79 | N = len(A)
80 | adj = np.zeros([N * steps] * 2) # "steps" = 4 !!!
81 |
82 | for i in range(steps):
83 | if (i == 1) or (i == 2):
84 | adj[i * N: (i + 1) * N, i * N: (i + 1) * N] = A
85 | else:
86 | adj[i * N: (i + 1) * N, i * N: (i + 1) * N] = A_dtw
87 | #'''
88 | for i in range(N):
89 | for k in range(steps - 1):
90 | adj[k * N + i, (k + 1) * N + i] = 1
91 | adj[(k + 1) * N + i, k * N + i] = 1
92 | #'''
93 | adj[3 * N: 4 * N, 0: N] = A_dtw #adj[0 * N : 1 * N, 1 * N : 2 * N]
94 | adj[0 : N, 3 * N: 4 * N] = A_dtw #adj[0 * N : 1 * N, 1 * N : 2 * N]
95 |
96 | adj[2 * N: 3 * N, 0 : N] = adj[0 * N : 1 * N, 1 * N : 2 * N]
97 | adj[0 : N, 2 * N: 3 * N] = adj[0 * N : 1 * N, 1 * N : 2 * N]
98 | adj[1 * N: 2 * N, 3 * N: 4 * N] = adj[0 * N : 1 * N, 1 * N : 2 * N]
99 | adj[3 * N: 4 * N, 1 * N: 2 * N] = adj[0 * N : 1 * N, 1 * N : 2 * N]
100 |
101 |
102 | for i in range(len(adj)):
103 | adj[i, i] = 1
104 |
105 | return adj
106 |
107 |
108 | class STFGNNDataset(MultiStepDataset):
109 |
110 | def __init__(self, config):
111 | super().__init__(config)
112 | self.strides = self.config.get("strides", 4)
113 | self.order = self.config.get("order", 1)
114 | self.lag = self.config.get("lag", 12)
115 | self.period = self.config.get("period", 288)
116 | self.sparsity = self.config.get("sparsity", 0.01)
117 | self.train_rate = self.config.get("train_rate", 0.6)
118 | self.adj_mx = torch.FloatTensor(self._construct_adj())
119 | # self.adj_mx = torch.randn((1432, 1432))
120 |
121 |
122 | def _construct_dtw(self):
123 | data = self.rawdat[:, :, 0]
124 | total_day = data.shape[0] / 288
125 | tr_day = int(total_day * 0.6)
126 | n_route = data.shape[1]
127 | xtr = gen_data(data, tr_day, n_route)
128 | print(np.shape(xtr))
129 | T0 = 288
130 | T = 12
131 | N = n_route
132 | d = np.zeros([N, N])
133 | for i in range(N):
134 | for j in range(i+1,N):
135 | d[i,j]=compute_dtw(xtr[:,:,i],xtr[:,:,j])
136 |
137 | print("The calculation of time series is done!")
138 | dtw = d+ d.T
139 | n = dtw.shape[0]
140 | w_adj = np.zeros([n,n])
141 | adj_percent = 0.01
142 | top = int(n * adj_percent)
143 | for i in range(dtw.shape[0]):
144 | a = dtw[i,:].argsort()[0:top]
145 | for j in range(top):
146 | w_adj[i, a[j]] = 1
147 |
148 | for i in range(n):
149 | for j in range(n):
150 | if (w_adj[i][j] != w_adj[j][i] and w_adj[i][j] ==0):
151 | w_adj[i][j] = 1
152 | if( i==j):
153 | w_adj[i][j] = 1
154 |
155 | print("Total route number: ", n)
156 | print("Sparsity of adj: ", len(w_adj.nonzero()[0])/(n*n))
157 | print("The weighted matrix of temporal graph is generated!")
158 | self.dtw = w_adj
159 |
160 |
161 | def _construct_adj(self):
162 | """
163 | 构建local 时空图
164 | :param A: np.ndarray, adjacency matrix, shape is (N, N)
165 | :param steps: 选择几个时间步来构建图
166 | :return: new adjacency matrix: csr_matrix, shape is (N * steps, N * steps)
167 | """
168 | self._construct_dtw()
169 | adj_mx = construct_adj_fusion(self.adj_mx, self.dtw, self.strides)
170 | print("The shape of localized adjacency matrix: {}".format(
171 | adj_mx.shape), flush=True)
172 |
173 | return adj_mx
174 |
175 | def get_data(self):
176 | """
177 | 返回数据的DataLoader,包括训练数据、测试数据、验证数据
178 |
179 | Returns:
180 | tuple: tuple contains:
181 | train_dataloader:
182 | eval_dataloader:
183 | test_dataloader:
184 | """
185 | # 加载数据集
186 |
187 | return self.data["train_loader"], self.data["valid_loader"], self.data["test_loader"]
188 |
189 | def get_data_feature(self):
190 | """
191 | 返回数据集特征,子类必须实现这个函数,返回必要的特征
192 |
193 | Returns:
194 | dict: 包含数据集的相关特征的字典
195 | """
196 | feature = {
197 | "scaler": self.data["scaler"],
198 | "adj_mx": self.adj_mx,
199 | "num_batches": self.data['num_batches']
200 | }
201 |
202 | return feature
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
--------------------------------------------------------------------------------
/STFGNN/data/utils.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import numpy as np
3 | import copy
4 | import pickle
5 | import torch.utils.data as torch_data
6 | import torch
7 | import pandas as pd
8 |
9 |
10 |
11 | def get_dataset(config):
12 | """
13 | according the config['dataset_class'] to create the dataset
14 |
15 | Args:
16 | config(ConfigParser): config
17 |
18 | Returns:
19 | AbstractDataset: the loaded dataset
20 | """
21 | if config["task"] == "multi_step":
22 | try:
23 | return getattr(importlib.import_module('libcity.data.dataset.multi_step_dataset'),
24 | config['dataset_class'])(config)
25 | except AttributeError:
26 | raise AttributeError('dataset_class is not found')
27 | elif config["task"] == "single_step":
28 | try:
29 | return getattr(importlib.import_module('libcity.data.dataset.single_step_dataset'),
30 | config['dataset_class'])(config)
31 | except AttributeError:
32 | raise AttributeError('dataset_class is not found')
33 |
34 |
35 | def load_pickle(pickle_file):
36 | try:
37 | with open(pickle_file, 'rb') as f:
38 | pickle_data = pickle.load(f)
39 | except UnicodeDecodeError as e:
40 | with open(pickle_file, 'rb') as f:
41 | pickle_data = pickle.load(f, encoding='latin1')
42 | except Exception as e:
43 | print('Unable to load data ', pickle_file, ':', e)
44 | raise
45 | return pickle_data
46 |
47 |
48 | class DataLoader(object):
49 | def __init__(self, xs, ys, batch_size, pad_with_last_sample=True, shuffle=False):
50 | """
51 |
52 | :param xs:
53 | :param ys:
54 | :param batch_size:
55 | :param pad_with_last_sample: pad with the last sample to make number of samples divisible to batch_size.
56 | """
57 | self.batch_size = batch_size
58 | self.current_ind = 0
59 | self.seq_len = ys.shape[0]
60 | if pad_with_last_sample:
61 | num_padding = (batch_size - (len(xs) % batch_size)) % batch_size
62 | x_padding = np.repeat(xs[-1:], num_padding, axis=0)
63 | y_padding = np.repeat(ys[-1:], num_padding, axis=0)
64 | xs = np.concatenate([xs, x_padding], axis=0)
65 | ys = np.concatenate([ys, y_padding], axis=0)
66 | self.size = len(xs)
67 | self.num_batch = int(self.size // self.batch_size)
68 | if shuffle:
69 | permutation = np.random.permutation(self.size)
70 | xs, ys = xs[permutation], ys[permutation]
71 | self.xs = xs
72 | self.ys = ys
73 |
74 | def shuffle(self):
75 | """洗牌"""
76 | permutation = np.random.permutation(self.size)
77 | xs, ys = self.xs[permutation], self.ys[permutation]
78 | self.xs = xs
79 | self.ys = ys
80 |
81 | def get_iterator(self):
82 | self.current_ind = 0
83 |
84 | def _wrapper():
85 | while self.current_ind < self.num_batch:
86 | start_ind = self.batch_size * self.current_ind
87 | end_ind = min(self.size, self.batch_size * (self.current_ind + 1))
88 | x_i = self.xs[start_ind: end_ind, ...]
89 | y_i = self.ys[start_ind: end_ind, ...]
90 | yield (x_i, y_i)
91 | self.current_ind += 1
92 |
93 | return _wrapper()
94 |
95 |
96 | class DataLoaderM_new(object):
97 | def __init__(self, xs, ys, ycl, batch_size, pad_with_last_sample=True):
98 | """
99 | :param xs:
100 | :param ys:
101 | :param batch_size:
102 | :param pad_with_last_sample: pad with the last sample to make number of samples divisible to batch_size.
103 | """
104 | self.batch_size = batch_size
105 | self.current_ind = 0
106 | if pad_with_last_sample:
107 | num_padding = (batch_size - (len(xs) % batch_size)) % batch_size
108 | x_padding = np.repeat(xs[-1:], num_padding, axis=0)
109 | y_padding = np.repeat(ys[-1:], num_padding, axis=0)
110 | xs = np.concatenate([xs, x_padding], axis=0)
111 | ys = np.concatenate([ys, y_padding], axis=0)
112 | ycl = np.concatenate([ycl, y_padding], axis=0)
113 | self.size = len(xs)
114 | self.num_batch = int(self.size // self.batch_size)
115 | self.xs = xs
116 | self.ys = ys
117 | self.ycl = ycl
118 |
119 | def shuffle(self):
120 | permutation = np.random.permutation(self.size)
121 | xs, ys, ycl = self.xs[permutation], self.ys[permutation], self.ycl[
122 | permutation]
123 | self.xs = xs
124 | self.ys = ys
125 | self.ycl = ycl
126 |
127 | def get_iterator(self):
128 | self.current_ind = 0
129 |
130 | def _wrapper():
131 | while self.current_ind < self.num_batch:
132 | start_ind = self.batch_size * self.current_ind
133 | end_ind = min(self.size,
134 | self.batch_size * (self.current_ind + 1))
135 | x_i = self.xs[start_ind:end_ind, ...]
136 | y_i = self.ys[start_ind:end_ind, ...]
137 | y_i_cl = self.ycl[start_ind:end_ind, ...]
138 | yield (x_i, y_i, y_i_cl)
139 | self.current_ind += 1
140 |
141 | return _wrapper()
142 |
143 |
144 |
--------------------------------------------------------------------------------
/STFGNN/evaluator/evaluator.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | import os
4 | import scipy.sparse as sp
5 | import torch
6 | from scipy.sparse import linalg
7 | from torch.autograd import Variable
8 | from collections import defaultdict
9 |
10 |
11 | def rse_np(preds, labels):
12 | if not isinstance(preds, np.ndarray):
13 | preds = preds.cpu().numpy()
14 | labels = labels.cpu().numpy()
15 | mse = np.sum(np.square(np.subtract(preds, labels)).astype('float32'))
16 | means = np.mean(labels)
17 | labels_mse = np.sum(np.square(np.subtract(labels, means)).astype('float32'))
18 | return np.sqrt(mse/labels_mse)
19 |
20 |
21 | def mae_np(preds, labels):
22 | if isinstance(preds, np.ndarray):
23 | mae = np.abs(np.subtract(preds, labels)).astype('float32')
24 | else:
25 | mae = np.abs(np.subtract(preds.cpu().numpy(), labels.cpu().numpy())).astype('float32')
26 | return np.mean(mae)
27 |
28 |
29 | def rmse_np(preds, labels):
30 | mse = mse_np(preds, labels)
31 | return np.sqrt(mse)
32 |
33 | def mse_np(preds, labels):
34 | if isinstance(preds, np.ndarray):
35 | return np.mean(np.square(np.subtract(preds, labels)).astype('float32'))
36 | else:
37 | return np.mean(np.square(np.subtract(preds.cpu().numpy(), labels.cpu().numpy())).astype('float32'))
38 |
39 | def mape_np(preds, labels):
40 | if isinstance(preds, np.ndarray):
41 | mape = np.abs(np.divide(np.subtract(preds, labels).astype('float32'), labels))
42 | else:
43 | mape = np.abs(np.divide(np.subtract(preds.cpu().numpy(), labels.cpu().numpy()).astype('float32'), labels.cpu().numpy()))
44 | return np.mean(mape)
45 |
46 |
47 |
48 | def rae_np(preds, labels):
49 | mse = np.sum(np.abs(np.subtract(preds, labels)).astype('float32'))
50 | means = np.mean(labels)
51 | labels_mse = np.sum(np.abs(np.subtract(labels, means)).astype('float32'))
52 | return mse/labels_mse
53 |
54 |
55 |
56 | def pcc_np(x, y):
57 | if not isinstance(x, np.ndarray):
58 | x, y = x.cpu().numpy(), y.cpu().numpy()
59 | x,y = x.reshape(-1),y.reshape(-1)
60 | return np.corrcoef(x,y)[0][1]
61 |
62 |
63 | def node_pcc_np(x, y):
64 | if not isinstance(x, np.ndarray):
65 | x, y = x.cpu().numpy(), y.cpu().numpy()
66 | sigma_x = x.std(axis=0)
67 | sigma_y = y.std(axis=0)
68 | mean_x = x.mean(axis=0)
69 | mean_y = y.mean(axis=0)
70 | cor = ((x - mean_x) * (y - mean_y)).mean(0) / (sigma_x * sigma_y + 0.000000000001)
71 | return cor.mean()
72 |
73 | def corr_np(preds, labels):
74 | sigma_p = (preds).std(axis=0)
75 | sigma_g = (labels).std(axis=0)
76 | mean_p = preds.mean(axis=0)
77 | mean_g = labels.mean(axis=0)
78 | index = (sigma_g != 0)
79 | correlation = ((preds - mean_p) * (labels - mean_g)).mean(axis=0) / (sigma_p * sigma_g)
80 | correlation = (correlation[index]).mean()
81 | return correlation
82 |
83 |
84 | def stemgnn_mape(preds,labels, axis=None):
85 | '''
86 | Mean absolute percentage error.
87 | :param labels: np.ndarray or int, ground truth.
88 | :param preds: np.ndarray or int, prediction.
89 | :param axis: axis to do calculation.
90 | :return: int, MAPE averages on all elements of input.
91 | '''
92 | if not isinstance(preds, np.ndarray):
93 | preds = preds.cpu().numpy()
94 | labels = labels.cpu().numpy()
95 | mape = (np.abs(preds - labels) / (np.abs(labels)+1e-5)).astype(np.float64)
96 | mape = np.where(mape > 5, 5, mape)
97 | return np.mean(mape, axis)
98 |
99 |
100 | def masked_rmse_np(preds, labels, null_val=np.nan):
101 | return np.sqrt(masked_mse_np(preds=preds, labels=labels, null_val=null_val))
102 |
103 |
104 | def masked_mse_np(preds, labels, null_val=np.nan):
105 | with np.errstate(divide='ignore', invalid='ignore'):
106 | if np.isnan(null_val):
107 | mask = ~np.isnan(labels)
108 | else:
109 | mask = np.not_equal(labels, null_val)
110 | mask = mask.astype('float32')
111 | mask /= np.mean(mask)
112 | mse = np.square(np.subtract(preds, labels)).astype('float32')
113 | mse = np.nan_to_num(mse * mask)
114 | return np.mean(mse)
115 |
116 |
117 | def masked_mae_np(preds, labels, null_val=np.nan):
118 | with np.errstate(divide='ignore', invalid='ignore'):
119 | if np.isnan(null_val):
120 | mask = ~np.isnan(labels)
121 | else:
122 | mask = np.not_equal(labels, null_val)
123 | mask = mask.astype('float32')
124 | mask /= np.mean(mask)
125 | mae = np.abs(np.subtract(preds, labels)).astype('float32')
126 | mae = np.nan_to_num(mae * mask)
127 | return np.mean(mae)
128 |
129 |
130 | def masked_mape_np(preds, labels, null_val=np.nan):
131 | if not isinstance(preds, np.ndarray):
132 | preds = preds.cpu().numpy()
133 | labels = labels.cpu().numpy()
134 | with np.errstate(divide='ignore', invalid='ignore'):
135 | if np.isnan(null_val):
136 | mask = ~np.isnan(labels)
137 | else:
138 | mask = np.not_equal(labels, null_val)
139 | mask = mask.astype('float32')
140 | mask /= np.mean(mask)
141 | mape = np.abs(np.divide(np.subtract(preds, labels).astype('float32'), labels))
142 | mape = np.nan_to_num(mask * mape)
143 | return np.mean(mape)
144 |
145 |
146 | class Evaluator(object):
147 | def __init__(self, config):
148 | self.config = config
149 | self.mask = self.config.get("mask", False)
150 | self.out_catagory = "multi"
151 |
152 |
153 | def _evaluate(self, output:np.ndarray, groud_truth:np.ndarray, mask: int, out_catagory: str):
154 | """
155 | evluate the model performance
156 | : multi
157 | :param output: [n_samples, 12, n_nodes, n_features]
158 | :param groud_truth: [n_samples, 12, n_nodes, n_features]
159 | : single
160 |
161 | :return: dict [str -> float]
162 | """
163 | if out_catagory == 'multi':
164 | if bool(mask):
165 | if output.shape != groud_truth.shape:
166 | groud_truth = np.expand_dims( groud_truth[...,0], axis=-1)
167 | assert output.shape == groud_truth.shape, f'{output.shape}, {groud_truth.shape}'
168 | batch, steps, scores, node = output.shape[0], output.shape[1], defaultdict(dict), output.shape[2]
169 | for step in range(steps):
170 | y_pred = np.reshape(output[:,step],(batch, -1))
171 | y_true = np.reshape(groud_truth[:,step],(batch,-1))
172 | scores['masked_MAE'][f'horizon-{step}'] = masked_mae_np(y_pred, y_true, null_val=0.0)
173 | scores['masked_RMSE'][f'horizon-{step}'] = masked_rmse_np(y_pred, y_true, null_val=0.0)
174 | scores['masked_MAPE'][f'horizon-{step}'] = masked_mape_np(y_pred, y_true, null_val=0.0) * 100.0
175 | scores['node_wise_PCC'][f'horizon-{step}']= node_pcc_np(y_pred.swapaxes(1,-1).reshape((-1,node)), y_true.swapaxes(1,-1).reshape((-1,node)))
176 | scores['PCC'][f'horizon-{step}'] = pcc_np(y_pred, y_true)
177 | scores['masked_MAE']['all'] = masked_mae_np(output,groud_truth ,null_val=0.0)
178 | scores['masked_RMSE']['all'] = masked_rmse_np( output,groud_truth, null_val=0.0)
179 | scores['masked_MAPE']['all'] = masked_mape_np( output,groud_truth, null_val=0.0) * 100.0
180 | scores['PCC']['all'] = pcc_np(output,groud_truth)
181 | scores["node_pcc"]['all'] = node_pcc_np(output, groud_truth)
182 | else:
183 | if output.shape != groud_truth.shape:
184 | groud_truth = np.expand_dims( groud_truth[...,0], axis=-1)
185 | assert output.shape == groud_truth.shape, f'{output.shape}, {groud_truth.shape}'
186 | batch, steps, scores, node = output.shape[0], output.shape[1], defaultdict(dict), output.shape[2]
187 | for step in range(steps):
188 | y_pred = output[:,step]
189 | y_true = groud_truth[:,step]
190 | scores['MAE'][f'horizon-{step}'] = mae_np(y_pred, y_true)
191 | scores['RMSE'][f'horizon-{step}'] = rmse_np(y_pred, y_true)
192 | # scores['MAPE'][f'horizon-{step}'] = mape_np(y_pred,y_true) * 100.0
193 | scores['masked_MAPE'][f'horizon-{step}'] = masked_mape_np(y_pred, y_true, null_val=0.0) * 100.0
194 | scores['StemGNN_MAPE'][f'horizon-{step}'] = stemgnn_mape(y_pred, y_true) * 100.0
195 | scores['PCC'][f'horizon-{step}'] = pcc_np(y_pred, y_true)
196 | scores['node_wise_PCC'][f'horizon-{step}']= node_pcc_np(y_pred.swapaxes(1,-1).reshape((-1,node)), y_true.swapaxes(1,-1).reshape((-1,node)))
197 | scores['MAE']['all'] = mae_np(output,groud_truth)
198 | scores['RMSE']['all'] = rmse_np(output,groud_truth)
199 | scores['masked_MAPE']['all'] = masked_mape_np( output,groud_truth, null_val=0.0) * 100.0
200 | scores['StemGNN_MAPE']['all'] = stemgnn_mape(output,groud_truth) * 100.0
201 | scores['PCC']['all'] = pcc_np(output,groud_truth)
202 | scores['node_wise_PCC']['all'] = node_pcc_np(output.swapaxes(2,-1).reshape((-1,node)), groud_truth.swapaxes(2,-1).reshape((-1,node)))
203 | else:
204 | output = output.squeeze()
205 | groud_truth = groud_truth.squeeze()
206 | assert output.shape == groud_truth.shape, f'{output.shape}, {groud_truth.shape}'
207 | scores = defaultdict(dict)
208 |
209 | scores['RMSE']['all'] = rmse_np(output, groud_truth)
210 | scores['masked_MAPE']['all'] = masked_mape_np(output, groud_truth, null_val=0.0) * 100.0
211 | scores['PCC']['all'] = node_pcc_np(output, groud_truth)
212 | scores['rse']['all'] = rse_np(output, groud_truth)
213 | scores['rae']['all'] = rae_np(output, groud_truth)
214 | scores['MAPE']['all'] = stemgnn_mape(output, groud_truth) * 100.0
215 | scores['MAE']['all'] = mae_np(output, groud_truth)
216 | scores["node_pcc"]['all'] = node_pcc_np(output, groud_truth)
217 | scores['CORR']['all'] = corr_np(output, groud_truth)
218 | return scores
219 |
220 |
221 | def evaluate(self, output, groud_truth):
222 | if not isinstance(output, np.ndarray):
223 | output = output.cpu().numpy()
224 | if not isinstance(groud_truth, np.ndarray):
225 | groud_truth = groud_truth.cpu().numpy()
226 | return self._evaluate(output, groud_truth, self.mask, self.out_catagory)
227 |
--------------------------------------------------------------------------------
/STFGNN/executor/multi_step_executor.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import numpy as np
4 | import torch
5 | import math
6 | import time
7 | import torch.nn as nn
8 | from torch.autograd import Variable
9 | from logging import getLogger
10 | import tqdm
11 | from torch.utils.tensorboard import SummaryWriter
12 | from executor.utils import get_train_loss
13 | from utils.Optim import Optim
14 | from evaluator.evaluator import Evaluator
15 | from utils.utils import ensure_dir
16 |
17 | from model import loss
18 | from functools import partial
19 |
20 |
21 | class MultiStepExecutor(object):
22 | def __init__(self, config, model):
23 | self.config = config
24 | self.evaluator = Evaluator(config)
25 |
26 | _device = self.config.get('device', torch.device('cpu'))
27 | self.device = torch.device(_device)
28 | self.model = model.to(self.device)
29 |
30 | self.cache_dir = 'cache/model_cache'
31 | self.evaluate_res_dir = 'cache/evaluate_cache'
32 | self.summary_writer_dir = 'log/runs'
33 | ensure_dir(self.cache_dir)
34 | ensure_dir(self.evaluate_res_dir)
35 | ensure_dir(self.summary_writer_dir)
36 |
37 | self._writer = SummaryWriter(self.summary_writer_dir)
38 | self._logger = getLogger()
39 | self._logger.info(self.model)
40 |
41 | for name, param in self.model.named_parameters():
42 | self._logger.info(str(name) + '\t' + str(param.shape) + '\t' +
43 | str(param.device) + '\t' + str(param.requires_grad))
44 |
45 | total_num = sum([param.nelement() for param in self.model.parameters()])
46 | self._logger.info('Total parameter numbers: {}'.format(total_num))
47 |
48 | self.train_loss = self.config.get("train_loss", "masked_mae")
49 | self.criterion = get_train_loss(self.train_loss)
50 |
51 | self.cuda = self.config.get("cuda", True)
52 | self.best_val = 10000000
53 | self.optim = Optim(
54 | model.parameters(), self.config
55 | )
56 | self.epochs = self.config.get("epochs", 100)
57 | self.scaler = self.model.scaler
58 | self.num_batches = self.model.num_batches
59 | self.num_nodes = self.config.get("num_nodes", 0)
60 | self.batch_size = self.config.get("batch_size", 64)
61 | self.patience = self.config.get("patience", 20)
62 | self.lr_decay = self.config.get("lr_decay", False)
63 | self.mask = self.config.get("mask", True)
64 |
65 |
66 | def train(self, train_data, valid_data):
67 | print("begin training")
68 | wait = 0
69 | batches_seen = self.num_batches * 0
70 |
71 |
72 | for epoch in tqdm.tqdm(range(1, self.epochs + 1)):
73 | epoch_start_time = time.time()
74 | train_loss = []
75 | train_data.shuffle()
76 |
77 | for iter, (x,y) in enumerate(train_data.get_iterator()):
78 | self.model.train()
79 | self.model.zero_grad()
80 | trainx = torch.Tensor(x).to(self.device) # [batch_size, window, num_nodes, dim]
81 | trainy = torch.Tensor(y).to(self.device) # [batch_size, horizon, num_nodes, dim]
82 | output = self.model(trainx)
83 | loss = self.criterion(self.scaler.inverse_transform(output),
84 | self.scaler.inverse_transform(trainy))
85 |
86 | loss.backward()
87 | self.optim.step()
88 | train_loss.append(loss.item())
89 |
90 |
91 | if self.lr_decay:
92 | self.optim.lr_scheduler.step()
93 |
94 | valid_loss = []
95 | valid_mape = []
96 | valid_rmse = []
97 | valid_pcc = []
98 | for iter, (x, y) in enumerate(valid_data.get_iterator()):
99 | self.model.eval()
100 | valx = torch.Tensor(x).to(self.device)
101 | valy = torch.Tensor(y).to(self.device)
102 | with torch.no_grad():
103 | output = self.model(valx)
104 | score = self.evaluator.evaluate(self.scaler.inverse_transform(output), \
105 | self.scaler.inverse_transform(valy))
106 | if self.mask:
107 | vloss = score["masked_MAE"]["all"]
108 | else:
109 | vloss = score["MAE"]["all"]
110 |
111 | valid_loss.append(vloss)
112 |
113 |
114 | mtrain_loss = np.mean(train_loss)
115 |
116 | mvalid_loss = np.mean(valid_loss)
117 |
118 | print(
119 | '| end of epoch {:3d} | time: {:5.2f}s | train_loss {:5.4f} | valid mae {:5.4f}'.format(
120 | epoch, (time.time() - epoch_start_time), mtrain_loss, \
121 | mvalid_loss))
122 |
123 | if mvalid_loss < self.best_val:
124 | self.best_val = mvalid_loss
125 | wait = 0
126 | self.best_val = mvalid_loss
127 | self.best_model = self.model
128 | else:
129 | wait += 1
130 |
131 | if wait >= self.patience:
132 | print('early stop at epoch: {:04d}'.format(epoch))
133 | break
134 |
135 | self.model = self.best_model
136 |
137 |
138 | def evaluate(self, test_data):
139 | """
140 | use model to test data
141 |
142 | Args:
143 | test_dataloader(torch.Dataloader): Dataloader
144 | """
145 | self._logger.info('Start evaluating ...')
146 | outputs = []
147 | realy = []
148 | seq_len = test_data.seq_len #test_data["y_test"]
149 | self.model.eval()
150 | for iter, (x, y) in enumerate(test_data.get_iterator()):
151 | testx = torch.Tensor(x).to(self.device)
152 | testy = torch.Tensor(y).to(self.device)
153 | with torch.no_grad():
154 | # self.evaluator.clear()
155 | pred = self.model(testx)
156 | outputs.append(pred)
157 | realy.append(testy)
158 | realy = torch.cat(realy, dim=0)
159 | yhat = torch.cat(outputs, dim=0)
160 |
161 | realy = realy[:seq_len, ...]
162 | yhat = yhat[:seq_len, ...]
163 |
164 | realy = self.scaler.inverse_transform(realy)
165 | preds = self.scaler.inverse_transform(yhat)
166 |
167 | res_scores = self.evaluator.evaluate(preds, realy)
168 | for _index in res_scores.keys():
169 | print(_index, " :")
170 | step_dict = res_scores[_index]
171 | for j, k in step_dict.items():
172 | print(j, " : ", k.item())
173 |
174 |
175 |
176 | def save_model(self, cache_name):
177 | """
178 | 将当前的模型保存到文件
179 |
180 | Args:
181 | cache_name(str): 保存的文件名
182 | """
183 | ensure_dir(self.cache_dir)
184 | self._logger.info("Saved model at " + cache_name)
185 | torch.save(self.model.state_dict(), cache_name)
186 |
187 | def load_model(self, cache_name):
188 | """
189 | 加载对应模型的 cache
190 |
191 | Args:
192 | cache_name(str): 保存的文件名
193 | """
194 | self._logger.info("Loaded model at " + cache_name)
195 | model_state = torch.load(cache_name)
196 | self.model.load_state_dict(model_state)
197 |
--------------------------------------------------------------------------------
/STFGNN/executor/utils.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import numpy as np
3 | import copy
4 | import pickle
5 | from model import loss
6 | from functools import partial
7 |
8 | def get_train_loss(train_loss):
9 | """
10 | get the loss func
11 | """
12 | if train_loss.lower() == 'none':
13 | print('Warning. Received none train loss func and will use the loss func defined in the model.')
14 | return None
15 |
16 | def func(preds, labels):
17 |
18 | if train_loss.lower() == 'mae':
19 | lf = loss.masked_mae_torch
20 | elif train_loss.lower() == 'mse':
21 | lf = loss.masked_mse_torch
22 | elif train_loss.lower() == 'rmse':
23 | lf = loss.masked_rmse_torch
24 | elif train_loss.lower() == 'mape':
25 | lf = loss.masked_mape_torch
26 | elif train_loss.lower() == 'logcosh':
27 | lf = loss.log_cosh_loss
28 | elif train_loss.lower() == 'huber':
29 | lf = loss.huber_loss
30 | elif train_loss.lower() == 'quantile':
31 | lf = loss.quantile_loss
32 | elif train_loss.lower() == 'masked_mae':
33 | lf = partial(loss.masked_mae_torch, null_val=0)
34 | elif train_loss.lower() == 'masked_mse':
35 | lf = partial(loss.masked_mse_torch, null_val=0)
36 | elif train_loss.lower() == 'masked_rmse':
37 | lf = partial(loss.masked_rmse_torch, null_val=0)
38 | elif train_loss.lower() == 'masked_mape':
39 | lf = partial(loss.masked_mape_torch, null_val=0)
40 | elif train_loss.lower() == 'r2':
41 | lf = loss.r2_score_torch
42 | elif train_loss.lower() == 'evar':
43 | lf = loss.explained_variance_score_torch
44 | else:
45 | lf = loss.masked_mae_torch
46 |
47 | return lf(preds, labels)
48 | return func
49 |
50 |
--------------------------------------------------------------------------------
/STFGNN/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import torch
4 |
5 | from data.dataset.stfgnn_dataset import STFGNNDataset
6 | from model.STFGNN import STFGNN
7 | from executor.multi_step_executor import MultiStepExecutor as STFGNNExecutor
8 |
9 |
10 | config = {}
11 | for filename in ["config/PEMS03.json", "config/STFGNN.json"]:
12 | with open(filename, "r") as f:
13 | _config = json.load(f)
14 | for key in _config:
15 | if key not in config:
16 | config[key] = _config[key]
17 |
18 | dataset = STFGNNDataset(config)
19 |
20 | train_data, valid_data, test_data = dataset.get_data()
21 | data_feature = dataset.get_data_feature()
22 |
23 | model_cache_file = 'cache/model_cache/PEMS03_STFGNN.m'
24 |
25 | model = STFGNN(config, data_feature)
26 |
27 | executor = STFGNNExecutor(config, model)
28 |
29 |
30 | train = True #标识是否需要重新训练
31 |
32 | if train or not os.path.exists(model_cache_file):
33 | executor.train(train_data, valid_data)
34 | executor.save_model(model_cache_file)
35 | else:
36 | executor.load_model(model_cache_file)
37 | # 评估,评估结果将会放在 cache/evaluate_cache 下
38 | executor.evaluate(test_data)
39 |
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/STFGNN/model/STFGNN.py:
--------------------------------------------------------------------------------
1 | # from _typeshed import Self
2 | import torch
3 | import torch.nn.functional as F
4 | import torch.nn as nn
5 |
6 |
7 | class gcn_operation(nn.Module):
8 | def __init__(self, adj, in_dim, out_dim, num_vertices, activation='GLU'):
9 | """
10 | 图卷积模块
11 | :param adj: 邻接图
12 | :param in_dim: 输入维度
13 | :param out_dim: 输出维度
14 | :param num_vertices: 节点数量
15 | :param activation: 激活方式 {'relu', 'GLU'}
16 | """
17 | super(gcn_operation, self).__init__()
18 | self.adj = adj
19 | self.in_dim = in_dim
20 | self.out_dim = out_dim
21 | self.num_vertices = num_vertices
22 | self.activation = activation
23 |
24 | assert self.activation in {'GLU', 'relu'}
25 |
26 | if self.activation == 'GLU':
27 | self.FC = nn.Linear(self.in_dim, 2 * self.out_dim, bias=True)
28 | else:
29 | self.FC = nn.Linear(self.in_dim, self.out_dim, bias=True)
30 |
31 | def forward(self, x, mask=None):
32 | """
33 | :param x: (3*N, B, Cin)
34 | :param mask:(3*N, 3*N)
35 | :return: (3*N, B, Cout)
36 | """
37 | adj = self.adj
38 | if mask is not None:
39 | adj = adj.to(mask.device) * mask
40 |
41 | x = torch.einsum('nm, mbc->nbc', adj.to(x.device), x) # 4*N, B, Cin
42 |
43 | if self.activation == 'GLU':
44 | lhs_rhs = self.FC(x) # 4*N, B, 2*Cout
45 | lhs, rhs = torch.split(lhs_rhs, self.out_dim, dim=-1) # 4*N, B, Cout
46 |
47 | out = lhs * torch.sigmoid(rhs)
48 | del lhs, rhs, lhs_rhs
49 |
50 | return out
51 |
52 | elif self.activation == 'relu':
53 | return torch.relu(self.FC(x)) # 3*N, B, Cout
54 |
55 |
56 | class STSGCM(nn.Module):
57 | def __init__(self, adj, in_dim, out_dims, num_of_vertices, activation='GLU'):
58 | """
59 | :param adj: 邻接矩阵
60 | :param in_dim: 输入维度
61 | :param out_dims: list 各个图卷积的输出维度
62 | :param num_of_vertices: 节点数量
63 | :param activation: 激活方式 {'relu', 'GLU'}
64 | """
65 | super(STSGCM, self).__init__()
66 | self.adj = adj
67 | self.in_dim = in_dim
68 | self.out_dims = out_dims
69 | self.num_of_vertices = num_of_vertices
70 | self.activation = activation
71 |
72 | self.gcn_operations = nn.ModuleList()
73 |
74 | self.gcn_operations.append(
75 | gcn_operation(
76 | adj=self.adj,
77 | in_dim=self.in_dim,
78 | out_dim=self.out_dims[0],
79 | num_vertices=self.num_of_vertices,
80 | activation=self.activation
81 | )
82 | )
83 |
84 | for i in range(1, len(self.out_dims)):
85 | self.gcn_operations.append(
86 | gcn_operation(
87 | adj=self.adj,
88 | in_dim=self.out_dims[i-1],
89 | out_dim=self.out_dims[i],
90 | num_vertices=self.num_of_vertices,
91 | activation=self.activation
92 | )
93 | )
94 |
95 | def forward(self, x, mask=None):
96 | """
97 | :param x: (3N, B, Cin)
98 | :param mask: (3N, 3N)
99 | :return: (N, B, Cout)
100 | """
101 | need_concat = []
102 |
103 | for i in range(len(self.out_dims)):
104 | x = self.gcn_operations[i](x, mask)
105 | need_concat.append(x)
106 |
107 | # shape of each element is (1, N, B, Cout)
108 | need_concat = [
109 | torch.unsqueeze(
110 | h[self.num_of_vertices: 2 * self.num_of_vertices], dim=0
111 | ) for h in need_concat
112 | ]
113 |
114 | out = torch.max(torch.cat(need_concat, dim=0), dim=0).values # (N, B, Cout)
115 |
116 | del need_concat
117 |
118 | return out
119 |
120 |
121 | class STSGCL(nn.Module):
122 | def __init__(self,
123 | adj,
124 | history,
125 | num_of_vertices,
126 | in_dim,
127 | out_dims,
128 | strides=4,
129 | activation='GLU',
130 | temporal_emb=True,
131 | spatial_emb=True):
132 | """
133 | :param adj: 邻接矩阵
134 | :param history: 输入时间步长
135 | :param in_dim: 输入维度
136 | :param out_dims: list 各个图卷积的输出维度
137 | :param strides: 滑动窗口步长,local时空图使用几个时间步构建的,默认为3
138 | :param num_of_vertices: 节点数量
139 | :param activation: 激活方式 {'relu', 'GLU'}
140 | :param temporal_emb: 加入时间位置嵌入向量
141 | :param spatial_emb: 加入空间位置嵌入向量
142 | """
143 | super(STSGCL, self).__init__()
144 | self.adj = adj
145 | self.strides = strides
146 | self.history = history
147 | self.in_dim = in_dim
148 | self.out_dims = out_dims
149 | self.num_of_vertices = num_of_vertices
150 |
151 | self.activation = activation
152 | self.temporal_emb = temporal_emb
153 | self.spatial_emb = spatial_emb
154 |
155 |
156 | self.conv1 = nn.Conv1d(self.in_dim, self.out_dims[-1], kernel_size=(1, 2), stride=(1, 1), dilation=(1, 3))
157 | self.conv2 = nn.Conv1d(self.in_dim, self.out_dims[-1], kernel_size=(1, 2), stride=(1, 1), dilation=(1, 3))
158 |
159 |
160 | self.STSGCMS = nn.ModuleList()
161 | for i in range(self.history - self.strides + 1):
162 | self.STSGCMS.append(
163 | STSGCM(
164 | adj=self.adj,
165 | in_dim=self.in_dim,
166 | out_dims=self.out_dims,
167 | num_of_vertices=self.num_of_vertices,
168 | activation=self.activation
169 | )
170 | )
171 |
172 | if self.temporal_emb:
173 | self.temporal_embedding = nn.Parameter(torch.FloatTensor(1, self.history, 1, self.in_dim))
174 | # 1, T, 1, Cin
175 |
176 | if self.spatial_emb:
177 | self.spatial_embedding = nn.Parameter(torch.FloatTensor(1, 1, self.num_of_vertices, self.in_dim))
178 | # 1, 1, N, Cin
179 |
180 | self.reset()
181 |
182 | def reset(self):
183 | if self.temporal_emb:
184 | nn.init.xavier_normal_(self.temporal_embedding, gain=0.0003)
185 |
186 | if self.spatial_emb:
187 | nn.init.xavier_normal_(self.spatial_embedding, gain=0.0003)
188 |
189 | def forward(self, x, mask=None):
190 | """
191 | :param x: B, T, N, Cin
192 | :param mask: (N, N)
193 | :return: B, T-3, N, Cout
194 | """
195 | if self.temporal_emb:
196 | x = x + self.temporal_embedding
197 |
198 | if self.spatial_emb:
199 | x = x + self.spatial_embedding
200 |
201 | #############################################
202 | # shape is (B, C, N, T)
203 | data_temp = x.permute(0, 3, 2, 1)
204 | data_left = torch.sigmoid(self.conv1(data_temp))
205 | data_right = torch.tanh(self.conv2(data_temp))
206 | data_time_axis = data_left * data_right
207 | data_res = data_time_axis.permute(0, 3, 2, 1)
208 | # shape is (B, T-3, N, C)
209 | #############################################
210 |
211 | need_concat = []
212 | batch_size = x.shape[0]
213 |
214 | for i in range(self.history - self.strides + 1):
215 | t = x[:, i: i+self.strides, :, :] # (B, 4, N, Cin)
216 |
217 | t = torch.reshape(t, shape=[batch_size, self.strides * self.num_of_vertices, self.in_dim])
218 | # (B, 4*N, Cin)
219 |
220 | t = self.STSGCMS[i](t.permute(1, 0, 2), mask) # (4*N, B, Cin) -> (N, B, Cout)
221 |
222 | t = torch.unsqueeze(t.permute(1, 0, 2), dim=1) # (N, B, Cout) -> (B, N, Cout) ->(B, 1, N, Cout)
223 |
224 | need_concat.append(t)
225 |
226 | mid_out = torch.cat(need_concat, dim=1) # (B, T-3, N, Cout)
227 | out = mid_out + data_res
228 |
229 | del need_concat, batch_size
230 |
231 | return out
232 |
233 |
234 | class output_layer(nn.Module):
235 | def __init__(self, num_of_vertices, history, in_dim, out_dim,
236 | hidden_dim=128, horizon=12):
237 | """
238 | 预测层,注意在作者的实验中是对每一个预测时间step做处理的,也即他会令horizon=1
239 | :param num_of_vertices:节点数
240 | :param history:输入时间步长
241 | :param in_dim: 输入维度
242 | :param hidden_dim:中间层维度
243 | :param horizon:预测时间步长
244 | """
245 | super(output_layer, self).__init__()
246 | self.num_of_vertices = num_of_vertices
247 | self.history = history
248 | self.in_dim = in_dim
249 | self.out_dim = out_dim
250 | self.hidden_dim = hidden_dim
251 | self.horizon = horizon
252 |
253 | #print("#####################")
254 | #print(self.in_dim)
255 | #print(self.history)
256 | #print(self.hidden_dim)
257 |
258 | self.FC1 = nn.Linear(self.in_dim * self.history, self.hidden_dim, bias=True)
259 |
260 | #self.FC2 = nn.Linear(self.hidden_dim, self.horizon , bias=True)
261 |
262 | self.FC2 = nn.Linear(self.hidden_dim, self.horizon * self.out_dim, bias=True)
263 |
264 | def forward(self, x):
265 | """
266 | :param x: (B, Tin, N, Cin)
267 | :return: (B, Tout, N)
268 | """
269 | batch_size = x.shape[0]
270 |
271 | x = x.permute(0, 2, 1, 3) # B, N, Tin, Cin
272 |
273 | out1 = torch.relu(self.FC1(x.reshape(batch_size, self.num_of_vertices, -1)))
274 | # (B, N, Tin, Cin) -> (B, N, Tin * Cin) -> (B, N, hidden)
275 |
276 | out2 = self.FC2(out1) # (B, N, hidden) -> (B, N, horizon * 2)
277 |
278 | out2 = out2.reshape(batch_size, self.num_of_vertices, self.horizon, self.out_dim)
279 |
280 | del out1, batch_size
281 |
282 | return out2.permute(0, 2, 1, 3) # B, horizon, N
283 | # return out2.permute(0, 2, 1) # B, horizon, N
284 |
285 |
286 | class STFGNN(nn.Module):
287 | def __init__(self, config, data_feature):
288 | """
289 |
290 | :param adj: local时空间矩阵
291 | :param history:输入时间步长
292 | :param num_of_vertices:节点数量
293 | :param in_dim:输入维度
294 | :param hidden_dims: lists, 中间各STSGCL层的卷积操作维度
295 | :param first_layer_embedding_size: 第一层输入层的维度
296 | :param out_layer_dim: 输出模块中间层维度
297 | :param activation: 激活函数 {relu, GlU}
298 | :param use_mask: 是否使用mask矩阵对adj进行优化
299 | :param temporal_emb:是否使用时间嵌入向量
300 | :param spatial_emb:是否使用空间嵌入向量
301 | :param horizon:预测时间步长
302 | :param strides:滑动窗口步长,local时空图使用几个时间步构建的,默认为4
303 | """
304 | super(STFGNN, self).__init__()
305 |
306 | self.config = config
307 | self.data_feature = data_feature
308 | self.scaler = data_feature["scaler"]
309 | self.num_batches = data_feature["num_batches"]
310 |
311 | adj = self.data_feature["adj_mx"]
312 | history = self.config.get("window", 12)
313 | num_of_vertices = self.config.get("num_nodes", None)
314 | in_dim = self.config.get("input_dim", 1)
315 | out_dim = self.config.get("output_dim", 1)
316 | hidden_dims = self.config.get("hidden_dims", None)
317 | first_layer_embedding_size = self.config.get("first_layer_embedding_size", None)
318 | out_layer_dim = self.config.get("out_layer_dim", None)
319 | activation = self.config.get("activation", "GLU")
320 | use_mask = self.config.get("mask")
321 | temporal_emb = self.config.get("temporal_emb", True)
322 | spatial_emb = self.config.get("spatial_emb", True)
323 | horizon = self.config.get("horizon", 12)
324 | strides = self.config.get("strides", 4)
325 |
326 | self.adj = adj
327 | self.num_of_vertices = num_of_vertices
328 | self.hidden_dims = hidden_dims
329 | self.out_layer_dim = out_layer_dim
330 | self.activation = activation
331 | self.use_mask = use_mask
332 |
333 | self.temporal_emb = temporal_emb
334 | self.spatial_emb = spatial_emb
335 | self.horizon = horizon
336 | self.strides = 4
337 |
338 | self.First_FC = nn.Linear(in_dim, first_layer_embedding_size, bias=True)
339 | self.STSGCLS = nn.ModuleList()
340 | #print("____________________")
341 | #print(history)
342 |
343 | self.STSGCLS.append(
344 | STSGCL(
345 | adj=self.adj,
346 | history=history,
347 | num_of_vertices=self.num_of_vertices,
348 | in_dim=first_layer_embedding_size,
349 | out_dims=self.hidden_dims[0],
350 | strides=self.strides,
351 | activation=self.activation,
352 | temporal_emb=self.temporal_emb,
353 | spatial_emb=self.spatial_emb
354 | )
355 | )
356 |
357 | in_dim = self.hidden_dims[0][-1]
358 | history -= (self.strides - 1)
359 |
360 | #print("!!!!!!!!!!!!!!!!!!!")
361 | #print(history)
362 |
363 | for idx, hidden_list in enumerate(self.hidden_dims):
364 | #print("?????? ", idx)
365 | if idx == 0:
366 | continue
367 | #print("---------", idx)
368 | self.STSGCLS.append(
369 | STSGCL(
370 | adj=self.adj,
371 | history=history,
372 | num_of_vertices=self.num_of_vertices,
373 | in_dim=in_dim,
374 | out_dims=hidden_list,
375 | strides=self.strides,
376 | activation=self.activation,
377 | temporal_emb=self.temporal_emb,
378 | spatial_emb=self.spatial_emb
379 | )
380 | )
381 | history -= (self.strides - 1)
382 | in_dim = hidden_list[-1]
383 |
384 | self.predictLayer = nn.ModuleList()
385 | #print("***********************")
386 | #print(history)
387 | for t in range(self.horizon):
388 | self.predictLayer.append(
389 | output_layer(
390 | num_of_vertices=self.num_of_vertices,
391 | history=history,
392 | in_dim=in_dim,
393 | out_dim = out_dim,
394 | hidden_dim=out_layer_dim,
395 | horizon=1
396 | )
397 | )
398 |
399 | if self.use_mask:
400 | mask = torch.zeros_like(self.adj)
401 | mask[self.adj != 0] = self.adj[self.adj != 0]
402 | self.mask = nn.Parameter(mask)
403 | else:
404 | self.mask = None
405 |
406 | def forward(self, x):
407 | """
408 | :param x: B, Tin, N, Cin)
409 | :return: B, Tout, N
410 | """
411 |
412 | x = torch.relu(self.First_FC(x)) # B, Tin, N, Cin
413 | #print(1)
414 |
415 | for model in self.STSGCLS:
416 | x = model(x, self.mask)
417 | # (B, T - 8, N, Cout)
418 | #print(2)
419 | need_concat = []
420 | for i in range(self.horizon):
421 | out_step = self.predictLayer[i](x) # (B, 1, N, 2)
422 | need_concat.append(out_step)
423 | #print(3)
424 | out = torch.cat(need_concat, dim=1) # B, Tout, N, 2
425 |
426 | del need_concat
427 |
428 | return out
429 |
430 |
431 |
432 |
--------------------------------------------------------------------------------
/STFGNN/model/loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from sklearn.metrics import r2_score, explained_variance_score
4 |
5 |
6 | def masked_mae_loss(y_pred, y_true):
7 | mask = (y_true != 0).float()
8 | mask /= mask.mean()
9 | loss = torch.abs(y_pred - y_true)
10 | loss = loss * mask
11 | # trick for nans:
12 | # https://discuss.pytorch.org/t/how-to-set-nan-in-tensor-to-0/3918/3
13 | loss[loss != loss] = 0
14 | return loss.mean()
15 |
16 |
17 | def masked_mae_torch(preds, labels, null_val=np.nan):
18 | labels[torch.abs(labels) < 1e-4] = 0
19 | if np.isnan(null_val):
20 | mask = ~torch.isnan(labels)
21 | else:
22 | mask = labels.ne(null_val)
23 | mask = mask.float()
24 | mask /= torch.mean(mask)
25 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
26 | loss = torch.abs(torch.sub(preds, labels))
27 | loss = loss * mask
28 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
29 | return torch.mean(loss)
30 |
31 |
32 | def log_cosh_loss(preds, labels):
33 | loss = torch.log(torch.cosh(preds - labels))
34 | return torch.mean(loss)
35 |
36 |
37 | def huber_loss(preds, labels, delta=1.0):
38 | residual = torch.abs(preds - labels)
39 | condition = torch.le(residual, delta)
40 | small_res = 0.5 * torch.square(residual)
41 | large_res = delta * residual - 0.5 * delta * delta
42 | return torch.mean(torch.where(condition, small_res, large_res))
43 | # lo = torch.nn.SmoothL1Loss()
44 | # return lo(preds, labels)
45 |
46 |
47 | def quantile_loss(preds, labels, delta=0.25):
48 | condition = torch.ge(labels, preds)
49 | large_res = delta * (labels - preds)
50 | small_res = (1 - delta) * (preds - labels)
51 | return torch.mean(torch.where(condition, large_res, small_res))
52 |
53 |
54 | def masked_mape_torch(preds, labels, null_val=np.nan, eps=0):
55 | labels[torch.abs(labels) < 1e-4] = 0
56 | if np.isnan(null_val) and eps != 0:
57 | loss = torch.abs((preds - labels) / (labels + eps))
58 | return torch.mean(loss)
59 | if np.isnan(null_val):
60 | mask = ~torch.isnan(labels)
61 | else:
62 | mask = labels.ne(null_val)
63 | mask = mask.float()
64 | mask /= torch.mean(mask)
65 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
66 | loss = torch.abs((preds - labels) / labels)
67 | loss = loss * mask
68 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
69 | return torch.mean(loss)
70 |
71 |
72 | def masked_mse_torch(preds, labels, null_val=np.nan):
73 | labels[torch.abs(labels) < 1e-4] = 0
74 | if np.isnan(null_val):
75 | mask = ~torch.isnan(labels)
76 | else:
77 | mask = labels.ne(null_val)
78 | mask = mask.float()
79 | mask /= torch.mean(mask)
80 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
81 | loss = torch.square(torch.sub(preds, labels))
82 | loss = loss * mask
83 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
84 | return torch.mean(loss)
85 |
86 |
87 | def masked_rmse_torch(preds, labels, null_val=np.nan):
88 | labels[torch.abs(labels) < 1e-4] = 0
89 | return torch.sqrt(masked_mse_torch(preds=preds, labels=labels,
90 | null_val=null_val))
91 |
92 |
93 | def r2_score_torch(preds, labels):
94 | preds = preds.cpu().flatten()
95 | labels = labels.cpu().flatten()
96 | return r2_score(labels, preds)
97 |
98 |
99 | def explained_variance_score_torch(preds, labels):
100 | preds = preds.cpu().flatten()
101 | labels = labels.cpu().flatten()
102 | return explained_variance_score(labels, preds)
103 |
104 |
105 | def masked_rmse_np(preds, labels, null_val=np.nan):
106 | return np.sqrt(masked_mse_np(preds=preds, labels=labels,
107 | null_val=null_val))
108 |
109 |
110 | def masked_mse_np(preds, labels, null_val=np.nan):
111 | with np.errstate(divide='ignore', invalid='ignore'):
112 | if np.isnan(null_val):
113 | mask = ~np.isnan(labels)
114 | else:
115 | mask = np.not_equal(labels, null_val)
116 | mask = mask.astype('float32')
117 | mask /= np.mean(mask)
118 | rmse = np.square(np.subtract(preds, labels)).astype('float32')
119 | rmse = np.nan_to_num(rmse * mask)
120 | return np.mean(rmse)
121 |
122 |
123 | def masked_mae_np(preds, labels, null_val=np.nan):
124 | with np.errstate(divide='ignore', invalid='ignore'):
125 | if np.isnan(null_val):
126 | mask = ~np.isnan(labels)
127 | else:
128 | mask = np.not_equal(labels, null_val)
129 | mask = mask.astype('float32')
130 | mask /= np.mean(mask)
131 | mae = np.abs(np.subtract(preds, labels)).astype('float32')
132 | mae = np.nan_to_num(mae * mask)
133 | return np.mean(mae)
134 |
135 |
136 | def masked_mape_np(preds, labels, null_val=np.nan):
137 | with np.errstate(divide='ignore', invalid='ignore'):
138 | if np.isnan(null_val):
139 | mask = ~np.isnan(labels)
140 | else:
141 | mask = np.not_equal(labels, null_val)
142 | mask = mask.astype('float32')
143 | mask /= np.mean(mask)
144 | mape = np.abs(np.divide(np.subtract(
145 | preds, labels).astype('float32'), labels))
146 | mape = np.nan_to_num(mask * mape)
147 | return np.mean(mape)
148 |
149 |
150 | def r2_score_np(preds, labels):
151 | preds = preds.flatten()
152 | labels = labels.flatten()
153 | return r2_score(labels, preds)
154 |
155 |
156 | def explained_variance_score_np(preds, labels):
157 | preds = preds.flatten()
158 | labels = labels.flatten()
159 | return explained_variance_score(labels, preds)
160 |
--------------------------------------------------------------------------------
/STFGNN/raw_data/PEMS03/PEMS03.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lwm412/STFGNN-Pytorch/ae7c95866d036d2bd7143d6d6020228a4e902e50/STFGNN/raw_data/PEMS03/PEMS03.npz
--------------------------------------------------------------------------------
/STFGNN/raw_data/PEMS03/adj_mx.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lwm412/STFGNN-Pytorch/ae7c95866d036d2bd7143d6d6020228a4e902e50/STFGNN/raw_data/PEMS03/adj_mx.pkl
--------------------------------------------------------------------------------
/STFGNN/utils/GPS_utils.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | R_EARTH = 6371000 # meter
4 |
5 |
6 | def angle2radian(angle):
7 | """
8 | convert from an angle to a radian
9 | :param angle: (float)
10 | :return: radian (float)
11 | """
12 | return math.radians(angle)
13 |
14 |
15 | def radian2angle(radian):
16 | return math.degrees(radian)
17 |
18 |
19 | def spherical_law_of_cosines(phi1, lambda1, phi2, lambda2):
20 | """
21 | calculate great circle distance with spherical law of cosines
22 | phi/lambda for latitude/longitude in radians
23 | :param phi1: point one's latitude in radians
24 | :param lambda1: point one's longitude in radians
25 | :param phi2: point two's latitude in radians
26 | :param lambda2: point two's longitude in radians
27 | :return:
28 | """
29 | d_lambda = lambda2 - lambda1
30 | return math.acos(math.sin(phi1) * math.sin(phi2) + math.cos(phi1) * math.cos(phi2) * math.cos(d_lambda))
31 |
32 |
33 | def haversine(phi1, lambda1, phi2, lambda2):
34 | """
35 | calculate angular great circle distance with haversine formula
36 | see parameters in spherical_law_of_cosines
37 | """
38 | d_phi = phi2 - phi1
39 | d_lambda = lambda2 - lambda1
40 | a = math.pow(math.sin(d_phi / 2), 2) + \
41 | math.cos(phi1) * math.cos(phi2) * math.pow(math.sin(d_lambda / 2), 2)
42 | c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
43 | return c
44 |
45 |
46 | def equirectangular_approximation(phi1, lambda1, phi2, lambda2):
47 | """
48 | calculate angular great circle distance with Pythagoras’ theorem performed on an equirectangular projection
49 | see parameters in spherical_law_of_cosines
50 | """
51 | x = (lambda2 - lambda1) * math.cos((phi1 + phi2) / 2)
52 | y = phi2 - phi1
53 | return math.sqrt(math.pow(x, 2) + math.pow(y, 2))
54 |
55 |
56 | def dist(phi1, lambda1, phi2, lambda2, r=R_EARTH, method='hav'):
57 | """
58 | calculate great circle distance with given latitude and longitude,
59 | :param phi1: point one's latitude in angle
60 | :param lambda1: point one's longitude in angle
61 | :param phi2: point two's latitude in angle
62 | :param lambda2: point two's longitude in angle
63 | :param r: earth radius(m)
64 | :param method: 'hav' means haversine,
65 | 'LoC' means Spherical Law of Cosines,
66 | 'approx' means Pythagoras’ theorem performed on an equirectangular projection
67 | :return: distance (m)
68 | """
69 | return angular_dist(phi1, lambda1, phi2, lambda2, method) * r
70 |
71 |
72 | def angular_dist(phi1, lambda1, phi2, lambda2, method='hav'):
73 | """
74 | calculate angular great circle distance with given latitude and longitude
75 | :return: angle
76 | """
77 | if method.lower() == 'hav':
78 | return haversine(phi1, lambda1, phi2, lambda2)
79 | elif method.lower() == 'loc':
80 | return spherical_law_of_cosines(phi1, lambda1, phi2, lambda2)
81 | elif method.lower() == 'approx':
82 | return equirectangular_approximation(phi1, lambda1, phi2, lambda2)
83 | else:
84 | assert False
85 |
86 |
87 | def destination(phi1, lambda1, brng, distance, r=R_EARTH):
88 | """
89 |
90 | :param phi1:
91 | :param lambda1:
92 | :param brng:
93 | :param distance:
94 | :return:
95 | """
96 | delta = distance / r
97 | phi2 = math.asin(math.sin(phi1) * math.cos(delta) + math.cos(phi1) * math.sin(delta) * math.cos(brng))
98 | lambda2 = lambda1 + math.atan2(
99 | math.sin(brng) * math.sin(delta) * math.cos(phi1), math.cos(delta) - math.sin(phi1) * math.sin(phi2)
100 | )
101 | return phi2, lambda2
102 |
103 |
104 | def init_bearing(phi1, lambda1, phi2, lambda2):
105 | """
106 | initial bearing of a great circle route
107 | :return: 0~360
108 | """
109 | y = math.sin(lambda2 - lambda1) * math.cos(phi2)
110 | x = math.cos(phi1) * math.sin(phi2) - math.sin(phi1) * math.cos(phi2) * math.cos(lambda2 - lambda1)
111 | theta = math.atan2(y, x)
112 | brng = (theta * 180 / math.pi + 360) % 360
113 | return brng
--------------------------------------------------------------------------------
/STFGNN/utils/Optim.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import torch.optim as optim
4 |
5 |
6 | class Optim(object):
7 |
8 | def __init__(self, params, config):
9 | self.params = list(params) # careful: params may be a generator
10 | self.config = config
11 | self.last_ppl = None
12 | self.lr = self.config.get("lr", 0.001)
13 | self.max_grad_norm = self.config.get("clip", 10)
14 | self.method = self.config.get("optim", "adam")
15 | self.lr_decay = self.config.get("lr_decay", False)
16 | self.lr_scheduler_type = self.config.get('lr_scheduler', 'multisteplr')
17 | self.lr_decay_ratio = self.config.get("lr_decay_ratio", 0.1)
18 | self.milestones = self.config.get("lr_decay_steps", [])
19 | self.step_size = self.config.get("step_size", 10)
20 |
21 | self._makeOptimizer()
22 | self.lr_scheduler = self._build_lr_scheduler()
23 |
24 |
25 | def _makeOptimizer(self):
26 | if self.method == 'sgd':
27 | self.optimizer = optim.SGD(self.params, lr=self.lr)
28 | elif self.method == 'adagrad':
29 | self.optimizer = optim.Adagrad(self.params, lr=self.lr)
30 | elif self.method == 'adadelta':
31 | self.optimizer = optim.Adadelta(self.params, lr=self.lr)
32 | elif self.method == 'adam':
33 | self.optimizer = optim.Adam(self.params, lr=self.lr)
34 | else:
35 | raise RuntimeError("Invalid optim method: " + self.method)
36 |
37 | def _build_lr_scheduler(self):
38 | """
39 | 根据全局参数`lr_scheduler`选择对应的lr_scheduler
40 | """
41 | if self.lr_decay:
42 | if self.lr_scheduler_type.lower() == 'multisteplr':
43 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
44 | self.optimizer, milestones=self.milestones, gamma=self.lr_decay_ratio)
45 | elif self.lr_scheduler_type.lower() == 'steplr':
46 | lr_scheduler = torch.optim.lr_scheduler.StepLR(
47 | self.optimizer, step_size=self.step_size, gamma=self.lr_decay_ratio)
48 | elif self.lr_scheduler_type.lower() == 'exponentiallr':
49 | lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
50 | self.optimizer, gamma=self.lr_decay_ratio)
51 | else:
52 | print('Received unrecognized lr_scheduler, '
53 | 'please check the parameter `lr_scheduler`.')
54 | lr_scheduler = None
55 | else:
56 | lr_scheduler = None
57 | return lr_scheduler
58 |
59 |
60 | def step(self):
61 | # Compute gradients norm.
62 | grad_norm = 0
63 |
64 | if self.max_grad_norm is not None:
65 | torch.nn.utils.clip_grad_norm_(self.params, self.max_grad_norm)
66 |
67 | self.optimizer.step()
68 | return grad_norm
69 |
70 |
71 | def zero_grad(self):
72 | self.optimizer.zero_grad()
73 | return
74 |
75 |
76 | # decay learning rate if val perf does not improve or we hit the start_decay_at limit
77 | def updateLearningRate(self, ppl, epoch):
78 | if self.start_decay_at is not None and epoch >= self.start_decay_at:
79 | self.start_decay = True
80 | if self.last_ppl is not None and ppl > self.last_ppl:
81 | self.start_decay = True
82 |
83 | if self.start_decay:
84 | self.lr = self.lr * self.lr_decay
85 | print("Decaying learning rate to %g" % self.lr)
86 | #only decay for one epoch
87 | self.start_decay = False
88 |
89 | self.last_ppl = ppl
90 |
91 | self._makeOptimizer()
92 |
--------------------------------------------------------------------------------
/STFGNN/utils/argument_list.py:
--------------------------------------------------------------------------------
1 | """
2 | store the arguments can be modified by the user
3 | """
4 | import argparse
5 |
6 | general_arguments = {
7 | "gpu": "bool",
8 | "batch_size": "int",
9 | "train_rate": "float",
10 | "eval_rate": "float",
11 | "learning_rate": "float",
12 | "max_epoch": "int",
13 | "gpu_id": "int"
14 | }
15 |
16 | hyper_arguments = {
17 | "gpu": {
18 | "type": "bool",
19 | "default": None,
20 | "help": "whether use gpu"
21 | },
22 | "gpu_id": {
23 | "type": "int",
24 | "default": None,
25 | "help": "the gpu id to use"
26 | },
27 | "train_rate": {
28 | "type": "float",
29 | "default": None,
30 | "help": "the train set rate"
31 | },
32 | "eval_rate": {
33 | "type": "float",
34 | "default": None,
35 | "help": "the validation set rate"
36 | },
37 | "batch_size": {
38 | "type": "int",
39 | "default": None,
40 | "help": "the batch size"
41 | }
42 | }
43 |
44 |
45 | def str2bool(s):
46 | if isinstance(s, bool):
47 | return s
48 | if s.lower() in ('yes', 'true'):
49 | return True
50 | elif s.lower() in ('no', 'false'):
51 | return False
52 | else:
53 | raise argparse.ArgumentTypeError('bool value expected.')
54 |
55 |
56 | def str2float(s):
57 | if isinstance(s, float):
58 | return s
59 | try:
60 | x = float(s)
61 | except ValueError:
62 | raise argparse.ArgumentTypeError('float value expected.')
63 | return x
64 |
--------------------------------------------------------------------------------
/STFGNN/utils/dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | 数据预处理阶段相关的工具函数
3 | """
4 | import numpy as np
5 | import time
6 | from datetime import datetime, timedelta
7 | from collections import defaultdict
8 |
9 |
10 | def parse_time(time_in, timezone_offset_in_minute=0):
11 | """
12 | 将 json 中 time_format 格式的 time 转化为 local datatime
13 | """
14 | date = datetime.strptime(time_in, '%Y-%m-%dT%H:%M:%SZ') # 这是 UTC 时间
15 | return date + timedelta(minutes=timezone_offset_in_minute)
16 |
17 |
18 | def cal_basetime(start_time, base_zero):
19 | """
20 | 用于切分轨迹成一个 session,
21 | 思路为:给定一个 start_time 找到一个基准时间 base_time,
22 | 在该 base_time 到 base_time + time_length 区间的点划分到一个 session 内,
23 | 选取 base_time 来做的理由是:这样可以保证同一个小时段总是被 encode 成同一个数
24 | """
25 | if base_zero:
26 | return start_time - timedelta(hours=start_time.hour,
27 | minutes=start_time.minute,
28 | seconds=start_time.second,
29 | microseconds=start_time.microsecond)
30 | else:
31 | # time length = 12
32 | if start_time.hour < 12:
33 | return start_time - timedelta(hours=start_time.hour,
34 | minutes=start_time.minute,
35 | seconds=start_time.second,
36 | microseconds=start_time.microsecond)
37 | else:
38 | return start_time - timedelta(hours=start_time.hour - 12,
39 | minutes=start_time.minute,
40 | seconds=start_time.second,
41 | microseconds=start_time.microsecond)
42 |
43 |
44 | def cal_timeoff(now_time, base_time):
45 | """
46 | 计算两个时间之间的差值,返回值以小时为单位
47 | """
48 | # 先将 now 按小时对齐
49 | delta = now_time - base_time
50 | return delta.days * 24 + delta.seconds / 3600
51 |
52 |
53 | def caculate_time_sim(data):
54 | time_checkin_set = defaultdict(set)
55 | tim_size = data['tim_size']
56 | data_neural = data['data']
57 | for uid in data_neural:
58 | uid_sessions = data_neural[uid]
59 | for session in uid_sessions:
60 | for checkin in session:
61 | timid = checkin[1]
62 | locid = checkin[0]
63 | if timid not in time_checkin_set:
64 | time_checkin_set[timid] = set()
65 | time_checkin_set[timid].add(locid)
66 | sim_matrix = np.zeros((tim_size, tim_size))
67 | for i in range(tim_size):
68 | for j in range(tim_size):
69 | set_i = time_checkin_set[i]
70 | set_j = time_checkin_set[j]
71 | if len(set_i | set_j) != 0:
72 | jaccard_ij = len(set_i & set_j) / len(set_i | set_j)
73 | sim_matrix[i][j] = jaccard_ij
74 | return sim_matrix
75 |
76 |
77 | def parse_coordinate(coordinate):
78 | items = coordinate[1:-1].split(',')
79 | return float(items[0]), float(items[1])
80 |
81 |
82 | def string2timestamp(strings, offset_frame):
83 | ts = []
84 | for t in strings:
85 | dtstr = '-'.join([t[:4].decode(), t[4:6].decode(), t[6:8].decode()])
86 | slot = int(t[8:]) - 1
87 | ts.append(np.datetime64(dtstr, 'm') + slot * offset_frame)
88 | return ts # [numpy.datetime64('2014-01-01T00:00'), ...]
89 |
90 |
91 | def timestamp2array(timestamps, t):
92 | """
93 | 把时间戳的序列中的每一个时间戳转成特征数组,考虑了星期和小时,
94 | 时间戳: numpy.datetime64('2013-07-01T00:00:00.000000000')
95 |
96 | Args:
97 | timestamps: 时间戳序列
98 | t: 一天有多少个时间步
99 |
100 | Returns:
101 | np.ndarray: 特征数组,shape: (len(timestamps), ext_dim)
102 | """
103 | vec_wday = [time.strptime(
104 | str(t)[:10], '%Y-%m-%d').tm_wday for t in timestamps]
105 | vec_hour = [time.strptime(str(t)[11:13], '%H').tm_hour for t in timestamps]
106 | vec_minu = [time.strptime(str(t)[14:16], '%M').tm_min for t in timestamps]
107 | ret = []
108 | for idx, wday in enumerate(vec_wday):
109 | # day
110 | v = [0 for _ in range(7)]
111 | v[wday] = 1
112 | if wday >= 5: # 0是周一, 6是周日
113 | v.append(0) # weekend
114 | else:
115 | v.append(1) # weekday len(v)=8
116 | # hour
117 | v += [0 for _ in range(t)] # len(v)=8+T
118 | hour = vec_hour[idx]
119 | minu = vec_minu[idx]
120 | # 24*60/T 表示一个时间步是多少分钟
121 | # hour * 60 + minu 是从0:0开始到现在是多少分钟,相除计算是第几个时间步
122 | # print(hour, minu, T, (hour * 60 + minu) / (24 * 60 / T))
123 | v[int((hour * 60 + minu) / (24 * 60 / t))] = 1
124 | # +8是因为v前边有表示星期的8位
125 | if hour >= 18 or hour < 6:
126 | v.append(0) # night
127 | else:
128 | v.append(1) # day
129 | ret.append(v) # len(v)=7+1+T+1=T+9
130 | return np.asarray(ret)
131 |
132 |
133 | def timestamp2vec_origin(timestamps):
134 | """
135 | 把时间戳的序列中的每一个时间戳转成特征数组,只考虑星期,
136 | 时间戳: numpy.datetime64('2013-07-01T00:00:00.000000000')
137 |
138 | Args:
139 | timestamps: 时间戳序列
140 |
141 | Returns:
142 | np.ndarray: 特征数组,shape: (len(timestamps), 8)
143 | """
144 | vec = [time.strptime(str(t)[:10], '%Y-%m-%d').tm_wday for t in timestamps]
145 | ret = []
146 | for i in vec:
147 | v = [0 for _ in range(7)]
148 | v[i] = 1
149 | if i >= 5:
150 | v.append(0) # weekend
151 | else:
152 | v.append(1) # weekday
153 | ret.append(v)
154 | return np.asarray(ret)
155 |
--------------------------------------------------------------------------------
/STFGNN/utils/normalization.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Scaler:
5 | """
6 | 归一化接口
7 | """
8 |
9 | def transform(self, data):
10 | """
11 | 数据归一化接口
12 |
13 | Args:
14 | data(np.ndarray): 归一化前的数据
15 |
16 | Returns:
17 | np.ndarray: 归一化后的数据
18 | """
19 | raise NotImplementedError("Transform not implemented")
20 |
21 | def inverse_transform(self, data):
22 | """
23 | 数据逆归一化接口
24 |
25 | Args:
26 | data(np.ndarray): 归一化后的数据
27 |
28 | Returns:
29 | np.ndarray: 归一化前的数据
30 | """
31 | raise NotImplementedError("Inverse_transform not implemented")
32 |
33 |
34 | class NoneScaler(Scaler):
35 | """
36 | 不归一化
37 | """
38 |
39 | def transform(self, data):
40 | return data
41 |
42 | def inverse_transform(self, data):
43 | return data
44 |
45 |
46 | class NormalScaler(Scaler):
47 | """
48 | 除以最大值归一化
49 | x = x / x.max
50 | """
51 |
52 | def __init__(self, maxx):
53 | self.max = maxx
54 |
55 | def transform(self, data):
56 | return data / self.max
57 |
58 | def inverse_transform(self, data):
59 | return data * self.max
60 |
61 |
62 | class StandardScaler(Scaler):
63 | """
64 | Z-score归一化
65 | x = (x - x.mean) / x.std
66 | """
67 |
68 | def __init__(self, mean, std):
69 | self.mean = mean
70 | self.std = std
71 |
72 | def transform(self, data):
73 | return (data - self.mean) / self.std
74 |
75 | def inverse_transform(self, data):
76 | return (data * self.std) + self.mean
77 |
78 |
79 | class MinMax01Scaler(Scaler):
80 | """
81 | MinMax归一化 结果区间[0, 1]
82 | x = (x - min) / (max - min)
83 | """
84 |
85 | def __init__(self, minn, maxx):
86 | self.min = minn
87 | self.max = maxx
88 |
89 | def transform(self, data):
90 | return (data - self.min) / (self.max - self.min)
91 |
92 | def inverse_transform(self, data):
93 | return data * (self.max - self.min) + self.min
94 |
95 |
96 | class MinMax11Scaler(Scaler):
97 | """
98 | MinMax归一化 结果区间[-1, 1]
99 | x = (x - min) / (max - min)
100 | x = x * 2 - 1
101 | """
102 |
103 | def __init__(self, minn, maxx):
104 | self.min = minn
105 | self.max = maxx
106 |
107 | def transform(self, data):
108 | return ((data - self.min) / (self.max - self.min)) * 2. - 1.
109 |
110 | def inverse_transform(self, data):
111 | return ((data + 1.) / 2.) * (self.max - self.min) + self.min
112 |
113 |
114 | class LogScaler(Scaler):
115 | """
116 | Log scaler
117 | x = log(x+eps)
118 | """
119 |
120 | def __init__(self, eps=0.999):
121 | self.eps = eps
122 |
123 | def transform(self, data):
124 | return np.log(data + self.eps)
125 |
126 | def inverse_transform(self, data):
127 | return np.exp(data) - self.eps
128 |
--------------------------------------------------------------------------------
/STFGNN/utils/utils.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import logging
3 | import datetime
4 | import os
5 | import sys
6 | import numpy as np
7 |
8 |
9 | def get_executor(config, model):
10 | """
11 | according the config['executor'] to create the executor
12 |
13 | Args:
14 | config(ConfigParser): config
15 | model(AbstractModel): model
16 |
17 | Returns:
18 | AbstractExecutor: the loaded executor
19 | """
20 | if config["task"] == "single_step":
21 | try:
22 | return getattr(importlib.import_module('libcity.executor.single_step_executor'),
23 | config['executor'])(config, model)
24 | except AttributeError:
25 | raise AttributeError('executor is not found')
26 | elif config["task"] == "multi_step":
27 | try:
28 | return getattr(importlib.import_module('libcity.executor.multi_step_executor'),
29 | config['executor'])(config, model)
30 | except AttributeError:
31 | raise AttributeError('executor is not found')
32 | else:
33 | raise AttributeError('task is not found')
34 |
35 |
36 |
37 | def get_model(config, data_feature):
38 | """
39 | according the config['model'] to create the model
40 |
41 | Args:
42 | config(ConfigParser): config
43 | data_feature(dict): feature of the data
44 |
45 | Returns:
46 | AbstractModel: the loaded model
47 | """
48 | if config['task'] == 'multi_step':
49 | print("config[model]: ", config['model'])
50 | try:
51 | return getattr(importlib.import_module('libcity.model.multi_step_model'),
52 | config['model'])(config, data_feature)
53 | except AttributeError:
54 | raise AttributeError('model is not found')
55 | elif config["task"] == "single_step":
56 | print("config[model]: ", config['model'])
57 | try:
58 | return getattr(importlib.import_module('libcity.model.single_step_model'),
59 | config['model'])(config, data_feature)
60 | except AttributeError:
61 | raise AttributeError('model is not found')
62 |
63 | else:
64 | raise AttributeError('task is not found')
65 |
66 |
67 | def get_evaluator(config):
68 | """
69 | according the config['evaluator'] to create the evaluator
70 |
71 | Args:
72 | config(ConfigParser): config
73 |
74 | Returns:
75 | AbstractEvaluator: the loaded evaluator
76 | """
77 | try:
78 | return getattr(importlib.import_module('libcity.evaluator'),
79 | config['evaluator'])(config)
80 | except AttributeError:
81 | raise AttributeError('evaluator is not found')
82 |
83 |
84 | def get_logger(config, name=None):
85 | """
86 | 获取Logger对象
87 |
88 | Args:
89 | config(ConfigParser): config
90 | name: specified name
91 |
92 | Returns:
93 | Logger: logger
94 | """
95 | log_dir = './libcity/log'
96 | if not os.path.exists(log_dir):
97 | os.makedirs(log_dir)
98 | log_filename = '{}-{}-{}.log'.format(
99 | config['model'], config['dataset'], get_local_time())
100 | logfilepath = os.path.join(log_dir, log_filename)
101 |
102 | logger = logging.getLogger(name)
103 |
104 | log_level = config.get('log_level', 'INFO')
105 |
106 | if log_level.lower() == 'info':
107 | level = logging.INFO
108 | elif log_level.lower() == 'debug':
109 | level = logging.DEBUG
110 | elif log_level.lower() == 'error':
111 | level = logging.ERROR
112 | elif log_level.lower() == 'warning':
113 | level = logging.WARNING
114 | elif log_level.lower() == 'critical':
115 | level = logging.CRITICAL
116 | else:
117 | level = logging.INFO
118 |
119 | logger.setLevel(level)
120 |
121 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
122 | file_handler = logging.FileHandler(logfilepath)
123 | file_handler.setFormatter(formatter)
124 |
125 | console_formatter = logging.Formatter(
126 | '%(asctime)s - %(levelname)s - %(message)s')
127 | console_handler = logging.StreamHandler(sys.stdout)
128 | console_handler.setFormatter(console_formatter)
129 |
130 | logger.addHandler(file_handler)
131 | logger.addHandler(console_handler)
132 |
133 | logger.info('Log directory: %s', log_dir)
134 | return logger
135 |
136 |
137 | def get_local_time():
138 | """
139 | 获取时间
140 |
141 | Return:
142 | datetime: 时间
143 | """
144 | cur = datetime.datetime.now()
145 | cur = cur.strftime('%b-%d-%Y_%H-%M-%S')
146 | return cur
147 |
148 |
149 | def ensure_dir(dir_path):
150 | """Make sure the directory exists, if it does not exist, create it.
151 |
152 | Args:
153 | dir_path (str): directory path
154 | """
155 | if not os.path.exists(dir_path):
156 | os.makedirs(dir_path)
157 |
158 |
159 | def trans_naming_rule(origin, origin_rule, target_rule):
160 | """
161 | 名字转换规则
162 |
163 | Args:
164 | origin (str): 源命名格式下的变量名
165 | origin_rule (str): 源命名格式,枚举类
166 | target_rule (str): 目标命名格式,枚举类
167 |
168 | Return:
169 | target (str): 转换之后的结果
170 | """
171 | # TODO: 请确保输入是符合 origin_rule,这里目前不做检查
172 | target = ''
173 | if origin_rule == 'upper_camel_case' and target_rule == 'under_score_rule':
174 | for i, c in enumerate(origin):
175 | if i == 0:
176 | target = c.lower()
177 | else:
178 | target += '_' + c.lower() if c.isupper() else c
179 | return target
180 | else:
181 | raise NotImplementedError(
182 | 'trans naming rule only support from upper_camel_case to \
183 | under_score_rule')
184 |
185 |
186 | def preprocess_data(data, config):
187 | """
188 | split by input_window and output_window
189 |
190 | Args:
191 | data: shape (T, ...)
192 |
193 | Returns:
194 | np.ndarray: (train_size/test_size, input_window, ...)
195 | (train_size/test_size, output_window, ...)
196 |
197 | """
198 | train_rate = config.get('train_rate', 0.7)
199 | eval_rate = config.get('eval_rate', 0.1)
200 |
201 | input_window = config.get('input_window', 12)
202 | output_window = config.get('output_window', 3)
203 |
204 | x, y = [], []
205 | for i in range(len(data) - input_window - output_window):
206 | a = data[i: i + input_window + output_window] # (in+out, ...)
207 | x.append(a[0: input_window]) # (in, ...)
208 | y.append(a[input_window: input_window + output_window]) # (out, ...)
209 | x = np.array(x) # (num_samples, in, ...)
210 | y = np.array(y) # (num_samples, out, ...)
211 |
212 | train_size = int(x.shape[0] * (train_rate + eval_rate))
213 | trainx = x[:train_size] # (train_size, in, ...)
214 | trainy = y[:train_size] # (train_size, out, ...)
215 | testx = x[train_size:x.shape[0]] # (test_size, in, ...)
216 | testy = y[train_size:x.shape[0]] # (test_size, out, ...)
217 | return trainx, trainy, testx, testy
218 |
219 |
--------------------------------------------------------------------------------
/STFGNN/utils/visualize.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import json
3 | from libcity.utils.utils import ensure_dir
4 | import os
5 |
6 |
7 | class VisHelper:
8 | def __init__(self, _config):
9 |
10 | self.config = _config
11 | self.raw_path = './raw_data/'
12 | self.dataset = _config.get("dataset", "")
13 | self.save_path = _config.get("save_path", "./visualized_data/")
14 |
15 | # get type
16 | self.config_path = self.raw_path + self.dataset + '/config.json'
17 | self.data_config = json.load(open(self.config_path, 'r'))
18 | if 'dyna' in self.data_config and ['state'] == self.data_config['dyna']['including_types']:
19 | self.type = 'state'
20 | elif 'grid' in self.data_config and ['state'] == self.data_config['grid']['including_types']:
21 | self.type = 'grid'
22 | else:
23 | self.type = 'trajectory'
24 | # get geo and dyna files
25 | all_files = os.listdir(self.raw_path + self.dataset)
26 | self.geo_file = []
27 | self.geo_path = None
28 | self.dyna_file = []
29 | self.dyna_path = None
30 | self.grid_file = []
31 | self.grid_path = None
32 | for file in all_files:
33 | if file.split('.')[1] == 'geo':
34 | self.geo_file.append(file)
35 | if file.split('.')[1] == 'dyna':
36 | self.dyna_file.append(file)
37 | if file.split('.')[1] == 'grid':
38 | self.grid_file.append(file)
39 |
40 | assert len(self.geo_file) == 1
41 |
42 | # reserved columns
43 | self.geo_reserved_lst = ['type', 'coordinates']
44 | self.dyna_reserved_lst = ['dyna_id', 'type', 'time', 'entity_id', 'traj_id', 'coordinates']
45 | self.grid_reserved_lst = ['dyna_id', 'type', 'time', 'row_id', 'column_id']
46 |
47 | def visualize(self):
48 | if self.type == 'trajectory':
49 | # geo
50 | self.geo_path = self.raw_path + self.dataset + '/' + self.geo_file[0]
51 | self._visualize_geo()
52 |
53 | # dyna
54 | for dyna_file in self.dyna_file:
55 | self.dyna_path = self.raw_path + self.dataset + '/' + dyna_file
56 | self._visualize_dyna()
57 |
58 | elif self.type == 'state':
59 | self.geo_path = self.raw_path + self.dataset + '/' + self.geo_file[0]
60 | for dyna_file in self.dyna_file:
61 | self.dyna_path = self.raw_path + self.dataset + '/' + dyna_file
62 | self._visualize_state()
63 | elif self.type == 'grid':
64 | self.geo_path = self.raw_path + self.dataset + '/' + self.geo_file[0]
65 | for grid_file in self.grid_file:
66 | self.grid_path = self.raw_path + self.dataset + '/' + grid_file
67 | self._visualize_grid()
68 |
69 | def _visualize_state(self):
70 | geo_file = pd.read_csv(self.geo_path, index_col=None)
71 | dyna_file = pd.read_csv(self.dyna_path, index_col=None)
72 | geojson_obj = {'type': "FeatureCollection", 'features': []}
73 |
74 | # get feature_lst
75 | geo_feature_lst = [_ for _ in list(geo_file.columns) if _ not in self.geo_reserved_lst]
76 | dyna_feature_lst = [_ for _ in list(dyna_file.columns) if _ not in self.dyna_reserved_lst]
77 |
78 | for _, row in geo_file.iterrows():
79 |
80 | # get feature dictionary
81 | geo_id = row['geo_id']
82 | feature_dct = row[geo_feature_lst].to_dict()
83 | dyna_i = dyna_file[dyna_file['entity_id'] == geo_id]
84 | for f in dyna_feature_lst:
85 | feature_dct[f] = float(dyna_i[f].mean())
86 |
87 | # form a feature
88 | feature_i = dict()
89 | feature_i['type'] = 'Feature'
90 | feature_i['properties'] = feature_dct
91 | feature_i['geometry'] = {}
92 | feature_i['geometry']['type'] = row['type']
93 | feature_i['geometry']['coordinates'] = eval(row['coordinates'])
94 | geojson_obj['features'].append(feature_i)
95 |
96 | ensure_dir(self.save_path)
97 | save_name = "_".join(self.dyna_path.split('/')[-1].split('.')) + '.json'
98 | json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w',
99 | encoding='utf-8'),
100 | ensure_ascii=False, indent=4)
101 |
102 | def _visualize_grid(self):
103 | geo_file = pd.read_csv(self.geo_path, index_col=None)
104 | grid_file = pd.read_csv(self.grid_path, index_col=None)
105 | geojson_obj = {'type': "FeatureCollection", 'features': []}
106 |
107 | # get feature_lst
108 | geo_feature_lst = [_ for _ in list(geo_file.columns) if _ not in self.geo_reserved_lst]
109 | grid_feature_lst = [_ for _ in list(grid_file.columns) if _ not in self.grid_reserved_lst]
110 |
111 | for _, row in geo_file.iterrows():
112 |
113 | # get feature dictionary
114 | row_id, column_id = row['row_id'], row['column_id']
115 | feature_dct = row[geo_feature_lst].to_dict()
116 | dyna_i = grid_file[(grid_file['row_id'] == row_id) & (grid_file['column_id'] == column_id)]
117 | for f in grid_feature_lst:
118 | feature_dct[f] = float(dyna_i[f].mean())
119 |
120 | # form a feature
121 | feature_i = dict()
122 | feature_i['type'] = 'Feature'
123 | feature_i['properties'] = feature_dct
124 | feature_i['geometry'] = {}
125 | feature_i['geometry']['type'] = row['type']
126 | feature_i['geometry']['coordinates'] = eval(row['coordinates'])
127 | geojson_obj['features'].append(feature_i)
128 |
129 | ensure_dir(self.save_path)
130 | save_name = "_".join(self.grid_path.split('/')[-1].split('.')) + '.json'
131 | json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w',
132 | encoding='utf-8'),
133 | ensure_ascii=False, indent=4)
134 |
135 | def _visualize_geo(self):
136 | geo_file = pd.read_csv(self.geo_path, index_col=None)
137 | geojson_obj = {'type': "FeatureCollection", 'features': []}
138 | extra_feature = [_ for _ in list(geo_file.columns) if _ not in self.geo_reserved_lst]
139 | for _, row in geo_file.iterrows():
140 | feature_dct = row[extra_feature].to_dict()
141 | feature_i = dict()
142 | feature_i['type'] = 'Feature'
143 | feature_i['properties'] = feature_dct
144 | feature_i['geometry'] = {}
145 | feature_i['geometry']['type'] = row['type']
146 | feature_i['geometry']['coordinates'] = eval(row['coordinates'])
147 | geojson_obj['features'].append(feature_i)
148 |
149 | ensure_dir(self.save_path)
150 | save_name = "_".join(self.geo_path.split('/')[-1].split('.')) + '.json'
151 | json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w',
152 | encoding='utf-8'),
153 | ensure_ascii=False, indent=4)
154 |
155 | def _visualize_dyna(self):
156 | dyna_file = pd.read_csv(self.dyna_path, index_col=None)
157 | dyna_feature_lst = [_ for _ in list(dyna_file.columns) if _ not in self.dyna_reserved_lst]
158 | geojson_obj = {'type': "FeatureCollection", 'features': []}
159 | trajectory = {}
160 | GPS_traj = "coordinates" in dyna_file.columns
161 | if not GPS_traj:
162 | geo_file = pd.read_csv(self.geo_path, index_col=None)
163 |
164 | a = dyna_file.groupby("entity_id")
165 | for entity_id, entity_value in a:
166 | if "traj_id" in dyna_file.columns:
167 | trajectory[entity_id] = {}
168 | entity_value = entity_value.groupby("traj_id")
169 | for traj_id, traj_value in entity_value:
170 | feature_dct = {"usr_id": entity_id, "traj_id": traj_id}
171 | for f in dyna_feature_lst:
172 | feature_dct[f] = float(traj_value[f].mean())
173 | feature_i = dict()
174 | feature_i['type'] = 'Feature'
175 | feature_i['properties'] = feature_dct
176 | feature_i['geometry'] = {}
177 | feature_i['geometry']['type'] = "LineString"
178 | feature_i['geometry']['coordinates'] = []
179 | if GPS_traj:
180 | for _, row in traj_value.iterrows():
181 | feature_i['geometry']['coordinates'].append(eval(row['coordinates']))
182 | else:
183 | for _, row in traj_value.iterrows():
184 | coor = eval(geo_file.loc[row['location']]['coordinates'])
185 | if _ == 0:
186 | feature_i['geometry']['coordinates'].append(coor[0])
187 | feature_i['geometry']['coordinates'].append(coor[1])
188 | geojson_obj['features'].append(feature_i)
189 |
190 | else:
191 | feature_dct = {"usr_id": entity_id}
192 | feature_i = dict()
193 | feature_i['type'] = 'Feature'
194 | feature_i['properties'] = feature_dct
195 | feature_i['geometry'] = {}
196 | feature_i['geometry']['type'] = "LineString"
197 | feature_i['geometry']['coordinates'] = []
198 | if GPS_traj:
199 | for _, row in entity_value.iterrows():
200 | feature_i['geometry']['coordinates'].append(eval(row['coordinates']))
201 | else:
202 | for _, row in entity_value.iterrows():
203 | coor = eval(geo_file.loc[row['location']]['coordinates'])
204 | if _ == 0:
205 | feature_i['geometry']['coordinates'].append(coor[0])
206 | feature_i['geometry']['coordinates'].append(coor[1])
207 | geojson_obj['features'].append(feature_i)
208 |
209 | ensure_dir(self.save_path)
210 | save_name = "_".join(self.dyna_path.split('/')[-1].split('.')) + '.json'
211 | json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w',
212 | encoding='utf-8'),
213 | ensure_ascii=False, indent=4)
214 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | ## STFGNN-Pytorch
2 |
3 | This is the pytorch re-implement of the STFGNN model described in https://arxiv.org/abs/2012.09641.
4 |
5 | ### Quick start
6 |
7 | Put your data in **STFGNN/raw_data**.
8 |
9 | For example, if you want to run model on dataset PEMS03, put the file **adj_mx.pkl** and file **PEMS03.npz** in **STFGNN/raw_data/PEMS03/** .
10 |
11 | Set appropriate value of parameter in **STFGNN/config/*.json**.
12 |
13 | Run **python main.py** to run the model.
14 |
15 | ### Discussion
16 | Contact me via lwm568@buaa.edu.cn.
17 | Any issue is welcome to be raised and actively communicated.
18 |
--------------------------------------------------------------------------------