├── STFGNN ├── config │ ├── PEMS03.json │ └── STFGNN.json ├── data │ ├── dataset │ │ ├── multi_step_dataset.py │ │ └── stfgnn_dataset.py │ └── utils.py ├── evaluator │ └── evaluator.py ├── executor │ ├── multi_step_executor.py │ └── utils.py ├── main.py ├── model │ ├── STFGNN.py │ └── loss.py ├── raw_data │ └── PEMS03 │ │ ├── PEMS03.npz │ │ └── adj_mx.pkl └── utils │ ├── GPS_utils.py │ ├── Optim.py │ ├── argument_list.py │ ├── dataset.py │ ├── normalization.py │ ├── utils.py │ └── visualize.py └── readme.md /STFGNN/config/PEMS03.json: -------------------------------------------------------------------------------- 1 | { 2 | "filename": "raw_data/PEMS03/PEMS03.npz", 3 | 4 | "adj_type": "connectivity", 5 | "//": "adj_type be distance or connectivity", 6 | "adj_filename": "raw_data/PEMS03/adj_mx.pkl", 7 | 8 | "input_dim": 1, 9 | "output_dim": 1, 10 | 11 | "seq_len": 26208, 12 | "num_nodes": 358, 13 | "train_rate": 0.6, 14 | "eval_rate": 0.2, 15 | "feature_dim": 1, 16 | "mask": false 17 | 18 | } 19 | -------------------------------------------------------------------------------- /STFGNN/config/STFGNN.json: -------------------------------------------------------------------------------- 1 | { 2 | "device": "cuda:1", 3 | "gpu": true, 4 | "gpu_id": 1, 5 | "save": true, 6 | "cuda": true, 7 | 8 | "normalize": 1, 9 | "window": 12, 10 | "horizon": 12, 11 | "batch_size": 64, 12 | "order": 1, 13 | "lag": 12, 14 | "period": 288, 15 | "sparsity": 0.01, 16 | 17 | "hidden_dims": [[64, 64, 64], [64, 64, 64], [64, 64, 64]], 18 | "first_layer_embedding_size": 64, 19 | "out_layer_dim": 128, 20 | "strides": 4, 21 | "temporal_emb": true, 22 | "spatial_emb": true, 23 | "activation": "GLU", 24 | "module_type": "individual", 25 | "//": ["individual", "sharing"], 26 | 27 | "train_loss": "mae", 28 | "clip": 10, 29 | "epochs": 100, 30 | "seed": 54321, 31 | "log_interval": 2000, 32 | "optim": "adam", 33 | "lr": 0.001, 34 | "patience": 20, 35 | "lr_decay": false, 36 | "lr_scheduler": "multisteplr", 37 | "lr_decay_ratio": 0.1, 38 | "lr_decay_steps": [5, 20, 40, 70], 39 | "step_size": 10 40 | } 41 | -------------------------------------------------------------------------------- /STFGNN/data/dataset/multi_step_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | import pickle 5 | import torch 6 | from torch.autograd import Variable 7 | 8 | from data.utils import DataLoader, load_pickle, DataLoaderM_new 9 | from utils.normalization import StandardScaler, NormalScaler, NoneScaler, \ 10 | MinMax01Scaler, MinMax11Scaler, LogScaler 11 | 12 | 13 | def get_adjacency_matrix(distance_df, sensor_ids, normalized_k=0.1): 14 | """ 15 | 16 | :param distance_df: data frame with three columns: [from, to, distance]. 17 | :param sensor_ids: list of sensor ids. 18 | :param normalized_k: entries that become lower than normalized_k after normalization are set to zero for sparsity. 19 | :return: 20 | """ 21 | num_sensors = len(sensor_ids) 22 | dist_mx = np.zeros((num_sensors, num_sensors), dtype=np.float32) 23 | dist_mx[:] = np.inf 24 | # Builds sensor id to index map. 25 | sensor_id_to_ind = {} 26 | for i, sensor_id in enumerate(sensor_ids): 27 | sensor_id_to_ind[sensor_id] = i 28 | 29 | # Fills cells in the matrix with distances. 30 | for row in distance_df.values: 31 | if row[0] not in sensor_id_to_ind or row[1] not in sensor_id_to_ind: 32 | continue 33 | dist_mx[sensor_id_to_ind[row[0]], sensor_id_to_ind[row[1]]] = row[2] 34 | 35 | # Calculates the standard deviation as theta. 36 | distances = dist_mx[~np.isinf(dist_mx)].flatten() 37 | std = distances.std() 38 | adj_mx = np.exp(-np.square(dist_mx / std)) 39 | # Make the adjacent matrix symmetric by taking the max. 40 | # adj_mx = np.maximum.reduce([adj_mx, adj_mx.T]) 41 | 42 | # Sets entries that lower than a threshold, i.e., k, to zero for sparsity. 43 | adj_mx[adj_mx < normalized_k] = 0 44 | return sensor_ids, sensor_id_to_ind, adj_mx 45 | 46 | 47 | class MultiStepDataset(object): 48 | 49 | def __init__(self, config): 50 | 51 | self.config = config 52 | self.file_name = self.config.get("filename", " ") 53 | self.adj_filename = self.config.get("adj_filename", "") 54 | self.graph_sensor_ids = self.config.get("graph_sensor_ids", "") 55 | self.distances_file = self.config.get("distances_file", "") 56 | self.adj_type = self.config.get("adj_type", None) 57 | 58 | self.train_rate = self.config.get("train_rate", 0.6) 59 | self.valid_rate = self.config.get("eval_rate", 0.2) 60 | self.cuda = self.config.get("cuda", True) 61 | 62 | self.horizon = self.config.get("horizon", 12) 63 | self.window = self.config.get("window", 12) 64 | 65 | self.normalize = self.config.get("normalize", 2) 66 | self.batch_size = self.config.get("batch_size", 64) 67 | self.adj_mx = None 68 | self.add_time_in_day = self.config.get("add_time_in_day", False) 69 | self.add_day_in_week = self.config.get("add_day_in_week", False) 70 | self.input_dim = self.config.get("input_dim", 1) 71 | self.output_dim = self.config.get("output_dim", 1) 72 | #self.ensure_adj_mat() 73 | self._load_origin_data(self.file_name, self.adj_filename) 74 | 75 | self.data = self._gene_dataset() 76 | 77 | def ensure_adj_mat(self): 78 | if os.path.exists(self.adj_filename): 79 | return 80 | else: 81 | with open(self.graph_sensor_ids) as f: 82 | sensor_ids = f.read().strip().split(',') 83 | distance_df = pd.read_csv(self.distances_file, dtype={'from': 'str', 'to': 'str'}) 84 | _, sensor_id_to_ind, adj_mx = get_adjacency_matrix(distance_df, sensor_ids, normalized_k=0.1) 85 | with open(self.adj_filename, 'wb') as f: 86 | pickle.dump([sensor_ids, sensor_id_to_ind, adj_mx], f, protocol=2) 87 | return 88 | 89 | def _load_origin_data(self, file_name, adj_name): 90 | if file_name[-3:] == "txt": 91 | fin = open(file_name) 92 | self.rawdat = np.loadtxt(fin, delimiter=',') 93 | elif file_name[-3:] == "csv": 94 | self.rawdat = pd.read_csv(file_name).values 95 | elif file_name[-2:] == "h5": 96 | self.rawdat = pd.read_hdf(file_name) 97 | elif file_name[-3:] == "npz": 98 | mid_dat = np.load(file_name) 99 | self.rawdat = mid_dat[mid_dat.files[0]] 100 | else: 101 | raise ValueError('file_name type error!') 102 | 103 | if adj_name == "": 104 | self.adj_mx = None 105 | elif adj_name[-3:] == "pkl": 106 | sensor_ids, sensor_id_to_ind, adj = load_pickle(adj_name) 107 | if self.adj_type == "distance": 108 | self.adj_mx = adj 109 | else: 110 | row, col = adj.shape 111 | for i in range(row): 112 | for j in range(i, col): 113 | if adj[i][j] > 0: 114 | adj[i][j] = 1 115 | adj[j][i] = 1 116 | else: 117 | adj[i][j] = 0 118 | adj[j][i] = 0 119 | self.adj_mx = adj 120 | else: 121 | raise ValueError('adj_name error!') 122 | 123 | def _get_scalar(self, x_train, y_train): 124 | """ 125 | 根据全局参数`scaler_type`选择数据归一化方法 126 | 127 | Args: 128 | x_train: 训练数据X 129 | y_train: 训练数据y 130 | 131 | Returns: 132 | Scaler: 归一化对象 133 | """ 134 | if self.normalize == 2: 135 | scaler = NormalScaler(maxx=max(x_train.max(), y_train.max())) 136 | print('NormalScaler max: ' + str(scaler.max)) 137 | elif self.normalize == 1: 138 | scaler = StandardScaler(mean=x_train.mean(), std=x_train.std()) 139 | print('StandardScaler mean: ' + str(scaler.mean) + ', std: ' + str(scaler.std)) 140 | elif self.normalize == 3: 141 | scaler = MinMax01Scaler( 142 | maxx=max(x_train.max(), y_train.max()), minn=min(x_train.min(), y_train.min())) 143 | print('MinMax01Scaler max: ' + str(scaler.max) + ', min: ' + str(scaler.min)) 144 | elif self.normalize == 4: 145 | scaler = MinMax11Scaler( 146 | maxx=max(x_train.max(), y_train.max()), minn=min(x_train.min(), y_train.min())) 147 | print('MinMax11Scaler max: ' + str(scaler.max) + ', min: ' + str(scaler.min)) 148 | elif self.normalize == 5: 149 | scaler = LogScaler() 150 | print('LogScaler') 151 | elif self.normalize == 0: 152 | scaler = NoneScaler() 153 | print('NoneScaler') 154 | else: 155 | raise ValueError('Scaler type error!') 156 | return scaler 157 | 158 | def _generate_graph_seq2seq_io_data( 159 | self, df, x_offsets, y_offsets, add_time_in_day=False, add_day_in_week=False, scaler=None 160 | ): 161 | """ 162 | 生成seq2seq样本数据 163 | :param data: np数据 [B, N, D] 其中D为3 164 | :param x_offsets: 165 | :param y_offsets: 166 | :return: 167 | """ 168 | num_samples, num_nodes = df.shape[0], df.shape[1] 169 | if not isinstance(df, np.ndarray): 170 | data = np.expand_dims(df.values, axis=-1) 171 | data_list = [data] 172 | else: 173 | data_list = [df] 174 | if add_time_in_day: 175 | time_ind = (df.index.values - df.index.values.astype("datetime64[D]")) / np.timedelta64(1, "D") 176 | time_in_day = np.tile(time_ind, [1, num_nodes, 1]).transpose((2, 1, 0)) 177 | data_list.append(time_in_day) 178 | if add_day_in_week: 179 | day_in_week = np.zeros(shape=(num_samples, num_nodes, 7)) 180 | day_in_week[np.arange(num_samples), :, df.index.dayofweek] = 1 181 | data_list.append(day_in_week) 182 | 183 | data = np.concatenate(data_list, axis=-1) 184 | 185 | x, y = [], [] 186 | # t is the index of the last observation. 187 | min_t = abs(min(x_offsets)) 188 | max_t = abs(num_samples - abs(max(y_offsets))) # Exclusive 189 | for t in range(min_t, max_t): 190 | x_t = data[t + x_offsets, ...] 191 | y_t = data[t + y_offsets, ...] 192 | x.append(x_t) 193 | y.append(y_t) 194 | x = np.stack(x, axis=0) 195 | y = np.stack(y, axis=0) 196 | 197 | return x, y 198 | 199 | def _generate_train_val_test(self): 200 | seq_length_x, seq_length_y = self.window, self.horizon 201 | x_offsets = np.arange(-(seq_length_x - 1), 1, 1) 202 | y_offsets = np.arange(1, (seq_length_y + 1), 1) 203 | x, y = self._generate_graph_seq2seq_io_data(self.rawdat, x_offsets, 204 | y_offsets, self.add_time_in_day, self.add_day_in_week) 205 | print("x shape: ", x.shape, ", y shape: ", y.shape) 206 | num_samples = x.shape[0] 207 | num_val = round(num_samples * self.valid_rate) 208 | num_train = round(num_samples * self.train_rate) 209 | num_test = num_samples - num_train - num_val 210 | return [x[:num_train], y[:num_train]], \ 211 | [x[num_train:num_train + num_val], y[num_train:num_train + num_val]], \ 212 | [x[num_train + num_val:], y[num_train + num_val:]] 213 | 214 | def _gene_dataset(self): 215 | data = {} 216 | self.train, self.valid, self.test = self._generate_train_val_test() 217 | x_train, y_train = self.train[0], self.train[1] 218 | x_valid, y_valid = self.valid[0], self.valid[1] 219 | x_test, y_test = self.test[0], self.test[1] 220 | self.scaler = self._get_scalar(x_train[..., :self.output_dim], y_train[..., :self.output_dim]) 221 | x_train[..., :self.output_dim] = self.scaler.transform(x_train[..., :self.output_dim]) 222 | y_train[..., :self.output_dim] = self.scaler.transform(y_train[..., :self.output_dim]) 223 | x_valid[..., :self.output_dim] = self.scaler.transform(x_valid[..., :self.output_dim]) 224 | y_valid[..., :self.output_dim] = self.scaler.transform(y_valid[..., :self.output_dim]) 225 | x_test[..., :self.output_dim] = self.scaler.transform(x_test[..., :self.output_dim]) 226 | y_test[..., :self.output_dim] = self.scaler.transform(y_test[..., :self.output_dim]) 227 | 228 | data['train_loader'] = DataLoader(x_train[..., :self.input_dim], y_train[..., :self.output_dim], 229 | self.batch_size) 230 | data['valid_loader'] = DataLoader(x_valid[..., :self.input_dim], y_valid[..., :self.output_dim], 231 | self.batch_size) 232 | data['test_loader'] = DataLoader(x_test[..., :self.input_dim], y_test[..., :self.output_dim], self.batch_size) 233 | data['scaler'] = self.scaler 234 | data['num_batches'] = x_train.shape[0] / self.batch_size 235 | return data 236 | 237 | def get_data(self): 238 | """ 239 | 返回数据的DataLoader,包括训练数据、测试数据、验证数据 240 | 241 | Returns: 242 | tuple: tuple contains: 243 | train_dataloader: 244 | eval_dataloader: 245 | test_dataloader: 246 | """ 247 | # 加载数据集 248 | 249 | return self.data["train_loader"], self.data["valid_loader"], self.data["test_loader"] 250 | 251 | def get_data_feature(self): 252 | """ 253 | 返回数据集特征,子类必须实现这个函数,返回必要的特征 254 | 255 | Returns: 256 | dict: 包含数据集的相关特征的字典 257 | """ 258 | feature = { 259 | "scaler": self.data["scaler"], 260 | "adj_mx": self.adj_mx, 261 | "num_batches": self.data['num_batches'] 262 | } 263 | 264 | return feature 265 | -------------------------------------------------------------------------------- /STFGNN/data/dataset/stfgnn_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | import pickle 5 | import torch 6 | import time 7 | from torch.autograd import Variable 8 | 9 | from data.dataset.multi_step_dataset import MultiStepDataset 10 | 11 | def gen_data(data, ntr, N): 12 | ''' 13 | if flag: 14 | data=pd.read_csv(fname) 15 | else: 16 | data=pd.read_csv(fname,header=None) 17 | ''' 18 | #data=data.as_matrix() 19 | data=np.reshape(data,[-1,288,N]) 20 | return data[0:ntr] 21 | 22 | def normalize(a): 23 | mu=np.mean(a,axis=1,keepdims=True) 24 | std=np.std(a,axis=1,keepdims=True) 25 | return (a-mu)/std 26 | 27 | def compute_dtw(a,b,order=1,Ts=12,normal=True): 28 | if normal: 29 | a=normalize(a) 30 | b=normalize(b) 31 | T0=a.shape[1] 32 | d=np.reshape(a,[-1,1,T0])-np.reshape(b,[-1,T0,1]) 33 | d=np.linalg.norm(d,axis=0,ord=order) 34 | D=np.zeros([T0,T0]) 35 | for i in range(T0): 36 | for j in range(max(0,i-Ts),min(T0,i+Ts+1)): 37 | if (i==0) and (j==0): 38 | D[i,j]=d[i,j]**order 39 | continue 40 | if (i==0): 41 | D[i,j]=d[i,j]**order+D[i,j-1] 42 | continue 43 | if (j==0): 44 | D[i,j]=d[i,j]**order+D[i-1,j] 45 | continue 46 | if (j==i-Ts): 47 | D[i,j]=d[i,j]**order+min(D[i-1,j-1],D[i-1,j]) 48 | continue 49 | if (j==i+Ts): 50 | D[i,j]=d[i,j]**order+min(D[i-1,j-1],D[i,j-1]) 51 | continue 52 | D[i,j]=d[i,j]**order+min(D[i-1,j-1],D[i-1,j],D[i,j-1]) 53 | return D[-1,-1]**(1.0/order) 54 | 55 | def construct_adj_fusion(A, A_dtw, steps): 56 | ''' 57 | construct a bigger adjacency matrix using the given matrix 58 | 59 | Parameters 60 | ---------- 61 | A: np.ndarray, adjacency matrix, shape is (N, N) 62 | 63 | steps: how many times of the does the new adj mx bigger than A 64 | 65 | Returns 66 | ---------- 67 | new adjacency matrix: csr_matrix, shape is (N * steps, N * steps) 68 | 69 | ---------- 70 | This is 4N_1 mode: 71 | 72 | [T, 1, 1, T 73 | 1, S, 1, 1 74 | 1, 1, S, 1 75 | T, 1, 1, T] 76 | 77 | ''' 78 | 79 | N = len(A) 80 | adj = np.zeros([N * steps] * 2) # "steps" = 4 !!! 81 | 82 | for i in range(steps): 83 | if (i == 1) or (i == 2): 84 | adj[i * N: (i + 1) * N, i * N: (i + 1) * N] = A 85 | else: 86 | adj[i * N: (i + 1) * N, i * N: (i + 1) * N] = A_dtw 87 | #''' 88 | for i in range(N): 89 | for k in range(steps - 1): 90 | adj[k * N + i, (k + 1) * N + i] = 1 91 | adj[(k + 1) * N + i, k * N + i] = 1 92 | #''' 93 | adj[3 * N: 4 * N, 0: N] = A_dtw #adj[0 * N : 1 * N, 1 * N : 2 * N] 94 | adj[0 : N, 3 * N: 4 * N] = A_dtw #adj[0 * N : 1 * N, 1 * N : 2 * N] 95 | 96 | adj[2 * N: 3 * N, 0 : N] = adj[0 * N : 1 * N, 1 * N : 2 * N] 97 | adj[0 : N, 2 * N: 3 * N] = adj[0 * N : 1 * N, 1 * N : 2 * N] 98 | adj[1 * N: 2 * N, 3 * N: 4 * N] = adj[0 * N : 1 * N, 1 * N : 2 * N] 99 | adj[3 * N: 4 * N, 1 * N: 2 * N] = adj[0 * N : 1 * N, 1 * N : 2 * N] 100 | 101 | 102 | for i in range(len(adj)): 103 | adj[i, i] = 1 104 | 105 | return adj 106 | 107 | 108 | class STFGNNDataset(MultiStepDataset): 109 | 110 | def __init__(self, config): 111 | super().__init__(config) 112 | self.strides = self.config.get("strides", 4) 113 | self.order = self.config.get("order", 1) 114 | self.lag = self.config.get("lag", 12) 115 | self.period = self.config.get("period", 288) 116 | self.sparsity = self.config.get("sparsity", 0.01) 117 | self.train_rate = self.config.get("train_rate", 0.6) 118 | self.adj_mx = torch.FloatTensor(self._construct_adj()) 119 | # self.adj_mx = torch.randn((1432, 1432)) 120 | 121 | 122 | def _construct_dtw(self): 123 | data = self.rawdat[:, :, 0] 124 | total_day = data.shape[0] / 288 125 | tr_day = int(total_day * 0.6) 126 | n_route = data.shape[1] 127 | xtr = gen_data(data, tr_day, n_route) 128 | print(np.shape(xtr)) 129 | T0 = 288 130 | T = 12 131 | N = n_route 132 | d = np.zeros([N, N]) 133 | for i in range(N): 134 | for j in range(i+1,N): 135 | d[i,j]=compute_dtw(xtr[:,:,i],xtr[:,:,j]) 136 | 137 | print("The calculation of time series is done!") 138 | dtw = d+ d.T 139 | n = dtw.shape[0] 140 | w_adj = np.zeros([n,n]) 141 | adj_percent = 0.01 142 | top = int(n * adj_percent) 143 | for i in range(dtw.shape[0]): 144 | a = dtw[i,:].argsort()[0:top] 145 | for j in range(top): 146 | w_adj[i, a[j]] = 1 147 | 148 | for i in range(n): 149 | for j in range(n): 150 | if (w_adj[i][j] != w_adj[j][i] and w_adj[i][j] ==0): 151 | w_adj[i][j] = 1 152 | if( i==j): 153 | w_adj[i][j] = 1 154 | 155 | print("Total route number: ", n) 156 | print("Sparsity of adj: ", len(w_adj.nonzero()[0])/(n*n)) 157 | print("The weighted matrix of temporal graph is generated!") 158 | self.dtw = w_adj 159 | 160 | 161 | def _construct_adj(self): 162 | """ 163 | 构建local 时空图 164 | :param A: np.ndarray, adjacency matrix, shape is (N, N) 165 | :param steps: 选择几个时间步来构建图 166 | :return: new adjacency matrix: csr_matrix, shape is (N * steps, N * steps) 167 | """ 168 | self._construct_dtw() 169 | adj_mx = construct_adj_fusion(self.adj_mx, self.dtw, self.strides) 170 | print("The shape of localized adjacency matrix: {}".format( 171 | adj_mx.shape), flush=True) 172 | 173 | return adj_mx 174 | 175 | def get_data(self): 176 | """ 177 | 返回数据的DataLoader,包括训练数据、测试数据、验证数据 178 | 179 | Returns: 180 | tuple: tuple contains: 181 | train_dataloader: 182 | eval_dataloader: 183 | test_dataloader: 184 | """ 185 | # 加载数据集 186 | 187 | return self.data["train_loader"], self.data["valid_loader"], self.data["test_loader"] 188 | 189 | def get_data_feature(self): 190 | """ 191 | 返回数据集特征,子类必须实现这个函数,返回必要的特征 192 | 193 | Returns: 194 | dict: 包含数据集的相关特征的字典 195 | """ 196 | feature = { 197 | "scaler": self.data["scaler"], 198 | "adj_mx": self.adj_mx, 199 | "num_batches": self.data['num_batches'] 200 | } 201 | 202 | return feature 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /STFGNN/data/utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import numpy as np 3 | import copy 4 | import pickle 5 | import torch.utils.data as torch_data 6 | import torch 7 | import pandas as pd 8 | 9 | 10 | 11 | def get_dataset(config): 12 | """ 13 | according the config['dataset_class'] to create the dataset 14 | 15 | Args: 16 | config(ConfigParser): config 17 | 18 | Returns: 19 | AbstractDataset: the loaded dataset 20 | """ 21 | if config["task"] == "multi_step": 22 | try: 23 | return getattr(importlib.import_module('libcity.data.dataset.multi_step_dataset'), 24 | config['dataset_class'])(config) 25 | except AttributeError: 26 | raise AttributeError('dataset_class is not found') 27 | elif config["task"] == "single_step": 28 | try: 29 | return getattr(importlib.import_module('libcity.data.dataset.single_step_dataset'), 30 | config['dataset_class'])(config) 31 | except AttributeError: 32 | raise AttributeError('dataset_class is not found') 33 | 34 | 35 | def load_pickle(pickle_file): 36 | try: 37 | with open(pickle_file, 'rb') as f: 38 | pickle_data = pickle.load(f) 39 | except UnicodeDecodeError as e: 40 | with open(pickle_file, 'rb') as f: 41 | pickle_data = pickle.load(f, encoding='latin1') 42 | except Exception as e: 43 | print('Unable to load data ', pickle_file, ':', e) 44 | raise 45 | return pickle_data 46 | 47 | 48 | class DataLoader(object): 49 | def __init__(self, xs, ys, batch_size, pad_with_last_sample=True, shuffle=False): 50 | """ 51 | 52 | :param xs: 53 | :param ys: 54 | :param batch_size: 55 | :param pad_with_last_sample: pad with the last sample to make number of samples divisible to batch_size. 56 | """ 57 | self.batch_size = batch_size 58 | self.current_ind = 0 59 | self.seq_len = ys.shape[0] 60 | if pad_with_last_sample: 61 | num_padding = (batch_size - (len(xs) % batch_size)) % batch_size 62 | x_padding = np.repeat(xs[-1:], num_padding, axis=0) 63 | y_padding = np.repeat(ys[-1:], num_padding, axis=0) 64 | xs = np.concatenate([xs, x_padding], axis=0) 65 | ys = np.concatenate([ys, y_padding], axis=0) 66 | self.size = len(xs) 67 | self.num_batch = int(self.size // self.batch_size) 68 | if shuffle: 69 | permutation = np.random.permutation(self.size) 70 | xs, ys = xs[permutation], ys[permutation] 71 | self.xs = xs 72 | self.ys = ys 73 | 74 | def shuffle(self): 75 | """洗牌""" 76 | permutation = np.random.permutation(self.size) 77 | xs, ys = self.xs[permutation], self.ys[permutation] 78 | self.xs = xs 79 | self.ys = ys 80 | 81 | def get_iterator(self): 82 | self.current_ind = 0 83 | 84 | def _wrapper(): 85 | while self.current_ind < self.num_batch: 86 | start_ind = self.batch_size * self.current_ind 87 | end_ind = min(self.size, self.batch_size * (self.current_ind + 1)) 88 | x_i = self.xs[start_ind: end_ind, ...] 89 | y_i = self.ys[start_ind: end_ind, ...] 90 | yield (x_i, y_i) 91 | self.current_ind += 1 92 | 93 | return _wrapper() 94 | 95 | 96 | class DataLoaderM_new(object): 97 | def __init__(self, xs, ys, ycl, batch_size, pad_with_last_sample=True): 98 | """ 99 | :param xs: 100 | :param ys: 101 | :param batch_size: 102 | :param pad_with_last_sample: pad with the last sample to make number of samples divisible to batch_size. 103 | """ 104 | self.batch_size = batch_size 105 | self.current_ind = 0 106 | if pad_with_last_sample: 107 | num_padding = (batch_size - (len(xs) % batch_size)) % batch_size 108 | x_padding = np.repeat(xs[-1:], num_padding, axis=0) 109 | y_padding = np.repeat(ys[-1:], num_padding, axis=0) 110 | xs = np.concatenate([xs, x_padding], axis=0) 111 | ys = np.concatenate([ys, y_padding], axis=0) 112 | ycl = np.concatenate([ycl, y_padding], axis=0) 113 | self.size = len(xs) 114 | self.num_batch = int(self.size // self.batch_size) 115 | self.xs = xs 116 | self.ys = ys 117 | self.ycl = ycl 118 | 119 | def shuffle(self): 120 | permutation = np.random.permutation(self.size) 121 | xs, ys, ycl = self.xs[permutation], self.ys[permutation], self.ycl[ 122 | permutation] 123 | self.xs = xs 124 | self.ys = ys 125 | self.ycl = ycl 126 | 127 | def get_iterator(self): 128 | self.current_ind = 0 129 | 130 | def _wrapper(): 131 | while self.current_ind < self.num_batch: 132 | start_ind = self.batch_size * self.current_ind 133 | end_ind = min(self.size, 134 | self.batch_size * (self.current_ind + 1)) 135 | x_i = self.xs[start_ind:end_ind, ...] 136 | y_i = self.ys[start_ind:end_ind, ...] 137 | y_i_cl = self.ycl[start_ind:end_ind, ...] 138 | yield (x_i, y_i, y_i_cl) 139 | self.current_ind += 1 140 | 141 | return _wrapper() 142 | 143 | 144 | -------------------------------------------------------------------------------- /STFGNN/evaluator/evaluator.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import os 4 | import scipy.sparse as sp 5 | import torch 6 | from scipy.sparse import linalg 7 | from torch.autograd import Variable 8 | from collections import defaultdict 9 | 10 | 11 | def rse_np(preds, labels): 12 | if not isinstance(preds, np.ndarray): 13 | preds = preds.cpu().numpy() 14 | labels = labels.cpu().numpy() 15 | mse = np.sum(np.square(np.subtract(preds, labels)).astype('float32')) 16 | means = np.mean(labels) 17 | labels_mse = np.sum(np.square(np.subtract(labels, means)).astype('float32')) 18 | return np.sqrt(mse/labels_mse) 19 | 20 | 21 | def mae_np(preds, labels): 22 | if isinstance(preds, np.ndarray): 23 | mae = np.abs(np.subtract(preds, labels)).astype('float32') 24 | else: 25 | mae = np.abs(np.subtract(preds.cpu().numpy(), labels.cpu().numpy())).astype('float32') 26 | return np.mean(mae) 27 | 28 | 29 | def rmse_np(preds, labels): 30 | mse = mse_np(preds, labels) 31 | return np.sqrt(mse) 32 | 33 | def mse_np(preds, labels): 34 | if isinstance(preds, np.ndarray): 35 | return np.mean(np.square(np.subtract(preds, labels)).astype('float32')) 36 | else: 37 | return np.mean(np.square(np.subtract(preds.cpu().numpy(), labels.cpu().numpy())).astype('float32')) 38 | 39 | def mape_np(preds, labels): 40 | if isinstance(preds, np.ndarray): 41 | mape = np.abs(np.divide(np.subtract(preds, labels).astype('float32'), labels)) 42 | else: 43 | mape = np.abs(np.divide(np.subtract(preds.cpu().numpy(), labels.cpu().numpy()).astype('float32'), labels.cpu().numpy())) 44 | return np.mean(mape) 45 | 46 | 47 | 48 | def rae_np(preds, labels): 49 | mse = np.sum(np.abs(np.subtract(preds, labels)).astype('float32')) 50 | means = np.mean(labels) 51 | labels_mse = np.sum(np.abs(np.subtract(labels, means)).astype('float32')) 52 | return mse/labels_mse 53 | 54 | 55 | 56 | def pcc_np(x, y): 57 | if not isinstance(x, np.ndarray): 58 | x, y = x.cpu().numpy(), y.cpu().numpy() 59 | x,y = x.reshape(-1),y.reshape(-1) 60 | return np.corrcoef(x,y)[0][1] 61 | 62 | 63 | def node_pcc_np(x, y): 64 | if not isinstance(x, np.ndarray): 65 | x, y = x.cpu().numpy(), y.cpu().numpy() 66 | sigma_x = x.std(axis=0) 67 | sigma_y = y.std(axis=0) 68 | mean_x = x.mean(axis=0) 69 | mean_y = y.mean(axis=0) 70 | cor = ((x - mean_x) * (y - mean_y)).mean(0) / (sigma_x * sigma_y + 0.000000000001) 71 | return cor.mean() 72 | 73 | def corr_np(preds, labels): 74 | sigma_p = (preds).std(axis=0) 75 | sigma_g = (labels).std(axis=0) 76 | mean_p = preds.mean(axis=0) 77 | mean_g = labels.mean(axis=0) 78 | index = (sigma_g != 0) 79 | correlation = ((preds - mean_p) * (labels - mean_g)).mean(axis=0) / (sigma_p * sigma_g) 80 | correlation = (correlation[index]).mean() 81 | return correlation 82 | 83 | 84 | def stemgnn_mape(preds,labels, axis=None): 85 | ''' 86 | Mean absolute percentage error. 87 | :param labels: np.ndarray or int, ground truth. 88 | :param preds: np.ndarray or int, prediction. 89 | :param axis: axis to do calculation. 90 | :return: int, MAPE averages on all elements of input. 91 | ''' 92 | if not isinstance(preds, np.ndarray): 93 | preds = preds.cpu().numpy() 94 | labels = labels.cpu().numpy() 95 | mape = (np.abs(preds - labels) / (np.abs(labels)+1e-5)).astype(np.float64) 96 | mape = np.where(mape > 5, 5, mape) 97 | return np.mean(mape, axis) 98 | 99 | 100 | def masked_rmse_np(preds, labels, null_val=np.nan): 101 | return np.sqrt(masked_mse_np(preds=preds, labels=labels, null_val=null_val)) 102 | 103 | 104 | def masked_mse_np(preds, labels, null_val=np.nan): 105 | with np.errstate(divide='ignore', invalid='ignore'): 106 | if np.isnan(null_val): 107 | mask = ~np.isnan(labels) 108 | else: 109 | mask = np.not_equal(labels, null_val) 110 | mask = mask.astype('float32') 111 | mask /= np.mean(mask) 112 | mse = np.square(np.subtract(preds, labels)).astype('float32') 113 | mse = np.nan_to_num(mse * mask) 114 | return np.mean(mse) 115 | 116 | 117 | def masked_mae_np(preds, labels, null_val=np.nan): 118 | with np.errstate(divide='ignore', invalid='ignore'): 119 | if np.isnan(null_val): 120 | mask = ~np.isnan(labels) 121 | else: 122 | mask = np.not_equal(labels, null_val) 123 | mask = mask.astype('float32') 124 | mask /= np.mean(mask) 125 | mae = np.abs(np.subtract(preds, labels)).astype('float32') 126 | mae = np.nan_to_num(mae * mask) 127 | return np.mean(mae) 128 | 129 | 130 | def masked_mape_np(preds, labels, null_val=np.nan): 131 | if not isinstance(preds, np.ndarray): 132 | preds = preds.cpu().numpy() 133 | labels = labels.cpu().numpy() 134 | with np.errstate(divide='ignore', invalid='ignore'): 135 | if np.isnan(null_val): 136 | mask = ~np.isnan(labels) 137 | else: 138 | mask = np.not_equal(labels, null_val) 139 | mask = mask.astype('float32') 140 | mask /= np.mean(mask) 141 | mape = np.abs(np.divide(np.subtract(preds, labels).astype('float32'), labels)) 142 | mape = np.nan_to_num(mask * mape) 143 | return np.mean(mape) 144 | 145 | 146 | class Evaluator(object): 147 | def __init__(self, config): 148 | self.config = config 149 | self.mask = self.config.get("mask", False) 150 | self.out_catagory = "multi" 151 | 152 | 153 | def _evaluate(self, output:np.ndarray, groud_truth:np.ndarray, mask: int, out_catagory: str): 154 | """ 155 | evluate the model performance 156 | : multi 157 | :param output: [n_samples, 12, n_nodes, n_features] 158 | :param groud_truth: [n_samples, 12, n_nodes, n_features] 159 | : single 160 | 161 | :return: dict [str -> float] 162 | """ 163 | if out_catagory == 'multi': 164 | if bool(mask): 165 | if output.shape != groud_truth.shape: 166 | groud_truth = np.expand_dims( groud_truth[...,0], axis=-1) 167 | assert output.shape == groud_truth.shape, f'{output.shape}, {groud_truth.shape}' 168 | batch, steps, scores, node = output.shape[0], output.shape[1], defaultdict(dict), output.shape[2] 169 | for step in range(steps): 170 | y_pred = np.reshape(output[:,step],(batch, -1)) 171 | y_true = np.reshape(groud_truth[:,step],(batch,-1)) 172 | scores['masked_MAE'][f'horizon-{step}'] = masked_mae_np(y_pred, y_true, null_val=0.0) 173 | scores['masked_RMSE'][f'horizon-{step}'] = masked_rmse_np(y_pred, y_true, null_val=0.0) 174 | scores['masked_MAPE'][f'horizon-{step}'] = masked_mape_np(y_pred, y_true, null_val=0.0) * 100.0 175 | scores['node_wise_PCC'][f'horizon-{step}']= node_pcc_np(y_pred.swapaxes(1,-1).reshape((-1,node)), y_true.swapaxes(1,-1).reshape((-1,node))) 176 | scores['PCC'][f'horizon-{step}'] = pcc_np(y_pred, y_true) 177 | scores['masked_MAE']['all'] = masked_mae_np(output,groud_truth ,null_val=0.0) 178 | scores['masked_RMSE']['all'] = masked_rmse_np( output,groud_truth, null_val=0.0) 179 | scores['masked_MAPE']['all'] = masked_mape_np( output,groud_truth, null_val=0.0) * 100.0 180 | scores['PCC']['all'] = pcc_np(output,groud_truth) 181 | scores["node_pcc"]['all'] = node_pcc_np(output, groud_truth) 182 | else: 183 | if output.shape != groud_truth.shape: 184 | groud_truth = np.expand_dims( groud_truth[...,0], axis=-1) 185 | assert output.shape == groud_truth.shape, f'{output.shape}, {groud_truth.shape}' 186 | batch, steps, scores, node = output.shape[0], output.shape[1], defaultdict(dict), output.shape[2] 187 | for step in range(steps): 188 | y_pred = output[:,step] 189 | y_true = groud_truth[:,step] 190 | scores['MAE'][f'horizon-{step}'] = mae_np(y_pred, y_true) 191 | scores['RMSE'][f'horizon-{step}'] = rmse_np(y_pred, y_true) 192 | # scores['MAPE'][f'horizon-{step}'] = mape_np(y_pred,y_true) * 100.0 193 | scores['masked_MAPE'][f'horizon-{step}'] = masked_mape_np(y_pred, y_true, null_val=0.0) * 100.0 194 | scores['StemGNN_MAPE'][f'horizon-{step}'] = stemgnn_mape(y_pred, y_true) * 100.0 195 | scores['PCC'][f'horizon-{step}'] = pcc_np(y_pred, y_true) 196 | scores['node_wise_PCC'][f'horizon-{step}']= node_pcc_np(y_pred.swapaxes(1,-1).reshape((-1,node)), y_true.swapaxes(1,-1).reshape((-1,node))) 197 | scores['MAE']['all'] = mae_np(output,groud_truth) 198 | scores['RMSE']['all'] = rmse_np(output,groud_truth) 199 | scores['masked_MAPE']['all'] = masked_mape_np( output,groud_truth, null_val=0.0) * 100.0 200 | scores['StemGNN_MAPE']['all'] = stemgnn_mape(output,groud_truth) * 100.0 201 | scores['PCC']['all'] = pcc_np(output,groud_truth) 202 | scores['node_wise_PCC']['all'] = node_pcc_np(output.swapaxes(2,-1).reshape((-1,node)), groud_truth.swapaxes(2,-1).reshape((-1,node))) 203 | else: 204 | output = output.squeeze() 205 | groud_truth = groud_truth.squeeze() 206 | assert output.shape == groud_truth.shape, f'{output.shape}, {groud_truth.shape}' 207 | scores = defaultdict(dict) 208 | 209 | scores['RMSE']['all'] = rmse_np(output, groud_truth) 210 | scores['masked_MAPE']['all'] = masked_mape_np(output, groud_truth, null_val=0.0) * 100.0 211 | scores['PCC']['all'] = node_pcc_np(output, groud_truth) 212 | scores['rse']['all'] = rse_np(output, groud_truth) 213 | scores['rae']['all'] = rae_np(output, groud_truth) 214 | scores['MAPE']['all'] = stemgnn_mape(output, groud_truth) * 100.0 215 | scores['MAE']['all'] = mae_np(output, groud_truth) 216 | scores["node_pcc"]['all'] = node_pcc_np(output, groud_truth) 217 | scores['CORR']['all'] = corr_np(output, groud_truth) 218 | return scores 219 | 220 | 221 | def evaluate(self, output, groud_truth): 222 | if not isinstance(output, np.ndarray): 223 | output = output.cpu().numpy() 224 | if not isinstance(groud_truth, np.ndarray): 225 | groud_truth = groud_truth.cpu().numpy() 226 | return self._evaluate(output, groud_truth, self.mask, self.out_catagory) 227 | -------------------------------------------------------------------------------- /STFGNN/executor/multi_step_executor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import torch 5 | import math 6 | import time 7 | import torch.nn as nn 8 | from torch.autograd import Variable 9 | from logging import getLogger 10 | import tqdm 11 | from torch.utils.tensorboard import SummaryWriter 12 | from executor.utils import get_train_loss 13 | from utils.Optim import Optim 14 | from evaluator.evaluator import Evaluator 15 | from utils.utils import ensure_dir 16 | 17 | from model import loss 18 | from functools import partial 19 | 20 | 21 | class MultiStepExecutor(object): 22 | def __init__(self, config, model): 23 | self.config = config 24 | self.evaluator = Evaluator(config) 25 | 26 | _device = self.config.get('device', torch.device('cpu')) 27 | self.device = torch.device(_device) 28 | self.model = model.to(self.device) 29 | 30 | self.cache_dir = 'cache/model_cache' 31 | self.evaluate_res_dir = 'cache/evaluate_cache' 32 | self.summary_writer_dir = 'log/runs' 33 | ensure_dir(self.cache_dir) 34 | ensure_dir(self.evaluate_res_dir) 35 | ensure_dir(self.summary_writer_dir) 36 | 37 | self._writer = SummaryWriter(self.summary_writer_dir) 38 | self._logger = getLogger() 39 | self._logger.info(self.model) 40 | 41 | for name, param in self.model.named_parameters(): 42 | self._logger.info(str(name) + '\t' + str(param.shape) + '\t' + 43 | str(param.device) + '\t' + str(param.requires_grad)) 44 | 45 | total_num = sum([param.nelement() for param in self.model.parameters()]) 46 | self._logger.info('Total parameter numbers: {}'.format(total_num)) 47 | 48 | self.train_loss = self.config.get("train_loss", "masked_mae") 49 | self.criterion = get_train_loss(self.train_loss) 50 | 51 | self.cuda = self.config.get("cuda", True) 52 | self.best_val = 10000000 53 | self.optim = Optim( 54 | model.parameters(), self.config 55 | ) 56 | self.epochs = self.config.get("epochs", 100) 57 | self.scaler = self.model.scaler 58 | self.num_batches = self.model.num_batches 59 | self.num_nodes = self.config.get("num_nodes", 0) 60 | self.batch_size = self.config.get("batch_size", 64) 61 | self.patience = self.config.get("patience", 20) 62 | self.lr_decay = self.config.get("lr_decay", False) 63 | self.mask = self.config.get("mask", True) 64 | 65 | 66 | def train(self, train_data, valid_data): 67 | print("begin training") 68 | wait = 0 69 | batches_seen = self.num_batches * 0 70 | 71 | 72 | for epoch in tqdm.tqdm(range(1, self.epochs + 1)): 73 | epoch_start_time = time.time() 74 | train_loss = [] 75 | train_data.shuffle() 76 | 77 | for iter, (x,y) in enumerate(train_data.get_iterator()): 78 | self.model.train() 79 | self.model.zero_grad() 80 | trainx = torch.Tensor(x).to(self.device) # [batch_size, window, num_nodes, dim] 81 | trainy = torch.Tensor(y).to(self.device) # [batch_size, horizon, num_nodes, dim] 82 | output = self.model(trainx) 83 | loss = self.criterion(self.scaler.inverse_transform(output), 84 | self.scaler.inverse_transform(trainy)) 85 | 86 | loss.backward() 87 | self.optim.step() 88 | train_loss.append(loss.item()) 89 | 90 | 91 | if self.lr_decay: 92 | self.optim.lr_scheduler.step() 93 | 94 | valid_loss = [] 95 | valid_mape = [] 96 | valid_rmse = [] 97 | valid_pcc = [] 98 | for iter, (x, y) in enumerate(valid_data.get_iterator()): 99 | self.model.eval() 100 | valx = torch.Tensor(x).to(self.device) 101 | valy = torch.Tensor(y).to(self.device) 102 | with torch.no_grad(): 103 | output = self.model(valx) 104 | score = self.evaluator.evaluate(self.scaler.inverse_transform(output), \ 105 | self.scaler.inverse_transform(valy)) 106 | if self.mask: 107 | vloss = score["masked_MAE"]["all"] 108 | else: 109 | vloss = score["MAE"]["all"] 110 | 111 | valid_loss.append(vloss) 112 | 113 | 114 | mtrain_loss = np.mean(train_loss) 115 | 116 | mvalid_loss = np.mean(valid_loss) 117 | 118 | print( 119 | '| end of epoch {:3d} | time: {:5.2f}s | train_loss {:5.4f} | valid mae {:5.4f}'.format( 120 | epoch, (time.time() - epoch_start_time), mtrain_loss, \ 121 | mvalid_loss)) 122 | 123 | if mvalid_loss < self.best_val: 124 | self.best_val = mvalid_loss 125 | wait = 0 126 | self.best_val = mvalid_loss 127 | self.best_model = self.model 128 | else: 129 | wait += 1 130 | 131 | if wait >= self.patience: 132 | print('early stop at epoch: {:04d}'.format(epoch)) 133 | break 134 | 135 | self.model = self.best_model 136 | 137 | 138 | def evaluate(self, test_data): 139 | """ 140 | use model to test data 141 | 142 | Args: 143 | test_dataloader(torch.Dataloader): Dataloader 144 | """ 145 | self._logger.info('Start evaluating ...') 146 | outputs = [] 147 | realy = [] 148 | seq_len = test_data.seq_len #test_data["y_test"] 149 | self.model.eval() 150 | for iter, (x, y) in enumerate(test_data.get_iterator()): 151 | testx = torch.Tensor(x).to(self.device) 152 | testy = torch.Tensor(y).to(self.device) 153 | with torch.no_grad(): 154 | # self.evaluator.clear() 155 | pred = self.model(testx) 156 | outputs.append(pred) 157 | realy.append(testy) 158 | realy = torch.cat(realy, dim=0) 159 | yhat = torch.cat(outputs, dim=0) 160 | 161 | realy = realy[:seq_len, ...] 162 | yhat = yhat[:seq_len, ...] 163 | 164 | realy = self.scaler.inverse_transform(realy) 165 | preds = self.scaler.inverse_transform(yhat) 166 | 167 | res_scores = self.evaluator.evaluate(preds, realy) 168 | for _index in res_scores.keys(): 169 | print(_index, " :") 170 | step_dict = res_scores[_index] 171 | for j, k in step_dict.items(): 172 | print(j, " : ", k.item()) 173 | 174 | 175 | 176 | def save_model(self, cache_name): 177 | """ 178 | 将当前的模型保存到文件 179 | 180 | Args: 181 | cache_name(str): 保存的文件名 182 | """ 183 | ensure_dir(self.cache_dir) 184 | self._logger.info("Saved model at " + cache_name) 185 | torch.save(self.model.state_dict(), cache_name) 186 | 187 | def load_model(self, cache_name): 188 | """ 189 | 加载对应模型的 cache 190 | 191 | Args: 192 | cache_name(str): 保存的文件名 193 | """ 194 | self._logger.info("Loaded model at " + cache_name) 195 | model_state = torch.load(cache_name) 196 | self.model.load_state_dict(model_state) 197 | -------------------------------------------------------------------------------- /STFGNN/executor/utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import numpy as np 3 | import copy 4 | import pickle 5 | from model import loss 6 | from functools import partial 7 | 8 | def get_train_loss(train_loss): 9 | """ 10 | get the loss func 11 | """ 12 | if train_loss.lower() == 'none': 13 | print('Warning. Received none train loss func and will use the loss func defined in the model.') 14 | return None 15 | 16 | def func(preds, labels): 17 | 18 | if train_loss.lower() == 'mae': 19 | lf = loss.masked_mae_torch 20 | elif train_loss.lower() == 'mse': 21 | lf = loss.masked_mse_torch 22 | elif train_loss.lower() == 'rmse': 23 | lf = loss.masked_rmse_torch 24 | elif train_loss.lower() == 'mape': 25 | lf = loss.masked_mape_torch 26 | elif train_loss.lower() == 'logcosh': 27 | lf = loss.log_cosh_loss 28 | elif train_loss.lower() == 'huber': 29 | lf = loss.huber_loss 30 | elif train_loss.lower() == 'quantile': 31 | lf = loss.quantile_loss 32 | elif train_loss.lower() == 'masked_mae': 33 | lf = partial(loss.masked_mae_torch, null_val=0) 34 | elif train_loss.lower() == 'masked_mse': 35 | lf = partial(loss.masked_mse_torch, null_val=0) 36 | elif train_loss.lower() == 'masked_rmse': 37 | lf = partial(loss.masked_rmse_torch, null_val=0) 38 | elif train_loss.lower() == 'masked_mape': 39 | lf = partial(loss.masked_mape_torch, null_val=0) 40 | elif train_loss.lower() == 'r2': 41 | lf = loss.r2_score_torch 42 | elif train_loss.lower() == 'evar': 43 | lf = loss.explained_variance_score_torch 44 | else: 45 | lf = loss.masked_mae_torch 46 | 47 | return lf(preds, labels) 48 | return func 49 | 50 | -------------------------------------------------------------------------------- /STFGNN/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | 5 | from data.dataset.stfgnn_dataset import STFGNNDataset 6 | from model.STFGNN import STFGNN 7 | from executor.multi_step_executor import MultiStepExecutor as STFGNNExecutor 8 | 9 | 10 | config = {} 11 | for filename in ["config/PEMS03.json", "config/STFGNN.json"]: 12 | with open(filename, "r") as f: 13 | _config = json.load(f) 14 | for key in _config: 15 | if key not in config: 16 | config[key] = _config[key] 17 | 18 | dataset = STFGNNDataset(config) 19 | 20 | train_data, valid_data, test_data = dataset.get_data() 21 | data_feature = dataset.get_data_feature() 22 | 23 | model_cache_file = 'cache/model_cache/PEMS03_STFGNN.m' 24 | 25 | model = STFGNN(config, data_feature) 26 | 27 | executor = STFGNNExecutor(config, model) 28 | 29 | 30 | train = True #标识是否需要重新训练 31 | 32 | if train or not os.path.exists(model_cache_file): 33 | executor.train(train_data, valid_data) 34 | executor.save_model(model_cache_file) 35 | else: 36 | executor.load_model(model_cache_file) 37 | # 评估,评估结果将会放在 cache/evaluate_cache 下 38 | executor.evaluate(test_data) 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /STFGNN/model/STFGNN.py: -------------------------------------------------------------------------------- 1 | # from _typeshed import Self 2 | import torch 3 | import torch.nn.functional as F 4 | import torch.nn as nn 5 | 6 | 7 | class gcn_operation(nn.Module): 8 | def __init__(self, adj, in_dim, out_dim, num_vertices, activation='GLU'): 9 | """ 10 | 图卷积模块 11 | :param adj: 邻接图 12 | :param in_dim: 输入维度 13 | :param out_dim: 输出维度 14 | :param num_vertices: 节点数量 15 | :param activation: 激活方式 {'relu', 'GLU'} 16 | """ 17 | super(gcn_operation, self).__init__() 18 | self.adj = adj 19 | self.in_dim = in_dim 20 | self.out_dim = out_dim 21 | self.num_vertices = num_vertices 22 | self.activation = activation 23 | 24 | assert self.activation in {'GLU', 'relu'} 25 | 26 | if self.activation == 'GLU': 27 | self.FC = nn.Linear(self.in_dim, 2 * self.out_dim, bias=True) 28 | else: 29 | self.FC = nn.Linear(self.in_dim, self.out_dim, bias=True) 30 | 31 | def forward(self, x, mask=None): 32 | """ 33 | :param x: (3*N, B, Cin) 34 | :param mask:(3*N, 3*N) 35 | :return: (3*N, B, Cout) 36 | """ 37 | adj = self.adj 38 | if mask is not None: 39 | adj = adj.to(mask.device) * mask 40 | 41 | x = torch.einsum('nm, mbc->nbc', adj.to(x.device), x) # 4*N, B, Cin 42 | 43 | if self.activation == 'GLU': 44 | lhs_rhs = self.FC(x) # 4*N, B, 2*Cout 45 | lhs, rhs = torch.split(lhs_rhs, self.out_dim, dim=-1) # 4*N, B, Cout 46 | 47 | out = lhs * torch.sigmoid(rhs) 48 | del lhs, rhs, lhs_rhs 49 | 50 | return out 51 | 52 | elif self.activation == 'relu': 53 | return torch.relu(self.FC(x)) # 3*N, B, Cout 54 | 55 | 56 | class STSGCM(nn.Module): 57 | def __init__(self, adj, in_dim, out_dims, num_of_vertices, activation='GLU'): 58 | """ 59 | :param adj: 邻接矩阵 60 | :param in_dim: 输入维度 61 | :param out_dims: list 各个图卷积的输出维度 62 | :param num_of_vertices: 节点数量 63 | :param activation: 激活方式 {'relu', 'GLU'} 64 | """ 65 | super(STSGCM, self).__init__() 66 | self.adj = adj 67 | self.in_dim = in_dim 68 | self.out_dims = out_dims 69 | self.num_of_vertices = num_of_vertices 70 | self.activation = activation 71 | 72 | self.gcn_operations = nn.ModuleList() 73 | 74 | self.gcn_operations.append( 75 | gcn_operation( 76 | adj=self.adj, 77 | in_dim=self.in_dim, 78 | out_dim=self.out_dims[0], 79 | num_vertices=self.num_of_vertices, 80 | activation=self.activation 81 | ) 82 | ) 83 | 84 | for i in range(1, len(self.out_dims)): 85 | self.gcn_operations.append( 86 | gcn_operation( 87 | adj=self.adj, 88 | in_dim=self.out_dims[i-1], 89 | out_dim=self.out_dims[i], 90 | num_vertices=self.num_of_vertices, 91 | activation=self.activation 92 | ) 93 | ) 94 | 95 | def forward(self, x, mask=None): 96 | """ 97 | :param x: (3N, B, Cin) 98 | :param mask: (3N, 3N) 99 | :return: (N, B, Cout) 100 | """ 101 | need_concat = [] 102 | 103 | for i in range(len(self.out_dims)): 104 | x = self.gcn_operations[i](x, mask) 105 | need_concat.append(x) 106 | 107 | # shape of each element is (1, N, B, Cout) 108 | need_concat = [ 109 | torch.unsqueeze( 110 | h[self.num_of_vertices: 2 * self.num_of_vertices], dim=0 111 | ) for h in need_concat 112 | ] 113 | 114 | out = torch.max(torch.cat(need_concat, dim=0), dim=0).values # (N, B, Cout) 115 | 116 | del need_concat 117 | 118 | return out 119 | 120 | 121 | class STSGCL(nn.Module): 122 | def __init__(self, 123 | adj, 124 | history, 125 | num_of_vertices, 126 | in_dim, 127 | out_dims, 128 | strides=4, 129 | activation='GLU', 130 | temporal_emb=True, 131 | spatial_emb=True): 132 | """ 133 | :param adj: 邻接矩阵 134 | :param history: 输入时间步长 135 | :param in_dim: 输入维度 136 | :param out_dims: list 各个图卷积的输出维度 137 | :param strides: 滑动窗口步长,local时空图使用几个时间步构建的,默认为3 138 | :param num_of_vertices: 节点数量 139 | :param activation: 激活方式 {'relu', 'GLU'} 140 | :param temporal_emb: 加入时间位置嵌入向量 141 | :param spatial_emb: 加入空间位置嵌入向量 142 | """ 143 | super(STSGCL, self).__init__() 144 | self.adj = adj 145 | self.strides = strides 146 | self.history = history 147 | self.in_dim = in_dim 148 | self.out_dims = out_dims 149 | self.num_of_vertices = num_of_vertices 150 | 151 | self.activation = activation 152 | self.temporal_emb = temporal_emb 153 | self.spatial_emb = spatial_emb 154 | 155 | 156 | self.conv1 = nn.Conv1d(self.in_dim, self.out_dims[-1], kernel_size=(1, 2), stride=(1, 1), dilation=(1, 3)) 157 | self.conv2 = nn.Conv1d(self.in_dim, self.out_dims[-1], kernel_size=(1, 2), stride=(1, 1), dilation=(1, 3)) 158 | 159 | 160 | self.STSGCMS = nn.ModuleList() 161 | for i in range(self.history - self.strides + 1): 162 | self.STSGCMS.append( 163 | STSGCM( 164 | adj=self.adj, 165 | in_dim=self.in_dim, 166 | out_dims=self.out_dims, 167 | num_of_vertices=self.num_of_vertices, 168 | activation=self.activation 169 | ) 170 | ) 171 | 172 | if self.temporal_emb: 173 | self.temporal_embedding = nn.Parameter(torch.FloatTensor(1, self.history, 1, self.in_dim)) 174 | # 1, T, 1, Cin 175 | 176 | if self.spatial_emb: 177 | self.spatial_embedding = nn.Parameter(torch.FloatTensor(1, 1, self.num_of_vertices, self.in_dim)) 178 | # 1, 1, N, Cin 179 | 180 | self.reset() 181 | 182 | def reset(self): 183 | if self.temporal_emb: 184 | nn.init.xavier_normal_(self.temporal_embedding, gain=0.0003) 185 | 186 | if self.spatial_emb: 187 | nn.init.xavier_normal_(self.spatial_embedding, gain=0.0003) 188 | 189 | def forward(self, x, mask=None): 190 | """ 191 | :param x: B, T, N, Cin 192 | :param mask: (N, N) 193 | :return: B, T-3, N, Cout 194 | """ 195 | if self.temporal_emb: 196 | x = x + self.temporal_embedding 197 | 198 | if self.spatial_emb: 199 | x = x + self.spatial_embedding 200 | 201 | ############################################# 202 | # shape is (B, C, N, T) 203 | data_temp = x.permute(0, 3, 2, 1) 204 | data_left = torch.sigmoid(self.conv1(data_temp)) 205 | data_right = torch.tanh(self.conv2(data_temp)) 206 | data_time_axis = data_left * data_right 207 | data_res = data_time_axis.permute(0, 3, 2, 1) 208 | # shape is (B, T-3, N, C) 209 | ############################################# 210 | 211 | need_concat = [] 212 | batch_size = x.shape[0] 213 | 214 | for i in range(self.history - self.strides + 1): 215 | t = x[:, i: i+self.strides, :, :] # (B, 4, N, Cin) 216 | 217 | t = torch.reshape(t, shape=[batch_size, self.strides * self.num_of_vertices, self.in_dim]) 218 | # (B, 4*N, Cin) 219 | 220 | t = self.STSGCMS[i](t.permute(1, 0, 2), mask) # (4*N, B, Cin) -> (N, B, Cout) 221 | 222 | t = torch.unsqueeze(t.permute(1, 0, 2), dim=1) # (N, B, Cout) -> (B, N, Cout) ->(B, 1, N, Cout) 223 | 224 | need_concat.append(t) 225 | 226 | mid_out = torch.cat(need_concat, dim=1) # (B, T-3, N, Cout) 227 | out = mid_out + data_res 228 | 229 | del need_concat, batch_size 230 | 231 | return out 232 | 233 | 234 | class output_layer(nn.Module): 235 | def __init__(self, num_of_vertices, history, in_dim, out_dim, 236 | hidden_dim=128, horizon=12): 237 | """ 238 | 预测层,注意在作者的实验中是对每一个预测时间step做处理的,也即他会令horizon=1 239 | :param num_of_vertices:节点数 240 | :param history:输入时间步长 241 | :param in_dim: 输入维度 242 | :param hidden_dim:中间层维度 243 | :param horizon:预测时间步长 244 | """ 245 | super(output_layer, self).__init__() 246 | self.num_of_vertices = num_of_vertices 247 | self.history = history 248 | self.in_dim = in_dim 249 | self.out_dim = out_dim 250 | self.hidden_dim = hidden_dim 251 | self.horizon = horizon 252 | 253 | #print("#####################") 254 | #print(self.in_dim) 255 | #print(self.history) 256 | #print(self.hidden_dim) 257 | 258 | self.FC1 = nn.Linear(self.in_dim * self.history, self.hidden_dim, bias=True) 259 | 260 | #self.FC2 = nn.Linear(self.hidden_dim, self.horizon , bias=True) 261 | 262 | self.FC2 = nn.Linear(self.hidden_dim, self.horizon * self.out_dim, bias=True) 263 | 264 | def forward(self, x): 265 | """ 266 | :param x: (B, Tin, N, Cin) 267 | :return: (B, Tout, N) 268 | """ 269 | batch_size = x.shape[0] 270 | 271 | x = x.permute(0, 2, 1, 3) # B, N, Tin, Cin 272 | 273 | out1 = torch.relu(self.FC1(x.reshape(batch_size, self.num_of_vertices, -1))) 274 | # (B, N, Tin, Cin) -> (B, N, Tin * Cin) -> (B, N, hidden) 275 | 276 | out2 = self.FC2(out1) # (B, N, hidden) -> (B, N, horizon * 2) 277 | 278 | out2 = out2.reshape(batch_size, self.num_of_vertices, self.horizon, self.out_dim) 279 | 280 | del out1, batch_size 281 | 282 | return out2.permute(0, 2, 1, 3) # B, horizon, N 283 | # return out2.permute(0, 2, 1) # B, horizon, N 284 | 285 | 286 | class STFGNN(nn.Module): 287 | def __init__(self, config, data_feature): 288 | """ 289 | 290 | :param adj: local时空间矩阵 291 | :param history:输入时间步长 292 | :param num_of_vertices:节点数量 293 | :param in_dim:输入维度 294 | :param hidden_dims: lists, 中间各STSGCL层的卷积操作维度 295 | :param first_layer_embedding_size: 第一层输入层的维度 296 | :param out_layer_dim: 输出模块中间层维度 297 | :param activation: 激活函数 {relu, GlU} 298 | :param use_mask: 是否使用mask矩阵对adj进行优化 299 | :param temporal_emb:是否使用时间嵌入向量 300 | :param spatial_emb:是否使用空间嵌入向量 301 | :param horizon:预测时间步长 302 | :param strides:滑动窗口步长,local时空图使用几个时间步构建的,默认为4 303 | """ 304 | super(STFGNN, self).__init__() 305 | 306 | self.config = config 307 | self.data_feature = data_feature 308 | self.scaler = data_feature["scaler"] 309 | self.num_batches = data_feature["num_batches"] 310 | 311 | adj = self.data_feature["adj_mx"] 312 | history = self.config.get("window", 12) 313 | num_of_vertices = self.config.get("num_nodes", None) 314 | in_dim = self.config.get("input_dim", 1) 315 | out_dim = self.config.get("output_dim", 1) 316 | hidden_dims = self.config.get("hidden_dims", None) 317 | first_layer_embedding_size = self.config.get("first_layer_embedding_size", None) 318 | out_layer_dim = self.config.get("out_layer_dim", None) 319 | activation = self.config.get("activation", "GLU") 320 | use_mask = self.config.get("mask") 321 | temporal_emb = self.config.get("temporal_emb", True) 322 | spatial_emb = self.config.get("spatial_emb", True) 323 | horizon = self.config.get("horizon", 12) 324 | strides = self.config.get("strides", 4) 325 | 326 | self.adj = adj 327 | self.num_of_vertices = num_of_vertices 328 | self.hidden_dims = hidden_dims 329 | self.out_layer_dim = out_layer_dim 330 | self.activation = activation 331 | self.use_mask = use_mask 332 | 333 | self.temporal_emb = temporal_emb 334 | self.spatial_emb = spatial_emb 335 | self.horizon = horizon 336 | self.strides = 4 337 | 338 | self.First_FC = nn.Linear(in_dim, first_layer_embedding_size, bias=True) 339 | self.STSGCLS = nn.ModuleList() 340 | #print("____________________") 341 | #print(history) 342 | 343 | self.STSGCLS.append( 344 | STSGCL( 345 | adj=self.adj, 346 | history=history, 347 | num_of_vertices=self.num_of_vertices, 348 | in_dim=first_layer_embedding_size, 349 | out_dims=self.hidden_dims[0], 350 | strides=self.strides, 351 | activation=self.activation, 352 | temporal_emb=self.temporal_emb, 353 | spatial_emb=self.spatial_emb 354 | ) 355 | ) 356 | 357 | in_dim = self.hidden_dims[0][-1] 358 | history -= (self.strides - 1) 359 | 360 | #print("!!!!!!!!!!!!!!!!!!!") 361 | #print(history) 362 | 363 | for idx, hidden_list in enumerate(self.hidden_dims): 364 | #print("?????? ", idx) 365 | if idx == 0: 366 | continue 367 | #print("---------", idx) 368 | self.STSGCLS.append( 369 | STSGCL( 370 | adj=self.adj, 371 | history=history, 372 | num_of_vertices=self.num_of_vertices, 373 | in_dim=in_dim, 374 | out_dims=hidden_list, 375 | strides=self.strides, 376 | activation=self.activation, 377 | temporal_emb=self.temporal_emb, 378 | spatial_emb=self.spatial_emb 379 | ) 380 | ) 381 | history -= (self.strides - 1) 382 | in_dim = hidden_list[-1] 383 | 384 | self.predictLayer = nn.ModuleList() 385 | #print("***********************") 386 | #print(history) 387 | for t in range(self.horizon): 388 | self.predictLayer.append( 389 | output_layer( 390 | num_of_vertices=self.num_of_vertices, 391 | history=history, 392 | in_dim=in_dim, 393 | out_dim = out_dim, 394 | hidden_dim=out_layer_dim, 395 | horizon=1 396 | ) 397 | ) 398 | 399 | if self.use_mask: 400 | mask = torch.zeros_like(self.adj) 401 | mask[self.adj != 0] = self.adj[self.adj != 0] 402 | self.mask = nn.Parameter(mask) 403 | else: 404 | self.mask = None 405 | 406 | def forward(self, x): 407 | """ 408 | :param x: B, Tin, N, Cin) 409 | :return: B, Tout, N 410 | """ 411 | 412 | x = torch.relu(self.First_FC(x)) # B, Tin, N, Cin 413 | #print(1) 414 | 415 | for model in self.STSGCLS: 416 | x = model(x, self.mask) 417 | # (B, T - 8, N, Cout) 418 | #print(2) 419 | need_concat = [] 420 | for i in range(self.horizon): 421 | out_step = self.predictLayer[i](x) # (B, 1, N, 2) 422 | need_concat.append(out_step) 423 | #print(3) 424 | out = torch.cat(need_concat, dim=1) # B, Tout, N, 2 425 | 426 | del need_concat 427 | 428 | return out 429 | 430 | 431 | 432 | -------------------------------------------------------------------------------- /STFGNN/model/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.metrics import r2_score, explained_variance_score 4 | 5 | 6 | def masked_mae_loss(y_pred, y_true): 7 | mask = (y_true != 0).float() 8 | mask /= mask.mean() 9 | loss = torch.abs(y_pred - y_true) 10 | loss = loss * mask 11 | # trick for nans: 12 | # https://discuss.pytorch.org/t/how-to-set-nan-in-tensor-to-0/3918/3 13 | loss[loss != loss] = 0 14 | return loss.mean() 15 | 16 | 17 | def masked_mae_torch(preds, labels, null_val=np.nan): 18 | labels[torch.abs(labels) < 1e-4] = 0 19 | if np.isnan(null_val): 20 | mask = ~torch.isnan(labels) 21 | else: 22 | mask = labels.ne(null_val) 23 | mask = mask.float() 24 | mask /= torch.mean(mask) 25 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) 26 | loss = torch.abs(torch.sub(preds, labels)) 27 | loss = loss * mask 28 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) 29 | return torch.mean(loss) 30 | 31 | 32 | def log_cosh_loss(preds, labels): 33 | loss = torch.log(torch.cosh(preds - labels)) 34 | return torch.mean(loss) 35 | 36 | 37 | def huber_loss(preds, labels, delta=1.0): 38 | residual = torch.abs(preds - labels) 39 | condition = torch.le(residual, delta) 40 | small_res = 0.5 * torch.square(residual) 41 | large_res = delta * residual - 0.5 * delta * delta 42 | return torch.mean(torch.where(condition, small_res, large_res)) 43 | # lo = torch.nn.SmoothL1Loss() 44 | # return lo(preds, labels) 45 | 46 | 47 | def quantile_loss(preds, labels, delta=0.25): 48 | condition = torch.ge(labels, preds) 49 | large_res = delta * (labels - preds) 50 | small_res = (1 - delta) * (preds - labels) 51 | return torch.mean(torch.where(condition, large_res, small_res)) 52 | 53 | 54 | def masked_mape_torch(preds, labels, null_val=np.nan, eps=0): 55 | labels[torch.abs(labels) < 1e-4] = 0 56 | if np.isnan(null_val) and eps != 0: 57 | loss = torch.abs((preds - labels) / (labels + eps)) 58 | return torch.mean(loss) 59 | if np.isnan(null_val): 60 | mask = ~torch.isnan(labels) 61 | else: 62 | mask = labels.ne(null_val) 63 | mask = mask.float() 64 | mask /= torch.mean(mask) 65 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) 66 | loss = torch.abs((preds - labels) / labels) 67 | loss = loss * mask 68 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) 69 | return torch.mean(loss) 70 | 71 | 72 | def masked_mse_torch(preds, labels, null_val=np.nan): 73 | labels[torch.abs(labels) < 1e-4] = 0 74 | if np.isnan(null_val): 75 | mask = ~torch.isnan(labels) 76 | else: 77 | mask = labels.ne(null_val) 78 | mask = mask.float() 79 | mask /= torch.mean(mask) 80 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) 81 | loss = torch.square(torch.sub(preds, labels)) 82 | loss = loss * mask 83 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) 84 | return torch.mean(loss) 85 | 86 | 87 | def masked_rmse_torch(preds, labels, null_val=np.nan): 88 | labels[torch.abs(labels) < 1e-4] = 0 89 | return torch.sqrt(masked_mse_torch(preds=preds, labels=labels, 90 | null_val=null_val)) 91 | 92 | 93 | def r2_score_torch(preds, labels): 94 | preds = preds.cpu().flatten() 95 | labels = labels.cpu().flatten() 96 | return r2_score(labels, preds) 97 | 98 | 99 | def explained_variance_score_torch(preds, labels): 100 | preds = preds.cpu().flatten() 101 | labels = labels.cpu().flatten() 102 | return explained_variance_score(labels, preds) 103 | 104 | 105 | def masked_rmse_np(preds, labels, null_val=np.nan): 106 | return np.sqrt(masked_mse_np(preds=preds, labels=labels, 107 | null_val=null_val)) 108 | 109 | 110 | def masked_mse_np(preds, labels, null_val=np.nan): 111 | with np.errstate(divide='ignore', invalid='ignore'): 112 | if np.isnan(null_val): 113 | mask = ~np.isnan(labels) 114 | else: 115 | mask = np.not_equal(labels, null_val) 116 | mask = mask.astype('float32') 117 | mask /= np.mean(mask) 118 | rmse = np.square(np.subtract(preds, labels)).astype('float32') 119 | rmse = np.nan_to_num(rmse * mask) 120 | return np.mean(rmse) 121 | 122 | 123 | def masked_mae_np(preds, labels, null_val=np.nan): 124 | with np.errstate(divide='ignore', invalid='ignore'): 125 | if np.isnan(null_val): 126 | mask = ~np.isnan(labels) 127 | else: 128 | mask = np.not_equal(labels, null_val) 129 | mask = mask.astype('float32') 130 | mask /= np.mean(mask) 131 | mae = np.abs(np.subtract(preds, labels)).astype('float32') 132 | mae = np.nan_to_num(mae * mask) 133 | return np.mean(mae) 134 | 135 | 136 | def masked_mape_np(preds, labels, null_val=np.nan): 137 | with np.errstate(divide='ignore', invalid='ignore'): 138 | if np.isnan(null_val): 139 | mask = ~np.isnan(labels) 140 | else: 141 | mask = np.not_equal(labels, null_val) 142 | mask = mask.astype('float32') 143 | mask /= np.mean(mask) 144 | mape = np.abs(np.divide(np.subtract( 145 | preds, labels).astype('float32'), labels)) 146 | mape = np.nan_to_num(mask * mape) 147 | return np.mean(mape) 148 | 149 | 150 | def r2_score_np(preds, labels): 151 | preds = preds.flatten() 152 | labels = labels.flatten() 153 | return r2_score(labels, preds) 154 | 155 | 156 | def explained_variance_score_np(preds, labels): 157 | preds = preds.flatten() 158 | labels = labels.flatten() 159 | return explained_variance_score(labels, preds) 160 | -------------------------------------------------------------------------------- /STFGNN/raw_data/PEMS03/PEMS03.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwm412/STFGNN-Pytorch/ae7c95866d036d2bd7143d6d6020228a4e902e50/STFGNN/raw_data/PEMS03/PEMS03.npz -------------------------------------------------------------------------------- /STFGNN/raw_data/PEMS03/adj_mx.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwm412/STFGNN-Pytorch/ae7c95866d036d2bd7143d6d6020228a4e902e50/STFGNN/raw_data/PEMS03/adj_mx.pkl -------------------------------------------------------------------------------- /STFGNN/utils/GPS_utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | R_EARTH = 6371000 # meter 4 | 5 | 6 | def angle2radian(angle): 7 | """ 8 | convert from an angle to a radian 9 | :param angle: (float) 10 | :return: radian (float) 11 | """ 12 | return math.radians(angle) 13 | 14 | 15 | def radian2angle(radian): 16 | return math.degrees(radian) 17 | 18 | 19 | def spherical_law_of_cosines(phi1, lambda1, phi2, lambda2): 20 | """ 21 | calculate great circle distance with spherical law of cosines 22 | phi/lambda for latitude/longitude in radians 23 | :param phi1: point one's latitude in radians 24 | :param lambda1: point one's longitude in radians 25 | :param phi2: point two's latitude in radians 26 | :param lambda2: point two's longitude in radians 27 | :return: 28 | """ 29 | d_lambda = lambda2 - lambda1 30 | return math.acos(math.sin(phi1) * math.sin(phi2) + math.cos(phi1) * math.cos(phi2) * math.cos(d_lambda)) 31 | 32 | 33 | def haversine(phi1, lambda1, phi2, lambda2): 34 | """ 35 | calculate angular great circle distance with haversine formula 36 | see parameters in spherical_law_of_cosines 37 | """ 38 | d_phi = phi2 - phi1 39 | d_lambda = lambda2 - lambda1 40 | a = math.pow(math.sin(d_phi / 2), 2) + \ 41 | math.cos(phi1) * math.cos(phi2) * math.pow(math.sin(d_lambda / 2), 2) 42 | c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) 43 | return c 44 | 45 | 46 | def equirectangular_approximation(phi1, lambda1, phi2, lambda2): 47 | """ 48 | calculate angular great circle distance with Pythagoras’ theorem performed on an equirectangular projection 49 | see parameters in spherical_law_of_cosines 50 | """ 51 | x = (lambda2 - lambda1) * math.cos((phi1 + phi2) / 2) 52 | y = phi2 - phi1 53 | return math.sqrt(math.pow(x, 2) + math.pow(y, 2)) 54 | 55 | 56 | def dist(phi1, lambda1, phi2, lambda2, r=R_EARTH, method='hav'): 57 | """ 58 | calculate great circle distance with given latitude and longitude, 59 | :param phi1: point one's latitude in angle 60 | :param lambda1: point one's longitude in angle 61 | :param phi2: point two's latitude in angle 62 | :param lambda2: point two's longitude in angle 63 | :param r: earth radius(m) 64 | :param method: 'hav' means haversine, 65 | 'LoC' means Spherical Law of Cosines, 66 | 'approx' means Pythagoras’ theorem performed on an equirectangular projection 67 | :return: distance (m) 68 | """ 69 | return angular_dist(phi1, lambda1, phi2, lambda2, method) * r 70 | 71 | 72 | def angular_dist(phi1, lambda1, phi2, lambda2, method='hav'): 73 | """ 74 | calculate angular great circle distance with given latitude and longitude 75 | :return: angle 76 | """ 77 | if method.lower() == 'hav': 78 | return haversine(phi1, lambda1, phi2, lambda2) 79 | elif method.lower() == 'loc': 80 | return spherical_law_of_cosines(phi1, lambda1, phi2, lambda2) 81 | elif method.lower() == 'approx': 82 | return equirectangular_approximation(phi1, lambda1, phi2, lambda2) 83 | else: 84 | assert False 85 | 86 | 87 | def destination(phi1, lambda1, brng, distance, r=R_EARTH): 88 | """ 89 | 90 | :param phi1: 91 | :param lambda1: 92 | :param brng: 93 | :param distance: 94 | :return: 95 | """ 96 | delta = distance / r 97 | phi2 = math.asin(math.sin(phi1) * math.cos(delta) + math.cos(phi1) * math.sin(delta) * math.cos(brng)) 98 | lambda2 = lambda1 + math.atan2( 99 | math.sin(brng) * math.sin(delta) * math.cos(phi1), math.cos(delta) - math.sin(phi1) * math.sin(phi2) 100 | ) 101 | return phi2, lambda2 102 | 103 | 104 | def init_bearing(phi1, lambda1, phi2, lambda2): 105 | """ 106 | initial bearing of a great circle route 107 | :return: 0~360 108 | """ 109 | y = math.sin(lambda2 - lambda1) * math.cos(phi2) 110 | x = math.cos(phi1) * math.sin(phi2) - math.sin(phi1) * math.cos(phi2) * math.cos(lambda2 - lambda1) 111 | theta = math.atan2(y, x) 112 | brng = (theta * 180 / math.pi + 360) % 360 113 | return brng -------------------------------------------------------------------------------- /STFGNN/utils/Optim.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.optim as optim 4 | 5 | 6 | class Optim(object): 7 | 8 | def __init__(self, params, config): 9 | self.params = list(params) # careful: params may be a generator 10 | self.config = config 11 | self.last_ppl = None 12 | self.lr = self.config.get("lr", 0.001) 13 | self.max_grad_norm = self.config.get("clip", 10) 14 | self.method = self.config.get("optim", "adam") 15 | self.lr_decay = self.config.get("lr_decay", False) 16 | self.lr_scheduler_type = self.config.get('lr_scheduler', 'multisteplr') 17 | self.lr_decay_ratio = self.config.get("lr_decay_ratio", 0.1) 18 | self.milestones = self.config.get("lr_decay_steps", []) 19 | self.step_size = self.config.get("step_size", 10) 20 | 21 | self._makeOptimizer() 22 | self.lr_scheduler = self._build_lr_scheduler() 23 | 24 | 25 | def _makeOptimizer(self): 26 | if self.method == 'sgd': 27 | self.optimizer = optim.SGD(self.params, lr=self.lr) 28 | elif self.method == 'adagrad': 29 | self.optimizer = optim.Adagrad(self.params, lr=self.lr) 30 | elif self.method == 'adadelta': 31 | self.optimizer = optim.Adadelta(self.params, lr=self.lr) 32 | elif self.method == 'adam': 33 | self.optimizer = optim.Adam(self.params, lr=self.lr) 34 | else: 35 | raise RuntimeError("Invalid optim method: " + self.method) 36 | 37 | def _build_lr_scheduler(self): 38 | """ 39 | 根据全局参数`lr_scheduler`选择对应的lr_scheduler 40 | """ 41 | if self.lr_decay: 42 | if self.lr_scheduler_type.lower() == 'multisteplr': 43 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 44 | self.optimizer, milestones=self.milestones, gamma=self.lr_decay_ratio) 45 | elif self.lr_scheduler_type.lower() == 'steplr': 46 | lr_scheduler = torch.optim.lr_scheduler.StepLR( 47 | self.optimizer, step_size=self.step_size, gamma=self.lr_decay_ratio) 48 | elif self.lr_scheduler_type.lower() == 'exponentiallr': 49 | lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( 50 | self.optimizer, gamma=self.lr_decay_ratio) 51 | else: 52 | print('Received unrecognized lr_scheduler, ' 53 | 'please check the parameter `lr_scheduler`.') 54 | lr_scheduler = None 55 | else: 56 | lr_scheduler = None 57 | return lr_scheduler 58 | 59 | 60 | def step(self): 61 | # Compute gradients norm. 62 | grad_norm = 0 63 | 64 | if self.max_grad_norm is not None: 65 | torch.nn.utils.clip_grad_norm_(self.params, self.max_grad_norm) 66 | 67 | self.optimizer.step() 68 | return grad_norm 69 | 70 | 71 | def zero_grad(self): 72 | self.optimizer.zero_grad() 73 | return 74 | 75 | 76 | # decay learning rate if val perf does not improve or we hit the start_decay_at limit 77 | def updateLearningRate(self, ppl, epoch): 78 | if self.start_decay_at is not None and epoch >= self.start_decay_at: 79 | self.start_decay = True 80 | if self.last_ppl is not None and ppl > self.last_ppl: 81 | self.start_decay = True 82 | 83 | if self.start_decay: 84 | self.lr = self.lr * self.lr_decay 85 | print("Decaying learning rate to %g" % self.lr) 86 | #only decay for one epoch 87 | self.start_decay = False 88 | 89 | self.last_ppl = ppl 90 | 91 | self._makeOptimizer() 92 | -------------------------------------------------------------------------------- /STFGNN/utils/argument_list.py: -------------------------------------------------------------------------------- 1 | """ 2 | store the arguments can be modified by the user 3 | """ 4 | import argparse 5 | 6 | general_arguments = { 7 | "gpu": "bool", 8 | "batch_size": "int", 9 | "train_rate": "float", 10 | "eval_rate": "float", 11 | "learning_rate": "float", 12 | "max_epoch": "int", 13 | "gpu_id": "int" 14 | } 15 | 16 | hyper_arguments = { 17 | "gpu": { 18 | "type": "bool", 19 | "default": None, 20 | "help": "whether use gpu" 21 | }, 22 | "gpu_id": { 23 | "type": "int", 24 | "default": None, 25 | "help": "the gpu id to use" 26 | }, 27 | "train_rate": { 28 | "type": "float", 29 | "default": None, 30 | "help": "the train set rate" 31 | }, 32 | "eval_rate": { 33 | "type": "float", 34 | "default": None, 35 | "help": "the validation set rate" 36 | }, 37 | "batch_size": { 38 | "type": "int", 39 | "default": None, 40 | "help": "the batch size" 41 | } 42 | } 43 | 44 | 45 | def str2bool(s): 46 | if isinstance(s, bool): 47 | return s 48 | if s.lower() in ('yes', 'true'): 49 | return True 50 | elif s.lower() in ('no', 'false'): 51 | return False 52 | else: 53 | raise argparse.ArgumentTypeError('bool value expected.') 54 | 55 | 56 | def str2float(s): 57 | if isinstance(s, float): 58 | return s 59 | try: 60 | x = float(s) 61 | except ValueError: 62 | raise argparse.ArgumentTypeError('float value expected.') 63 | return x 64 | -------------------------------------------------------------------------------- /STFGNN/utils/dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | 数据预处理阶段相关的工具函数 3 | """ 4 | import numpy as np 5 | import time 6 | from datetime import datetime, timedelta 7 | from collections import defaultdict 8 | 9 | 10 | def parse_time(time_in, timezone_offset_in_minute=0): 11 | """ 12 | 将 json 中 time_format 格式的 time 转化为 local datatime 13 | """ 14 | date = datetime.strptime(time_in, '%Y-%m-%dT%H:%M:%SZ') # 这是 UTC 时间 15 | return date + timedelta(minutes=timezone_offset_in_minute) 16 | 17 | 18 | def cal_basetime(start_time, base_zero): 19 | """ 20 | 用于切分轨迹成一个 session, 21 | 思路为:给定一个 start_time 找到一个基准时间 base_time, 22 | 在该 base_time 到 base_time + time_length 区间的点划分到一个 session 内, 23 | 选取 base_time 来做的理由是:这样可以保证同一个小时段总是被 encode 成同一个数 24 | """ 25 | if base_zero: 26 | return start_time - timedelta(hours=start_time.hour, 27 | minutes=start_time.minute, 28 | seconds=start_time.second, 29 | microseconds=start_time.microsecond) 30 | else: 31 | # time length = 12 32 | if start_time.hour < 12: 33 | return start_time - timedelta(hours=start_time.hour, 34 | minutes=start_time.minute, 35 | seconds=start_time.second, 36 | microseconds=start_time.microsecond) 37 | else: 38 | return start_time - timedelta(hours=start_time.hour - 12, 39 | minutes=start_time.minute, 40 | seconds=start_time.second, 41 | microseconds=start_time.microsecond) 42 | 43 | 44 | def cal_timeoff(now_time, base_time): 45 | """ 46 | 计算两个时间之间的差值,返回值以小时为单位 47 | """ 48 | # 先将 now 按小时对齐 49 | delta = now_time - base_time 50 | return delta.days * 24 + delta.seconds / 3600 51 | 52 | 53 | def caculate_time_sim(data): 54 | time_checkin_set = defaultdict(set) 55 | tim_size = data['tim_size'] 56 | data_neural = data['data'] 57 | for uid in data_neural: 58 | uid_sessions = data_neural[uid] 59 | for session in uid_sessions: 60 | for checkin in session: 61 | timid = checkin[1] 62 | locid = checkin[0] 63 | if timid not in time_checkin_set: 64 | time_checkin_set[timid] = set() 65 | time_checkin_set[timid].add(locid) 66 | sim_matrix = np.zeros((tim_size, tim_size)) 67 | for i in range(tim_size): 68 | for j in range(tim_size): 69 | set_i = time_checkin_set[i] 70 | set_j = time_checkin_set[j] 71 | if len(set_i | set_j) != 0: 72 | jaccard_ij = len(set_i & set_j) / len(set_i | set_j) 73 | sim_matrix[i][j] = jaccard_ij 74 | return sim_matrix 75 | 76 | 77 | def parse_coordinate(coordinate): 78 | items = coordinate[1:-1].split(',') 79 | return float(items[0]), float(items[1]) 80 | 81 | 82 | def string2timestamp(strings, offset_frame): 83 | ts = [] 84 | for t in strings: 85 | dtstr = '-'.join([t[:4].decode(), t[4:6].decode(), t[6:8].decode()]) 86 | slot = int(t[8:]) - 1 87 | ts.append(np.datetime64(dtstr, 'm') + slot * offset_frame) 88 | return ts # [numpy.datetime64('2014-01-01T00:00'), ...] 89 | 90 | 91 | def timestamp2array(timestamps, t): 92 | """ 93 | 把时间戳的序列中的每一个时间戳转成特征数组,考虑了星期和小时, 94 | 时间戳: numpy.datetime64('2013-07-01T00:00:00.000000000') 95 | 96 | Args: 97 | timestamps: 时间戳序列 98 | t: 一天有多少个时间步 99 | 100 | Returns: 101 | np.ndarray: 特征数组,shape: (len(timestamps), ext_dim) 102 | """ 103 | vec_wday = [time.strptime( 104 | str(t)[:10], '%Y-%m-%d').tm_wday for t in timestamps] 105 | vec_hour = [time.strptime(str(t)[11:13], '%H').tm_hour for t in timestamps] 106 | vec_minu = [time.strptime(str(t)[14:16], '%M').tm_min for t in timestamps] 107 | ret = [] 108 | for idx, wday in enumerate(vec_wday): 109 | # day 110 | v = [0 for _ in range(7)] 111 | v[wday] = 1 112 | if wday >= 5: # 0是周一, 6是周日 113 | v.append(0) # weekend 114 | else: 115 | v.append(1) # weekday len(v)=8 116 | # hour 117 | v += [0 for _ in range(t)] # len(v)=8+T 118 | hour = vec_hour[idx] 119 | minu = vec_minu[idx] 120 | # 24*60/T 表示一个时间步是多少分钟 121 | # hour * 60 + minu 是从0:0开始到现在是多少分钟,相除计算是第几个时间步 122 | # print(hour, minu, T, (hour * 60 + minu) / (24 * 60 / T)) 123 | v[int((hour * 60 + minu) / (24 * 60 / t))] = 1 124 | # +8是因为v前边有表示星期的8位 125 | if hour >= 18 or hour < 6: 126 | v.append(0) # night 127 | else: 128 | v.append(1) # day 129 | ret.append(v) # len(v)=7+1+T+1=T+9 130 | return np.asarray(ret) 131 | 132 | 133 | def timestamp2vec_origin(timestamps): 134 | """ 135 | 把时间戳的序列中的每一个时间戳转成特征数组,只考虑星期, 136 | 时间戳: numpy.datetime64('2013-07-01T00:00:00.000000000') 137 | 138 | Args: 139 | timestamps: 时间戳序列 140 | 141 | Returns: 142 | np.ndarray: 特征数组,shape: (len(timestamps), 8) 143 | """ 144 | vec = [time.strptime(str(t)[:10], '%Y-%m-%d').tm_wday for t in timestamps] 145 | ret = [] 146 | for i in vec: 147 | v = [0 for _ in range(7)] 148 | v[i] = 1 149 | if i >= 5: 150 | v.append(0) # weekend 151 | else: 152 | v.append(1) # weekday 153 | ret.append(v) 154 | return np.asarray(ret) 155 | -------------------------------------------------------------------------------- /STFGNN/utils/normalization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Scaler: 5 | """ 6 | 归一化接口 7 | """ 8 | 9 | def transform(self, data): 10 | """ 11 | 数据归一化接口 12 | 13 | Args: 14 | data(np.ndarray): 归一化前的数据 15 | 16 | Returns: 17 | np.ndarray: 归一化后的数据 18 | """ 19 | raise NotImplementedError("Transform not implemented") 20 | 21 | def inverse_transform(self, data): 22 | """ 23 | 数据逆归一化接口 24 | 25 | Args: 26 | data(np.ndarray): 归一化后的数据 27 | 28 | Returns: 29 | np.ndarray: 归一化前的数据 30 | """ 31 | raise NotImplementedError("Inverse_transform not implemented") 32 | 33 | 34 | class NoneScaler(Scaler): 35 | """ 36 | 不归一化 37 | """ 38 | 39 | def transform(self, data): 40 | return data 41 | 42 | def inverse_transform(self, data): 43 | return data 44 | 45 | 46 | class NormalScaler(Scaler): 47 | """ 48 | 除以最大值归一化 49 | x = x / x.max 50 | """ 51 | 52 | def __init__(self, maxx): 53 | self.max = maxx 54 | 55 | def transform(self, data): 56 | return data / self.max 57 | 58 | def inverse_transform(self, data): 59 | return data * self.max 60 | 61 | 62 | class StandardScaler(Scaler): 63 | """ 64 | Z-score归一化 65 | x = (x - x.mean) / x.std 66 | """ 67 | 68 | def __init__(self, mean, std): 69 | self.mean = mean 70 | self.std = std 71 | 72 | def transform(self, data): 73 | return (data - self.mean) / self.std 74 | 75 | def inverse_transform(self, data): 76 | return (data * self.std) + self.mean 77 | 78 | 79 | class MinMax01Scaler(Scaler): 80 | """ 81 | MinMax归一化 结果区间[0, 1] 82 | x = (x - min) / (max - min) 83 | """ 84 | 85 | def __init__(self, minn, maxx): 86 | self.min = minn 87 | self.max = maxx 88 | 89 | def transform(self, data): 90 | return (data - self.min) / (self.max - self.min) 91 | 92 | def inverse_transform(self, data): 93 | return data * (self.max - self.min) + self.min 94 | 95 | 96 | class MinMax11Scaler(Scaler): 97 | """ 98 | MinMax归一化 结果区间[-1, 1] 99 | x = (x - min) / (max - min) 100 | x = x * 2 - 1 101 | """ 102 | 103 | def __init__(self, minn, maxx): 104 | self.min = minn 105 | self.max = maxx 106 | 107 | def transform(self, data): 108 | return ((data - self.min) / (self.max - self.min)) * 2. - 1. 109 | 110 | def inverse_transform(self, data): 111 | return ((data + 1.) / 2.) * (self.max - self.min) + self.min 112 | 113 | 114 | class LogScaler(Scaler): 115 | """ 116 | Log scaler 117 | x = log(x+eps) 118 | """ 119 | 120 | def __init__(self, eps=0.999): 121 | self.eps = eps 122 | 123 | def transform(self, data): 124 | return np.log(data + self.eps) 125 | 126 | def inverse_transform(self, data): 127 | return np.exp(data) - self.eps 128 | -------------------------------------------------------------------------------- /STFGNN/utils/utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import logging 3 | import datetime 4 | import os 5 | import sys 6 | import numpy as np 7 | 8 | 9 | def get_executor(config, model): 10 | """ 11 | according the config['executor'] to create the executor 12 | 13 | Args: 14 | config(ConfigParser): config 15 | model(AbstractModel): model 16 | 17 | Returns: 18 | AbstractExecutor: the loaded executor 19 | """ 20 | if config["task"] == "single_step": 21 | try: 22 | return getattr(importlib.import_module('libcity.executor.single_step_executor'), 23 | config['executor'])(config, model) 24 | except AttributeError: 25 | raise AttributeError('executor is not found') 26 | elif config["task"] == "multi_step": 27 | try: 28 | return getattr(importlib.import_module('libcity.executor.multi_step_executor'), 29 | config['executor'])(config, model) 30 | except AttributeError: 31 | raise AttributeError('executor is not found') 32 | else: 33 | raise AttributeError('task is not found') 34 | 35 | 36 | 37 | def get_model(config, data_feature): 38 | """ 39 | according the config['model'] to create the model 40 | 41 | Args: 42 | config(ConfigParser): config 43 | data_feature(dict): feature of the data 44 | 45 | Returns: 46 | AbstractModel: the loaded model 47 | """ 48 | if config['task'] == 'multi_step': 49 | print("config[model]: ", config['model']) 50 | try: 51 | return getattr(importlib.import_module('libcity.model.multi_step_model'), 52 | config['model'])(config, data_feature) 53 | except AttributeError: 54 | raise AttributeError('model is not found') 55 | elif config["task"] == "single_step": 56 | print("config[model]: ", config['model']) 57 | try: 58 | return getattr(importlib.import_module('libcity.model.single_step_model'), 59 | config['model'])(config, data_feature) 60 | except AttributeError: 61 | raise AttributeError('model is not found') 62 | 63 | else: 64 | raise AttributeError('task is not found') 65 | 66 | 67 | def get_evaluator(config): 68 | """ 69 | according the config['evaluator'] to create the evaluator 70 | 71 | Args: 72 | config(ConfigParser): config 73 | 74 | Returns: 75 | AbstractEvaluator: the loaded evaluator 76 | """ 77 | try: 78 | return getattr(importlib.import_module('libcity.evaluator'), 79 | config['evaluator'])(config) 80 | except AttributeError: 81 | raise AttributeError('evaluator is not found') 82 | 83 | 84 | def get_logger(config, name=None): 85 | """ 86 | 获取Logger对象 87 | 88 | Args: 89 | config(ConfigParser): config 90 | name: specified name 91 | 92 | Returns: 93 | Logger: logger 94 | """ 95 | log_dir = './libcity/log' 96 | if not os.path.exists(log_dir): 97 | os.makedirs(log_dir) 98 | log_filename = '{}-{}-{}.log'.format( 99 | config['model'], config['dataset'], get_local_time()) 100 | logfilepath = os.path.join(log_dir, log_filename) 101 | 102 | logger = logging.getLogger(name) 103 | 104 | log_level = config.get('log_level', 'INFO') 105 | 106 | if log_level.lower() == 'info': 107 | level = logging.INFO 108 | elif log_level.lower() == 'debug': 109 | level = logging.DEBUG 110 | elif log_level.lower() == 'error': 111 | level = logging.ERROR 112 | elif log_level.lower() == 'warning': 113 | level = logging.WARNING 114 | elif log_level.lower() == 'critical': 115 | level = logging.CRITICAL 116 | else: 117 | level = logging.INFO 118 | 119 | logger.setLevel(level) 120 | 121 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 122 | file_handler = logging.FileHandler(logfilepath) 123 | file_handler.setFormatter(formatter) 124 | 125 | console_formatter = logging.Formatter( 126 | '%(asctime)s - %(levelname)s - %(message)s') 127 | console_handler = logging.StreamHandler(sys.stdout) 128 | console_handler.setFormatter(console_formatter) 129 | 130 | logger.addHandler(file_handler) 131 | logger.addHandler(console_handler) 132 | 133 | logger.info('Log directory: %s', log_dir) 134 | return logger 135 | 136 | 137 | def get_local_time(): 138 | """ 139 | 获取时间 140 | 141 | Return: 142 | datetime: 时间 143 | """ 144 | cur = datetime.datetime.now() 145 | cur = cur.strftime('%b-%d-%Y_%H-%M-%S') 146 | return cur 147 | 148 | 149 | def ensure_dir(dir_path): 150 | """Make sure the directory exists, if it does not exist, create it. 151 | 152 | Args: 153 | dir_path (str): directory path 154 | """ 155 | if not os.path.exists(dir_path): 156 | os.makedirs(dir_path) 157 | 158 | 159 | def trans_naming_rule(origin, origin_rule, target_rule): 160 | """ 161 | 名字转换规则 162 | 163 | Args: 164 | origin (str): 源命名格式下的变量名 165 | origin_rule (str): 源命名格式,枚举类 166 | target_rule (str): 目标命名格式,枚举类 167 | 168 | Return: 169 | target (str): 转换之后的结果 170 | """ 171 | # TODO: 请确保输入是符合 origin_rule,这里目前不做检查 172 | target = '' 173 | if origin_rule == 'upper_camel_case' and target_rule == 'under_score_rule': 174 | for i, c in enumerate(origin): 175 | if i == 0: 176 | target = c.lower() 177 | else: 178 | target += '_' + c.lower() if c.isupper() else c 179 | return target 180 | else: 181 | raise NotImplementedError( 182 | 'trans naming rule only support from upper_camel_case to \ 183 | under_score_rule') 184 | 185 | 186 | def preprocess_data(data, config): 187 | """ 188 | split by input_window and output_window 189 | 190 | Args: 191 | data: shape (T, ...) 192 | 193 | Returns: 194 | np.ndarray: (train_size/test_size, input_window, ...) 195 | (train_size/test_size, output_window, ...) 196 | 197 | """ 198 | train_rate = config.get('train_rate', 0.7) 199 | eval_rate = config.get('eval_rate', 0.1) 200 | 201 | input_window = config.get('input_window', 12) 202 | output_window = config.get('output_window', 3) 203 | 204 | x, y = [], [] 205 | for i in range(len(data) - input_window - output_window): 206 | a = data[i: i + input_window + output_window] # (in+out, ...) 207 | x.append(a[0: input_window]) # (in, ...) 208 | y.append(a[input_window: input_window + output_window]) # (out, ...) 209 | x = np.array(x) # (num_samples, in, ...) 210 | y = np.array(y) # (num_samples, out, ...) 211 | 212 | train_size = int(x.shape[0] * (train_rate + eval_rate)) 213 | trainx = x[:train_size] # (train_size, in, ...) 214 | trainy = y[:train_size] # (train_size, out, ...) 215 | testx = x[train_size:x.shape[0]] # (test_size, in, ...) 216 | testy = y[train_size:x.shape[0]] # (test_size, out, ...) 217 | return trainx, trainy, testx, testy 218 | 219 | -------------------------------------------------------------------------------- /STFGNN/utils/visualize.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import json 3 | from libcity.utils.utils import ensure_dir 4 | import os 5 | 6 | 7 | class VisHelper: 8 | def __init__(self, _config): 9 | 10 | self.config = _config 11 | self.raw_path = './raw_data/' 12 | self.dataset = _config.get("dataset", "") 13 | self.save_path = _config.get("save_path", "./visualized_data/") 14 | 15 | # get type 16 | self.config_path = self.raw_path + self.dataset + '/config.json' 17 | self.data_config = json.load(open(self.config_path, 'r')) 18 | if 'dyna' in self.data_config and ['state'] == self.data_config['dyna']['including_types']: 19 | self.type = 'state' 20 | elif 'grid' in self.data_config and ['state'] == self.data_config['grid']['including_types']: 21 | self.type = 'grid' 22 | else: 23 | self.type = 'trajectory' 24 | # get geo and dyna files 25 | all_files = os.listdir(self.raw_path + self.dataset) 26 | self.geo_file = [] 27 | self.geo_path = None 28 | self.dyna_file = [] 29 | self.dyna_path = None 30 | self.grid_file = [] 31 | self.grid_path = None 32 | for file in all_files: 33 | if file.split('.')[1] == 'geo': 34 | self.geo_file.append(file) 35 | if file.split('.')[1] == 'dyna': 36 | self.dyna_file.append(file) 37 | if file.split('.')[1] == 'grid': 38 | self.grid_file.append(file) 39 | 40 | assert len(self.geo_file) == 1 41 | 42 | # reserved columns 43 | self.geo_reserved_lst = ['type', 'coordinates'] 44 | self.dyna_reserved_lst = ['dyna_id', 'type', 'time', 'entity_id', 'traj_id', 'coordinates'] 45 | self.grid_reserved_lst = ['dyna_id', 'type', 'time', 'row_id', 'column_id'] 46 | 47 | def visualize(self): 48 | if self.type == 'trajectory': 49 | # geo 50 | self.geo_path = self.raw_path + self.dataset + '/' + self.geo_file[0] 51 | self._visualize_geo() 52 | 53 | # dyna 54 | for dyna_file in self.dyna_file: 55 | self.dyna_path = self.raw_path + self.dataset + '/' + dyna_file 56 | self._visualize_dyna() 57 | 58 | elif self.type == 'state': 59 | self.geo_path = self.raw_path + self.dataset + '/' + self.geo_file[0] 60 | for dyna_file in self.dyna_file: 61 | self.dyna_path = self.raw_path + self.dataset + '/' + dyna_file 62 | self._visualize_state() 63 | elif self.type == 'grid': 64 | self.geo_path = self.raw_path + self.dataset + '/' + self.geo_file[0] 65 | for grid_file in self.grid_file: 66 | self.grid_path = self.raw_path + self.dataset + '/' + grid_file 67 | self._visualize_grid() 68 | 69 | def _visualize_state(self): 70 | geo_file = pd.read_csv(self.geo_path, index_col=None) 71 | dyna_file = pd.read_csv(self.dyna_path, index_col=None) 72 | geojson_obj = {'type': "FeatureCollection", 'features': []} 73 | 74 | # get feature_lst 75 | geo_feature_lst = [_ for _ in list(geo_file.columns) if _ not in self.geo_reserved_lst] 76 | dyna_feature_lst = [_ for _ in list(dyna_file.columns) if _ not in self.dyna_reserved_lst] 77 | 78 | for _, row in geo_file.iterrows(): 79 | 80 | # get feature dictionary 81 | geo_id = row['geo_id'] 82 | feature_dct = row[geo_feature_lst].to_dict() 83 | dyna_i = dyna_file[dyna_file['entity_id'] == geo_id] 84 | for f in dyna_feature_lst: 85 | feature_dct[f] = float(dyna_i[f].mean()) 86 | 87 | # form a feature 88 | feature_i = dict() 89 | feature_i['type'] = 'Feature' 90 | feature_i['properties'] = feature_dct 91 | feature_i['geometry'] = {} 92 | feature_i['geometry']['type'] = row['type'] 93 | feature_i['geometry']['coordinates'] = eval(row['coordinates']) 94 | geojson_obj['features'].append(feature_i) 95 | 96 | ensure_dir(self.save_path) 97 | save_name = "_".join(self.dyna_path.split('/')[-1].split('.')) + '.json' 98 | json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w', 99 | encoding='utf-8'), 100 | ensure_ascii=False, indent=4) 101 | 102 | def _visualize_grid(self): 103 | geo_file = pd.read_csv(self.geo_path, index_col=None) 104 | grid_file = pd.read_csv(self.grid_path, index_col=None) 105 | geojson_obj = {'type': "FeatureCollection", 'features': []} 106 | 107 | # get feature_lst 108 | geo_feature_lst = [_ for _ in list(geo_file.columns) if _ not in self.geo_reserved_lst] 109 | grid_feature_lst = [_ for _ in list(grid_file.columns) if _ not in self.grid_reserved_lst] 110 | 111 | for _, row in geo_file.iterrows(): 112 | 113 | # get feature dictionary 114 | row_id, column_id = row['row_id'], row['column_id'] 115 | feature_dct = row[geo_feature_lst].to_dict() 116 | dyna_i = grid_file[(grid_file['row_id'] == row_id) & (grid_file['column_id'] == column_id)] 117 | for f in grid_feature_lst: 118 | feature_dct[f] = float(dyna_i[f].mean()) 119 | 120 | # form a feature 121 | feature_i = dict() 122 | feature_i['type'] = 'Feature' 123 | feature_i['properties'] = feature_dct 124 | feature_i['geometry'] = {} 125 | feature_i['geometry']['type'] = row['type'] 126 | feature_i['geometry']['coordinates'] = eval(row['coordinates']) 127 | geojson_obj['features'].append(feature_i) 128 | 129 | ensure_dir(self.save_path) 130 | save_name = "_".join(self.grid_path.split('/')[-1].split('.')) + '.json' 131 | json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w', 132 | encoding='utf-8'), 133 | ensure_ascii=False, indent=4) 134 | 135 | def _visualize_geo(self): 136 | geo_file = pd.read_csv(self.geo_path, index_col=None) 137 | geojson_obj = {'type': "FeatureCollection", 'features': []} 138 | extra_feature = [_ for _ in list(geo_file.columns) if _ not in self.geo_reserved_lst] 139 | for _, row in geo_file.iterrows(): 140 | feature_dct = row[extra_feature].to_dict() 141 | feature_i = dict() 142 | feature_i['type'] = 'Feature' 143 | feature_i['properties'] = feature_dct 144 | feature_i['geometry'] = {} 145 | feature_i['geometry']['type'] = row['type'] 146 | feature_i['geometry']['coordinates'] = eval(row['coordinates']) 147 | geojson_obj['features'].append(feature_i) 148 | 149 | ensure_dir(self.save_path) 150 | save_name = "_".join(self.geo_path.split('/')[-1].split('.')) + '.json' 151 | json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w', 152 | encoding='utf-8'), 153 | ensure_ascii=False, indent=4) 154 | 155 | def _visualize_dyna(self): 156 | dyna_file = pd.read_csv(self.dyna_path, index_col=None) 157 | dyna_feature_lst = [_ for _ in list(dyna_file.columns) if _ not in self.dyna_reserved_lst] 158 | geojson_obj = {'type': "FeatureCollection", 'features': []} 159 | trajectory = {} 160 | GPS_traj = "coordinates" in dyna_file.columns 161 | if not GPS_traj: 162 | geo_file = pd.read_csv(self.geo_path, index_col=None) 163 | 164 | a = dyna_file.groupby("entity_id") 165 | for entity_id, entity_value in a: 166 | if "traj_id" in dyna_file.columns: 167 | trajectory[entity_id] = {} 168 | entity_value = entity_value.groupby("traj_id") 169 | for traj_id, traj_value in entity_value: 170 | feature_dct = {"usr_id": entity_id, "traj_id": traj_id} 171 | for f in dyna_feature_lst: 172 | feature_dct[f] = float(traj_value[f].mean()) 173 | feature_i = dict() 174 | feature_i['type'] = 'Feature' 175 | feature_i['properties'] = feature_dct 176 | feature_i['geometry'] = {} 177 | feature_i['geometry']['type'] = "LineString" 178 | feature_i['geometry']['coordinates'] = [] 179 | if GPS_traj: 180 | for _, row in traj_value.iterrows(): 181 | feature_i['geometry']['coordinates'].append(eval(row['coordinates'])) 182 | else: 183 | for _, row in traj_value.iterrows(): 184 | coor = eval(geo_file.loc[row['location']]['coordinates']) 185 | if _ == 0: 186 | feature_i['geometry']['coordinates'].append(coor[0]) 187 | feature_i['geometry']['coordinates'].append(coor[1]) 188 | geojson_obj['features'].append(feature_i) 189 | 190 | else: 191 | feature_dct = {"usr_id": entity_id} 192 | feature_i = dict() 193 | feature_i['type'] = 'Feature' 194 | feature_i['properties'] = feature_dct 195 | feature_i['geometry'] = {} 196 | feature_i['geometry']['type'] = "LineString" 197 | feature_i['geometry']['coordinates'] = [] 198 | if GPS_traj: 199 | for _, row in entity_value.iterrows(): 200 | feature_i['geometry']['coordinates'].append(eval(row['coordinates'])) 201 | else: 202 | for _, row in entity_value.iterrows(): 203 | coor = eval(geo_file.loc[row['location']]['coordinates']) 204 | if _ == 0: 205 | feature_i['geometry']['coordinates'].append(coor[0]) 206 | feature_i['geometry']['coordinates'].append(coor[1]) 207 | geojson_obj['features'].append(feature_i) 208 | 209 | ensure_dir(self.save_path) 210 | save_name = "_".join(self.dyna_path.split('/')[-1].split('.')) + '.json' 211 | json.dump(geojson_obj, open(self.save_path + '/' + save_name, 'w', 212 | encoding='utf-8'), 213 | ensure_ascii=False, indent=4) 214 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## STFGNN-Pytorch 2 | 3 | This is the pytorch re-implement of the STFGNN model described in https://arxiv.org/abs/2012.09641. 4 | 5 | ### Quick start 6 | 7 | Put your data in **STFGNN/raw_data**. 8 | 9 | For example, if you want to run model on dataset PEMS03, put the file **adj_mx.pkl** and file **PEMS03.npz** in **STFGNN/raw_data/PEMS03/** . 10 | 11 | Set appropriate value of parameter in **STFGNN/config/*.json**. 12 | 13 | Run **python main.py** to run the model. 14 | 15 | ### Discussion 16 | Contact me via lwm568@buaa.edu.cn. 17 | Any issue is welcome to be raised and actively communicated. 18 | --------------------------------------------------------------------------------