├── README.md ├── conf └── PECPM.json ├── main.py ├── src ├── model │ ├── __init__.py │ ├── __pycache__ │ │ ├── STKEC_model.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── detect.cpython-37.pyc │ │ ├── detect2.cpython-37.pyc │ │ ├── ewc.cpython-37.pyc │ │ ├── ewc4.cpython-37.pyc │ │ ├── gcn_conv.cpython-37.pyc │ │ ├── gwnet.cpython-37.pyc │ │ ├── mode24423.cpython-37.pyc │ │ ├── model.cpython-37.pyc │ │ ├── model2.cpython-37.pyc │ │ ├── model5.cpython-37.pyc │ │ ├── replay.cpython-37.pyc │ │ └── stkec_ewc.cpython-37.pyc │ ├── detect.py │ ├── ewc.py │ ├── gcn_conv.py │ ├── model.py │ └── replay.py └── trafficDataset.py └── utils ├── __init__.py ├── __pycache__ ├── __init__.cpython-37.pyc ├── common_tools.cpython-37.pyc ├── data_convert.cpython-37.pyc └── my_math.cpython-37.pyc ├── common_tools.py ├── data_convert.py └── my_math.py /README.md: -------------------------------------------------------------------------------- 1 | # PECMP 2 | 3 | Code for **Pattern Expansion and Consolidation on Evolving Graphs for Continual Traffic Prediction**(KDD 2023). PECPM is a continual traffic flow forecasting framework, achieving accurate predictions and high efficiency. We propose an efficient and effective continual learning framework to achieve continuous traffic flow prediction without the access to historical graph data, namely Pattern Expansion and Consolidation based on Pattern Matching (PECPM). Specifically, we first design a bank module based on pattern matching to store representative patterns of the road network. With the expansion of the road network, the model configured with such a bank module can achieve continuous traffic prediction by effectively managing patterns stored in the bank. The core idea is to continuously update new patterns while consolidating learned ones. 4 | 5 | ### Requirements 6 | 7 | * python = 3.8.5 8 | * pytorch = 1.7.1 9 | * torch-geometric = 1.6.3 10 | 11 | ### Data 12 | 13 | Download raw data from [this](https://drive.google.com/file/d/1P5wowSaNSWBNCK3mQwESp-G2zsutXc5S/view?usp=sharing), unzip the file and put it in the `data` folder 14 | 15 | ### Usages 16 | 17 | * Data Process 18 | ``` 19 | Download attention data from [this](https://pan.baidu.com/s/1JRuYBT0RsRaF11-QI8soKg), Code is mm7w, and unzip the file and put it in the `data` folder 20 | ``` 21 | 22 | * PECMP 23 | ``` 24 | python main.py --conf conf/PECMP.json --gpuid 1 25 | ``` 26 | 27 | ### Expand and consolidate performance evaluation 28 | ``` 29 | Select conflicting and stable nodes to evaluate performance. 30 | ``` 31 | 32 | 33 | -------------------------------------------------------------------------------- /conf/PECPM.json: -------------------------------------------------------------------------------- 1 | { 2 | "begin_year": 2011, 3 | "end_year": 2017, 4 | "dropout": [0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.01, 0.015], 5 | "lr": [0.01, 0.01, 0.01, 0.005, 0.005, 0.005, 0.005, 0.015], 6 | "batch_size": [64,128,128,128,128,128,128], 7 | "epoch": 100, 8 | "gpuid": 1, 9 | "loss": "mse", 10 | "activation": "relu", 11 | "scheduler": "epo", 12 | "train": 1, 13 | "y_len": 12, 14 | "x_len": 12, 15 | "data_process": 1, 16 | "auto_test": 1, 17 | "influen_size":0.02, 18 | "beita":[5, 5, 1, 1, 0, 1, 1, 10, 10], 19 | "cluster":64, 20 | "attention_weight":[5,5,5,5,5,5,5,5], 21 | "raw_data_path": "/home/wbw/kdd/data/district3F11T17/finaldata/", 22 | "save_data_path": "/home/wbw/kdd/TrafficStream-main/TrafficStream-main2/data", 23 | "graph_path": "/home/wbw/kdd/data/district3F11T17/graph/", 24 | "model_path": "res/district3F11T17/", 25 | "detect_strategy":false, 26 | "gcn":{ 27 | "in_channel": 12, 28 | "out_channel": 12, 29 | "hidden_channel": 64 30 | }, 31 | 32 | "tcn":{ 33 | "in_channel": 1, 34 | "out_channel": 1, 35 | "kernel_size": 3, 36 | "dilation": 1 37 | }, 38 | 39 | 40 | "logname": "trafficStream", 41 | 42 | "strategy": "incremental", 43 | "init": true, 44 | "increase": true, 45 | "num_hops": 2, 46 | 47 | "detect": true, 48 | "detect_strategy": "feature", 49 | 50 | "adp_adj":false, 51 | "skip_dim": 64, 52 | "end_dim": 64, 53 | "hidden_channels": 32, 54 | "dilation_channels": 64, 55 | "residual_channels":64, 56 | "input_dim":1, 57 | "output_dim":1, 58 | 59 | "ewc": true, 60 | "ewc_strategy": "ewc", 61 | "ewc_lambda":[0.001, 0.001, 0.0001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.005], 62 | 63 | "replay": true, 64 | "replay_strategy": "random", 65 | "repaly_num_samples": 100 66 | } 67 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import sys, json, argparse, random, re, os, shutil 2 | sys.path.append("src/") 3 | import numpy as np 4 | import pandas as pd 5 | import logging 6 | from datetime import datetime 7 | from pathlib import Path 8 | import math 9 | import os.path as osp 10 | import networkx as nx 11 | import pdb 12 | 13 | from torch.optim.lr_scheduler import ReduceLROnPlateau, OneCycleLR 14 | import torch 15 | import torch.nn as nn 16 | import torch.nn.functional as func 17 | from Bio.Cluster import kcluster,clustercentroids 18 | from scipy.spatial.distance import cosine 19 | from torch import optim 20 | import torch.multiprocessing as mp 21 | from torch_geometric.data import Data, Batch, DataLoader 22 | from torch_geometric.utils import to_dense_batch, k_hop_subgraph 23 | 24 | from utils import common_tools as ct 25 | from utils.my_math import masked_mae_np, masked_mape_np, masked_mse_np,masked_mae,masked_mape,masked_mse 26 | from utils.data_convert import generate_samples, get_idx 27 | from src.model.model import Basic_Model 28 | from src.model.ewc import EWC 29 | from src.trafficDataset import TrafficDataset 30 | from src.model import detect 31 | from src.model import replay 32 | 33 | from sklearn.cluster import KMeans 34 | from sklearn.metrics.pairwise import cosine_similarity 35 | 36 | result = {3:{"mae":{}, "mape":{}, "rmse":{}}, 6:{"mae":{}, "mape":{}, "rmse":{}}, 12:{"mae":{}, "mape":{}, "rmse":{}}} 37 | pin_memory = True 38 | n_work = 16 39 | 40 | def update(src, tmp): 41 | for key in tmp: 42 | if key!= "gpuid": 43 | src[key] = tmp[key] 44 | 45 | def load_best_model(args): 46 | if (args.load_first_year and args.year <= args.begin_year+1) or args.train == 0: 47 | load_path = args.first_year_model_path 48 | loss = load_path.split("/")[-1].replace(".pkl", "") 49 | else: 50 | loss = [] 51 | for filename in os.listdir(osp.join(args.model_path, args.logname+args.time, str(args.year-1))): 52 | loss.append(filename[0:-4]) 53 | loss = sorted(loss) 54 | load_path = osp.join(args.model_path, args.logname+args.time, str(args.year-1), loss[0]+".pkl") 55 | 56 | args.logger.info("[*] load from {}".format(load_path)) 57 | state_dict = torch.load(load_path, map_location=args.device)["model_state_dict"] 58 | model = Basic_Model(args) 59 | model.load_state_dict(state_dict) 60 | model = model.to(args.device) 61 | return model, loss[0] 62 | 63 | def init(args): 64 | conf_path = osp.join(args.conf) 65 | info = ct.load_json_file(conf_path) 66 | info["time"] = datetime.now().strftime("%Y-%m-%d-%H:%M:%S.%f") 67 | update(vars(args), info) 68 | vars(args)["path"] = osp.join(args.model_path, args.logname+args.time) 69 | ct.mkdirs(args.path) 70 | del info 71 | 72 | 73 | def init_log(args): 74 | log_dir, log_filename = args.path, args.logname 75 | logger = logging.getLogger(__name__) 76 | ct.mkdirs(log_dir) 77 | logger.setLevel(logging.INFO) 78 | fh = logging.FileHandler(osp.join(log_dir, log_filename+".log")) 79 | fh.setLevel(logging.INFO) 80 | ch = logging.StreamHandler(sys.stdout) 81 | ch.setLevel(logging.INFO) 82 | formatter = logging.Formatter("%(asctime)s - %(message)s") 83 | fh.setFormatter(formatter) 84 | ch.setFormatter(formatter) 85 | logger.addHandler(fh) 86 | logger.addHandler(ch) 87 | logger.info("logger name:%s", osp.join(log_dir, log_filename+".log")) 88 | vars(args)["logger"] = logger 89 | return logger 90 | 91 | 92 | def seed_set(seed=0): 93 | max_seed = (1 << 32) - 1 94 | random.seed(seed) 95 | np.random.seed(random.randint(0, max_seed)) 96 | torch.manual_seed(random.randint(0, max_seed)) 97 | torch.cuda.manual_seed(random.randint(0, max_seed)) 98 | torch.cuda.manual_seed_all(random.randint(0, max_seed)) 99 | torch.backends.cudnn.benchmark = False # if benchmark=True, deterministic will be False 100 | torch.backends.cudnn.deterministic = True 101 | def cosine_distance(matrix_a, matrix_c): 102 | a, b = matrix_a.shape 103 | c, _ = matrix_c.shape 104 | 105 | # 初始化注意力矩阵 106 | attention_matrix = np.zeros((a, c)) 107 | 108 | # 计算注意力矩阵 109 | for i in range(a): 110 | for j in range(c): 111 | distance = cosine(matrix_a[i], matrix_c[j]) 112 | attention_matrix[i, j] = 1 - distance 113 | return attention_matrix 114 | def cosine_distance(A, B): 115 | norm_A = np.linalg.norm(A, axis=1, keepdims=True) # (m, 1) 116 | norm_B = np.linalg.norm(B, axis=1, keepdims=True) # (p, 1) 117 | 118 | # 计算 A 和 B 的点积 119 | dot_product = np.dot(A, B.T) # (m, p) 120 | 121 | # 计算余弦相似度 122 | similarity = dot_product / (norm_A * norm_B.T) # (m, p) 123 | 124 | return similarity 125 | 126 | def keep_top_k(matrix, k): 127 | # 对每行进行排序,返回排序后的索引 128 | sorted_indices = np.argsort(matrix, axis=1) 129 | 130 | # 生成一个与matrix形状相同的全零矩阵 131 | result = np.zeros_like(matrix) 132 | 133 | # 将前K个最大值设置为原始数值,其他设置为0 134 | rows = np.arange(matrix.shape[0])[:, np.newaxis] 135 | top_k_indices = sorted_indices[:, -k:] 136 | result[rows, top_k_indices] = matrix[rows, top_k_indices] 137 | 138 | return result 139 | def match_attention(data,args): 140 | attention=cosine_distance(data,args.last_clusterc) 141 | return keep_top_k(attention,args.attention_weight[args.year-args.begin_year]) 142 | 143 | def train(inputs, args): 144 | # Model Setting 145 | global result 146 | path = osp.join(args.path, str(args.year)) 147 | ct.mkdirs(path) 148 | 149 | if args.loss == "mse": lossfunc1 = func.mse_loss 150 | elif args.loss == "huber": lossfunc = func.smooth_l1_loss 151 | lossfunc= masked_mae 152 | cluster_lossf=masked_mse 153 | #train_idx,val_idx,test_idx= get_idx(inputs) 154 | # Dataset Definition 155 | N=inputs['train_x'].shape[1] 156 | pathatt='data/attetion/'+str(args.year)+'_attention.npy' 157 | attention=np.load(pathatt) 158 | C=attention.shape[-1] 159 | attention=attention.reshape(-1,N,C) 160 | 161 | if args.strategy == 'incremental' and args.year > args.begin_year: 162 | train_loader =DataLoader(TrafficDataset("", "", x=inputs["train_x"][:, args.subgraph.numpy(),:], y=inputs["train_y"][:, args.subgraph.numpy(),:],\ 163 | att=attention[:, args.subgraph.numpy(),:],edge_index="", mode="subgraph"), batch_size=args.batch_size[args.year-args.begin_year], shuffle=True, pin_memory=pin_memory, num_workers=n_work) 164 | val_loader = DataLoader(TrafficDataset("", "", x=inputs["val_x"][:, args.subgraph.numpy(),:], y=inputs["val_y"][:, args.subgraph.numpy(),:], \ 165 | att=attention[:, args.subgraph.numpy(),:],edge_index="", mode="subgraph"), batch_size=args.batch_size[args.year-args.begin_year], shuffle=False, pin_memory=pin_memory, num_workers=n_work) 166 | graph = nx.Graph() 167 | graph.add_nodes_from(range(args.subgraph.size(0))) 168 | graph.add_edges_from(args.subgraph_edge_index.numpy().T) 169 | adj = nx.to_numpy_array(graph) 170 | adj = adj / (np.sum(adj, 1, keepdims=True) + 1e-6) 171 | vars(args)["sub_adj"] = torch.from_numpy(adj).to(torch.float).to(args.device) 172 | path = osp.join(args.path, str(args.year)) 173 | else: 174 | train_loader = DataLoader(TrafficDataset(inputs, "train",att=attention), batch_size=args.batch_size[args.year-args.begin_year], shuffle=True, pin_memory=pin_memory, num_workers=n_work) 175 | val_loader = DataLoader(TrafficDataset(inputs, "val",att=attention), batch_size=args.batch_size[args.year-args.begin_year], shuffle=False, pin_memory=pin_memory, num_workers=n_work) 176 | vars(args)["sub_adj"] = vars(args)["adj"] 177 | test_loader = DataLoader(TrafficDataset(inputs, "test",att=attention), batch_size=args.batch_size[args.year-args.begin_year], shuffle=False, pin_memory=pin_memory, num_workers=n_work) 178 | 179 | args.logger.info("[*] Year " + str(args.year) + " Dataset load!") 180 | 181 | # Model Definition 182 | if args.init == True and args.year > args.begin_year: 183 | gnn_model, _ = load_best_model(args) 184 | if args.ewc: 185 | #args.logger.info("[*] EWC! lambda {:.6f}".format(args.ewc_lambda)) 186 | model = EWC(gnn_model, args.adj, args.ewc_lambda[args.year-args.begin_year], args.ewc_strategy) 187 | ewc_loader = DataLoader(TrafficDataset(inputs, "train",att=attention), batch_size=args.batch_size[args.year-args.begin_year], shuffle=False, pin_memory=pin_memory, num_workers=n_work) 188 | model.register_ewc_params(ewc_loader, lossfunc, device) 189 | else: 190 | model = gnn_model 191 | else: 192 | gnn_model = Basic_Model(args).to(args.device) 193 | model = gnn_model 194 | 195 | # Model Optimizer 196 | optimizer = optim.AdamW(model.parameters(), lr=args.lr[args.year-args.begin_year]) 197 | 198 | args.logger.info("[*] Year " + str(args.year) + " Training start") 199 | 200 | 201 | iters = len(train_loader) 202 | lowest_validation_loss = 1e7 203 | counter = 0 204 | patience = 10 205 | model.train() 206 | use_time = [] 207 | 208 | for epoch in range(100): 209 | training_loss = 0.0 210 | start_time = datetime.now() 211 | 212 | # Train Model 213 | cn = 0 214 | for batch_idx, data in enumerate(train_loader): 215 | if epoch == 0 and batch_idx == 0: 216 | args.logger.info("node number {}".format(data.x.shape)) 217 | data = data.to(device, non_blocking=pin_memory) 218 | optimizer.zero_grad() 219 | pred,attention = model(data, args.sub_adj) 220 | batch_att=pred.shape[0]//args.sub_adj.shape[0] 221 | loss_cluster=0 222 | 223 | attention_label=data.att.to(args.device) 224 | loss_cluster = func.mse_loss(attention,attention_label) 225 | if args.strategy == "incremental" and args.year > args.begin_year: 226 | pred, _ = to_dense_batch(pred, batch=data.batch) 227 | data.y, _ = to_dense_batch(data.y, batch=data.batch) 228 | pred = pred[:, args.mapping, :] 229 | data.y = data.y[:, args.mapping, :] 230 | mask_value = torch.tensor(0.0) 231 | if data.y.min() < 1: 232 | mask_value = data.y.min() 233 | loss = lossfunc(data.y,pred, mask_value)+loss_cluster*args.beita[args.year-args.begin_year] 234 | if args.ewc and args.year > args.begin_year: 235 | loss += model.compute_consolidation_loss() 236 | training_loss += float(loss) 237 | loss.backward() 238 | optimizer.step() 239 | 240 | cn += 1 241 | 242 | if epoch == 0: 243 | total_time = (datetime.now() - start_time).total_seconds() 244 | else: 245 | total_time += (datetime.now() - start_time).total_seconds() 246 | use_time.append((datetime.now() - start_time).total_seconds()) 247 | training_loss = training_loss/cn 248 | 249 | # Validate Model 250 | validation_loss = 0.0 251 | cn = 0 252 | with torch.no_grad(): 253 | for batch_idx, data in enumerate(val_loader): 254 | data = data.to(device,non_blocking=pin_memory) 255 | pred,_ = model(data, args.sub_adj) 256 | if args.strategy == "incremental" and args.year > args.begin_year: 257 | pred, _ = to_dense_batch(pred, batch=data.batch) 258 | data.y, _ = to_dense_batch(data.y, batch=data.batch) 259 | pred = pred[:, args.mapping, :] 260 | data.y = data.y[:, args.mapping, :] 261 | mask_value = torch.tensor(0) 262 | if data.y.min() < 1: 263 | mask_value = data.y.min() 264 | loss = lossfunc(data.y,pred, mask_value) 265 | validation_loss += float(loss) 266 | cn += 1 267 | validation_loss = float(validation_loss/cn) 268 | 269 | 270 | args.logger.info(f"epoch:{epoch}, training loss:{training_loss:.4f} validation loss:{validation_loss:.4f}") 271 | 272 | # Early Stop 273 | if validation_loss <= lowest_validation_loss: 274 | counter = 0 275 | lowest_validation_loss = round(validation_loss, 4) 276 | torch.save({'model_state_dict': gnn_model.state_dict()}, osp.join(path, str(round(validation_loss,4))+".pkl")) 277 | else: 278 | counter += 1 279 | if counter > patience: 280 | break 281 | 282 | best_model_path = osp.join(path, str(lowest_validation_loss)+".pkl") 283 | best_model = Basic_Model(args) 284 | best_model.load_state_dict(torch.load(best_model_path, args.device)["model_state_dict"]) 285 | best_model = best_model.to(args.device) 286 | 287 | # Test Model 288 | test_model2(best_model, args, test_loader, pin_memory) 289 | result[args.year] = {"total_time": total_time, "average_time": sum(use_time)/len(use_time), "epoch_num": epoch+1} 290 | args.logger.info("Finished optimization, total time:{:.2f} s, best model:{}".format(total_time, best_model_path)) 291 | #args.logger.info("Finished optimization, total time:{:.2f} s, best model:{}".format(total_time, best_model_path)) 292 | 293 | def test_model(model, args, testset, pin_memory): 294 | model.eval() 295 | pred_ = [] 296 | truth_ = [] 297 | loss = 0.0 298 | with torch.no_grad(): 299 | cn = 0 300 | for data in testset: 301 | data = data.to(args.device, non_blocking=pin_memory) 302 | pred = model(data, args.adj) 303 | loss += func.mse_loss(data.y, pred, reduction="mean") 304 | pred_.append(pred) 305 | truth_.append(data) 306 | cn += 1 307 | loss = loss/cn 308 | args.logger.info("[*] loss:{:.4f}".format(loss)) 309 | pred_ =torch.cat(pred_, 0) 310 | truth_ = torch.cat(truth_, 0) 311 | mask_value = 0.0 312 | if truth_.min() < 1: 313 | mask_value = truth_.min() 314 | mae =all_metric(truth_, pred_, args,mask_value) 315 | return loss 316 | 317 | 318 | 319 | def test_model2(model, args, testset, pin_memory): 320 | model.eval() 321 | pred_ = [] 322 | truth_ = [] 323 | loss = 0.0 324 | with torch.no_grad(): 325 | cn = 0 326 | for data in testset: 327 | data = data.to(args.device, non_blocking=pin_memory) 328 | pred,_ = model(data, args.adj) 329 | loss += func.mse_loss(data.y, pred, reduction="mean") 330 | pred, _ = to_dense_batch(pred, batch=data.batch) 331 | data.y, _ = to_dense_batch(data.y, batch=data.batch) 332 | pred_.append(pred.cpu().data.numpy()) 333 | truth_.append(data.y.cpu().data.numpy()) 334 | cn += 1 335 | loss = loss/cn 336 | args.logger.info("[*] loss:{:.4f}".format(loss)) 337 | pred_ = np.concatenate(pred_, 0) 338 | truth_ = np.concatenate(truth_, 0) 339 | mask_value = torch.tensor(0) 340 | if truth_.min() < 1: 341 | mask_value = truth_.min() 342 | mae = metric(truth_, pred_, args,mask_value) 343 | return loss 344 | 345 | def metric(ground_truth, prediction, args,mask_value): 346 | global result 347 | pred_time = [3,6,12] 348 | args.logger.info("[*] year {}, testing".format(args.year)) 349 | for i in pred_time: 350 | mae = masked_mae_np(ground_truth[:, :, :i], prediction[:, :, :i], 0) 351 | rmse = masked_mse_np(ground_truth[:, :, :i], prediction[:, :, :i], 0) ** 0.5 352 | mape = masked_mape_np(ground_truth[:, :, :i], prediction[:, :, :i], 0) 353 | args.logger.info("T:{:d}\tMAE\t{:.4f}\tRMSE\t{:.4f}\tMAPE\t{:.4f}".format(i,mae,rmse,mape)) 354 | result[i]["mae"][args.year] = mae 355 | result[i]["mape"][args.year] = mape 356 | result[i]["rmse"][args.year] = rmse 357 | return mae 358 | 359 | def all_metric(ground_truth, prediction, args,mask_value): 360 | global result 361 | pred_time = [3,6,12] 362 | args.logger.info("[*] year {}, testing".format(args.year)) 363 | for i in pred_time: 364 | mae = masked_mae(ground_truth[:, :, :i], prediction[:, :, :i], mask_value).item() 365 | rmse = masked_mse(ground_truth[:, :, :i], prediction[:, :, :i], mask_value).item() 366 | mape = masked_mape(ground_truth[:, :, :i], prediction[:, :, :i], mask_value).item() 367 | args.logger.info("T:{:d}\tMAE\t{:.4f}\tRMSE\t{:.4f}\tMAPE\t{:.4f}".format(i,mae,rmse,mape)) 368 | result[i]["mae"][args.year] = mae 369 | result[i]["mape"][args.year] = mape 370 | result[i]["rmse"][args.year] = rmse 371 | return mae 372 | 373 | 374 | def main(args): 375 | logger = init_log(args) 376 | logger.info("params : %s", vars(args)) 377 | ct.mkdirs(args.save_data_path) 378 | 379 | for year in range(args.begin_year, args.end_year+1): 380 | # Load Data 381 | graph = nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"]) 382 | vars(args)["graph_size"] = graph.number_of_nodes() 383 | vars(args)["year"] = year 384 | inputs = generate_samples(31, osp.join(args.save_data_path, str(year)+'_30day'), np.load(osp.join(args.raw_data_path, str(year)+".npz"))["x"], graph, val_test_mix=True) \ 385 | if args.data_process else np.load(osp.join(args.save_data_path, str(year)+"_30day.npz"), allow_pickle=True) 386 | args.logger.info("[*] Year {} load from {}_30day.npz".format(args.year, osp.join(args.save_data_path, str(year)))) 387 | 388 | adj = np.load(osp.join(args.graph_path, str(args.year)+"_adj.npz"))["x"] 389 | adj = adj / (np.sum(adj, 1, keepdims=True) + 1e-6) 390 | vars(args)["adj"] = torch.from_numpy(adj).to(torch.float).to(args.device) 391 | if year == args.begin_year and args.load_first_year: 392 | # Skip the first year, model has been trained and retrain is not needed 393 | model, _ = load_best_model(args) 394 | test_loader = DataLoader(TrafficDataset(inputs, "test"), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work) 395 | test_model2(model, args, test_loader, pin_memory=True) 396 | continue 397 | 398 | 399 | if year > args.begin_year and args.strategy == "incremental": 400 | # Load the best model 401 | model, _ = load_best_model(args) 402 | 403 | node_list = list() 404 | 405 | cur_node_size = np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"].shape[0] 406 | pre_node_size = np.load(osp.join(args.graph_path, str(year-1)+"_adj.npz"))["x"].shape[0] 407 | node_list.extend(list(range(pre_node_size, cur_node_size))) 408 | 409 | 410 | 411 | pre_data = np.load(osp.join(args.raw_data_path, str(year-1)+".npz"))["x"] 412 | cur_data = np.load(osp.join(args.raw_data_path, str(year)+".npz"))["x"] 413 | pre_graph = np.array(list(nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year-1)+"_adj.npz"))["x"]).edges)).T 414 | cur_graph = np.array(list(nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"]).edges)).T 415 | 416 | evo_num= int(0.01*args.graph_size) 417 | replay_num=int(0.09*args.graph_size) 418 | replay,evo_node = detect.get_eveloved_nodes(args,replay_num,evo_num) 419 | node_list.extend(list(evo_node )) 420 | node_list.extend(list(replay)) 421 | 422 | 423 | node_list = list(set(node_list)) 424 | if len(node_list) > int(0.2*args.graph_size): 425 | node_list = random.sample(node_list, int(0.15*args.graph_size)) 426 | 427 | # Obtain subgraph of node list 428 | cur_graph = torch.LongTensor(np.array(list(nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"]).edges)).T) 429 | edge_list = list(nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"]).edges) 430 | graph_node_from_edge = set() 431 | for (u,v) in edge_list: 432 | graph_node_from_edge.add(u) 433 | graph_node_from_edge.add(v) 434 | node_list = list(set(node_list) & graph_node_from_edge) 435 | 436 | 437 | if len(node_list) != 0 : 438 | subgraph, subgraph_edge_index, mapping, _ = k_hop_subgraph(node_list, num_hops=args.num_hops, edge_index=cur_graph, relabel_nodes=True) 439 | vars(args)["subgraph"] = subgraph 440 | vars(args)["subgraph_edge_index"] = subgraph_edge_index 441 | vars(args)["mapping"] = mapping 442 | logger.info("number of increase nodes:{}, nodes after {} hop:{}, total nodes this year {}".format\ 443 | (len(node_list), args.num_hops, args.subgraph.size(), args.graph_size)) 444 | vars(args)["node_list"] = np.asarray(node_list) 445 | 446 | 447 | # Skip the year when no nodes needed to be trained incrementally 448 | if args.strategy != "retrain" and year > args.begin_year and len(args.node_list) == 0: 449 | model, loss = load_best_model(args) 450 | ct.mkdirs(osp.join(args.model_path, args.logname+args.time, str(args.year))) 451 | torch.save({'model_state_dict': model.state_dict()}, osp.join(args.model_path, args.logname+args.time, str(args.year), loss+".pkl")) 452 | test_loader = DataLoader(TrafficDataset(inputs, "test"), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work) 453 | test_model(model, args, test_loader, pin_memory=True) 454 | logger.warning("[*] No increasing nodes at year " + str(args.year) + ", store model of the last year.") 455 | continue 456 | 457 | 458 | if args.train: 459 | train(inputs, args) 460 | else: 461 | if args.auto_test: 462 | model, _ = load_best_model(args) 463 | test_loader = DataLoader(TrafficDataset(inputs, "test"), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work) 464 | test_model(model, args, test_loader, pin_memory=True) 465 | 466 | 467 | for i in [3, 6, 12]: 468 | for j in ['mae', 'rmse', 'mape']: 469 | info = "" 470 | all12=0 471 | for year in range(args.begin_year, args.end_year+1): 472 | if i in result: 473 | if j in result[i]: 474 | if year in result[i][j]: 475 | info+="{:.2f}\t".format(result[i][j][year]) 476 | logger.info("{}\t{}\t".format(i,j) + info) 477 | 478 | 479 | if __name__ == "__main__": 480 | parser = argparse.ArgumentParser(formatter_class = argparse.RawTextHelpFormatter) 481 | parser.add_argument("--conf", type = str, default = "conf/test.json") 482 | parser.add_argument("--paral", type = int, default = 0) 483 | parser.add_argument("--gpuid", type = int, default = 2) 484 | parser.add_argument("--seed", type = int, default = 3208) 485 | parser.add_argument("--logname", type = str, default = "info") 486 | parser.add_argument("--load_first_year", type = int, default = 0, help="0: training first year, 1: load from model path of first year") 487 | parser.add_argument("--first_year_model_path", type = str, default = "data-path", help='specify a pretrained model root') 488 | args = parser.parse_args() 489 | init(args) 490 | seed_set(args.seed) 491 | 492 | device = torch.device("cuda:{}".format(args.gpuid)) if torch.cuda.is_available() and args.gpuid != -1 else "cpu" 493 | vars(args)["device"] = device 494 | main(args) 495 | -------------------------------------------------------------------------------- /src/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__init__.py -------------------------------------------------------------------------------- /src/model/__pycache__/STKEC_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/STKEC_model.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/detect.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/detect.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/detect2.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/detect2.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/ewc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/ewc.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/ewc4.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/ewc4.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/gcn_conv.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/gcn_conv.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/gwnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/gwnet.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/mode24423.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/mode24423.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/model2.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/model2.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/model5.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/model5.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/replay.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/replay.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/__pycache__/stkec_ewc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/stkec_ewc.cpython-37.pyc -------------------------------------------------------------------------------- /src/model/detect.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('src/') 3 | import numpy as np 4 | from scipy.stats import entropy as kldiv 5 | from datetime import datetime 6 | from torch_geometric.utils import to_dense_batch 7 | from src.trafficDataset import continue_learning_Dataset 8 | from torch_geometric.data import Data, Batch, DataLoader 9 | import torch 10 | from scipy.spatial import distance 11 | from scipy.stats import wasserstein_distance as WD 12 | import os.path as osp 13 | # scipy.stats.entropy(x, y) 14 | 15 | 16 | def get_feature(data, graph, args, model, adj): 17 | node_size = data.shape[1] 18 | data = np.reshape(data[-288*7-1:-1,:], (-1, args.x_len, node_size)) 19 | dataloader = DataLoader(continue_learning_Dataset(data), batch_size=data.shape[0], shuffle=False, pin_memory=True, num_workers=3) 20 | # feature shape [T', feature_dim, N] 21 | for data in dataloader: 22 | data = data.to(args.device, non_blocking=True) 23 | feature, _ = to_dense_batch(model.feature(data, adj), batch=data.batch) 24 | node_size = feature.size()[1] 25 | # print("before permute:", feature.size()) 26 | feature = feature.permute(1,0,2) 27 | 28 | # [N, T', feature_dim] 29 | return feature.cpu().detach().numpy() 30 | 31 | 32 | def get_adj(year, args): 33 | adj = np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"] 34 | adj = adj / (np.sum(adj, 1, keepdims=True) + 1e-6) 35 | return torch.from_numpy(adj).to(torch.float).to(args.device) 36 | 37 | 38 | def score_func(pre_data, cur_data, args): 39 | # shape: [T, N] 40 | node_size = pre_data.shape[1] 41 | score = [] 42 | for node in range(node_size): 43 | max_val = max(max(pre_data[:,node]), max(cur_data[:,node])) 44 | min_val = min(min(pre_data[:,node]), min(cur_data[:,node])) 45 | pre_prob, _ = np.histogram(pre_data[:,node], bins=10, range=(min_val, max_val)) 46 | pre_prob = pre_prob *1.0 / sum(pre_prob) 47 | cur_prob, _ = np.histogram(cur_data[:,node], bins=10, range=(min_val, max_val)) 48 | cur_prob = cur_prob * 1.0 /sum(cur_prob) 49 | score.append(kldiv(pre_prob, cur_prob)) 50 | # return staiton_id of topk max score, station with larger KL score needs more training 51 | return np.argpartition(np.asarray(score), -args.topk)[-args.topk:] 52 | def sort_with_index(lst): 53 | sorted_index = sorted(range(len(lst)), key=lambda i: lst[i], reverse=True) 54 | return sorted_index 55 | def random_sampling(data_size, num_samples): 56 | return np.random.choice(data_size, num_samples) 57 | def get_eveloved_nodes(args,replay_num,evo_num): 58 | # should be N*T 59 | past_path=args.daily_node+'/'+str(args.year-1)+'.npy' 60 | daily_node_past=np.load(past_path) 61 | cuettern_path=args.daily_node+'/'+str(args.year)+'.npy' 62 | daily_node_cur=np.load(cuettern_path) 63 | if daily_node_past.shape[0]=3: 12 | col0_mins = np.min(arr[..., 0], axis=0, keepdims=True) 13 | col0_maxs = np.max(arr[..., 0], axis=0, keepdims=True) 14 | normalized_col0 = (arr[..., 0] - col0_mins) / (col0_maxs - col0_mins) 15 | normalized_arr = arr.copy() 16 | normalized_arr[...,0] = normalized_col0 17 | 18 | return normalized_arr 19 | else: 20 | return (data - np.mean(data)) / np.std(data) 21 | def get_temporal_feature(data): 22 | 23 | data=np.expand_dims(data, axis=-1) 24 | feature_list = [data] 25 | n=data.shape[1] 26 | steps_per_day=288 27 | tod = [i % steps_per_day /steps_per_day for i in range(data.shape[0])] 28 | tod = np.array(tod) 29 | tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0)) 30 | feature_list.append(tod_tiled) 31 | dow = [(i // steps_per_day) % 7 for i in range(data.shape[0])] 32 | dow = np.array(dow) 33 | dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0)) 34 | feature_list.append(dow_tiled) 35 | processed_data = np.concatenate(feature_list, axis=-1) 36 | return processed_data 37 | 38 | def generate_dataset(data, idx, x_len=12, y_len=12,temporal_feature=False): 39 | res = data[idx] 40 | node_size = data.shape[1] 41 | C=data.shape[-1] 42 | t = len(idx)-1 43 | idic = 0 44 | x_index, y_index = [], [] 45 | 46 | for i in tqdm.tqdm(range(t,0,-1)): 47 | if i-x_len-y_len>=0: 48 | x_index.extend(list(range(i-x_len-y_len, i-y_len))) 49 | y_index.extend(list(range(i-y_len, i))) 50 | 51 | x_index = np.asarray(x_index) 52 | y_index = np.asarray(y_index) 53 | if temporal_feature: 54 | x = res[x_index].reshape((-1, x_len, node_size,C)) 55 | y = res[y_index].reshape((-1, y_len, node_size,C)) 56 | x =x.transpose(0,2,1,3).reshape(-1,node_size,x_len*C) 57 | y =y.transpose(0,2,1,3) 58 | return x, y[...,0] #L,N,T,C 59 | else: 60 | x = res[x_index].reshape((-1, x_len, node_size)) 61 | y = res[y_index].reshape((-1, y_len, node_size)) 62 | return x, y 63 | 64 | def generate_samples122(days, savepath, data, graph, train_rate=0.6, val_rate=0.2, test_rate=0.2, val_test_mix=False,temporal_feature=False): 65 | edge_index = np.array(list(graph.edges)).T 66 | del graph 67 | data = data[0:days*288, :] 68 | t, n = data.shape[0], data.shape[1] 69 | if temporal_feature: 70 | train_idx = [i for i in range(int(t*train_rate))] 71 | val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))] 72 | test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)] 73 | 74 | 75 | train_x, train_y = generate_dataset(data, train_idx) 76 | val_x, val_y = generate_dataset(data, val_idx) 77 | test_x, test_y = generate_dataset(data, test_idx) 78 | if val_test_mix: 79 | val_test_x = np.concatenate((val_x, test_x), 0) 80 | val_test_y = np.concatenate((val_y, test_y), 0) 81 | val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0]) 82 | np.random.shuffle(val_test_idx) 83 | val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]] 84 | test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]] 85 | 86 | train_x = z_score(train_x) 87 | val_x = z_score(val_x) 88 | test_x = z_score(test_x) 89 | #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index) 90 | data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index} 91 | else: 92 | data=get_temporal_feature(data) 93 | 94 | train_idx = [i for i in range(int(t*train_rate))] 95 | val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))] 96 | test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)] 97 | 98 | train_x, train_y = generate_dataset(data, train_idx,temporal_feature) 99 | val_x, val_y = generate_dataset(data, val_idx,temporal_feature) 100 | test_x, test_y = generate_dataset(data, test_idx,temporal_feature) 101 | if val_test_mix: 102 | val_test_x = np.concatenate((val_x, test_x), 0) 103 | val_test_y = np.concatenate((val_y, test_y), 0) 104 | val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0]) 105 | np.random.shuffle(val_test_idx) 106 | val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]] 107 | test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]] 108 | 109 | train_x = z_score(train_x) 110 | val_x = z_score(val_x) 111 | test_x = z_score(test_x) 112 | #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index) 113 | data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index} 114 | return data 115 | 116 | 117 | 118 | class StandardScaler(): 119 | def __init__(self, mean, std): 120 | self.mean = mean 121 | self.std = std 122 | 123 | def transform(self, data): 124 | return (data - self.mean) / self.std 125 | 126 | def inverse_transform(self, data): 127 | return (data * self.std) + self.mean 128 | 129 | 130 | def generate_samples(days, savepath, data, graph, train_rate=0.6, val_rate=0.2, test_rate=0.2, val_test_mix=False,temporal_feature=True): 131 | edge_index = np.array(list(graph.edges)).T 132 | del graph 133 | data = data[0:days*288, :] 134 | t, n = data.shape[0], data.shape[1] 135 | if temporal_feature==False: 136 | train_idx = [i for i in range(int(t*train_rate))] 137 | val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))] 138 | test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)] 139 | 140 | 141 | train_x, train_y = generate_dataset(data, train_idx) 142 | val_x, val_y = generate_dataset(data, val_idx) 143 | test_x, test_y = generate_dataset(data, test_idx) 144 | if val_test_mix: 145 | val_test_x = np.concatenate((val_x, test_x), 0) 146 | val_test_y = np.concatenate((val_y, test_y), 0) 147 | val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0]) 148 | np.random.shuffle(val_test_idx) 149 | val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]] 150 | test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]] 151 | 152 | train_x = z_score(train_x) 153 | val_x = z_score(val_x) 154 | test_x = z_score(test_x) 155 | #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index) 156 | data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index} 157 | else: 158 | data=get_temporal_feature(data) 159 | train_idx = [i for i in range(int(t*train_rate))] 160 | val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))] 161 | test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)] 162 | 163 | train_x, train_y = generate_dataset(data, train_idx,temporal_feature=temporal_feature) 164 | val_x, val_y = generate_dataset(data, val_idx,temporal_feature=temporal_feature) 165 | test_x, test_y = generate_dataset(data, test_idx,temporal_feature=temporal_feature) 166 | if val_test_mix: 167 | val_test_x = np.concatenate((val_x, test_x), 0) 168 | val_test_y = np.concatenate((val_y, test_y), 0) 169 | val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0]) 170 | np.random.shuffle(val_test_idx) 171 | val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]] 172 | test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]] 173 | 174 | train_x = z_score(train_x) 175 | val_x = z_score(val_x) 176 | test_x = z_score(test_x) 177 | #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index) 178 | data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index} 179 | return data 180 | 181 | def generate_samples(days, savepath, data, graph, train_rate=0.6, val_rate=0.2, test_rate=0.2, val_test_mix=False,temporal_feature=True): 182 | edge_index = np.array(list(graph.edges)).T 183 | del graph 184 | data = data[0:days*288, :] 185 | t, n = data.shape[0], data.shape[1] 186 | if temporal_feature==False: 187 | train_idx = [i for i in range(int(t*train_rate))] 188 | val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))] 189 | test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)] 190 | 191 | 192 | train_x, train_y = generate_dataset(data, train_idx) 193 | val_x, val_y = generate_dataset(data, val_idx) 194 | test_x, test_y = generate_dataset(data, test_idx) 195 | if val_test_mix: 196 | val_test_x = np.concatenate((val_x, test_x), 0) 197 | val_test_y = np.concatenate((val_y, test_y), 0) 198 | val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0]) 199 | np.random.shuffle(val_test_idx) 200 | val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]] 201 | test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]] 202 | 203 | train_x = z_score(train_x) 204 | val_x = z_score(val_x) 205 | test_x = z_score(test_x) 206 | #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index) 207 | data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index} 208 | else: 209 | data=get_temporal_feature(data) #L,N,C 210 | 211 | train_idx = [i for i in range(int(t*train_rate))] 212 | val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))] 213 | test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)] 214 | 215 | train_x, train_y = generate_dataset(data, train_idx,temporal_feature=temporal_feature) 216 | val_x, val_y = generate_dataset(data, val_idx,temporal_feature=temporal_feature) 217 | test_x, test_y = generate_dataset(data, test_idx,temporal_feature=temporal_feature) 218 | if val_test_mix: 219 | val_test_x = np.concatenate((val_x, test_x), 0) 220 | val_test_y = np.concatenate((val_y, test_y), 0) 221 | val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0]) 222 | np.random.shuffle(val_test_idx) 223 | val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]] 224 | test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]] 225 | train_x = z_score(train_x) #(5332, 655, 12, 3) 226 | val_x = z_score(val_x) 227 | test_x = z_score(test_x) 228 | 229 | #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index) 230 | data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index} 231 | return data 232 | def get_idx(days, savepath, data, train_rate=0.6, val_rate=0.2, test_rate=0.2, val_test_mix=False,temporal_feature=True): 233 | del graph 234 | data = data[0:days*288, :] 235 | t, n = data.shape[0], data.shape[1] 236 | 237 | train_idx = [i for i in range(int(t*train_rate))] 238 | val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))] 239 | test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)] 240 | return train_idx,val_idx,test_idx 241 | if __name__ == "__main__": 242 | for year in range(2011,2018): 243 | data_path=osp.join('/home/wbw/ijcai/data/district3F11T17/finaldata',str(year)+'.npz') 244 | data=np.load(data_path)['x'] 245 | 246 | edge_path=osp.join('/home/wbw/ijcai/data/district3F11T17/FastData',str(year)+'_30day.npz') 247 | edge_index=np.load(edge_path)['edge_index'] 248 | generate_samples1(data,edge_index=edge_index,year=year) -------------------------------------------------------------------------------- /utils/my_math.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | def mask_np(array, null_val): 4 | if np.isnan(null_val): 5 | return (~np.isnan(null_val)).astype('float32') 6 | else: 7 | return np.not_equal(array, null_val).astype('float32') 8 | 9 | def masked_mse(preds, labels, null_val): 10 | if torch.isnan(null_val): 11 | mask = ~torch.isnan(labels) 12 | else: 13 | mask = (labels != null_val) 14 | mask = mask.float() 15 | mask /= torch.mean((mask)) 16 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) 17 | loss = (preds - labels)**2 18 | loss = loss * mask 19 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) 20 | return torch.mean(loss) 21 | 22 | 23 | def masked_rmse(preds, labels, null_val): 24 | return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val)) 25 | 26 | 27 | def masked_mae(preds, labels, null_val): 28 | if torch.isnan(null_val): 29 | mask = ~torch.isnan(labels) 30 | else: 31 | mask = (labels != null_val) 32 | mask = mask.float() 33 | mask /= torch.mean((mask)) 34 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) 35 | loss = torch.abs(preds - labels) 36 | loss = loss * mask 37 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) 38 | return torch.mean(loss) 39 | 40 | 41 | def masked_mape(preds, labels, null_val): 42 | if torch.isnan(null_val): 43 | mask = ~torch.isnan(labels) 44 | else: 45 | mask = (labels != null_val) 46 | mask = mask.float() 47 | mask /= torch.mean((mask)) 48 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) 49 | loss = torch.abs(preds - labels) / labels 50 | loss = loss * mask 51 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) 52 | return torch.mean(loss) 53 | def masked_mape_np(y_true, y_pred, null_val=np.nan): 54 | with np.errstate(divide='ignore', invalid='ignore'): 55 | mask = mask_np(y_true, null_val) 56 | mask /= mask.mean() 57 | mape = np.abs((y_pred - y_true) / y_true) 58 | mape = np.nan_to_num(mask * mape) 59 | return np.mean(mape) * 100 60 | 61 | def masked_mse(preds, labels, null_val): 62 | 63 | if torch.isnan(null_val): 64 | mask = ~torch.isnan(labels) 65 | else: 66 | mask = (labels != null_val) 67 | mask = mask.float() 68 | mask /= torch.mean((mask)) 69 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) 70 | loss = (preds - labels)**2 71 | loss = loss * mask 72 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) 73 | return torch.mean(loss) 74 | 75 | 76 | def masked_rmse(preds, labels, null_val): 77 | return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val)) 78 | 79 | 80 | def masked_mae(preds, labels, null_val): 81 | if torch.isnan(null_val): 82 | mask = ~torch.isnan(labels) 83 | else: 84 | mask = (labels != null_val) 85 | mask = mask.float() 86 | mask /= torch.mean((mask)) 87 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) 88 | loss = torch.abs(preds - labels) 89 | loss = loss * mask 90 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) 91 | return torch.mean(loss) 92 | 93 | 94 | def masked_mape(preds, labels, null_val): 95 | if torch.isnan(null_val): 96 | mask = ~torch.isnan(labels) 97 | else: 98 | mask = (labels != null_val) 99 | mask = mask.float() 100 | mask /= torch.mean((mask)) 101 | mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) 102 | loss = torch.abs(preds - labels) / labels 103 | loss = loss * mask 104 | loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) 105 | return torch.mean(loss) 106 | 107 | 108 | def compute_all_metrics(preds, labels, null_val): 109 | mae = masked_mae(preds, labels, null_val).item() 110 | mape = masked_mape(preds, labels, null_val).item() 111 | rmse = masked_rmse(preds, labels, null_val).item() 112 | return mae, mape, rmse 113 | def masked_mse_np(y_true, y_pred, null_val=np.nan): 114 | mask = mask_np(y_true, null_val) 115 | mask /= mask.mean() 116 | mse = (y_true - y_pred) ** 2 117 | return np.mean(np.nan_to_num(mask * mse)) 118 | 119 | 120 | def masked_mae_np(y_true, y_pred, null_val=np.nan): 121 | mask = mask_np(y_true, null_val) 122 | mask /= mask.mean() 123 | mae = np.abs(y_true - y_pred) 124 | return np.mean(np.nan_to_num(mask * mae)) 125 | --------------------------------------------------------------------------------