├── README.md
├── conf
    └── PECPM.json
├── main.py
├── src
    ├── model
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── STKEC_model.cpython-37.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── detect.cpython-37.pyc
    │   │   ├── detect2.cpython-37.pyc
    │   │   ├── ewc.cpython-37.pyc
    │   │   ├── ewc4.cpython-37.pyc
    │   │   ├── gcn_conv.cpython-37.pyc
    │   │   ├── gwnet.cpython-37.pyc
    │   │   ├── mode24423.cpython-37.pyc
    │   │   ├── model.cpython-37.pyc
    │   │   ├── model2.cpython-37.pyc
    │   │   ├── model5.cpython-37.pyc
    │   │   ├── replay.cpython-37.pyc
    │   │   └── stkec_ewc.cpython-37.pyc
    │   ├── detect.py
    │   ├── ewc.py
    │   ├── gcn_conv.py
    │   ├── model.py
    │   └── replay.py
    └── trafficDataset.py
└── utils
    ├── __init__.py
    ├── __pycache__
        ├── __init__.cpython-37.pyc
        ├── common_tools.cpython-37.pyc
        ├── data_convert.cpython-37.pyc
        └── my_math.cpython-37.pyc
    ├── common_tools.py
    ├── data_convert.py
    └── my_math.py


/README.md:
--------------------------------------------------------------------------------
 1 | # PECMP
 2 | 
 3 | Code for **Pattern Expansion and Consolidation on Evolving Graphs for Continual Traffic Prediction**（KDD 2023). PECPM is a continual traffic flow forecasting framework, achieving accurate predictions and high efficiency. We propose an efficient and effective continual learning framework to achieve continuous traffic flow prediction without the access to historical graph data, namely Pattern Expansion and Consolidation based on Pattern Matching (PECPM). Specifically, we first design a bank module based on pattern matching to store representative patterns of the road network. With the expansion of the road network, the model configured with such a bank module can achieve continuous traffic prediction by effectively managing patterns stored in the bank. The core idea is to continuously update new patterns while consolidating learned ones.
 4 | 
 5 | ### Requirements
 6 | 
 7 | * python = 3.8.5
 8 | * pytorch = 1.7.1
 9 | * torch-geometric = 1.6.3
10 | 
11 | ### Data
12 | 
13 | Download raw data from [this](https://drive.google.com/file/d/1P5wowSaNSWBNCK3mQwESp-G2zsutXc5S/view?usp=sharing), unzip the file and put it in the `data` folder
14 | 
15 | ### Usages
16 | 
17 | * Data Process
18 | ```
19 | Download attention data from [this](https://pan.baidu.com/s/1JRuYBT0RsRaF11-QI8soKg), Code is mm7w, and unzip the file and put it in the `data` folder
20 | ```
21 | 
22 | * PECMP
23 | ```
24 | python main.py --conf conf/PECMP.json --gpuid 1
25 | ```
26 | 
27 | ### Expand and consolidate performance evaluation
28 | ```
29 | Select conflicting and stable nodes to evaluate performance.
30 | ```
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/conf/PECPM.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "begin_year": 2011,
 3 |     "end_year": 2017,
 4 |     "dropout": [0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.01, 0.015],
 5 |     "lr": [0.01, 0.01, 0.01, 0.005, 0.005, 0.005, 0.005, 0.015],
 6 |     "batch_size": [64,128,128,128,128,128,128],
 7 |     "epoch": 100,
 8 |     "gpuid": 1,
 9 |     "loss": "mse",
10 |     "activation": "relu",
11 |     "scheduler": "epo",
12 |     "train": 1,
13 |     "y_len": 12,
14 |     "x_len": 12,
15 |     "data_process": 1,
16 |     "auto_test": 1,
17 |     "influen_size":0.02,
18 |     "beita":[5, 5, 1, 1, 0, 1, 1, 10, 10],
19 |     "cluster":64,
20 |     "attention_weight":[5,5,5,5,5,5,5,5],
21 |     "raw_data_path": "/home/wbw/kdd/data/district3F11T17/finaldata/",
22 |     "save_data_path":  "/home/wbw/kdd/TrafficStream-main/TrafficStream-main2/data",
23 |     "graph_path": "/home/wbw/kdd/data/district3F11T17/graph/",
24 |     "model_path": "res/district3F11T17/",
25 |     "detect_strategy":false,
26 |     "gcn":{
27 |         "in_channel": 12,
28 |         "out_channel": 12,
29 |         "hidden_channel": 64
30 |     },
31 | 
32 |     "tcn":{
33 |         "in_channel": 1,
34 |         "out_channel": 1,
35 |         "kernel_size": 3,
36 |         "dilation": 1
37 |     },
38 | 
39 | 
40 |     "logname": "trafficStream",
41 | 
42 |     "strategy": "incremental",
43 |     "init": true,
44 |     "increase": true,
45 |     "num_hops": 2,
46 |     
47 |     "detect": true,
48 |     "detect_strategy": "feature",
49 | 
50 |     "adp_adj":false,
51 |     "skip_dim": 64,
52 |     "end_dim": 64,
53 |     "hidden_channels": 32,
54 |     "dilation_channels": 64,
55 |     "residual_channels":64,
56 |     "input_dim":1,
57 |     "output_dim":1,
58 |  
59 |     "ewc": true,
60 |     "ewc_strategy": "ewc", 
61 |     "ewc_lambda":[0.001, 0.001, 0.0001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.005],
62 |     
63 |     "replay": true,
64 |     "replay_strategy": "random",
65 |     "repaly_num_samples": 100
66 | }
67 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import sys, json, argparse, random, re, os, shutil
  2 | sys.path.append("src/")
  3 | import numpy as np
  4 | import pandas as pd
  5 | import logging
  6 | from datetime import datetime
  7 | from pathlib import Path
  8 | import math
  9 | import os.path as osp
 10 | import networkx as nx
 11 | import pdb
 12 | 
 13 | from torch.optim.lr_scheduler import ReduceLROnPlateau, OneCycleLR
 14 | import torch
 15 | import torch.nn as nn
 16 | import torch.nn.functional as func
 17 | from Bio.Cluster import kcluster,clustercentroids
 18 | from scipy.spatial.distance import cosine
 19 | from torch import optim
 20 | import torch.multiprocessing as mp
 21 | from torch_geometric.data import Data, Batch, DataLoader
 22 | from torch_geometric.utils import to_dense_batch, k_hop_subgraph
 23 | 
 24 | from utils import common_tools as ct
 25 | from utils.my_math import masked_mae_np, masked_mape_np, masked_mse_np,masked_mae,masked_mape,masked_mse
 26 | from utils.data_convert import generate_samples, get_idx
 27 | from src.model.model import Basic_Model
 28 | from src.model.ewc import EWC
 29 | from src.trafficDataset import TrafficDataset
 30 | from src.model import detect
 31 | from src.model import replay
 32 | 
 33 | from sklearn.cluster import KMeans
 34 | from sklearn.metrics.pairwise import cosine_similarity
 35 | 
 36 | result = {3:{"mae":{}, "mape":{}, "rmse":{}}, 6:{"mae":{}, "mape":{}, "rmse":{}}, 12:{"mae":{}, "mape":{}, "rmse":{}}}
 37 | pin_memory = True
 38 | n_work = 16 
 39 | 
 40 | def update(src, tmp):
 41 |     for key in tmp:
 42 |         if key!= "gpuid":
 43 |             src[key] = tmp[key]
 44 | 
 45 | def load_best_model(args):
 46 |     if (args.load_first_year and args.year <= args.begin_year+1) or args.train == 0:
 47 |         load_path = args.first_year_model_path
 48 |         loss = load_path.split("/")[-1].replace(".pkl", "")
 49 |     else:
 50 |         loss = []
 51 |         for filename in os.listdir(osp.join(args.model_path, args.logname+args.time, str(args.year-1))): 
 52 |             loss.append(filename[0:-4])
 53 |         loss = sorted(loss)
 54 |         load_path = osp.join(args.model_path, args.logname+args.time, str(args.year-1), loss[0]+".pkl")
 55 |         
 56 |     args.logger.info("[*] load from {}".format(load_path))
 57 |     state_dict = torch.load(load_path, map_location=args.device)["model_state_dict"]
 58 |     model = Basic_Model(args)
 59 |     model.load_state_dict(state_dict)
 60 |     model = model.to(args.device)
 61 |     return model, loss[0]
 62 | 
 63 | def init(args):    
 64 |     conf_path = osp.join(args.conf)
 65 |     info = ct.load_json_file(conf_path)
 66 |     info["time"] = datetime.now().strftime("%Y-%m-%d-%H:%M:%S.%f")
 67 |     update(vars(args), info)
 68 |     vars(args)["path"] = osp.join(args.model_path, args.logname+args.time)
 69 |     ct.mkdirs(args.path)
 70 |     del info
 71 | 
 72 | 
 73 | def init_log(args):
 74 |     log_dir, log_filename = args.path, args.logname
 75 |     logger = logging.getLogger(__name__)
 76 |     ct.mkdirs(log_dir)
 77 |     logger.setLevel(logging.INFO)
 78 |     fh = logging.FileHandler(osp.join(log_dir, log_filename+".log"))
 79 |     fh.setLevel(logging.INFO)
 80 |     ch = logging.StreamHandler(sys.stdout)
 81 |     ch.setLevel(logging.INFO)
 82 |     formatter = logging.Formatter("%(asctime)s - %(message)s")
 83 |     fh.setFormatter(formatter)
 84 |     ch.setFormatter(formatter)
 85 |     logger.addHandler(fh)
 86 |     logger.addHandler(ch)
 87 |     logger.info("logger name:%s", osp.join(log_dir, log_filename+".log"))
 88 |     vars(args)["logger"] = logger
 89 |     return logger
 90 | 
 91 | 
 92 | def seed_set(seed=0):
 93 |     max_seed = (1 << 32) - 1
 94 |     random.seed(seed)
 95 |     np.random.seed(random.randint(0, max_seed))
 96 |     torch.manual_seed(random.randint(0, max_seed))
 97 |     torch.cuda.manual_seed(random.randint(0, max_seed))
 98 |     torch.cuda.manual_seed_all(random.randint(0, max_seed))
 99 |     torch.backends.cudnn.benchmark = False  # if benchmark=True, deterministic will be False
100 |     torch.backends.cudnn.deterministic = True
101 | def cosine_distance(matrix_a, matrix_c):
102 |     a, b = matrix_a.shape
103 |     c, _ = matrix_c.shape
104 | 
105 |     # 初始化注意力矩阵
106 |     attention_matrix = np.zeros((a, c))
107 | 
108 |     # 计算注意力矩阵
109 |     for i in range(a):
110 |         for j in range(c):
111 |             distance = cosine(matrix_a[i], matrix_c[j])
112 |             attention_matrix[i, j] = 1 - distance
113 |     return attention_matrix
114 | def cosine_distance(A, B):
115 |     norm_A = np.linalg.norm(A, axis=1, keepdims=True)  # (m, 1)
116 |     norm_B = np.linalg.norm(B, axis=1, keepdims=True)  # (p, 1)
117 |     
118 |     # 计算 A 和 B 的点积
119 |     dot_product = np.dot(A, B.T)  # (m, p)
120 |     
121 |     # 计算余弦相似度
122 |     similarity = dot_product / (norm_A * norm_B.T)  # (m, p)
123 |     
124 |     return similarity
125 | 
126 | def keep_top_k(matrix, k):
127 |     # 对每行进行排序，返回排序后的索引
128 |     sorted_indices = np.argsort(matrix, axis=1)
129 |     
130 |     # 生成一个与matrix形状相同的全零矩阵
131 |     result = np.zeros_like(matrix)
132 |     
133 |     # 将前K个最大值设置为原始数值，其他设置为0
134 |     rows = np.arange(matrix.shape[0])[:, np.newaxis]
135 |     top_k_indices = sorted_indices[:, -k:]
136 |     result[rows, top_k_indices] = matrix[rows, top_k_indices]
137 |     
138 |     return result
139 | def match_attention(data,args):
140 |     attention=cosine_distance(data,args.last_clusterc)
141 |     return keep_top_k(attention,args.attention_weight[args.year-args.begin_year]) 
142 | 
143 | def train(inputs, args):
144 |     # Model Setting
145 |     global result
146 |     path = osp.join(args.path, str(args.year))
147 |     ct.mkdirs(path)
148 | 
149 |     if args.loss == "mse": lossfunc1 = func.mse_loss
150 |     elif args.loss == "huber": lossfunc = func.smooth_l1_loss
151 |     lossfunc= masked_mae
152 |     cluster_lossf=masked_mse
153 |     #train_idx,val_idx,test_idx= get_idx(inputs)
154 |     # Dataset Definition
155 |     N=inputs['train_x'].shape[1]
156 |     pathatt='data/attetion/'+str(args.year)+'_attention.npy'
157 |     attention=np.load(pathatt)
158 |     C=attention.shape[-1]
159 |     attention=attention.reshape(-1,N,C)
160 | 
161 |     if args.strategy == 'incremental' and args.year > args.begin_year:
162 |         train_loader =DataLoader(TrafficDataset("", "", x=inputs["train_x"][:, args.subgraph.numpy(),:], y=inputs["train_y"][:,  args.subgraph.numpy(),:],\
163 |             att=attention[:, args.subgraph.numpy(),:],edge_index="", mode="subgraph"), batch_size=args.batch_size[args.year-args.begin_year], shuffle=True, pin_memory=pin_memory, num_workers=n_work)
164 |         val_loader = DataLoader(TrafficDataset("", "", x=inputs["val_x"][:, args.subgraph.numpy(),:], y=inputs["val_y"][:, args.subgraph.numpy(),:], \
165 |              att=attention[:, args.subgraph.numpy(),:],edge_index="", mode="subgraph"), batch_size=args.batch_size[args.year-args.begin_year], shuffle=False, pin_memory=pin_memory, num_workers=n_work) 
166 |         graph = nx.Graph()
167 |         graph.add_nodes_from(range(args.subgraph.size(0)))
168 |         graph.add_edges_from(args.subgraph_edge_index.numpy().T)
169 |         adj = nx.to_numpy_array(graph)
170 |         adj = adj / (np.sum(adj, 1, keepdims=True) + 1e-6)
171 |         vars(args)["sub_adj"] = torch.from_numpy(adj).to(torch.float).to(args.device)
172 |         path = osp.join(args.path, str(args.year))
173 |     else:
174 |         train_loader = DataLoader(TrafficDataset(inputs, "train",att=attention), batch_size=args.batch_size[args.year-args.begin_year], shuffle=True, pin_memory=pin_memory, num_workers=n_work)
175 |         val_loader = DataLoader(TrafficDataset(inputs, "val",att=attention), batch_size=args.batch_size[args.year-args.begin_year], shuffle=False, pin_memory=pin_memory, num_workers=n_work)
176 |         vars(args)["sub_adj"] = vars(args)["adj"]
177 |     test_loader = DataLoader(TrafficDataset(inputs, "test",att=attention), batch_size=args.batch_size[args.year-args.begin_year], shuffle=False, pin_memory=pin_memory, num_workers=n_work)
178 | 
179 |     args.logger.info("[*] Year " + str(args.year) + " Dataset load!")
180 | 
181 |     # Model Definition
182 |     if args.init == True and args.year > args.begin_year:
183 |         gnn_model, _ = load_best_model(args) 
184 |         if args.ewc:
185 |             #args.logger.info("[*] EWC! lambda {:.6f}".format(args.ewc_lambda))
186 |             model = EWC(gnn_model, args.adj, args.ewc_lambda[args.year-args.begin_year], args.ewc_strategy)
187 |             ewc_loader = DataLoader(TrafficDataset(inputs, "train",att=attention), batch_size=args.batch_size[args.year-args.begin_year], shuffle=False, pin_memory=pin_memory, num_workers=n_work)
188 |             model.register_ewc_params(ewc_loader, lossfunc, device)
189 |         else:
190 |             model = gnn_model
191 |     else:
192 |         gnn_model = Basic_Model(args).to(args.device)
193 |         model = gnn_model
194 | 
195 |     # Model Optimizer
196 |     optimizer = optim.AdamW(model.parameters(), lr=args.lr[args.year-args.begin_year])
197 | 
198 |     args.logger.info("[*] Year " + str(args.year) + " Training start")
199 | 
200 | 
201 |     iters = len(train_loader)
202 |     lowest_validation_loss = 1e7
203 |     counter = 0
204 |     patience = 10
205 |     model.train()
206 |     use_time = []
207 | 
208 |     for epoch in range(100):
209 |         training_loss = 0.0
210 |         start_time = datetime.now()
211 |         
212 |         # Train Model
213 |         cn = 0
214 |         for batch_idx, data in enumerate(train_loader):
215 |             if epoch == 0 and batch_idx == 0:
216 |                 args.logger.info("node number {}".format(data.x.shape))
217 |             data = data.to(device, non_blocking=pin_memory)
218 |             optimizer.zero_grad()
219 |             pred,attention = model(data, args.sub_adj)
220 |             batch_att=pred.shape[0]//args.sub_adj.shape[0]
221 |             loss_cluster=0
222 | 
223 |             attention_label=data.att.to(args.device)
224 |             loss_cluster = func.mse_loss(attention,attention_label)          
225 |             if args.strategy == "incremental" and args.year > args.begin_year:
226 |                 pred, _ = to_dense_batch(pred, batch=data.batch)
227 |                 data.y, _ = to_dense_batch(data.y, batch=data.batch)
228 |                 pred = pred[:, args.mapping, :]
229 |                 data.y = data.y[:, args.mapping, :]
230 |             mask_value = torch.tensor(0.0)
231 |             if data.y.min() < 1:
232 |                 mask_value = data.y.min()
233 |             loss = lossfunc(data.y,pred, mask_value)+loss_cluster*args.beita[args.year-args.begin_year]
234 |             if args.ewc and args.year > args.begin_year:
235 |                 loss += model.compute_consolidation_loss()
236 |             training_loss += float(loss)
237 |             loss.backward()
238 |             optimizer.step()
239 |             
240 |             cn += 1
241 | 
242 |         if epoch == 0:
243 |             total_time = (datetime.now() - start_time).total_seconds()
244 |         else:
245 |             total_time += (datetime.now() - start_time).total_seconds()
246 |         use_time.append((datetime.now() - start_time).total_seconds())
247 |         training_loss = training_loss/cn 
248 |  
249 |         # Validate Model
250 |         validation_loss = 0.0
251 |         cn = 0
252 |         with torch.no_grad():
253 |             for batch_idx, data in enumerate(val_loader):
254 |                 data = data.to(device,non_blocking=pin_memory)
255 |                 pred,_ = model(data, args.sub_adj)
256 |                 if args.strategy == "incremental" and args.year > args.begin_year:
257 |                     pred, _ = to_dense_batch(pred, batch=data.batch)
258 |                     data.y, _ = to_dense_batch(data.y, batch=data.batch)
259 |                     pred = pred[:, args.mapping, :]
260 |                     data.y = data.y[:, args.mapping, :]
261 |                 mask_value = torch.tensor(0)
262 |                 if data.y.min() < 1:
263 |                     mask_value = data.y.min()
264 |                 loss = lossfunc(data.y,pred, mask_value)
265 |                 validation_loss += float(loss)
266 |                 cn += 1
267 |         validation_loss = float(validation_loss/cn)
268 |         
269 | 
270 |         args.logger.info(f"epoch:{epoch}, training loss:{training_loss:.4f} validation loss:{validation_loss:.4f}")
271 | 
272 |         # Early Stop
273 |         if validation_loss <= lowest_validation_loss:
274 |             counter = 0
275 |             lowest_validation_loss = round(validation_loss, 4)
276 |             torch.save({'model_state_dict': gnn_model.state_dict()}, osp.join(path, str(round(validation_loss,4))+".pkl"))
277 |         else:
278 |             counter += 1
279 |             if counter > patience:
280 |                 break
281 | 
282 |     best_model_path = osp.join(path, str(lowest_validation_loss)+".pkl")
283 |     best_model = Basic_Model(args)
284 |     best_model.load_state_dict(torch.load(best_model_path, args.device)["model_state_dict"])
285 |     best_model = best_model.to(args.device)
286 |             
287 |             # Test Model
288 |     test_model2(best_model, args, test_loader, pin_memory)
289 |     result[args.year] = {"total_time": total_time, "average_time": sum(use_time)/len(use_time), "epoch_num": epoch+1}
290 |     args.logger.info("Finished optimization, total time:{:.2f} s, best model:{}".format(total_time, best_model_path))
291 |         #args.logger.info("Finished optimization, total time:{:.2f} s, best model:{}".format(total_time, best_model_path))
292 | 
293 | def test_model(model, args, testset, pin_memory):
294 |     model.eval()
295 |     pred_ = []
296 |     truth_ = []
297 |     loss = 0.0
298 |     with torch.no_grad():
299 |         cn = 0
300 |         for data in testset:
301 |             data = data.to(args.device, non_blocking=pin_memory)
302 |             pred = model(data, args.adj)
303 |             loss += func.mse_loss(data.y, pred, reduction="mean")
304 |             pred_.append(pred)   
305 |             truth_.append(data)
306 |             cn += 1
307 |         loss = loss/cn
308 |         args.logger.info("[*] loss:{:.4f}".format(loss))
309 |         pred_ =torch.cat(pred_, 0)
310 |         truth_ = torch.cat(truth_, 0)
311 |         mask_value = 0.0
312 |         if truth_.min() < 1:
313 |             mask_value = truth_.min()
314 |         mae =all_metric(truth_, pred_, args,mask_value)
315 |         return loss
316 | 
317 | 
318 | 
319 | def test_model2(model, args, testset, pin_memory):
320 |     model.eval()
321 |     pred_ = []
322 |     truth_ = []
323 |     loss = 0.0
324 |     with torch.no_grad():
325 |         cn = 0
326 |         for data in testset:
327 |             data = data.to(args.device, non_blocking=pin_memory)
328 |             pred,_ = model(data, args.adj)
329 |             loss += func.mse_loss(data.y, pred, reduction="mean")
330 |             pred, _ = to_dense_batch(pred, batch=data.batch)
331 |             data.y, _ = to_dense_batch(data.y, batch=data.batch)
332 |             pred_.append(pred.cpu().data.numpy())   
333 |             truth_.append(data.y.cpu().data.numpy())
334 |             cn += 1
335 |         loss = loss/cn
336 |         args.logger.info("[*] loss:{:.4f}".format(loss))
337 |         pred_ = np.concatenate(pred_, 0)
338 |         truth_ = np.concatenate(truth_, 0)
339 |         mask_value = torch.tensor(0)
340 |         if truth_.min() < 1:
341 |             mask_value = truth_.min()
342 |         mae = metric(truth_, pred_, args,mask_value)
343 |         return loss
344 | 
345 | def metric(ground_truth, prediction, args,mask_value):
346 |     global result
347 |     pred_time = [3,6,12]
348 |     args.logger.info("[*] year {}, testing".format(args.year))
349 |     for i in pred_time:
350 |         mae = masked_mae_np(ground_truth[:, :, :i], prediction[:, :, :i], 0)
351 |         rmse = masked_mse_np(ground_truth[:, :, :i], prediction[:, :, :i], 0) ** 0.5
352 |         mape = masked_mape_np(ground_truth[:, :, :i], prediction[:, :, :i], 0)
353 |         args.logger.info("T:{:d}\tMAE\t{:.4f}\tRMSE\t{:.4f}\tMAPE\t{:.4f}".format(i,mae,rmse,mape))
354 |         result[i]["mae"][args.year] = mae
355 |         result[i]["mape"][args.year] = mape
356 |         result[i]["rmse"][args.year] = rmse
357 |     return mae
358 | 
359 | def all_metric(ground_truth, prediction, args,mask_value):
360 |     global result
361 |     pred_time = [3,6,12]
362 |     args.logger.info("[*] year {}, testing".format(args.year))
363 |     for i in pred_time:
364 |         mae = masked_mae(ground_truth[:, :, :i], prediction[:, :, :i], mask_value).item()
365 |         rmse = masked_mse(ground_truth[:, :, :i], prediction[:, :, :i], mask_value).item()
366 |         mape = masked_mape(ground_truth[:, :, :i], prediction[:, :, :i], mask_value).item()
367 |         args.logger.info("T:{:d}\tMAE\t{:.4f}\tRMSE\t{:.4f}\tMAPE\t{:.4f}".format(i,mae,rmse,mape))
368 |         result[i]["mae"][args.year] = mae
369 |         result[i]["mape"][args.year] = mape
370 |         result[i]["rmse"][args.year] = rmse
371 |     return mae
372 | 
373 | 
374 | def main(args):
375 |     logger = init_log(args)
376 |     logger.info("params : %s", vars(args))
377 |     ct.mkdirs(args.save_data_path)
378 | 
379 |     for year in range(args.begin_year, args.end_year+1):
380 |         # Load Data 
381 |         graph = nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"])
382 |         vars(args)["graph_size"] = graph.number_of_nodes()
383 |         vars(args)["year"] = year
384 |         inputs = generate_samples(31, osp.join(args.save_data_path, str(year)+'_30day'), np.load(osp.join(args.raw_data_path, str(year)+".npz"))["x"], graph, val_test_mix=True) \
385 |             if args.data_process else np.load(osp.join(args.save_data_path, str(year)+"_30day.npz"), allow_pickle=True)
386 |         args.logger.info("[*] Year {} load from {}_30day.npz".format(args.year, osp.join(args.save_data_path, str(year)))) 
387 | 
388 |         adj = np.load(osp.join(args.graph_path, str(args.year)+"_adj.npz"))["x"]
389 |         adj = adj / (np.sum(adj, 1, keepdims=True) + 1e-6)
390 |         vars(args)["adj"] = torch.from_numpy(adj).to(torch.float).to(args.device)
391 |         if year == args.begin_year and args.load_first_year:
392 |             # Skip the first year, model has been trained and retrain is not needed
393 |             model, _ = load_best_model(args)
394 |             test_loader = DataLoader(TrafficDataset(inputs, "test"), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work)
395 |             test_model2(model, args, test_loader, pin_memory=True)
396 |             continue
397 | 
398 |         
399 |         if year > args.begin_year and args.strategy == "incremental":
400 |             # Load the best model
401 |             model, _ = load_best_model(args)
402 |             
403 |             node_list = list()
404 | 
405 |             cur_node_size = np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"].shape[0]
406 |             pre_node_size = np.load(osp.join(args.graph_path, str(year-1)+"_adj.npz"))["x"].shape[0]
407 |             node_list.extend(list(range(pre_node_size, cur_node_size)))
408 | 
409 | 
410 | 
411 |             pre_data = np.load(osp.join(args.raw_data_path, str(year-1)+".npz"))["x"]
412 |             cur_data = np.load(osp.join(args.raw_data_path, str(year)+".npz"))["x"]
413 |             pre_graph = np.array(list(nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year-1)+"_adj.npz"))["x"]).edges)).T
414 |             cur_graph = np.array(list(nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"]).edges)).T
415 | 
416 |             evo_num= int(0.01*args.graph_size)
417 |             replay_num=int(0.09*args.graph_size)  
418 |             replay,evo_node = detect.get_eveloved_nodes(args,replay_num,evo_num)
419 |             node_list.extend(list(evo_node ))
420 |             node_list.extend(list(replay))
421 | 
422 |             
423 |             node_list = list(set(node_list))
424 |             if len(node_list) > int(0.2*args.graph_size):
425 |                 node_list = random.sample(node_list, int(0.15*args.graph_size))
426 |             
427 |             # Obtain subgraph of node list
428 |             cur_graph = torch.LongTensor(np.array(list(nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"]).edges)).T)
429 |             edge_list = list(nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"]).edges)
430 |             graph_node_from_edge = set()
431 |             for (u,v) in edge_list:
432 |                 graph_node_from_edge.add(u)
433 |                 graph_node_from_edge.add(v)
434 |             node_list = list(set(node_list) & graph_node_from_edge)
435 |                 
436 |            
437 |             if len(node_list) != 0 :
438 |                 subgraph, subgraph_edge_index, mapping, _ = k_hop_subgraph(node_list, num_hops=args.num_hops, edge_index=cur_graph, relabel_nodes=True)
439 |                 vars(args)["subgraph"] = subgraph
440 |                 vars(args)["subgraph_edge_index"] = subgraph_edge_index
441 |                 vars(args)["mapping"] = mapping
442 |             logger.info("number of increase nodes:{}, nodes after {} hop:{}, total nodes this year {}".format\
443 |                         (len(node_list), args.num_hops, args.subgraph.size(), args.graph_size))
444 |             vars(args)["node_list"] = np.asarray(node_list)
445 | 
446 | 
447 |         # Skip the year when no nodes needed to be trained incrementally
448 |         if args.strategy != "retrain" and year > args.begin_year and len(args.node_list) == 0:
449 |             model, loss = load_best_model(args)
450 |             ct.mkdirs(osp.join(args.model_path, args.logname+args.time, str(args.year)))
451 |             torch.save({'model_state_dict': model.state_dict()}, osp.join(args.model_path, args.logname+args.time, str(args.year), loss+".pkl"))
452 |             test_loader = DataLoader(TrafficDataset(inputs, "test"), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work)
453 |             test_model(model, args, test_loader, pin_memory=True)
454 |             logger.warning("[*] No increasing nodes at year " + str(args.year) + ", store model of the last year.")
455 |             continue
456 |         
457 | 
458 |         if args.train:
459 |             train(inputs, args)
460 |         else:
461 |             if args.auto_test:
462 |                 model, _ = load_best_model(args)
463 |                 test_loader = DataLoader(TrafficDataset(inputs, "test"), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work)
464 |                 test_model(model, args, test_loader, pin_memory=True)
465 | 
466 | 
467 |     for i in [3, 6, 12]:
468 |         for j in ['mae', 'rmse', 'mape']:
469 |             info = ""
470 |             all12=0
471 |             for year in range(args.begin_year, args.end_year+1):
472 |                 if i in result:
473 |                     if j in result[i]:
474 |                         if year in result[i][j]:
475 |                             info+="{:.2f}\t".format(result[i][j][year])
476 |             logger.info("{}\t{}\t".format(i,j) + info)
477 | 
478 | 
479 | if __name__ == "__main__":
480 |     parser = argparse.ArgumentParser(formatter_class = argparse.RawTextHelpFormatter)
481 |     parser.add_argument("--conf", type = str, default = "conf/test.json")
482 |     parser.add_argument("--paral", type = int, default = 0)
483 |     parser.add_argument("--gpuid", type = int, default = 2)
484 |     parser.add_argument("--seed", type = int, default = 3208)
485 |     parser.add_argument("--logname", type = str, default = "info")
486 |     parser.add_argument("--load_first_year", type = int, default = 0, help="0: training first year, 1: load from model path of first year")
487 |     parser.add_argument("--first_year_model_path", type = str, default = "data-path", help='specify a pretrained model root')
488 |     args = parser.parse_args()
489 |     init(args)
490 |     seed_set(args.seed)
491 | 
492 |     device = torch.device("cuda:{}".format(args.gpuid)) if torch.cuda.is_available() and args.gpuid != -1 else "cpu"
493 |     vars(args)["device"] = device
494 |     main(args)
495 | 


--------------------------------------------------------------------------------
/src/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__init__.py


--------------------------------------------------------------------------------
/src/model/__pycache__/STKEC_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/STKEC_model.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/detect.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/detect.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/detect2.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/detect2.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/ewc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/ewc.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/ewc4.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/ewc4.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/gcn_conv.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/gcn_conv.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/gwnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/gwnet.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/mode24423.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/mode24423.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/model.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/model2.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/model2.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/model5.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/model5.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/replay.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/replay.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/__pycache__/stkec_ewc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/src/model/__pycache__/stkec_ewc.cpython-37.pyc


--------------------------------------------------------------------------------
/src/model/detect.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('src/')
 3 | import numpy as np
 4 | from scipy.stats import entropy as kldiv
 5 | from datetime import datetime
 6 | from torch_geometric.utils import to_dense_batch 
 7 | from src.trafficDataset import continue_learning_Dataset
 8 | from torch_geometric.data import Data, Batch, DataLoader
 9 | import torch
10 | from scipy.spatial import distance
11 | from scipy.stats import wasserstein_distance as WD
12 | import os.path as osp
13 | # scipy.stats.entropy(x, y) 
14 | 
15 | 
16 | def get_feature(data, graph, args, model, adj):
17 |     node_size = data.shape[1]
18 |     data = np.reshape(data[-288*7-1:-1,:], (-1, args.x_len, node_size))
19 |     dataloader = DataLoader(continue_learning_Dataset(data), batch_size=data.shape[0], shuffle=False, pin_memory=True, num_workers=3)
20 |     # feature shape [T', feature_dim, N]
21 |     for data in dataloader:
22 |         data = data.to(args.device, non_blocking=True)
23 |         feature, _ = to_dense_batch(model.feature(data, adj), batch=data.batch)
24 |         node_size = feature.size()[1]
25 |         # print("before permute:", feature.size())
26 |         feature = feature.permute(1,0,2)
27 | 
28 |         # [N, T', feature_dim]
29 |         return feature.cpu().detach().numpy()
30 | 
31 | 
32 | def get_adj(year, args):
33 |     adj = np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"]
34 |     adj = adj / (np.sum(adj, 1, keepdims=True) + 1e-6)
35 |     return torch.from_numpy(adj).to(torch.float).to(args.device)
36 |     
37 | 
38 | def score_func(pre_data, cur_data, args):
39 |     # shape: [T, N]
40 |     node_size = pre_data.shape[1]
41 |     score = []
42 |     for node in range(node_size):
43 |         max_val = max(max(pre_data[:,node]), max(cur_data[:,node]))
44 |         min_val = min(min(pre_data[:,node]), min(cur_data[:,node]))
45 |         pre_prob, _ = np.histogram(pre_data[:,node], bins=10, range=(min_val, max_val))
46 |         pre_prob = pre_prob *1.0 / sum(pre_prob)
47 |         cur_prob, _ = np.histogram(cur_data[:,node], bins=10, range=(min_val, max_val))
48 |         cur_prob = cur_prob * 1.0 /sum(cur_prob)
49 |         score.append(kldiv(pre_prob, cur_prob))
50 |     # return staiton_id of topk max score, station with larger KL score needs more training
51 |     return np.argpartition(np.asarray(score), -args.topk)[-args.topk:]
52 | def sort_with_index(lst):
53 |     sorted_index = sorted(range(len(lst)), key=lambda i: lst[i], reverse=True)
54 |     return sorted_index
55 | def random_sampling(data_size, num_samples):
56 |     return np.random.choice(data_size, num_samples)
57 | def get_eveloved_nodes(args,replay_num,evo_num):
58 |     # should be N*T
59 |     past_path=args.daily_node+'/'+str(args.year-1)+'.npy'
60 |     daily_node_past=np.load(past_path)
61 |     cuettern_path=args.daily_node+'/'+str(args.year)+'.npy'
62 |     daily_node_cur=np.load(cuettern_path)
63 |     if daily_node_past.shape[0]<daily_node_past.shape[1]:
64 |         daily_node_cur=daily_node_cur.transpose(1,0)
65 |         daily_node_past=daily_node_past.transpose(1,0)
66 | 
67 |     daily_node_cur=daily_node_cur[:daily_node_past.shape[0],:]
68 | 
69 |     distance=[]
70 |     for i in range(daily_node_past.shape[0]):
71 |         distance.append(WD(daily_node_past[i],daily_node_cur[i]))
72 |     sorted_index = sort_with_index(distance)
73 |     replay_node=sorted_index[-int(replay_num*0.1):]
74 |     replay_list.extend(replay_node)
75 |     evo_node=list(sorted_index[:evo_num])
76 |     replay_sample=random_sampling(daily_node_past.shape[0],int(replay_num*0.9))
77 |     replay_list.extend(replay_sample)
78 |     return replay_list,evo_node
79 | 


--------------------------------------------------------------------------------
/src/model/ewc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import torch.optim as optim
 5 | from torch import autograd
 6 | 
 7 | import numpy as np
 8 | import logging
 9 | import pdb
10 | 
11 | from torch_geometric.data import Data
12 | 
13 | 
14 | class EWC(nn.Module):
15 | 
16 |     def __init__(self, model, adj, ewc_lambda = 0, ewc_type = 'ewc'):
17 |         super(EWC, self).__init__()
18 |         self.model = model
19 |         self.ewc_lambda = ewc_lambda
20 |         self.ewc_type = ewc_type
21 |         self.adj = adj
22 | 
23 |     def _update_mean_params(self):
24 |         for param_name, param in self.model.named_parameters():
25 |             _buff_param_name = param_name.replace('.', '__')
26 |             self.register_buffer(_buff_param_name + '_estimated_mean', param.data.clone())
27 | 
28 |     def _update_fisher_params(self, loader, lossfunc, device):
29 |         _buff_param_names = [param[0].replace('.', '__') for param in self.model.named_parameters()]
30 |         est_fisher_info = {name: 0.0 for name in _buff_param_names}
31 |         for i, data in enumerate(loader):
32 |             data = data.to(device, non_blocking=True)
33 |             pred,_ = self.model.forward(data, self.adj)
34 |             log_likelihood = lossfunc(data.y, pred,  torch.tensor(0.0))
35 |             grad_log_liklihood = autograd.grad(log_likelihood, self.model.parameters())
36 |             for name, grad in zip(_buff_param_names, grad_log_liklihood):
37 |                 est_fisher_info[name] += grad.data.clone() ** 2
38 |         for name in _buff_param_names:
39 |             self.register_buffer(name + '_estimated_fisher', est_fisher_info[name])
40 | 
41 | 
42 |     def register_ewc_params(self, loader, lossfunc, device):
43 |         self._update_fisher_params(loader, lossfunc, device)
44 |         self._update_mean_params()
45 | 
46 | 
47 |     def compute_consolidation_loss(self):
48 |         losses = []
49 |         for param_name, param in self.model.named_parameters():
50 |             _buff_param_name = param_name.replace('.', '__')
51 |             estimated_mean = getattr(self, '{}_estimated_mean'.format(_buff_param_name))
52 |             estimated_fisher = getattr(self, '{}_estimated_fisher'.format(_buff_param_name))
53 |             if estimated_fisher == None:
54 |                 losses.append(0)
55 |             elif self.ewc_type == 'l2':
56 |                 losses.append((10e-6 * (param - estimated_mean) ** 2).sum())
57 |             else:
58 |                 losses.append((estimated_fisher * (param - estimated_mean) ** 2).sum())
59 |         return 1 * (self.ewc_lambda / 2) * sum(losses)
60 |     
61 |     def forward(self, data, adj): 
62 |         return self.model(data, adj)
63 | 
64 | 


--------------------------------------------------------------------------------
/src/model/gcn_conv.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import pdb
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | class BatchGCNConv(nn.Module):
 7 |     """
 8 |     Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
 9 |     """
10 |     def __init__(self, in_features, out_features, bias=True, gcn=True):
11 |         super(BatchGCNConv, self).__init__()
12 |         self.in_features = in_features
13 |         self.out_features = out_features
14 |         self.weight_neigh = nn.Linear(in_features, out_features, bias=bias)
15 |         if not gcn:
16 |             self.weight_self = nn.Linear(in_features, out_features, bias=False)
17 |         else:
18 |             self.register_parameter('weight_self', None)
19 |         
20 |         self.reset_parameters()
21 | 
22 |     def reset_parameters(self):
23 |         self.weight_neigh.reset_parameters()
24 |         if self.weight_self is not None:
25 |             self.weight_self.reset_parameters()
26 | 
27 | 
28 | 
29 |     def forward(self, x, adj):
30 |         # x: [bs, N, in_features], adj: [N, N]
31 |         input_x = torch.matmul(adj, x)             # [N, N] * [bs, N, in_features] = [bs, N, in_features]
32 |         output = self.weight_neigh(input_x)             # [bs, N, in_features] * [in_features, out_features] = [bs, N, out_features]
33 |         if self.weight_self is not None:
34 |             output += self.weight_self(x)               # [bs, N, out_features]
35 |         return output


--------------------------------------------------------------------------------
/src/model/model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import os
 3 | import pdb
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | from model.gcn_conv import BatchGCNConv
 9 | 
10 | 
11 | class Basic_Model(nn.Module):
12 |     """Some Information about Basic_Model"""
13 |     def __init__(self, args):
14 |         super(Basic_Model, self).__init__()
15 |         self.dropout = args.dropout[args.year-args.begin_year]
16 |         self.gcn1 = BatchGCNConv(args.gcn["in_channel"], args.gcn["hidden_channel"], bias=True, gcn=False)
17 |         self.gcn2 = BatchGCNConv(args.gcn["hidden_channel"], args.gcn["out_channel"], bias=True, gcn=False)
18 |         self.tcn1 = nn.Conv1d(in_channels=args.tcn["in_channel"], out_channels=args.tcn["out_channel"], kernel_size=args.tcn["kernel_size"], \
19 |             dilation=args.tcn["dilation"], padding=int((args.tcn["kernel_size"]-1)*args.tcn["dilation"]/2))
20 |         self.tcn2 = nn.Conv1d(in_channels=args.tcn["in_channel"], out_channels=args.tcn["out_channel"], kernel_size=args.tcn["kernel_size"], \
21 |             dilation=args.tcn["dilation"], padding=int((args.tcn["kernel_size"]-1)*args.tcn["dilation"]/2))
22 |         self.fc = nn.Linear(args.gcn["out_channel"], args.y_len)
23 |         self.activation = nn.GELU()
24 |         self.memory=nn.Parameter(torch.FloatTensor(args.cluster,args.gcn["out_channel"]), requires_grad=True)
25 |         self.sigmoid = nn.Sigmoid()
26 |         nn.init.xavier_uniform_(self.memory, gain=1.414)
27 | 
28 |         #self.time_emb = nn.Parameter(torch.empty(args.gcn["out_channel"]))
29 |         #nn.init.xavier_uniform_(self.node_emb)
30 | 
31 |         self.args = args
32 |         self.w1=nn.Linear(args.gcn["hidden_channel"],args.gcn["hidden_channel"])
33 |         self.w2=nn.Linear(args.gcn["out_channel"],args.gcn["out_channel"])
34 | 
35 |         self.w3=nn.Linear(args.gcn["in_channel"]*2,args.gcn["hidden_channel"])
36 | 
37 |         
38 |     def forward(self, data, adj):
39 |         T=self.args.x_len
40 |         N = adj.shape[0]
41 |         if len(data.x.shape)==4:
42 |             res_x=data.x[...,0]
43 |             x =data.x[...,0].reshape((-1,N, self.args.gcn["in_channel"])) 
44 |         else:
45 |             input = data.x.reshape(-1,N,self.args.x_len,3)
46 |             res_x=input[...,0].reshape(-1,self.args.x_len)
47 |             x =input[...,0].reshape((-1,N, self.args.gcn["in_channel"]))   
48 |             tem_feature=input[...,1:3].reshape((-1,N, self.args.gcn["in_channel"]*2))
49 |         #tem_x = self.w3(x[:,:,T:])        
50 |         x = F.relu(self.gcn1(x, adj)+self.w3(tem_feature))                       
51 |         x = x.reshape((-1, 1, self.args.gcn["hidden_channel"]))              
52 |         x = self.tcn1(x)
53 |         x = torch.mul((x), self.sigmoid(self.w1(x)))                                         
54 |         x = x.reshape((-1, N, self.args.gcn["hidden_channel"]))    
55 |         x = self.gcn2(x, adj)
56 |         x = x.reshape((-1, 1, self.args.gcn["out_channel"]))                     
57 |         x=self.tcn2(x)       
58 |  
59 |         x = torch.mul((x), self.sigmoid(self.w2(x)))                                                
60 |         x = x.reshape((-1, self.args.gcn["out_channel"]))         
61 |         attention = torch.matmul(x,self.memory.transpose(0,1))                        
62 |         attention=F.softmax(attention,dim=1)                         
63 |         z=torch.matmul(attention,self.memory)
64 |         x = x + res_x+z
65 |         x = self.fc(self.activation(x))
66 |         x = F.dropout(x, p=self.dropout, training=self.training)
67 |         
68 |         return x,attention
69 | 
70 | 
71 |  
72 |     def feature(self, data, adj):
73 |         N = adj.shape[0]
74 |         x = data.x.reshape((-1, N, self.args.gcn["in_channel"]))  
75 |         x = F.relu(self.gcn1(x, adj))                             
76 |         x = x.reshape((-1, 1, self.args.gcn["hidden_channel"]))    
77 | 
78 |         x = self.tcn1(x)                                          
79 | 
80 |         x = x.reshape((-1, N, self.args.gcn["hidden_channel"]))    
81 |         x = self.gcn2(x, adj)                                      
82 |         x = x.reshape((-1, self.args.gcn["out_channel"]))         
83 |         
84 |         x = x + data.x
85 |         print(data.x.shape)
86 |         return x
87 | 


--------------------------------------------------------------------------------
/src/model/replay.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | 
 4 | 
 5 | def replay_node_selection(args, data, model=None):
 6 |     if args.replay_strategy == 'random':
 7 |         return random_sampling(data['train_x'].shape[2], args.replay_num_samples)
 8 |     else:
 9 |         args.logger.info("repaly node selection mode illegal!")
10 | 
11 | def random_sampling(data_size, num_samples):
12 |     return np.random.choice(data_size, num_samples)
13 | 


--------------------------------------------------------------------------------
/src/trafficDataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch 
 3 | from torch_geometric.data import Data, Dataset
 4 | 
 5 | class TrafficDataset(Dataset):
 6 |     def __init__(self, inputs, split, x='', y='', att='',edge_index='', mode='default'):
 7 |         if mode == 'default':
 8 |             self.x = inputs[split+'_x'] # [T, Len, N]
 9 |             self.y = inputs[split+'_y'] # [T, Len, N]
10 |             self.att=att
11 |         else:
12 |             self.x = x
13 |             self.y = y
14 |             self.att=att
15 |     def __len__(self):
16 |         return self.x.shape[0]
17 | 
18 |     def __getitem__(self, index):
19 |         
20 |         x = torch.Tensor(self.x[index])
21 |         y = torch.Tensor(self.y[index])
22 |         att = torch.Tensor(self.att[index])
23 |         
24 |         
25 |         return Data(x=x, y=y,att=att)  
26 |     
27 | class continue_learning_Dataset(Dataset):
28 |     def __init__(self, inputs):
29 |         self.x = inputs # [T, Len, N]
30 |     
31 |     def __len__(self):
32 |         return self.x.shape[0]
33 | 
34 |     def __getitem__(self, index):
35 |         x = torch.Tensor(self.x[index].T)
36 |         return Data(x=x)


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/utils/__init__.py


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/common_tools.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/utils/__pycache__/common_tools.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/data_convert.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/utils/__pycache__/data_convert.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/my_math.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnderReview24/PECPM/580992984406a741c64a83000a542087de68d904/utils/__pycache__/my_math.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/common_tools.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import sys
  4 | import networkx as nx
  5 | import re
  6 | import json
  7 | import numpy as np
  8 | import math
  9 | import logging
 10 | import time
 11 | from datetime import datetime
 12 | from queue import Queue
 13 | from sklearn.preprocessing import MultiLabelBinarizer
 14 | import threading
 15 | import gc
 16 | 
 17 | import os
 18 | import pickle
 19 | import torch
 20 | import numpy as np
 21 | import threading
 22 | import multiprocessing as mp
 23 | 
 24 | class our_DataLoader(object):
 25 |     def __init__(self, data, idx, seq_len, horizon, bs, pad_last_sample=False):
 26 |         if pad_last_sample:
 27 |             num_padding = (bs - (len(idx) % bs)) % bs
 28 |             idx_padding = np.repeat(idx[-1:], num_padding, axis=0)
 29 |             idx = np.concatenate([idx, idx_padding], axis=0)
 30 |         
 31 |         self.data = data
 32 |         self.idx = idx
 33 |         self.size = len(idx)
 34 |         self.bs = bs
 35 |         self.num_batch = int(self.size // self.bs)
 36 |         self.current_ind = 0
 37 | 
 38 |         
 39 |         self.x_offsets = np.arange(-(seq_len - 1), 1, 1)
 40 |         self.y_offsets = np.arange(1, (horizon + 1), 1)
 41 |         self.seq_len = seq_len
 42 |         self.horizon = horizon
 43 | 
 44 | 
 45 |     def shuffle(self):
 46 |         perm = np.random.permutation(self.size)
 47 |         idx = self.idx[perm]
 48 |         self.idx = idx
 49 | 
 50 | 
 51 |     def write_to_shared_array(self, x, y, idx_ind, start_idx, end_idx):
 52 |         for i in range(start_idx, end_idx):
 53 |             x[i] = self.data[idx_ind[i] + self.x_offsets, :, :]
 54 |             y[i] = self.data[idx_ind[i] + self.y_offsets, :, :1]
 55 | 
 56 | 
 57 |     def get_iterator(self):
 58 |         self.current_ind = 0
 59 | 
 60 |         def _wrapper():
 61 |             while self.current_ind < self.num_batch:
 62 |                 start_ind = self.bs * self.current_ind
 63 |                 end_ind = min(self.size, self.bs * (self.current_ind + 1))
 64 |                 idx_ind = self.idx[start_ind: end_ind, ...]
 65 | 
 66 |                 x_shape = (len(idx_ind), self.seq_len, self.data.shape[1], self.data.shape[-1])
 67 |                 x_shared = mp.RawArray('f', int(np.prod(x_shape)))
 68 |                 x = np.frombuffer(x_shared, dtype='f').reshape(x_shape)
 69 | 
 70 |                 y_shape = (len(idx_ind), self.horizon, self.data.shape[1], 1)
 71 |                 y_shared = mp.RawArray('f', int(np.prod(y_shape)))
 72 |                 y = np.frombuffer(y_shared, dtype='f').reshape(y_shape)
 73 | 
 74 |                 array_size = len(idx_ind)
 75 |                 num_threads = len(idx_ind) // 2
 76 |                 chunk_size = array_size // num_threads
 77 |                 threads = []
 78 |                 for i in range(num_threads):
 79 |                     start_index = i * chunk_size
 80 |                     end_index = start_index + chunk_size if i < num_threads - 1 else array_size
 81 |                     thread = threading.Thread(target=self.write_to_shared_array, args=(x, y, idx_ind, start_index, end_index))
 82 |                     thread.start()
 83 |                     threads.append(thread)
 84 | 
 85 |                 for thread in threads:
 86 |                     thread.join()
 87 | 
 88 |                 yield (x, y)
 89 |                 self.current_ind += 1
 90 | 
 91 |         return _wrapper()
 92 | 
 93 | def mkdirs(path):
 94 |     if not os.path.exists(path):
 95 |         os.makedirs(path)
 96 | 
 97 | def graph_matrix_reader(file):
 98 |     df = pd.read_csv(file, header=None, index_col=None)
 99 |     return np.asarray(df.values)
100 | 
101 | def dict_add(d, key, add):
102 |     if key in d:
103 |         d[key] += add
104 |     else:
105 |         d[key] = add
106 | 
107 | def check_attr(params, attr, default):
108 |     if attr not in params:
109 |         params[attr] = default
110 |         return False
111 |     return True
112 | 
113 | def obj_dic(d):
114 |     top = type('new', (object,), d)
115 |     seqs = tuple, list, set, frozenset
116 |     for i, j in d.items():
117 |         if isinstance(j, dict):
118 |             setattr(top, i, obj_dic(j))
119 |         elif isinstance(j, seqs):
120 |             setattr(top, i,
121 |                 type(j)(obj_dic(sj) if isinstance(sj, dict) else sj for sj in j))
122 |         else:
123 |             setattr(top, i, j)
124 |     return top
125 | 
126 | def load_fea(file_path):
127 |     X = []
128 |     with open(file_path, "r") as f:
129 |         for line in f:
130 |             line = line.strip()
131 |             items = line.split()
132 |             if len(items) < 1:
133 |                 continue
134 |             X.append([float(item) for item in items])
135 |     return np.array(X)
136 | 
137 | 
138 | def symlink(src, dst):
139 |     try:
140 |         os.symlink(src, dst)
141 |     except OSError:
142 |         os.remove(dst)
143 |         os.symlink(src, dst)
144 | 
145 | 
146 | def load_json_file(file_path):
147 |     with open(file_path, "r") as f:
148 |         s = f.read()
149 |         s = re.sub('\s',"", s)
150 |     return json.loads(s)
151 | 
152 | def get_time_str():
153 |     return datetime.now().strftime("%Y-%m-%d-%H:%M:%S.%f")
154 | 
155 | def append_to_file(file_path, s):
156 |     with open(file_path, "a") as f:
157 |         f.write(s)
158 | 
159 | 
160 | # def mkdir(path):
161 | #     """Judge whether the path exists and make dirs
162 | #     :return: Boolean, if path exists then return True
163 | #     """
164 | #     if os.path.exists(path) == False:
165 | #          os.makedirs(path)
166 | #          return False
167 | #     return True
168 | 
169 | def rmtree(path):
170 |     if os.path.exists(path) == True:
171 |         shutil.rmtree(path)
172 |         return True
173 |     return False
174 | 
175 | def get_logger(log_filename=None, module_name=__name__, level=logging.INFO):
176 |     # select handler
177 |     if log_filename is None:
178 |         handler = logging.StreamHandler()
179 |     elif type(log_filename) is str:
180 |         handler = logging.FileHandler(log_filename, 'w')
181 |     else:
182 |         raise ValueError("log_filename invalid!")
183 | 
184 |     # build logger
185 |     logger = logging.getLogger(module_name)
186 |     logger.setLevel(level)
187 |     handler.setLevel(level)
188 |     formatter = logging.Formatter(('%(asctime)s %(filename)s' \
189 |                     '[line:%(lineno)d] %(levelname)s %(message)s'))
190 |     handler.setFormatter(formatter)
191 |     logger.addHandler(handler)
192 |     return logger
193 | 
194 | def load_ground_truth(file_path):
195 |     lst = []
196 |     with open(file_path, "r") as f:
197 |         for line in f:
198 |             line = line.strip()
199 |             if len(line) == 0:
200 |                 continue
201 |             items = line.split()
202 |             lst.append([int(i) for i in items])
203 |     lst.sort()
204 |     return [i[1] for i in lst]
205 | 
206 | def timer(func):
207 |     def wrapper(*args, **kwargs):
208 |         start_time = datetime.now()
209 |         res = func(*args, **kwargs)
210 |         end_time = datetime.now()
211 |         print((end_time - start_time).seconds)
212 |         return res
213 |     return wrapper
214 | 
215 | 
216 | 
217 | def module_decorator(func):
218 |     def wrapper(*args, **kwargs):
219 |         print("[+] Start %s ..." % (kwargs["mdl_name"], ))
220 |         kwargs["info"]["log"].info("Start Module %s" % (kwargs["mdl_name"], ))
221 |         start_time = datetime.now()
222 |         res = func(*args, **kwargs)
223 |         end_time = datetime.now()
224 |         gc.collect()
225 |         print("[+] Finished!\n[+] During Time: %.2f\n"  % (end_time - start_time).seconds)
226 |         kwargs["info"]["log"].info(
227 |                 "[+] Finished!\n[+] During Time: %.2f\n" % (end_time - start_time).seconds)
228 |         res["Duration"] = (end_time - start_time).seconds
229 |         kwargs["info"]["log"].info("Module Results: " + str(res))
230 |         print("[+] Module Results: " + str(res))
231 |         kwargs["info"]["log"].info("[+] Module Results: " + str(res))
232 |         return res
233 |     return wrapper
234 | 
235 | def load_multilabel_ground_truth(file_path):
236 |     lst = []
237 |     with open(file_path, "r") as f:
238 |         for line in f:
239 |             line = line.strip()
240 |             if len(line) == 0:
241 |                 continue
242 |             items = line.split()
243 |             lst.append([int(i) for i in items])
244 |     lst.sort()
245 |     lst = [i[1:] for i in lst]
246 |     mlb = MultiLabelBinarizer()
247 |     return mlb.fit_transform(lst)
248 | 
249 | def load_onehot_ground_truth(file_path):
250 |     lst = []
251 |     with open(file_path, "r") as f:
252 |         for line in f:
253 |             line = line.strip()
254 |             if len(line) == 0:
255 |                 continue
256 |             items = line.split()
257 |             lst.append([int(i) for i in items])
258 |     lst.sort()
259 |     return np.array([i[1:] for i in lst], dtype=int)
260 | 
261 | 
262 | import torch
263 | 
264 | def masked_mse(preds, labels, null_val):
265 |     if torch.isnan(null_val):
266 |         mask = ~torch.isnan(labels)
267 |     else:
268 |         mask = (labels != null_val)
269 |     mask = mask.float()
270 |     mask /= torch.mean((mask))
271 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
272 |     loss = (preds - labels)**2
273 |     loss = loss * mask
274 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
275 |     return torch.mean(loss)
276 | 
277 | 
278 | def masked_rmse(preds, labels, null_val):
279 |     return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))
280 | 
281 | 
282 | def masked_mae(preds, labels, null_val):
283 |     if torch.isnan(null_val):
284 |         mask = ~torch.isnan(labels)
285 |     else:
286 |         mask = (labels != null_val)
287 |     mask = mask.float()
288 |     mask /= torch.mean((mask))
289 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
290 |     loss = torch.abs(preds - labels)
291 |     loss = loss * mask
292 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
293 |     return torch.mean(loss)
294 | 
295 | 
296 | def masked_mape(preds, labels, null_val):
297 |     if torch.isnan(null_val):
298 |         mask = ~torch.isnan(labels)
299 |     else:
300 |         mask = (labels != null_val)
301 |     mask = mask.float()
302 |     mask /= torch.mean((mask))
303 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
304 |     loss = torch.abs(preds - labels) / labels
305 |     loss = loss * mask
306 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
307 |     return torch.mean(loss)
308 | 
309 | 
310 | def compute_all_metrics(preds, labels, null_val):
311 |     mae = masked_mae(preds, labels, null_val).item()
312 |     mape = masked_mape(preds, labels, null_val).item()
313 |     rmse = masked_rmse(preds, labels, null_val).item()
314 |     return mae, mape, rmse
315 | 


--------------------------------------------------------------------------------
/utils/data_convert.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import os.path as osp
  4 | import pdb
  5 | import networkx as nx
  6 | from utils.common_tools import mkdirs
  7 | import tqdm
  8 | import random
  9 | 
 10 | def z_score(arr):
 11 |     if len(arr.shape) >=3:
 12 |         col0_mins = np.min(arr[..., 0], axis=0, keepdims=True)
 13 |         col0_maxs = np.max(arr[..., 0], axis=0, keepdims=True)
 14 |         normalized_col0 = (arr[..., 0] - col0_mins) / (col0_maxs - col0_mins)
 15 |         normalized_arr = arr.copy()
 16 |         normalized_arr[...,0] = normalized_col0
 17 |         
 18 |         return normalized_arr
 19 |     else:
 20 |         return (data - np.mean(data)) / np.std(data)
 21 | def get_temporal_feature(data):
 22 |     
 23 |     data=np.expand_dims(data, axis=-1)
 24 |     feature_list = [data]
 25 |     n=data.shape[1]
 26 |     steps_per_day=288
 27 |     tod = [i % steps_per_day /steps_per_day for i in range(data.shape[0])]
 28 |     tod = np.array(tod)
 29 |     tod_tiled = np.tile(tod, [1, n, 1]).transpose((2, 1, 0))
 30 |     feature_list.append(tod_tiled)
 31 |     dow = [(i // steps_per_day) % 7 for i in range(data.shape[0])]
 32 |     dow = np.array(dow)
 33 |     dow_tiled = np.tile(dow, [1, n, 1]).transpose((2, 1, 0))
 34 |     feature_list.append(dow_tiled)
 35 |     processed_data = np.concatenate(feature_list, axis=-1)
 36 |     return processed_data
 37 | 
 38 | def generate_dataset(data, idx, x_len=12, y_len=12,temporal_feature=False):
 39 |     res = data[idx]
 40 |     node_size = data.shape[1]
 41 |     C=data.shape[-1]
 42 |     t = len(idx)-1
 43 |     idic = 0
 44 |     x_index, y_index = [], []
 45 |     
 46 |     for i in tqdm.tqdm(range(t,0,-1)):
 47 |         if i-x_len-y_len>=0:
 48 |             x_index.extend(list(range(i-x_len-y_len, i-y_len)))
 49 |             y_index.extend(list(range(i-y_len, i)))
 50 | 
 51 |     x_index = np.asarray(x_index)
 52 |     y_index = np.asarray(y_index)
 53 |     if temporal_feature:
 54 |         x = res[x_index].reshape((-1, x_len, node_size,C))
 55 |         y = res[y_index].reshape((-1, y_len, node_size,C))
 56 |         x =x.transpose(0,2,1,3).reshape(-1,node_size,x_len*C)
 57 |         y =y.transpose(0,2,1,3)
 58 |         return x, y[...,0]  #L,N,T,C
 59 |     else:
 60 |         x = res[x_index].reshape((-1, x_len, node_size))
 61 |         y = res[y_index].reshape((-1, y_len, node_size))
 62 |         return x, y
 63 | 
 64 | def generate_samples122(days, savepath, data, graph, train_rate=0.6, val_rate=0.2, test_rate=0.2, val_test_mix=False,temporal_feature=False):
 65 |     edge_index = np.array(list(graph.edges)).T
 66 |     del graph
 67 |     data = data[0:days*288, :]
 68 |     t, n = data.shape[0], data.shape[1]
 69 |     if temporal_feature:     
 70 |         train_idx = [i for i in range(int(t*train_rate))]
 71 |         val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))]
 72 |         test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)]
 73 |         
 74 | 
 75 |         train_x, train_y = generate_dataset(data, train_idx)
 76 |         val_x, val_y = generate_dataset(data, val_idx)
 77 |         test_x, test_y = generate_dataset(data, test_idx)
 78 |         if val_test_mix:
 79 |             val_test_x = np.concatenate((val_x, test_x), 0)
 80 |             val_test_y = np.concatenate((val_y, test_y), 0)
 81 |             val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0])
 82 |             np.random.shuffle(val_test_idx)
 83 |             val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]]
 84 |             test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]]
 85 | 
 86 |         train_x = z_score(train_x)
 87 |         val_x = z_score(val_x)
 88 |         test_x = z_score(test_x)
 89 |         #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index)
 90 |         data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index}
 91 |     else:
 92 |         data=get_temporal_feature(data)
 93 | 
 94 |         train_idx = [i for i in range(int(t*train_rate))]
 95 |         val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))]
 96 |         test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)]
 97 |         
 98 |         train_x, train_y = generate_dataset(data, train_idx,temporal_feature)
 99 |         val_x, val_y = generate_dataset(data, val_idx,temporal_feature)
100 |         test_x, test_y = generate_dataset(data, test_idx,temporal_feature)
101 |         if val_test_mix:
102 |             val_test_x = np.concatenate((val_x, test_x), 0)
103 |             val_test_y = np.concatenate((val_y, test_y), 0)
104 |             val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0])
105 |             np.random.shuffle(val_test_idx)
106 |             val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]]
107 |             test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]]
108 | 
109 |         train_x = z_score(train_x)
110 |         val_x = z_score(val_x)
111 |         test_x = z_score(test_x)
112 |         #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index)
113 |         data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index}      
114 |     return data
115 | 
116 | 
117 | 
118 | class StandardScaler():
119 |     def __init__(self, mean, std):
120 |         self.mean = mean
121 |         self.std = std
122 | 
123 |     def transform(self, data):
124 |         return (data - self.mean) / self.std
125 | 
126 |     def inverse_transform(self, data):
127 |         return (data * self.std) + self.mean
128 | 
129 | 
130 | def generate_samples(days, savepath, data, graph, train_rate=0.6, val_rate=0.2, test_rate=0.2, val_test_mix=False,temporal_feature=True):
131 |     edge_index = np.array(list(graph.edges)).T
132 |     del graph
133 |     data = data[0:days*288, :]
134 |     t, n = data.shape[0], data.shape[1]
135 |     if temporal_feature==False:  
136 |         train_idx = [i for i in range(int(t*train_rate))]
137 |         val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))]
138 |         test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)]
139 |         
140 | 
141 |         train_x, train_y = generate_dataset(data, train_idx)
142 |         val_x, val_y = generate_dataset(data, val_idx)
143 |         test_x, test_y = generate_dataset(data, test_idx)
144 |         if val_test_mix:
145 |             val_test_x = np.concatenate((val_x, test_x), 0)
146 |             val_test_y = np.concatenate((val_y, test_y), 0)
147 |             val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0])
148 |             np.random.shuffle(val_test_idx)
149 |             val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]]
150 |             test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]]
151 |         
152 |         train_x = z_score(train_x)
153 |         val_x = z_score(val_x)
154 |         test_x = z_score(test_x)
155 |         #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index)
156 |         data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index}
157 |     else:
158 |         data=get_temporal_feature(data)
159 |         train_idx = [i for i in range(int(t*train_rate))]
160 |         val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))]
161 |         test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)]
162 |         
163 |         train_x, train_y = generate_dataset(data, train_idx,temporal_feature=temporal_feature)
164 |         val_x, val_y = generate_dataset(data, val_idx,temporal_feature=temporal_feature)
165 |         test_x, test_y = generate_dataset(data, test_idx,temporal_feature=temporal_feature)
166 |         if val_test_mix:
167 |             val_test_x = np.concatenate((val_x, test_x), 0)
168 |             val_test_y = np.concatenate((val_y, test_y), 0)
169 |             val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0])
170 |             np.random.shuffle(val_test_idx)
171 |             val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]]
172 |             test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]]
173 | 
174 |         train_x = z_score(train_x)
175 |         val_x = z_score(val_x)
176 |         test_x = z_score(test_x)
177 |         #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index)
178 |         data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index}      
179 |     return data
180 | 
181 | def generate_samples(days, savepath, data, graph, train_rate=0.6, val_rate=0.2, test_rate=0.2, val_test_mix=False,temporal_feature=True):
182 |     edge_index = np.array(list(graph.edges)).T
183 |     del graph
184 |     data = data[0:days*288, :]
185 |     t, n = data.shape[0], data.shape[1]
186 |     if temporal_feature==False:     
187 |         train_idx = [i for i in range(int(t*train_rate))]
188 |         val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))]
189 |         test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)]
190 |         
191 | 
192 |         train_x, train_y = generate_dataset(data, train_idx)
193 |         val_x, val_y = generate_dataset(data, val_idx)
194 |         test_x, test_y = generate_dataset(data, test_idx)
195 |         if val_test_mix:
196 |             val_test_x = np.concatenate((val_x, test_x), 0)
197 |             val_test_y = np.concatenate((val_y, test_y), 0)
198 |             val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0])
199 |             np.random.shuffle(val_test_idx)
200 |             val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]]
201 |             test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]]
202 | 
203 |         train_x = z_score(train_x)
204 |         val_x = z_score(val_x)
205 |         test_x = z_score(test_x)
206 |         #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index)
207 |         data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index}
208 |     else:
209 |         data=get_temporal_feature(data) #L,N,C
210 | 
211 |         train_idx = [i for i in range(int(t*train_rate))]
212 |         val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))]
213 |         test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)]
214 | 
215 |         train_x, train_y = generate_dataset(data, train_idx,temporal_feature=temporal_feature)
216 |         val_x, val_y = generate_dataset(data, val_idx,temporal_feature=temporal_feature)
217 |         test_x, test_y = generate_dataset(data, test_idx,temporal_feature=temporal_feature)
218 |         if val_test_mix:
219 |             val_test_x = np.concatenate((val_x, test_x), 0)
220 |             val_test_y = np.concatenate((val_y, test_y), 0)
221 |             val_test_idx = np.arange(val_x.shape[0]+test_x.shape[0])
222 |             np.random.shuffle(val_test_idx)
223 |             val_x, val_y = val_test_x[val_test_idx[:int(t*val_rate)]], val_test_y[val_test_idx[:int(t*val_rate)]]
224 |             test_x, test_y = val_test_x[val_test_idx[int(t*val_rate):]], val_test_y[val_test_idx[int(t*val_rate):]]
225 |         train_x = z_score(train_x) #(5332, 655, 12, 3)
226 |         val_x = z_score(val_x)
227 |         test_x = z_score(test_x)
228 | 
229 |         #np.savez(savepath, train_x=train_x, train_y=train_y, val_x=val_x, val_y=val_y, test_x=test_x, test_y=test_y, edge_index=edge_index)
230 |         data = {"train_x":train_x, "train_y":train_y, "val_x":val_x, "val_y":val_y, "test_x":test_x, "test_y":test_y, "edge_index":edge_index}      
231 |     return data
232 | def get_idx(days, savepath, data, train_rate=0.6, val_rate=0.2, test_rate=0.2, val_test_mix=False,temporal_feature=True):
233 |     del graph
234 |     data = data[0:days*288, :]
235 |     t, n = data.shape[0], data.shape[1]
236 |      
237 |     train_idx = [i for i in range(int(t*train_rate))]
238 |     val_idx = [i for i in range(int(t*train_rate), int(t*(train_rate+val_rate)))]
239 |     test_idx = [i for i in range(int(t*(train_rate+val_rate)), t)]     
240 |     return train_idx,val_idx,test_idx
241 | if __name__ == "__main__":
242 |     for year in range(2011,2018):
243 |         data_path=osp.join('/home/wbw/ijcai/data/district3F11T17/finaldata',str(year)+'.npz')
244 |         data=np.load(data_path)['x']
245 | 
246 |         edge_path=osp.join('/home/wbw/ijcai/data/district3F11T17/FastData',str(year)+'_30day.npz')
247 |         edge_index=np.load(edge_path)['edge_index']
248 |         generate_samples1(data,edge_index=edge_index,year=year)


--------------------------------------------------------------------------------
/utils/my_math.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | def mask_np(array, null_val):
  4 |     if np.isnan(null_val):
  5 |         return (~np.isnan(null_val)).astype('float32')
  6 |     else:
  7 |         return np.not_equal(array, null_val).astype('float32')
  8 | 
  9 | def masked_mse(preds, labels, null_val):
 10 |     if torch.isnan(null_val):
 11 |         mask = ~torch.isnan(labels)
 12 |     else:
 13 |         mask = (labels != null_val)
 14 |     mask = mask.float()
 15 |     mask /= torch.mean((mask))
 16 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
 17 |     loss = (preds - labels)**2
 18 |     loss = loss * mask
 19 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
 20 |     return torch.mean(loss)
 21 | 
 22 | 
 23 | def masked_rmse(preds, labels, null_val):
 24 |     return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))
 25 | 
 26 | 
 27 | def masked_mae(preds, labels, null_val):
 28 |     if torch.isnan(null_val):
 29 |         mask = ~torch.isnan(labels)
 30 |     else:
 31 |         mask = (labels != null_val)
 32 |     mask = mask.float()
 33 |     mask /= torch.mean((mask))
 34 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
 35 |     loss = torch.abs(preds - labels)
 36 |     loss = loss * mask
 37 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
 38 |     return torch.mean(loss)
 39 | 
 40 | 
 41 | def masked_mape(preds, labels, null_val):
 42 |     if torch.isnan(null_val):
 43 |         mask = ~torch.isnan(labels)
 44 |     else:
 45 |         mask = (labels != null_val)
 46 |     mask = mask.float()
 47 |     mask /= torch.mean((mask))
 48 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
 49 |     loss = torch.abs(preds - labels) / labels
 50 |     loss = loss * mask
 51 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
 52 |     return torch.mean(loss)
 53 | def masked_mape_np(y_true, y_pred, null_val=np.nan):
 54 |     with np.errstate(divide='ignore', invalid='ignore'):
 55 |         mask = mask_np(y_true, null_val)
 56 |         mask /= mask.mean()
 57 |         mape = np.abs((y_pred - y_true) / y_true)
 58 |         mape = np.nan_to_num(mask * mape)
 59 |         return np.mean(mape) * 100
 60 | 
 61 | def masked_mse(preds, labels, null_val):
 62 |     
 63 |     if torch.isnan(null_val):
 64 |         mask = ~torch.isnan(labels)
 65 |     else:
 66 |         mask = (labels != null_val)
 67 |     mask = mask.float()
 68 |     mask /= torch.mean((mask))
 69 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
 70 |     loss = (preds - labels)**2
 71 |     loss = loss * mask
 72 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
 73 |     return torch.mean(loss)
 74 | 
 75 | 
 76 | def masked_rmse(preds, labels, null_val):
 77 |     return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))
 78 | 
 79 | 
 80 | def masked_mae(preds, labels, null_val):
 81 |     if torch.isnan(null_val):
 82 |         mask = ~torch.isnan(labels)
 83 |     else:
 84 |         mask = (labels != null_val)
 85 |     mask = mask.float()
 86 |     mask /= torch.mean((mask))
 87 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
 88 |     loss = torch.abs(preds - labels)
 89 |     loss = loss * mask
 90 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
 91 |     return torch.mean(loss)
 92 | 
 93 | 
 94 | def masked_mape(preds, labels, null_val):
 95 |     if torch.isnan(null_val):
 96 |         mask = ~torch.isnan(labels)
 97 |     else:
 98 |         mask = (labels != null_val)
 99 |     mask = mask.float()
100 |     mask /= torch.mean((mask))
101 |     mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
102 |     loss = torch.abs(preds - labels) / labels
103 |     loss = loss * mask
104 |     loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
105 |     return torch.mean(loss)
106 | 
107 | 
108 | def compute_all_metrics(preds, labels, null_val):
109 |     mae = masked_mae(preds, labels, null_val).item()
110 |     mape = masked_mape(preds, labels, null_val).item()
111 |     rmse = masked_rmse(preds, labels, null_val).item()
112 |     return mae, mape, rmse
113 | def masked_mse_np(y_true, y_pred, null_val=np.nan):
114 |     mask = mask_np(y_true, null_val)
115 |     mask /= mask.mean()
116 |     mse = (y_true - y_pred) ** 2
117 |     return np.mean(np.nan_to_num(mask * mse))
118 | 
119 | 
120 | def masked_mae_np(y_true, y_pred, null_val=np.nan):
121 |     mask = mask_np(y_true, null_val)
122 |     mask /= mask.mean()
123 |     mae = np.abs(y_true - y_pred)
124 |     return np.mean(np.nan_to_num(mask * mae))
125 | 


--------------------------------------------------------------------------------