├── code ├── init ├── init.py ├── data │ ├── init │ ├── init.py │ ├── data.pt │ ├── poi_matrix.pickle │ ├── poi_dict_new.pickle │ ├── poi_skip_vec.pickle │ ├── region_back.pickle │ ├── reg_vector_dict.pickle │ ├── reg_incld_poi_new.pickle │ └── region_attr_graph.pickle ├── data_pre │ ├── init.py │ ├── pre_s2.py │ ├── pre_s4.py │ ├── pre_s1.py │ ├── pre_spatial_graph.py │ ├── pre_s5.py │ ├── pre_s6_dataloader.py │ ├── pre_s3.py │ └── pre_poi_transformer.py ├── distance_dict.pickle ├── view_split.py ├── layers.py ├── utils.py ├── batch_train.py ├── test_data.py ├── eval.py ├── config.yaml ├── vector_transform.py ├── attack.py ├── pre_dataloader.py ├── model.py ├── train.py ├── model_gcn.py ├── train_edit.py └── train_edit_auto.py ├── house ├── init.py ├── pre_s9.py └── pre_s7.py ├── pictures ├── init.py ├── result.png ├── framework.png └── case_study.png ├── exptract_regions.py ├── pre_s2.py ├── pre_s4.py ├── region_spatial.py ├── pre_poifrom_osm.py ├── pre_spatial_graph.py ├── pre_s10.py ├── pre_poi_transformer.py ├── pre_s5.py ├── README.md ├── pre_s1.py ├── pre_s14_poi_skip.py ├── pre_s6_dataloader.py └── pre_s3.py /code/init: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /code/init.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /house/init.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /code/data/init: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /code/data/init.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pictures/init.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /code/data_pre/init.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /code/data/data.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/data.pt -------------------------------------------------------------------------------- /pictures/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/pictures/result.png -------------------------------------------------------------------------------- /pictures/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/pictures/framework.png -------------------------------------------------------------------------------- /pictures/case_study.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/pictures/case_study.png -------------------------------------------------------------------------------- /code/data/poi_matrix.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/poi_matrix.pickle -------------------------------------------------------------------------------- /code/distance_dict.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/distance_dict.pickle -------------------------------------------------------------------------------- /code/data/poi_dict_new.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/poi_dict_new.pickle -------------------------------------------------------------------------------- /code/data/poi_skip_vec.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/poi_skip_vec.pickle -------------------------------------------------------------------------------- /code/data/region_back.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/region_back.pickle -------------------------------------------------------------------------------- /code/data/reg_vector_dict.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/reg_vector_dict.pickle -------------------------------------------------------------------------------- /code/data/reg_incld_poi_new.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/reg_incld_poi_new.pickle -------------------------------------------------------------------------------- /code/data/region_attr_graph.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/region_attr_graph.pickle -------------------------------------------------------------------------------- /code/view_split.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pickle 4 | import numpy as np 5 | import pandas as pd 6 | 7 | def load_data(file): 8 | data_load_file = [] 9 | file_1 = open(file, "rb") 10 | data_load_file = pickle.load(file_1) 11 | return data_load_file 12 | hy = load_data("./data/hy_new_aaai_2.pickle") 13 | 14 | node_list = list(hy.nodes) 15 | poi_view = [] 16 | spatial_view = [] 17 | flow_view = [] 18 | for item in node_list: 19 | if item.endswith("s"): 20 | spatial_view.append(node_list.index(item)) 21 | elif item.endswith("p"): 22 | poi_view.append(node_list.index(item)) 23 | else: 24 | flow_view.append(node_list.index(item)) 25 | print(len(poi_view)) 26 | print(len(spatial_view)) 27 | print(len(flow_view)) 28 | print(len(poi_view)+len(spatial_view)+len(flow_view)) 29 | # print(poi_view) 30 | # print(flow_view) 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /code/layers.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | 5 | from torch.nn.parameter import Parameter 6 | from torch.nn.modules.module import Module 7 | 8 | 9 | class GCNConv(Module): 10 | """ 11 | Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 12 | """ 13 | 14 | def __init__(self, in_features, out_features, bias=True): 15 | super(GCNConv, self).__init__() 16 | self.in_features = in_features 17 | self.out_features = out_features 18 | self.weight = Parameter(torch.FloatTensor(in_features, out_features)) 19 | if bias: 20 | self.bias = Parameter(torch.FloatTensor(out_features)) 21 | else: 22 | self.register_parameter('bias', None) 23 | self.reset_parameters() 24 | 25 | def reset_parameters(self): 26 | stdv = 1. / math.sqrt(self.weight.size(1)) 27 | self.weight.data.uniform_(-stdv, stdv) 28 | if self.bias is not None: 29 | self.bias.data.uniform_(-stdv, stdv) 30 | 31 | def forward(self, input, adj): 32 | support = torch.mm(input, self.weight) 33 | output = torch.spmm(adj, support) 34 | if self.bias is not None: 35 | return output + self.bias 36 | else: 37 | return output 38 | 39 | def __repr__(self): 40 | return self.__class__.__name__ + ' (' \ 41 | + str(self.in_features) + ' -> ' \ 42 | + str(self.out_features) + ')' -------------------------------------------------------------------------------- /code/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_scatter import scatter_add 3 | from torch_geometric.utils import get_laplacian, add_self_loops 4 | 5 | 6 | def normalize_adj_tensor(adj): 7 | """Symmetrically normalize adjacency tensor.""" 8 | rowsum = torch.sum(adj,1) 9 | d_inv_sqrt = torch.pow(rowsum, -0.5) 10 | d_inv_sqrt[d_inv_sqrt == float("Inf")] = 0. 11 | d_mat_inv_sqrt = torch.diag(d_inv_sqrt) 12 | return torch.mm(torch.mm(adj,d_mat_inv_sqrt).transpose(0,1),d_mat_inv_sqrt) 13 | 14 | def normalize_adj_tensor_sp(adj): 15 | """Symmetrically normalize sparse adjacency tensor.""" 16 | device = adj.device 17 | adj = adj.to("cpu") 18 | rowsum = torch.spmm(adj, torch.ones((adj.size(0),1))).reshape(-1) 19 | d_inv_sqrt = torch.pow(rowsum, -0.5) 20 | d_inv_sqrt[d_inv_sqrt == float("Inf")] = 0. 21 | d_mat_inv_sqrt = torch.diag(d_inv_sqrt) 22 | adj = torch.mm(torch.smm(adj.transpose(0,1),d_mat_inv_sqrt.transpose(0,1)),d_mat_inv_sqrt) 23 | return adj.to(device) 24 | 25 | def edge2adj(x, edge_index): 26 | """Convert edge index to adjacency matrix""" 27 | num_nodes = x.shape[0] 28 | tmp, _ = add_self_loops(edge_index, num_nodes=num_nodes) 29 | edge_weight = torch.ones(tmp.size(1), dtype=None, 30 | device=edge_index.device) 31 | 32 | row, col = tmp[0], tmp[1] 33 | deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes) 34 | deg_inv_sqrt = deg.pow_(-0.5) 35 | deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0) 36 | edge_weight = deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col] 37 | return torch.sparse.FloatTensor(tmp, edge_weight,torch.Size((num_nodes, num_nodes))) -------------------------------------------------------------------------------- /exptract_regions.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | from shapely.geometry import Point, LineString 4 | from shapely.geometry import Polygon,MultiPoint,MultiPolygon 5 | import numpy as np 6 | import json 7 | import geopandas 8 | import shapefile 9 | 10 | 11 | m_region =[] 12 | shp_df = geopandas.GeoDataFrame.from_file("../data/2010 Census Blocks/geo_export_c80540b5-38fc-4bb4-81cd-ae8082c49f02.shp",encoding = 'gb18030').values.tolist() 13 | for item in shp_df: 14 | if item[2] == "Manhattan": 15 | m_region.append(item) 16 | 17 | # print(len(m_region)) 18 | q_index = [] 19 | for hh in m_region: 20 | if hh[3] not in q_index: 21 | q_index.append(hh[3]) 22 | print(len(q_index)) 23 | region_dict = {} 24 | for item in m_region: 25 | if item[3] not in region_dict.keys(): 26 | region_dict[item[3]] = item 27 | else: 28 | if item[5] > region_dict[item[3]][5]: 29 | region_dict[item[3]] = item 30 | # print(m_region) 31 | # print(len(region_dict)) 32 | region_trans = {} 33 | for key,value in region_dict.items(): 34 | region_trans[int(key)] = value[-1] 35 | # print(region_trans[1051]) 36 | # print(len(region_trans)) 37 | region_s = {} 38 | for idx,im in enumerate(region_trans.items()): 39 | region_s[idx] = im[1] 40 | # print(len(region_s)) 41 | # print(region_s[0]) 42 | import pickle 43 | file=open(r"../data/region_back.pickle","wb") 44 | pickle.dump(region_s,file) #storing_list 45 | file.close() 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /code/batch_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess as sp 3 | 4 | 5 | datasets = ["Cora", "CiteSeer", "AmazonP", "AmazonC", "CoauthorC", "CoauthorP"] 6 | """ 7 | seeds = [0,1,2,3,4,39788] 8 | epses = [0.5 ,1, 1.5, 2] 9 | alphas = [50, 200, 600] 10 | betas = [0.001, 0.01] 11 | lambs = [0, 0.5, 1, 1.5, 2] 12 | 13 | jobs = [] 14 | for dataset in datasets: 15 | for seed in seeds: 16 | for eps in epses: 17 | for alpha in alphas: 18 | for beta in betas: 19 | for lamb in lambs: 20 | log = "results/%s_%d_%g_%g_%g_%g"%(dataset, seed, eps, alpha, beta, lamb) 21 | jobs.append({'dataset':dataset, 'seed': seed, 'eps': eps, 'alpha': alpha, 'beta': beta, 'lamb':lamb, 'log': log}) 22 | 23 | for job in jobs: 24 | print(job) 25 | 26 | for job in jobs: 27 | path = job['log'] 28 | if not os.path.exists(path): 29 | sp.call(['mkdir', path]) 30 | print("Starting: ", job) 31 | sp.call(['python', 'train.py', 32 | '--dataset', job['dataset'], 33 | '--seed', str(job['seed']), 34 | '--eps', str(job['eps']), 35 | '--alpha', str(job['alpha']), 36 | '--beta', str(job['beta']), 37 | '--lamb', str(job['lamb']), 38 | '--log', path 39 | ]) 40 | """ 41 | 42 | jobs = [] 43 | for dataset in datasets: 44 | log = "results/%s"%dataset 45 | jobs.append({'dataset':dataset,'log': log}) 46 | 47 | for job in jobs: 48 | path = job['log'] 49 | if not os.path.exists(path): 50 | sp.call(['mkdir', path]) 51 | print("Starting: ", job) 52 | sp.call(['python', 'train.py', 53 | '--dataset', job['dataset'], 54 | '--log', path 55 | ]) -------------------------------------------------------------------------------- /code/data_pre/pre_s2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from shapely.geometry import Point, LineString 6 | from shapely.geometry import Polygon,MultiPoint #多边形 7 | import matplotlib.pyplot as plt 8 | import json 9 | from urllib.request import urlopen, quote 10 | import requests 11 | import geopy 12 | from geopy.geocoders import Nominatim 13 | import copy 14 | import pickle 15 | from datetime import datetime 16 | taxi = pd.read_csv("../data/2016_Green_Taxi_Trip_Data.csv", sep = ',') 17 | # print(taxi[:100]) 18 | print(taxi.columns.values.tolist()) #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID'] 19 | 20 | def load_data(file): 21 | data_load_file = [] 22 | file_1 = open(file, "rb") 23 | data_load_file = pickle.load(file_1) 24 | return data_load_file 25 | 26 | # region = load_data("../data/NY_region.pickle") 27 | # selection_dataset['year'] = selection_dataset['Trip Start Timestamp'].map(lambda x: x.split('-')[0]) 28 | taxi['date'] = taxi['lpep_pickup_datetime'].map(lambda x:x.split(' ')[0]) 29 | taxi['day'] = taxi['date'].map(lambda x:x.split('/')[0]).apply(int) 30 | taxi["date"] = pd.to_datetime(taxi["date"]).dt.date 31 | s_date = datetime.strptime('20160101', '%Y%m%d').date() 32 | e_date = datetime.strptime('20160103', '%Y%m%d').date() 33 | week_df = taxi[(taxi['date'] >= s_date) & (taxi['date'] <= e_date)] 34 | month_traffic = week_df.drop(['date'], axis=1) 35 | # println() 36 | # month_traffic = y_traffic.loc[[y_traffic['month'] == MONTH]] 37 | #a whole year include 77 regions and a month inlucde 70 regions 38 | #month_traffic = year_traffic 39 | print("one week data:", len(month_traffic)) 40 | 41 | 42 | month_traffic = month_traffic.values.tolist() 43 | file=open(r"../data/NY_traffic.pickle","wb") 44 | pickle.dump(month_traffic,file) #storing_list 45 | file.close() 46 | 47 | -------------------------------------------------------------------------------- /pre_s2.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pandas as pd 4 | from shapely.geometry import Point, LineString 5 | from shapely.geometry import Polygon,MultiPoint #多边形 6 | import matplotlib.pyplot as plt 7 | import json 8 | from urllib.request import urlopen, quote 9 | import requests 10 | import geopy 11 | from geopy.geocoders import Nominatim 12 | import copy 13 | import pickle 14 | from datetime import datetime 15 | taxi = pd.read_csv("../data/2016_Green_Taxi_Trip_Data.csv", sep = ',') 16 | # taxi = pd.read_csv("../data/2015_Green_Taxi_Trip_Data.csv", sep = ',') 17 | # print(taxi[:100]) 18 | print(taxi.columns.values.tolist()) #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID'] 19 | 20 | def load_data(file): 21 | data_load_file = [] 22 | file_1 = open(file, "rb") 23 | data_load_file = pickle.load(file_1) 24 | return data_load_file 25 | 26 | # region = load_data("../data/NY_region.pickle") 27 | # selection_dataset['year'] = selection_dataset['Trip Start Timestamp'].map(lambda x: x.split('-')[0]) 28 | taxi['date'] = taxi['lpep_pickup_datetime'].map(lambda x:x.split(' ')[0]) 29 | # taxi['date'] = taxi['pickup_datetime'].map(lambda x:x.split(' ')[0]) 30 | taxi['day'] = taxi['date'].map(lambda x:x.split('/')[1]).apply(int) 31 | taxi["date"] = pd.to_datetime(taxi["date"]).dt.date 32 | s_date = datetime.strptime('20160101', '%Y%m%d').date() 33 | e_date = datetime.strptime('20160101', '%Y%m%d').date() 34 | week_df = taxi[(taxi['date'] >= s_date) & (taxi['date'] <= e_date)] 35 | month_traffic = week_df.drop(['date'], axis=1) 36 | # println() 37 | # month_traffic = y_traffic.loc[[y_traffic['month'] == MONTH]] 38 | #a whole year include 77 regions and a month inlucde 70 regions 39 | #month_traffic = year_traffic 40 | print("one week data:", len(month_traffic)) 41 | print(month_traffic['day']) 42 | 43 | # pritnln() 44 | month_traffic = month_traffic.values.tolist() 45 | file=open(r"../data/NY_traffic_1_.pickle","wb") 46 | pickle.dump(month_traffic,file) #storing_list 47 | file.close() 48 | 49 | -------------------------------------------------------------------------------- /pre_s4.py: -------------------------------------------------------------------------------- 1 | 2 | import pickle 3 | import pandas as pd 4 | import numpy as np 5 | import copy 6 | from shapely.geometry import Point, LineString 7 | from shapely.geometry import Polygon,MultiPoint #多边形 8 | import torch 9 | import networkx as nx 10 | import matplotlib.pyplot as pl 11 | 12 | 13 | 14 | 15 | def load_data(file): 16 | data_load_file = [] 17 | file_1 = open(file, "rb") 18 | data_load_file = pickle.load(file_1) 19 | return data_load_file 20 | 21 | 22 | reg_vec_sort = load_data("../data/reg_poi_vec.pickle") 23 | region_que = load_data("../data/reg_poi_idx.pickle") 24 | region_attr_edges=[] 25 | 26 | 27 | for idx in region_que: 28 | for idt in range(idx+1, len(reg_vec_sort)): 29 | # print("^^:",reg_vec_sort[idx].size()) 30 | # print("**:",reg_vec_sort[idx+1].size()) 31 | # pritnln() 32 | output = torch.cosine_similarity(torch.unsqueeze(reg_vec_sort[idx],0), torch.unsqueeze(reg_vec_sort[idt],0), eps=1e-08).mean() 33 | # print("output:", output.item()) 34 | # pritnln() 35 | #0.87 36 | if output.item()>=0.9: 37 | tmp_1 = "r" + '_' + str(idx)+"_"+"p" 38 | tmp_2 = "r" + '_' + str(idt)+"_"+"p" 39 | # sim_dict[key] = [tmp_1, tmp_2, value] 40 | region_attr_edges.append([tmp_1, tmp_2, output.item()]) 41 | print(len(region_attr_edges)) 42 | # println() 43 | G = nx.Graph() 44 | # for edge in edges: 45 | # G.add_edge(edge[0],edge[1],weight= edge[2]) 46 | 47 | [G.add_edge(edge[0],edge[1],weight= edge[2], date = "1", start = edge[0], end = edge[1] ) for edge in region_attr_edges] 48 | # print(len(G.adj)) 49 | # nx.draw(G, with_labels=True) 50 | # plt.show() 51 | 52 | 53 | file=open(r"../data/region_attr_graph_test.pickle","wb") 54 | pickle.dump(G,file) #storing_list 55 | file.close() 56 | 57 | print("attr_region:", G) 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /code/test_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pandas as pd 4 | import pickle 5 | from shapely.geometry import Point, LineString 6 | from shapely.geometry import Polygon,MultiPoint #多边形 7 | import torch 8 | from torch import nn 9 | import numpy as np 10 | 11 | def load_data(file): 12 | data_load_file = [] 13 | file_1 = open(file, "rb") 14 | data_load_file = pickle.load(file_1) 15 | return data_load_file 16 | 17 | view_graph = load_data("./data/hy_new_aaai_2.pickle") 18 | region_lab = load_data("./data/region_label.pickle") 19 | # print(region_lab) 20 | # print(region_lab.keys()) 21 | # print(type(region_lab.keys())) 22 | # pritnln() 23 | nodes_list = view_graph.nodes() 24 | nodes_lab = [] 25 | for item in nodes_list: 26 | # print("item:", item) 27 | idx = int(item.split("_")[1]) 28 | if idx in region_lab.keys(): 29 | tmp_lab = region_lab[idx] 30 | nodes_lab.append(tmp_lab) 31 | else: 32 | nodes_lab.append(5) 33 | # print("nodes_lab:",nodes_lab) 34 | data_num = 1388 35 | file=open(r"./data/nodes_lab.pickle","wb") 36 | pickle.dump(nodes_lab,file) #storing_list 37 | file.close() 38 | '''train mask''' 39 | l1 = [True]*int(0.051698670605613*data_num) 40 | l2 = [False]*(data_num-int(0.051698670605613*data_num)) 41 | l1.extend(l2) 42 | tmp_len = int(0.051698670605613*data_num) 43 | train_mask = l1 44 | # print(tmp_len) 45 | file=open(r"./data/train_mask.pickle","wb") 46 | pickle.dump(train_mask,file) #storing_list 47 | file.close() 48 | '''val mask''' 49 | l3 = [True]*int(0.18463810930576072*data_num) 50 | l4 = [False]*(data_num-int(0.18463810930576072*data_num)-tmp_len) 51 | l5 = [False]* tmp_len 52 | l5.extend(l3) 53 | l5.extend(l4) 54 | tmp_len_val = len(l3) 55 | val_mask = l5 56 | file=open(r"./data/val_mask.pickle","wb") 57 | pickle.dump(val_mask,file) #storing_list 58 | file.close() 59 | # print(len(val_mask)) 60 | '''test mask''' 61 | l6 = [True]*int(0.36927621861152143*data_num) 62 | l7 = [False]*(data_num-int(0.36927621861152143*data_num)-tmp_len-tmp_len_val) 63 | l8 = [False]* (tmp_len+tmp_len_val) 64 | l8.extend(l6) 65 | l8.extend(l7) 66 | test_mask = l8 67 | file=open(r"./data/test_mask.pickle","wb") 68 | pickle.dump(test_mask,file) #storing_list 69 | file.close() 70 | 71 | print("---Done---") 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /region_spatial.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import pickle 4 | import numpy as np 5 | # import matplotlib.pyplot as plt 6 | from sklearn import metrics 7 | from sklearn.cluster import KMeans 8 | from sklearn.metrics import adjusted_mutual_info_score 9 | import json 10 | import numpy as np 11 | import pandas as pd 12 | from sklearn import linear_model 13 | from sklearn.model_selection import KFold 14 | from sklearn.metrics import adjusted_rand_score 15 | from sklearn.metrics import normalized_mutual_info_score 16 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 17 | from shapely.geometry import Polygon 18 | from shapely import wkt 19 | import geopandas as gpd 20 | import math 21 | from math import cos 22 | def load_data(file): 23 | data_load_file = [] 24 | file_1 = open(file, "rb") 25 | data_load_file = pickle.load(file_1) 26 | return data_load_file 27 | 28 | para= 3000 29 | region_pos = load_data("../data/region_back.pickle") 30 | reg_sm = {} 31 | all_pos = [] 32 | for key,value in region_pos.items(): 33 | reg_sm[key] = list(value.centroid.coords)[0] 34 | all_pos.append(list(value.centroid.coords)[0]) 35 | max_lon = max([item[0] for item in all_pos]) 36 | min_lon = min([item[0] for item in all_pos]) 37 | max_lat = max([item[1] for item in all_pos]) 38 | min_lat = min([item[1] for item in all_pos]) 39 | # print(max_lon,min_lon,max_lat,min_lat) 40 | dis_lon = (max_lon-min_lon)*111100 41 | lon_num = math.ceil(dis_lon/para) 42 | 43 | 44 | dis_lat = (max_lat-min_lat)*111100*cos(max_lat-min_lat) 45 | # print(dis_lat) 46 | lat_num = math.ceil(dis_lat/para) 47 | # print(lon_num,lat_num) 48 | 49 | reg_token = {} 50 | li=[] 51 | for idx,pos in enumerate(all_pos): 52 | lon = pos[0]-min_lon 53 | lat = pos[1]-min_lat 54 | x,y = int(lon*111100/para),int(lat*111100*cos(lat)/para) 55 | tok= x*21+y 56 | if tok not in li: 57 | li.append(tok) 58 | reg_token[idx] = tok 59 | # print("cor_token:", idx,x,y,tok) 60 | # print(reg_token) 61 | print(len(li)) 62 | li_map = {} 63 | for idx,uu in enumerate(li): 64 | li_map[uu] = idx 65 | reg_t_con ={} 66 | ton=[] 67 | for key,value in reg_token.items(): 68 | reg_t_con[key] = li_map[value] 69 | if li_map[value] not in ton: 70 | ton.append(li_map[value]) 71 | print(reg_t_con) 72 | print(max(ton)) 73 | 74 | file=open(r"../data/region_spatial.pickle","wb") 75 | pickle.dump(reg_t_con,file) #storing_list 76 | file.close() 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /code/eval.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import functools 3 | 4 | from sklearn.metrics import f1_score, roc_auc_score 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.svm import SVC 7 | from sklearn.model_selection import train_test_split, GridSearchCV 8 | from sklearn.multiclass import OneVsRestClassifier 9 | from sklearn.preprocessing import normalize, OneHotEncoder 10 | 11 | 12 | def repeat(n_times): 13 | def decorator(f): 14 | @functools.wraps(f) 15 | def wrapper(*args, **kwargs): 16 | results = [f(*args, **kwargs) for _ in range(n_times)] 17 | statistics = {} 18 | for key in results[0].keys(): 19 | values = [r[key] for r in results] 20 | statistics[key] = { 21 | 'mean': np.mean(values), 22 | 'std': np.std(values)} 23 | print_statistics(statistics, f.__name__) 24 | return statistics 25 | return wrapper 26 | return decorator 27 | 28 | 29 | def prob_to_one_hot(y_pred): 30 | ret = np.zeros(y_pred.shape, np.bool) 31 | indices = np.argmax(y_pred, axis=1) 32 | for i in range(y_pred.shape[0]): 33 | ret[i][indices[i]] = True 34 | return ret 35 | 36 | 37 | def print_statistics(statistics, function_name): 38 | print(f'(E) | {function_name}:', end=' ') 39 | for i, key in enumerate(statistics.keys()): 40 | mean = statistics[key]['mean'] 41 | std = statistics[key]['std'] 42 | print(f'{key}={mean:.4f}+-{std:.4f}', end='') 43 | if i != len(statistics.keys()) - 1: 44 | print(',', end=' ') 45 | else: 46 | print() 47 | 48 | 49 | @repeat(20) 50 | def label_classification(embeddings, y, ratio): 51 | X = embeddings.detach().cpu().numpy() 52 | Y = y.detach().cpu().numpy() 53 | Y = Y.reshape(-1, 1) 54 | onehot_encoder = OneHotEncoder(categories='auto').fit(Y) 55 | Y = onehot_encoder.transform(Y).toarray().astype(np.bool) 56 | 57 | X = normalize(X, norm='l2') 58 | 59 | X_train, X_test, y_train, y_test = train_test_split(X, Y, 60 | test_size=1 - ratio) 61 | 62 | logreg = LogisticRegression(solver='liblinear') 63 | c = 2.0 ** np.arange(-10, 10) 64 | 65 | clf = GridSearchCV(estimator=OneVsRestClassifier(logreg), 66 | param_grid=dict(estimator__C=c), n_jobs=8, cv=5, 67 | verbose=0) 68 | clf.fit(X_train, y_train) 69 | 70 | y_pred = clf.predict_proba(X_test) 71 | y_pred = prob_to_one_hot(y_pred) 72 | 73 | acc = np.sum(np.where(y_test)[1]==np.where(y_pred)[1])/len(y_pred) 74 | 75 | return {"ACC": acc} 76 | -------------------------------------------------------------------------------- /code/config.yaml: -------------------------------------------------------------------------------- 1 | Cora: 2 | seed: 4 3 | learning_rate: 0.0005 4 | num_hidden: 128 5 | num_proj_hidden: 128 6 | activation: 'relu' 7 | base_model: 'GCNConv' 8 | num_layers: 2 9 | drop_edge_rate_1: 0.2 10 | drop_edge_rate_2: 0.4 11 | drop_feature_rate_1: 0.3 12 | drop_feature_rate_2: 0.4 13 | tau: 0.4 14 | num_epochs: 300 15 | weight_decay: 0.00001 16 | eps: 0.5 17 | alpha: 200 18 | beta: 0.01 19 | lamb: 0 20 | CiteSeer: 21 | seed: 2 22 | learning_rate: 0.001 23 | num_hidden: 256 24 | num_proj_hidden: 256 25 | activation: 'prelu' 26 | base_model: 'GCNConv' 27 | num_layers: 2 28 | drop_edge_rate_1: 0.2 29 | drop_edge_rate_2: 0.0 30 | drop_feature_rate_1: 0.3 31 | drop_feature_rate_2: 0.2 32 | tau: 0.9 33 | num_epochs: 200 34 | weight_decay: 0.00001 35 | eps: 1 36 | alpha: 50 37 | beta: 0.01 38 | lamb: 0 39 | AmazonC: 40 | seed: 3 41 | learning_rate: 0.01 42 | num_hidden: 128 43 | num_proj_hidden: 128 44 | activation: "rrelu" 45 | base_model: 'GCNConv' 46 | num_layers: 2 47 | drop_edge_rate_1: 0.6 48 | drop_edge_rate_2: 0.3 49 | drop_feature_rate_1: 0.2 50 | drop_feature_rate_2: 0.3 51 | tau: 0.2 52 | num_epochs: 2000 53 | weight_decay: 0.00001 54 | eps: 1 55 | alpha: 50 56 | beta: 0.01 57 | lamb: 0 58 | AmazonP: 59 | seed: 3 60 | learning_rate: 0.1 61 | num_hidden: 256 62 | num_proj_hidden: 64 63 | activation: "relu" 64 | base_model: 'GCNConv' 65 | num_layers: 2 66 | drop_edge_rate_1: 0.3 67 | drop_edge_rate_2: 0.5 68 | drop_feature_rate_1: 0.1 69 | drop_feature_rate_2: 0.1 70 | tau: 0.3 71 | num_epochs: 2000 72 | weight_decay: 0.00001 73 | eps: 0.5 74 | alpha: 200 75 | beta: 0.01 76 | lamb: 0 77 | CoauthorC: 78 | seed: 1 79 | learning_rate: 0.0005 80 | num_hidden: 256 81 | num_proj_hidden: 256 82 | activation: "rrelu" 83 | base_model: 'GCNConv' 84 | num_layers: 2 85 | drop_edge_rate_1: 0.3 86 | drop_edge_rate_2: 0.2 87 | drop_feature_rate_1: 0.3 88 | drop_feature_rate_2: 0.4 89 | tau: 0.4 90 | num_epochs: 1000 91 | weight_decay: 0.00001 92 | eps: 2 93 | alpha: 50 94 | beta: 0.001 95 | lamb: 2 96 | CoauthorP: 97 | seed: 1 98 | learning_rate: 0.01 99 | num_hidden: 128 100 | num_proj_hidden: 64 101 | activation: "rrelu" 102 | base_model: 'GCNConv' 103 | num_layers: 2 104 | drop_edge_rate_1: 0.4 105 | drop_edge_rate_2: 0.1 106 | drop_feature_rate_1: 0.1 107 | drop_feature_rate_2: 0.4 108 | tau: 0.5 109 | num_epochs: 1500 110 | weight_decay: 0.00001 111 | eps: 1 112 | alpha: 50 113 | beta: 0.001 114 | lamb: 0 -------------------------------------------------------------------------------- /code/vector_transform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | from torch_geometric.data import Data 5 | from itertools import product 6 | import numpy as np 7 | import pandas as pd 8 | from torch import nn 9 | import pickle 10 | 11 | import warnings 12 | warnings.filterwarnings("ignore") 13 | 14 | def load_data(file): 15 | data_load_file = [] 16 | file_1 = open(file, "rb") 17 | data_load_file = pickle.load(file_1) 18 | return data_load_file 19 | 20 | # tmp_vector = load_data("./data/tmp_vector.pickle") 21 | 22 | # tmp_vector = load_data("./data/tmp_vector_vgae_au.pickle") 23 | 24 | # tmp_vector = load_data("./data/tmp_vector_vgae_3.pickle") 25 | tmp_vector = load_data("./data/tmp_vector_chi_3.pickle") 26 | linear = nn.Linear(128, 16) 27 | ten = torch.tensor(tmp_vector.tolist()) 28 | # print(ten.size()) 29 | # final = [] 30 | # for item in ten: 31 | # tmp = linear(item).tolist() 32 | # final.append(tmp) 33 | # final_vector = torch.tensor(final) 34 | # print(final_vector.size()) 35 | 36 | # hy = load_data("./data/hy_new_aaai_2.pickle") 37 | 38 | hy = load_data("./data/hy_aaai_chi_1.pickle") 39 | region = ten 40 | 41 | nn_1 = nn.Linear(128,96) 42 | hy_nodes_dict={} 43 | for n,n_vec in zip(hy.nodes(),region): 44 | tp = n.split("_")[1] 45 | if tp not in hy_nodes_dict.keys(): 46 | hy_nodes_dict[int(tp)] = [] 47 | hy_nodes_dict[int(tp)].append(n_vec.tolist()) 48 | else: 49 | hy_nodes_dict[int(tp)].append(n_vec.tolist()) 50 | 51 | 52 | hy_com = {} 53 | for key,value in hy_nodes_dict.items(): 54 | tmp = np.mean(value, axis=0).tolist() 55 | tmp_ = torch.tensor(tmp).tolist() 56 | hy_com[int(key)] = tmp_ 57 | 58 | linear = nn.Linear(128, 16) 59 | hycom_vec = [] 60 | for key,value in hy_com.items(): 61 | hycom_vec.append(linear(torch.tensor(value)).tolist()) 62 | vec_final_ = np.reshape(np.tile(np.array(hycom_vec),(30,4)),(234,30,4,16)) 63 | 64 | print(np.array(hycom_vec).shape) 65 | print("vec_final_:",vec_final_.shape) 66 | file=open(r"./data/tmp_7.pickle","wb") 67 | pickle.dump(vec_final_,file) #storing_list 68 | file.close() 69 | file=open(r"./data/tmp_house.pickle","wb") 70 | pickle.dump(np.array(hycom_vec),file) #storing_list 71 | file.close() 72 | 73 | 74 | 75 | '''transform for traffi prediction--32 dimension vector''' 76 | trans_1 = np.reshape(np.array(hycom_vec), 234*16) 77 | linear_traffic = nn.Linear(234*16, 32) 78 | traffic_vec = linear_traffic(torch.tensor(trans_1).float()).detach().numpy() 79 | # print(traffic_vec.size()) 80 | file=open(r"./data/traff_vec.pickle","wb") 81 | pickle.dump(traffic_vec,file) #storing_list 82 | file.close() 83 | 84 | print("---finish---") 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /pre_poifrom_osm.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | import pickle 4 | from shapely.geometry import Point, LineString 5 | from shapely.geometry import Polygon,MultiPoint #多边形 6 | import torch 7 | from torch import nn 8 | 9 | 10 | def load_data(file): 11 | data_load_file = [] 12 | file_1 = open(file, "rb") 13 | data_load_file = pickle.load(file_1) 14 | return data_load_file 15 | poi = pd.read_csv("../data/poi_nyc.csv",sep=",").values.tolist() 16 | region_back = load_data("../data/region_back_merge.pickle") 17 | # print(poi.columns.values.tolist()) 18 | # pritnln() 19 | region_poi={} 20 | poi_list=[] 21 | for key,value in region_back.items(): 22 | region_poi[key] = [] 23 | for item in poi: 24 | # print(item[23], item[84], item[92]) 25 | for key,value in region_back.items(): 26 | tmp_point = Point(item[3],item[0]) 27 | if tmp_point.intersects(value): 28 | if item[23]!=" ": 29 | if item[23] not in region_poi[key]: 30 | region_poi[key].append(item[23]) 31 | if item[23] not in poi_list: 32 | poi_list.append(item[23]) 33 | elif item[84]!=" ": 34 | if item[84] not in region_poi[key]: 35 | region_poi[key].append(item[84]) 36 | if item[84] not in poi_list: 37 | poi_list.append(item[84]) 38 | print(region_poi) 39 | print(poi_list) 40 | # poi_list = ['drinking_water', 'toilets', 'school', 'hospital', 'arts_centre', 'fire_station', 'police', 'bicycle_parking', 'fountain', 'ferry_terminal', 'bench', 'cinema', 'cafe', 'pub', 'waste_basket', 'parking_entrance', 'parking', 'fast_food', 'bank', 'restaurant', 'ice_cream', 'pharmacy', 'taxi', 'post_box', 'atm', 'nightclub', 'social_facility', 'bar', 'biergarten', 'clock', 'bicycle_rental', 'community_centre', 'watering_place', 'ranger_station', 'boat_rental', 'recycling', 'payment_terminal', 'bicycle_repair_station', 'place_of_worship', 'shelter', 'telephone', 'clinic', 'dentist', 'vending_machine', 'theatre', 'charging_station', 'public_bookcase', 'post_office', 'fuel', 'doctors'] 41 | poi_dict = {} 42 | for idx,item in enumerate(poi_list): 43 | poi_dict[item]=idx 44 | print("sum of the category of POI:", len(poi_dict)) 45 | reg_incld_poi={} 46 | for key,value in region_poi.items(): 47 | reg_incld_poi[key] = [] 48 | for uu in value: 49 | if uu in poi_dict.keys(): 50 | reg_incld_poi[key].append(poi_dict[uu]) 51 | print("reg_incld_poi:",reg_incld_poi) 52 | 53 | import pickle 54 | file=open(r"../data/reg_incld_poi_new.pickle","wb") 55 | pickle.dump(reg_incld_poi,file) #storing_list 56 | file.close() 57 | 58 | file=open(r"../data/poi_dict_new.pickle","wb") 59 | pickle.dump(poi_dict,file) #storing_list 60 | file.close() 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /pre_spatial_graph.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pandas as pd 4 | from shapely.geometry import Point, LineString 5 | from shapely.geometry import Polygon,MultiPoint #多边形 6 | import matplotlib.pyplot as plt 7 | import json 8 | from urllib.request import urlopen, quote 9 | import requests 10 | import geopy 11 | from geopy.geocoders import Nominatim 12 | import copy 13 | import pickle 14 | from datetime import datetime 15 | from itertools import chain 16 | import networkx as nx 17 | import numpy as np 18 | import matplotlib.pyplot as plt 19 | from math import radians, cos, sin, asin, sqrt 20 | 21 | def haversine(lon1, lat1, lon2, lat2): # 22 | 23 | lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) 24 | 25 | # haversine公式 26 | dlon = lon2 - lon1 27 | dlat = lat2 - lat1 28 | a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 29 | c = 2 * asin(sqrt(a)) 30 | r = 6371 31 | return c * r * 1000 32 | 33 | def load_data(file): 34 | data_load_file = [] 35 | file_1 = open(file, "rb") 36 | data_load_file = pickle.load(file_1) 37 | return data_load_file 38 | 39 | region_back = load_data("../data/region_back_merge.pickle") 40 | region_fea = load_data("../data/reg_fea.pickle") 41 | # print(region_fea) 42 | # prtinln() 43 | spatial_edges = [] 44 | # spatial_edges.extend(flow_edges) # add edges in flow graph 45 | # sim_num=0 46 | 47 | # print(check_index) 48 | node=[] 49 | 50 | reg_spatial={} 51 | for ii in range(180): 52 | for jj in range(ii+1, 180): 53 | # time = flow_nodes[ii].split("_")[2] 54 | # t_1 = flow_nodes[ii].split("_") 55 | # t_2 = flow_nodes[jj].split("_") 56 | t_1 = ii 57 | t_2 = jj 58 | # print("t_1:",t_1) 59 | # print("t_2:",t_2) 60 | if int(t_1) not in node: 61 | node.append(int(t_1)) 62 | if int(t_2) not in node: 63 | node.append(int(t_2)) 64 | t_1_pos = list(region_back[int(t_1)].centroid.coords)[0] 65 | t_2_pos = list(region_back[int(t_2)].centroid.coords)[0] 66 | value = haversine(t_1_pos[0], t_1_pos[1], t_2_pos[0], t_2_pos[1]) 67 | if value<= 5600: #小于5公里 68 | n1 = "r"+"_"+str(t_1) 69 | n2 = "r"+"_"+str(t_2) 70 | pair = (n1,n2, {"weight":value, "date":int(1), "start":n1, "end":n2}) 71 | if pair not in spatial_edges: 72 | spatial_edges.append(pair) 73 | 74 | print(len(spatial_edges)) 75 | # println() 76 | 77 | 78 | # println() 79 | print("spatial_edges:",spatial_edges) 80 | print(len(spatial_edges)) 81 | print("finish spatial graph") 82 | # println() 83 | 84 | # #spatial graph 85 | G_spatial = nx.Graph() 86 | G_spatial.add_edges_from(spatial_edges[:]) 87 | # nx.draw(G_spatial, with_labels=True) 88 | # plt.show() 89 | print("G_spatial:",G_spatial) 90 | 91 | file=open(r"../data/spatial_graph_5600.pickle","wb") 92 | pickle.dump(G_spatial,file) #storing_list 93 | file.close() -------------------------------------------------------------------------------- /pre_s10.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | import pickle 4 | from shapely.geometry import Point, LineString 5 | from shapely.geometry import Polygon,MultiPoint #多边形 6 | import torch 7 | from torch import nn 8 | import networkx as nx 9 | import numpy as np 10 | 11 | 12 | def load_data(file): 13 | data_load_file = [] 14 | file_1 = open(file, "rb") 15 | data_load_file = pickle.load(file_1) 16 | return data_load_file 17 | 18 | region = load_data("../data/hy_vector_signal_trans_18.pickle") 19 | check_vector = load_data("../data/ck_poi.pickle") 20 | hy = load_data("../data/hy_6.pickle") 21 | 22 | 23 | print(region.size()) 24 | 25 | 26 | print(check_vector[0].size()) 27 | print(hy.nodes()) 28 | print(hy) 29 | 30 | hy_nodes_dict={} 31 | for n,n_vec in zip(hy.nodes(),region): 32 | tp = n.split("_")[1] 33 | if tp not in hy_nodes_dict.keys(): 34 | hy_nodes_dict[tp] = [] 35 | hy_nodes_dict[tp].append(n_vec.tolist()) 36 | else: 37 | hy_nodes_dict[tp].append(n_vec.tolist()) 38 | 39 | hy_com = {} 40 | for key,value in hy_nodes_dict.items(): 41 | tmp = np.mean(value, axis=0).tolist() 42 | tmp_ = torch.tensor(tmp).tolist() 43 | hy_com[int(key)] = tmp_ 44 | 45 | 46 | file=open(r"../data/hy_com_dict_trans.pickle","wb") 47 | pickle.dump(hy_com,file) #storing_list 48 | file.close() 49 | print("---finish---") 50 | 51 | 52 | 53 | println() 54 | from sklearn.linear_model import Lasso,Ridge 55 | from sklearn.model_selection import train_test_split 56 | import matplotlib.pyplot as plt 57 | import numpy as np 58 | import mglearn 59 | 60 | # 读取数据,并划分训练集和测试集 61 | X,y = mglearn.datasets.load_extended_boston() 62 | X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42) 63 | # 通过设置不同的alpha值建立三个lasso实例 64 | lasso = Lasso().fit(X_train,y_train) 65 | lasso001 =Lasso(alpha=0.01).fit(X_train,y_train) 66 | lasso00001 = Lasso(alpha=0.0001).fit(X_train,y_train) 67 | print('**********************************') 68 | print("Lasso alpha=1") 69 | print ("training set score:{:.2f}".format(lasso.score(X_train,y_train))) 70 | print ("test set score:{:.2f}".format(lasso.score(X_test,y_test))) 71 | print ("Number of features used:{}".format(np.sum(lasso.coef_!=0))) 72 | 73 | print('**********************************') 74 | print("Lasso alpha=0.01") 75 | print ("training set score:{:.2f}".format(lasso001.score(X_train,y_train))) 76 | print ("test set score:{:.2f}".format(lasso001.score(X_test,y_test))) 77 | print ("Number of features used:{}".format(np.sum(lasso001.coef_!=0))) 78 | 79 | print('**********************************') 80 | print("Lasso alpha=0.0001") 81 | print ("training set score:{:.2f}".format(lasso00001.score(X_train,y_train))) 82 | print ("test set score:{:.2f}".format(lasso00001.score(X_test,y_test))) 83 | print ("Number of features used:{}".format(np.sum(lasso00001.coef_!=0))) 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /pre_poi_transformer.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | import pickle 4 | from shapely.geometry import Point, LineString 5 | from shapely.geometry import Polygon,MultiPoint 6 | import torch 7 | from torch import nn 8 | import numpy as np 9 | 10 | 11 | def load_data(file): 12 | data_load_file = [] 13 | file_1 = open(file, "rb") 14 | data_load_file = pickle.load(file_1) 15 | return data_load_file 16 | region_back = load_data("../data/region_back_merge.pickle") 17 | reg_poi = load_data("../data/reg_incld_poi_new.pickle") 18 | # reg_spatial = load_data("../data/region_spatial.pickle") 19 | poi_max=[] 20 | for key,value in reg_poi.items(): 21 | poi_max.extend(value) 22 | print(max(poi_max)) #there are 120 fin-grained pois 23 | # println() 24 | reg_poi_={} 25 | s = 0 26 | emb = nn.Embedding(120, 512) 27 | embedding_spatial = torch.nn.Embedding(15, 512) # spatial 28 | for key,value in reg_poi.items(): 29 | # print("value:",value) 30 | if value!=[]: 31 | reg_poi_[key]=[] 32 | # print("value:",value) 33 | if len(value)>s: 34 | s = len(value) 35 | for item in value: 36 | reg_poi_[key].append(emb(torch.tensor(item)).tolist()) 37 | # spa_vec= embedding_spatial(torch.tensor(reg_spatial[idx])) 38 | # reg_poi_t = {} 39 | reg_poi_list = [] 40 | for iii in range(180): 41 | # for key,value in reg_poi_.items(): 42 | if iii not in reg_poi_.keys(): 43 | reg_poi_list.append(np.array([0.0]*512)) 44 | # reg_poi_list.append(ci) 45 | else: 46 | # print("value:",value) 47 | tp = np.mean(reg_poi_[key],axis=0) 48 | reg_poi_list.append(tp) 49 | reg_poi_list_ = torch.tensor(np.array(reg_poi_list)).float() 50 | reg_poi_list_tensor = torch.unsqueeze(reg_poi_list_,0) 51 | print(reg_poi_list_tensor.size()) 52 | 53 | reg_idx= [key for key in reg_poi_.keys()] 54 | from torch import nn 55 | encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8 ) 56 | transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6) 57 | # src = torch.rand(1, 172, 512) 58 | src = reg_poi_list_tensor 59 | out = transformer_encoder(src) 60 | # print(out.size()) 61 | out_ = torch.squeeze(out,0) 62 | print(out_.size()) 63 | print(reg_idx) 64 | print(len(reg_idx)) 65 | # reg_poi_vec = {} 66 | # for idx,vec in zip(reg_idx,out_): 67 | # reg_poi_vec[idx] = vec 68 | 69 | file=open(r"../data/reg_poi_vec.pickle","wb") 70 | pickle.dump(out_,file) #storing_list 71 | file.close() 72 | 73 | file=open(r"../data/reg_poi_idx.pickle","wb") 74 | pickle.dump(reg_idx,file) #storing_list 75 | file.close() 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /code/attack.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from utils import normalize_adj_tensor, normalize_adj_tensor_sp, edge2adj 4 | 5 | def bisection(a,eps,xi,ub=1): 6 | pa = torch.clamp(a, 0, ub) 7 | if torch.sum(pa) <= eps: 8 | upper_S_update = pa 9 | else: 10 | mu_l = torch.min(a-1) 11 | mu_u = torch.max(a) 12 | mu_a = (mu_u + mu_l)/2 13 | while torch.abs(mu_u - mu_l)>xi: 14 | mu_a = (mu_u + mu_l)/2 15 | gu = torch.sum(torch.clamp(a-mu_a, 0, ub)) - eps 16 | gu_l = torch.sum(torch.clamp(a-mu_l, 0, ub)) - eps 17 | if gu == 0: 18 | break 19 | if torch.sign(gu) == torch.sign(gu_l): 20 | mu_l = mu_a 21 | else: 22 | mu_u = mu_a 23 | upper_S_update = torch.clamp(a-mu_a, 0, ub) 24 | return upper_S_update 25 | 26 | 27 | def PGD_attack_graph(model, edge_index_1, edge_index_2, x_1, x_2, steps, node_ratio, alpha, beta): 28 | """ PGD attack on both features and edges""" 29 | for param in model.parameters(): 30 | param.requires_grad = False 31 | model.eval() 32 | device = x_1.device 33 | total_edges = edge_index_2.shape[1] 34 | n_node = x_2.shape[0] 35 | eps = total_edges * node_ratio/2 36 | xi = 1e-3 37 | 38 | A_ = torch.sparse.FloatTensor(edge_index_2, torch.ones(total_edges,device=device), torch.Size((n_node, n_node))).to_dense() 39 | C_ = torch.ones_like(A_) - 2 * A_ - torch.eye(A_.shape[0],device=device) 40 | S_ = torch.zeros_like(A_, requires_grad= True) 41 | mask = torch.ones_like(A_) 42 | mask = mask - torch.tril(mask) 43 | delta = torch.zeros_like(x_2, device=device, requires_grad=True) 44 | adj_1 = edge2adj(x_1, edge_index_1) 45 | model.to(device) 46 | for epoch in range(steps): 47 | S = (S_ * mask) 48 | S = S + S.T 49 | A_prime = A_ + (S * C_) 50 | adj_hat = normalize_adj_tensor(A_prime + torch.eye(n_node,device=device)) 51 | z1 = model(x_1, adj_1) 52 | z2 = model(x_2 + delta, adj_hat) 53 | loss, _ = model.loss(z1, z2, batch_size=0) 54 | attack_loss = loss.mean() 55 | attack_loss.backward() 56 | S_.data = (S_.data + alpha/np.sqrt(epoch+1)*S_.grad.detach()) # annealing 57 | S_.data = bisection(S_.data, eps, xi) # clip S 58 | S_.grad.zero_() 59 | 60 | delta.data = (delta.data + beta*delta.grad.detach().sign()).clamp(-0.04,0.04) 61 | delta.grad.zero_() 62 | 63 | randm = torch.rand(n_node, n_node,device=device) 64 | discretized_S = torch.where(S_.detach() > randm, torch.ones(n_node, n_node,device=device), torch.zeros(n_node, n_node, device=device)) 65 | discretized_S = discretized_S + discretized_S.T 66 | A_hat = A_ + discretized_S * C_ + torch.eye(n_node,device=device) 67 | 68 | for param in model.parameters(): 69 | param.requires_grad = True 70 | model.train() 71 | x_hat = x_2 + delta.data.to(device) 72 | assert torch.equal(A_hat, A_hat.transpose(0,1)) 73 | return normalize_adj_tensor(A_hat), x_hat -------------------------------------------------------------------------------- /pre_s5.py: -------------------------------------------------------------------------------- 1 | 2 | import pickle 3 | import pandas as pd 4 | from itertools import chain 5 | import networkx as nx 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from math import radians, cos, sin, asin, sqrt 9 | 10 | 11 | def load_data(file): 12 | data_load_file = [] 13 | file_1 = open(file, "rb") 14 | data_load_file = pickle.load(file_1) 15 | return data_load_file 16 | from scipy.sparse import csr_matrix 17 | 18 | flow_g = load_data('../data/flow_graph_2.pickle') 19 | 20 | spatial_g = load_data('../data/spatial_graph_new_1.pickle') 21 | region_attr_g = load_data('../data/region_attr_graph_test.pickle') 22 | 23 | 24 | flow_nodes = list(flow_g.nodes) 25 | spatial_nodes = list(spatial_g.nodes) 26 | # flowsum_nodes = list(flow_sum_g.nodes) 27 | regat_nodes = list(region_attr_g.nodes) 28 | flow_edges = list(flow_g.edges(data=True)) 29 | # print("****:",len(flowsum_nodes)) 30 | # println() 31 | spatial_edges = list(spatial_g.edges(data=True)) 32 | # print(":**************",spatial_edges) 33 | # println() 34 | # flowsum_edges = list(flow_sum_g.edges(data=True)) 35 | regat_edges = list(region_attr_g.edges(data=True)) 36 | # print(regat_edges) 37 | 38 | 39 | part_f = flow_nodes 40 | part_s = spatial_nodes 41 | # part_flow = flowsum_nodes 42 | 43 | 44 | hy_edges = [] 45 | for sub in regat_nodes: 46 | for ss in flow_nodes: 47 | tmp_ss = ss.split("_") 48 | tmp_sub = sub.split("_") 49 | tmp_c = tmp_ss[0]+'_'+tmp_ss[1] 50 | tmp_s = tmp_sub[0]+'_'+tmp_sub[1] 51 | 52 | if tmp_s == tmp_c: 53 | # pair = (sub, ss,{"weight":1, "date": tmp[2], "start":sub, "end":ss}) 54 | pair = (sub, ss,{"weight":1, "date": tmp_ss[2], "start":sub, "end":ss}) 55 | # print("pair:", pair) 56 | # println() 57 | # if pair not in hy_edges: 58 | hy_edges.append(pair) 59 | print(len(hy_edges)) 60 | 61 | for ss in spatial_nodes: 62 | for ff in flow_nodes: 63 | tps = ss.split("_") 64 | # tps_c = tps[0]+'_'+tps[1] 65 | tpf = ff.split("_") 66 | # tpf_c = tpf[0]+'_'+tpf[1] 67 | # print("ff:",ff) 68 | # print("ss:",ss) 69 | # print(tpf) 70 | # println() 71 | # ss_=ss+"_"+"s" 72 | if tps[1] == tpf[1]: 73 | # pair = (ss, ff,{"weight":0, "date":tpf[2] , "start":ss, "end":ff}) 74 | pair = (ss, ff,{"weight":0, "date":1 , "start":ss, "end":ff}) 75 | # print("pair:", pair) 76 | # pritnln() 77 | hy_edges.append(pair) 78 | 79 | 80 | print("hy_edges:",len(hy_edges)) 81 | 82 | G_hy = nx.Graph() 83 | G_hy.add_edges_from(hy_edges) 84 | G_hy.add_edges_from(flow_edges) 85 | # G_hy.add_edges_from(flowsum_edges) 86 | G_hy.add_edges_from(spatial_edges) 87 | G_hy.add_edges_from(regat_edges) 88 | # nx.draw(G_hy) 89 | # plt.show() 90 | print("hyper_grapgh:", G_hy) 91 | 92 | print(G_hy) 93 | nodes_num = 3 94 | file=open(r"../data/hy_new_test_60.pickle","wb") 95 | pickle.dump(G_hy,file) #storing_list 96 | file.close() 97 | 98 | -------------------------------------------------------------------------------- /house/pre_s9.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu May 19 19:33:25 2022 4 | 5 | @author: User 6 | """ 7 | # this file is to predict hosue price 8 | import pandas as pd 9 | import pickle 10 | from shapely.geometry import Point, LineString 11 | from shapely.geometry import Polygon,MultiPoint #多边形 12 | import torch 13 | from torch import nn 14 | import networkx as nx 15 | import numpy as np 16 | from sklearn.linear_model import Lasso,Ridge 17 | from sklearn.model_selection import train_test_split 18 | import matplotlib.pyplot as plt 19 | import numpy as np 20 | # import mglearn 21 | from sklearn.metrics import mean_absolute_error 22 | from sklearn.metrics import r2_score 23 | from sklearn import svm 24 | from sklearn import metrics 25 | 26 | 27 | def load_data(file): 28 | data_load_file = [] 29 | file_1 = open(file, "rb") 30 | data_load_file = pickle.load(file_1) 31 | return data_load_file 32 | 33 | 34 | train_set = load_data("../data/train_house.pickle") 35 | test_set = load_data("../data/test_house.pickle") 36 | emb_dim = 16 37 | embedding_1 = nn.Embedding(1000000, emb_dim) 38 | linear = nn.Linear(144, 16) 39 | 40 | 41 | for name in ['ours']: 42 | tmp_vec = load_data("../data/baseline/{}_vector.pickle".format(name)) 43 | # print(tmp_vec.shape) 44 | # println() 45 | train_vec = [] 46 | train_y =[] 47 | for item in train_set: 48 | # print(item) 49 | # printl() 50 | tmp=[] 51 | # tmp.extend(linear(torch.tensor(tmp_vec[item[0]])).tolist()) 52 | tmp.extend(linear(torch.tensor(tmp_vec[item[0]]).float()).tolist()) 53 | tmp.extend(embedding_1(torch.tensor(item[1])).tolist()) 54 | train_y.append(item[-1]) 55 | train_vec.append(tmp) 56 | # print(train_vec) 57 | # print(train_y) 58 | # println() 59 | test_vec= [] 60 | test_y =[] 61 | for item in test_set: 62 | tmp=[] 63 | tmp.extend(linear(torch.tensor(tmp_vec[item[0]]).float()).tolist()) 64 | tmp.extend(embedding_1(torch.tensor(item[1])).tolist()) 65 | # tmp.append(item[0]) 66 | # tmp.append(item[1]) 67 | test_y.append(item[-1]) 68 | test_vec.append(tmp) 69 | 70 | # lasso00001 = Lasso(alpha=0.00001).fit(test_vec,test_y) 71 | lasso01 = Lasso(alpha=0.00001).fit(test_vec,test_y) 72 | y_pred_lasso=lasso01.fit(train_vec,train_y).predict(test_vec) 73 | # print(len(y_pred_lasso)) 74 | # print(test_y) 75 | r2_score_lasso=r2_score(test_y,y_pred_lasso) 76 | # print(mean_absolute_error(test_y,y_pred_lasso)) 77 | # print(r2_score_lasso) 78 | test_y_ = [] 79 | y_pred_lasso_= [] 80 | for i,j in zip(test_y,y_pred_lasso): 81 | if i!=0: 82 | test_y_.append(i) 83 | y_pred_lasso_.append(j) 84 | 85 | # y = np.array([1,1]) 86 | # y_hat = np.array([2,3]) 87 | MSE = metrics.mean_squared_error(test_y,y_pred_lasso) 88 | RMSE = metrics.mean_squared_error(test_y,y_pred_lasso)**0.5 89 | MAE = metrics.mean_absolute_error(test_y,y_pred_lasso) 90 | MAPE = metrics.mean_absolute_percentage_error(test_y_,y_pred_lasso_) 91 | print("{}:".format(name), MSE,RMSE,MAE,MAPE,r2_score_lasso) 92 | # print(test_y) 93 | print('**********************************') 94 | println() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Framework](./pictures/framework.png) 2 | 3 | # Initial Data link 4 | https://drive.google.com/drive/folders/1heuy-28olym0Tn4bkMaJTABVF9kXMZNe?usp=sharing 5 | 6 | 7 | # Spatial-Temporal Graph Learning with Adversarial Contrastive Adaptation # 8 | This is the implementation of Spatial-Temporal Graph Learning with Adversarial Contrastive Adaptation (ICML'23) in the following paper: 9 | 10 | ## Requirements ## 11 | Pytorch = 1.7.0 and Tensorflow = 1.15.3 (crime prediction task (ST-SHN)) 12 | 13 | ## Data ## 14 | Moblity data, crime data, census block data, POI data, house price of Chicago ([https://data.cityofchicago.org/](https://data.cityofchicago.org/)) and New York([https://opendata.cityofnewyork.us/](https://opendata.cityofnewyork.us/)). 15 | Also, we provide the process data and processing code for data preprocessing. 16 | 17 | ## Data Preocessing ## 18 | cd data_pre 19 | pre_s1.py # collecing positions of 180 (234 regions) in Manhatton (a certain district) in New York (Chicago). 20 | pre_s2.py # collecting traffic data 21 | pre_s3.py # preprocessing the traffic data on correspongding regions 22 | pre_s4.py # preprocessing the POI data and obtain POI-aware Region Graph 23 | pre_poi_transformer.py # obatining the features of nodes by Transformer and Skip-gram 24 | pre_spatial_graph.py # constructing the trajectory-based Regoin Graph and Distance-based Region Graph 25 | pre_s5.py # constructing a hierarical graph 26 | pre_s6_dataloader.py # obtaining the dataloder (dataset.pt file) for next step to get region embeddings 27 | cd house 28 | pre_s7.py # processing house data 29 | 30 | ## Hyperparameters ## 31 | The dimensionality d of region representation is set as 96 . The depth of convolutional layers in GCN is set as 2. The learning rate is initialized as 0.001 with the weight decay of 1e-5. The number of hidden units is 256. The number of projection hidden is 256. The eps is set as 0.5. And alpha is set as.1, beta is set as 0.1; the lamb is set as 0.05. For the crime prediction backbone model, ST-SHN is configured with the learning rate of 0.001 and the weight decay of 0.96. The depth of the spatial path aggregation layers is set as 2. For the traffic prediction backbone model ST-GCN, the historical time window of all tests are set as 60 minutes with 12 observed data points that are utilized to forecast traffic conditions in the next 15, 30, 45 minutes. The baselines are implemented with the source code released by their original papers. We further apply the grid search strategy for parameter tuning in baselines to achieve their best performance. 32 | 33 | ## Spatial-Temporal Adversarial Graph Model (STAG) Training and Obtaining Region Representations ## 34 | train_edit_auto.py # training for obatining region representations 35 | 36 | ## TASK 1: Crime Prediction Task ## 37 | The code of the ST-SHN is [https://github.com/akaxlh/ST-SHN](https://github.com/akaxlh/ST-SHN) 38 | 39 | Replace the area embedding with the region represenation vetcor and run `HG_ST_labcode.py`. 40 | 41 | 42 | ## TASK 2: Traffic Prediction Task ## 43 | The code of the ST-GCN is [https://github.com/VeritasYin/STGCN_IJCAI-18](https://github.com/VeritasYin/STGCN_IJCAI-18) 44 | ST-GCN predicts traffic in one stage, namely 15 minutes. 45 | Run `main.py`. 46 | 47 | 48 | ## TASK 3: House Prediction Prediction Task ## 49 | We use Lasso Regression to perform house price evaluation task. 50 | 51 | Just `cd house` and run `pre_s9.py` and got the regression result. 52 | 53 | ![Prediction Results](./pictures/result.png) 54 | 55 | ## Citing ## 56 | If our paper benefits to your research, please cite our paper using the bitex below: 57 | 58 | @inproceedings{zhang2023spatial, 59 | title={Spatial-Temporal Graph Learning with Adversarial Contrastive Adaptation}, 60 | author={Zhang, Qianru and Huang, Chao and Xia, Lianghao and Wang, Zheng and Yiu, Siu Ming and Han, Ruihua}, 61 | booktitle={International Conference on Machine Learning}, 62 | pages={41151--41163}, 63 | year={2023}, 64 | organization={PMLR} 65 | } 66 | -------------------------------------------------------------------------------- /pre_s1.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pandas as pd 4 | from shapely.geometry import Point, LineString 5 | from shapely.geometry import Polygon,MultiPoint #多边形 6 | import matplotlib.pyplot as plt 7 | import json 8 | from urllib.request import urlopen, quote 9 | import requests 10 | import geopy 11 | from geopy.geocoders import Nominatim 12 | import copy 13 | import pickle 14 | import time 15 | # taxi = pd.read_csv("../data/2016_Green_Taxi_Trip_Data.csv", sep = ',') 16 | # print(taxi[:2]) 17 | 18 | census_block = pd.read_excel("../data/rollingsales_manhattan.xlsx",skiprows = 4) 19 | # print(census_block[:2]) 20 | print(census_block.columns.values.tolist()) 21 | blocks = copy.deepcopy(census_block).values.tolist() 22 | 23 | 24 | # region = census_block["BUILDING CLASS CATEGORY"].values.tolist() 25 | region = census_block["BUILDING CLASS AT TIME OF SALE"].values.tolist() 26 | 27 | 28 | region_ = list(set(region)) 29 | reg_nyc_dict = {} ##113 region in manhattan 30 | for idx,sub in enumerate(region_): 31 | reg_nyc_dict[sub] = idx 32 | # print(reg_nyc_dict) 33 | # print(len(reg_nyc_dict)) 34 | # println() 35 | 36 | skip_num = 0 37 | region_f = {} 38 | add_pos = {} 39 | i= 0 40 | NYC_house_middle = [] 41 | for sline in blocks: 42 | start_t = time.time() 43 | i+=1 44 | tmp = [] 45 | # print("sline:", sline[8],sline[18],sline[14], sline[19]) 46 | # print("address:",sline[8]) 47 | t = sline[8].split(",") 48 | ##collect lat,lon 49 | geolocater = Nominatim(user_agent='demo_of_gnss_help') 50 | try: 51 | if t[0] not in add_pos.keys(): 52 | # print("not in here") 53 | location = geolocater.geocode(t[0]) 54 | if hasattr(location,'latitude') and (location.latitude is not None) and hasattr(location,'longitude') and (location.longitude is not None): 55 | # print([location.latitude, location.longitude]) 56 | # println() 57 | # print("t:", t) 58 | # tmp.append([location.latitude, location.longitude]) 59 | # tmp.append(reg_nyc_dict[sline[18]]) 60 | add_pos[t[0]] = [location.latitude, location.longitude] 61 | tmp.append(reg_nyc_dict[sline[18]]) 62 | tmp.append(sline[14]) 63 | tmp.append(sline[19]) 64 | # print("--:",float(sline[19])/float(sline[14])) 65 | tmp.append(float(sline[19])) 66 | if reg_nyc_dict[sline[18]] not in region_f.keys(): 67 | region_f[reg_nyc_dict[sline[18]]] = [] 68 | region_f[reg_nyc_dict[sline[18]]].append([location.latitude, location.longitude]) 69 | else: 70 | region_f[reg_nyc_dict[sline[18]]].append([location.latitude, location.longitude]) 71 | NYC_house_middle.append(tmp) 72 | 73 | else: 74 | # print("---in here---") 75 | # print("add_pos[t[0]]:", add_pos[t[0]]) 76 | tmp.append(reg_nyc_dict[sline[18]]) 77 | tmp.append(sline[14]) 78 | tmp.append(sline[19]) 79 | # print("--:",float(sline[19])/float(sline[14])) 80 | # tmp.append(float(sline[19])) 81 | if reg_nyc_dict[sline[18]] not in region_f.keys(): 82 | region_f[reg_nyc_dict[sline[18]]] = [] 83 | region_f[reg_nyc_dict[sline[18]]].append(add_pos[t[0]]) 84 | else: 85 | region_f[reg_nyc_dict[sline[18]]].append(add_pos[t[0]]) 86 | NYC_house_middle.append(tmp) 87 | except IOError: 88 | add_pos[t[0]] = [] 89 | skip_num+=1 90 | # print('skip this row') 91 | print("i:", i) 92 | print(time.time()-start_t) 93 | 94 | print(region_f) 95 | print(NYC_house_middle[:3]) 96 | print(len(NYC_house_middle)) 97 | print(len(region_f)) 98 | print(len(add_pos)) 99 | print("skip_num",skip_num) 100 | 101 | file=open(r"../data/NY_house.pickle","wb") 102 | pickle.dump(NYC_house_middle,file) #storing_list 103 | file.close() 104 | file=open(r"../data/NY_stree_pos.pickle","wb") 105 | pickle.dump(add_pos,file) #storing_list 106 | file.close() 107 | 108 | file=open(r"../data/NY_region.pickle","wb") 109 | pickle.dump(region_f,file) #storing_list 110 | file.close() 111 | 112 | -------------------------------------------------------------------------------- /house/pre_s7.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch 3 | # import networkx as nx 4 | import matplotlib.pyplot as pl 5 | import pickle 6 | import pandas as pd 7 | import numpy as np 8 | import math 9 | import numpy as np 10 | import pandas as pd 11 | from shapely.geometry import Point, LineString 12 | from shapely.geometry import Polygon,MultiPoint #多边形 13 | import matplotlib.pyplot as plt 14 | import json 15 | from urllib.request import urlopen, quote 16 | import requests 17 | import geopy 18 | from geopy.geocoders import Nominatim 19 | import copy 20 | import pickle 21 | import time 22 | 23 | 24 | def load_data(file): 25 | data_load_file = [] 26 | file_1 = open(file, "rb") 27 | data_load_file = pickle.load(file_1) 28 | return data_load_file 29 | house = load_data("../data/NY_house.pickle") 30 | region_old = load_data("../data/NY_region.pickle") 31 | region_new = load_data("../data/region_back_merge.pickle") 32 | 33 | region_map = {} 34 | for key,value in region_old.items(): 35 | tmp = [(item[1],item[0]) for item in value if item!=[]] 36 | if len(tmp)>=3: 37 | tmp_ = Polygon(tmp) 38 | for k,v in region_new.items(): 39 | if tmp_.intersects(v): 40 | region_map[key]=k 41 | break 42 | else: 43 | tmp_ = Point(tmp) 44 | for k,v in region_new.items(): 45 | if tmp_.intersects(v): 46 | region_map[key]=k 47 | break 48 | 49 | 50 | 51 | # print(region_map) 52 | # print(len(region_old)) 53 | # print(len(region_map)) 54 | # println() 55 | 56 | # region_ = {} 57 | # for key,value in region.items(): 58 | # if [] not in value and len(value)>=3: 59 | # region_[key] = value 60 | # region_back = {} 61 | # map_region = {} 62 | # for idx, tt in enumerate(region_.items()): 63 | # # print(tt) 64 | # map_region[tt[0]] = idx 65 | # region_back[idx] = tt[1] 66 | 67 | left_region = [item for item in region_map.keys()] 68 | house_refine = [] 69 | #and np.isnan(float(ie[1])) == False 70 | for ie in house: 71 | if ie[0] in left_region and np.isnan(float(ie[1])) == False and float(ie[-1])!=0.0 and float(ie[1])!=0.0: 72 | house_refine.append([region_map[ie[0]],ie[1],ie[2]]) 73 | 74 | 75 | house_sum = [] 76 | for item in house_refine: 77 | tmp = [] 78 | tmp.append(item[0]) 79 | tmp.append(item[1]) 80 | tmp.append(item[2]) 81 | tmp.append(float(item[2]/item[1])) 82 | house_sum.append(tmp) 83 | 84 | # print("item:", item) 85 | 86 | 87 | 88 | house_array = np.array(house_sum) 89 | house_unit = house_array[:, 1] 90 | price_unit = house_array[:, 3] 91 | 92 | unit_max, unit_min = max(house_unit), min(house_unit) 93 | price_max, price_min = max(price_unit), min(price_unit) 94 | print(price_max,price_min) 95 | 96 | # re = pd.cut(house_unit, bins=[unit_min,1000,1500, 2000, 2500, unit_max]) 97 | # print("re:",re.tolist()) 98 | # house_uni_class = pd.cut(house_unit, [unit_min-1,1500, 2000, 2500,3000, 3500,4000,unit_max], labels=False).tolist() # 7 classes 99 | # price_class = pd.cut(price_unit, [0, 2500,5000,10000, 15000,20000, 30000, 40000,50000, 60000, 70000, 80000, price_max], labels=False).tolist() # 12 classes 100 | # ,7000,7500,8000,8500,9000,9500,10000, 10500, 11000, 11500,12000],labels=False).values.tolist() 101 | 102 | # print(price_class) 103 | house_feature = [] 104 | for item,unit,price in zip(house_sum,house_unit,price_unit): 105 | tmp = [] 106 | tmp.append(item[0]) 107 | tmp.append(int(unit)) 108 | tmp.append(int(price)) 109 | house_feature.append(tmp) 110 | 111 | train_house = house_feature[:700] 112 | test_house = house_feature[700:] 113 | # print(train_house) 114 | # print(test_house) 115 | # print(len(test_house)) 116 | # print(len(house_feature)) 117 | # print 118 | # println() 119 | file=open(r"../data/train_house.pickle","wb") 120 | pickle.dump(train_house,file) #storing_list 121 | file.close() 122 | file=open(r"../data/test_house.pickle","wb") 123 | pickle.dump(test_house,file) #storing_list 124 | file.close() 125 | 126 | # re = pd.cut(house.sqft, bins=[0,500,1000,1500,2000, 2500, 3000,3500, 4000,4500, 5000, 5500, 6000,6500 127 | # ,7000,7500,8000,8500,9000,9500,10000, 10500, 11000, 11500,12000],labels=False).values.tolist() 128 | # # hs = pd.read_csv("../data/house_source_extra.csv",sep = ",").values.tolist() 129 | # house['sq'] = re 130 | # hou = house.dropna(axis=0,how='any') #drop all rows that have any NaN values 131 | # # print(len(hos)) 132 | # classi = hou['sq'].values.tolist() 133 | # hos = hou.values.tolist() 134 | # classier = list(set(classi)) 135 | 136 | 137 | # print("house_refine:",house_sum) 138 | # print("before:", len(house)) 139 | # print("after:", len(house_sum)) 140 | -------------------------------------------------------------------------------- /code/data_pre/pre_s4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pickle 4 | import pandas as pd 5 | import numpy as np 6 | import copy 7 | from shapely.geometry import Point, LineString 8 | from shapely.geometry import Polygon,MultiPoint #多边形 9 | import torch 10 | import networkx as nx 11 | import matplotlib.pyplot as pl 12 | 13 | 14 | 15 | 16 | def load_data(file): 17 | data_load_file = [] 18 | file_1 = open(file, "rb") 19 | data_load_file = pickle.load(file_1) 20 | return data_load_file 21 | 22 | 23 | reg_vec_sort = load_data("../data/reg_poi_vec_2.pickle") 24 | region_que = load_data("../data/reg_poi_idx_1.pickle") 25 | 26 | region_attr_edges=[] 27 | # region_que 28 | # for idx in range(len(reg_vec_sort)): 29 | # for idt in range(idx+1, len(reg_vec_sort)): 30 | # output = torch.cosine_similarity(reg_vec_sort[idx], reg_vec_sort[idt], eps=1e-08).mean() 31 | # if output>=0.8: 32 | # tmp_1 = "r" + '_' + str(idx) 33 | # tmp_2 = "r" + '_' + str(idt) 34 | # # sim_dict[key] = [tmp_1, tmp_2, value] 35 | # region_attr_edges.append([tmp_1, tmp_2, output.item()]) 36 | # print("reg_vec_sort:",len(reg_vec_sort)) 37 | # print("region_que:",region_que) 38 | # print(reg_vec_sort[170]) 39 | 40 | # pritnnl() 41 | 42 | for idx in region_que: 43 | for idt in range(idx+1, len(reg_vec_sort)): 44 | # print("^^:",reg_vec_sort[idx].size()) 45 | # print("**:",reg_vec_sort[idx+1].size()) 46 | # pritnln() 47 | output = torch.cosine_similarity(torch.unsqueeze(reg_vec_sort[idx],0), torch.unsqueeze(reg_vec_sort[idt],0), eps=1e-08).mean() 48 | # print("output:", output.item()) 49 | # pritnln() 50 | if output.item()>=0.850: 51 | tmp_1 = "r" + '_' + str(idx) 52 | tmp_2 = "r" + '_' + str(idt) 53 | # sim_dict[key] = [tmp_1, tmp_2, value] 54 | region_attr_edges.append([tmp_1, tmp_2, output.item()]) 55 | # print(len(region_attr_edges)) 56 | # println() 57 | G = nx.Graph() 58 | # for edge in edges: 59 | # G.add_edge(edge[0],edge[1],weight= edge[2]) 60 | 61 | [G.add_edge(edge[0],edge[1],weight= edge[2], date = "1", start = edge[0], end = edge[1] ) for edge in region_attr_edges] 62 | # print(len(G.adj)) 63 | # nx.draw(G, with_labels=True) 64 | # plt.show() 65 | 66 | 67 | file=open(r"../data/region_attr_graph.pickle","wb") 68 | pickle.dump(G,file) #storing_list 69 | file.close() 70 | 71 | print("attr_region:", G) 72 | # similarity_dict = {} 73 | # similarity_list = [] 74 | # for ii in range(emb.size()[0]): 75 | # # print(emb[ii]) 76 | # # print(emb[ii].shape) 77 | # # print(ii.shape) 78 | # for jj in range(ii+1, emb.size()[0]): 79 | # # print(emb[jj]) 80 | # # print(emb[jj].shape) 81 | # output = torch.cosine_similarity(emb[ii], emb[jj], eps=1e-08).mean() 82 | # # print("similarity:", output.item()) 83 | # # println() 84 | # similarity_list.append(output.item()) 85 | # tmp = 'r_{}_{}'.format(ii, jj) 86 | # similarity_dict[tmp] = output.item() 87 | # similarity_list.sort(reverse = True) 88 | # # print(similarity_list) 89 | # print(len(similarity_list)) 90 | # sum_1 = 0 91 | # for item in similarity_list: 92 | # if item>=1.0: 93 | # sum_1+=1 94 | # print(sum_1) 95 | # print(sum_1/len(similarity_list)) 96 | # print(similarity_dict) 97 | # sim_dict = {} 98 | # edges = [] 99 | # for key,value in similarity_dict.items(): 100 | # tmp = key.split('_') 101 | # # print("tmp:", tmp) 102 | # # print(tmp[0] + '_' + tmp[1]) 103 | # # print(tmp[0] + '_' + tmp[2]) 104 | # if value >=0.8: 105 | # tmp_1 = tmp[0] + '_' + tmp[1] 106 | # tmp_2 = tmp[0] + '_' + tmp[2] 107 | # sim_dict[key] = [tmp_1, tmp_2, value] 108 | # edges.append([tmp_1, tmp_2, value]) 109 | 110 | # print(len(edges)) 111 | # # println() 112 | # G = nx.Graph() 113 | # # for edge in edges: 114 | # # G.add_edge(edge[0],edge[1],weight= edge[2]) 115 | 116 | # [G.add_edge(edge[0],edge[1],weight= edge[2], date = "1", start = edge[0], end = edge[1] ) for edge in edges] 117 | # # print(len(G.adj)) 118 | # nx.draw(G, with_labels=True) 119 | # plt.show() 120 | 121 | 122 | # file=open(r"../data/region_attr_sim_graph_{}.pickle".format(resolution),"wb") 123 | # pickle.dump(G,file) #storing_list 124 | # file.close() 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /pre_s14_poi_skip.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch import nn 4 | import numpy as np 5 | import torch.nn.functional as F 6 | import torch.optim as optim 7 | import pickle 8 | 9 | 10 | def load_data(file): 11 | data_load_file = [] 12 | file_1 = open(file, "rb") 13 | data_load_file = pickle.load(file_1) 14 | return data_load_file 15 | poi_list = ['drinking_water', 'toilets', 'school', 'hospital', 'arts_centre', 'fire_station', 'police', 'bicycle_parking', 'fountain', 'ferry_terminal', 'bench', 'cinema', 'cafe', 'pub', 'waste_basket', 'parking_entrance', 'parking', 'fast_food', 'bank', 'restaurant', 'ice_cream', 'pharmacy', 'taxi', 'post_box', 'atm', 'nightclub', 'social_facility', 'bar', 'biergarten', 'clock', 'bicycle_rental', 'community_centre', 'watering_place', 'ranger_station', 'boat_rental', 'recycling', 'payment_terminal', 'bicycle_repair_station', 'place_of_worship', 'shelter', 'telephone', 'clinic', 'dentist', 'vending_machine', 'theatre', 'charging_station', 'public_bookcase', 'post_office', 'fuel', 'doctors'] 16 | poi_list_1 = ['drinking_water', 'toilets', 'school', 'hospital', 'arts_centre', 'fire_station', 'police', 'bicycle_parking', 'fountain', 'ferry_terminal', 'bench', 'cinema', 'cafe', 'pub', 'waste_basket', 'parking_entrance', 'parking', 'fast_food', 'bank', 'restaurant', 'ice_cream', 'pharmacy', 'taxi', 'post_box', 'atm', 'nightclub', 'social_facility', 'bar', 'biergarten', 'clock', 'bicycle_rental', 'community_centre', 'watering_place', 'ranger_station', 'boat_rental', 'recycling', 'payment_terminal', 'bicycle_repair_station', 'place_of_worship', 'shelter', 'telephone', 'clinic', 'dentist', 'vending_machine', 'theatre', 'charging_station', 'public_bookcase', 'post_office', 'fuel', 'doctors','drinking_water', 'toilets'] 17 | region_back = load_data("../data/region_back.pickle") 18 | reg_poi = load_data("../data/reg_incld_poi_new.pickle") 19 | # print(reg_poi) 20 | # print(reg_poi) 21 | poi_dict = {} 22 | for idx, item in enumerate(poi_list): 23 | poi_dict[item] = idx 24 | # print(poi_dict) 25 | # println() 26 | 27 | 28 | 29 | CONTEXT_SIZE = 2 30 | EMBEDDING_DIM = 96 # 编码向量的维度 31 | 32 | # test_sentence = """0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 0 1""".split() 33 | test_sentence = poi_list_1 34 | # print(test_sentence) 35 | # preinln() 36 | # 构建训练集数据 ([ 第一个单词, 第二个单词 ], 预测目标) 37 | trigrams = [([test_sentence[i], test_sentence[i + 1]], test_sentence[i + 2]) 38 | for i in range(len(test_sentence) - 2)] 39 | # trigrams = [([test_sentence[i]], test_sentence[i + 1]) 40 | # for i in range(len(test_sentence) - 2)] 41 | # print(trigrams) 42 | # println() 43 | # 构建测试集数据 44 | vocab = set(test_sentence) 45 | word_to_ix = {word: i for i, word in enumerate(vocab)} 46 | # print(vocab) 47 | # print(word_to_ix) 48 | # println() 49 | # 定义模型 50 | class NGramLanguageModeler(nn.Module): 51 | 52 | def __init__(self, vocab_size, embedding_dim, context_size): 53 | super(NGramLanguageModeler, self).__init__() 54 | self.embeddings = nn.Embedding(vocab_size, embedding_dim) 55 | self.linear1 = nn.Linear(context_size * embedding_dim, 128) 56 | self.linear2 = nn.Linear(128, EMBEDDING_DIM) 57 | 58 | def forward(self, inputs): 59 | embeds = self.embeddings(inputs).view((1, -1)) # 进行embedding 60 | # print(embeds.size()) 61 | # pritjnln() 62 | out = F.relu(self.linear1(embeds)) # 经过第一个全连接层 63 | out = self.linear2(out) # 经过第二个全连接层 64 | log_probs = F.log_softmax(out, dim=1) 65 | return log_probs,out 66 | 67 | # # 进行训练 68 | losses = [] 69 | loss_function = nn.NLLLoss() 70 | model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE) 71 | optimizer = optim.SGD(model.parameters(), lr=0.0005) 72 | emb_dict={} 73 | for epoch in range(1500): 74 | total_loss = 0 75 | for context, target in trigrams: 76 | # 准备输入模型的数据 77 | context_idxs = torch.tensor([word_to_ix[w] for w in context], dtype=torch.long) 78 | # print("context_idxs:",context_idxs) 79 | # print(context_idxs.size()) 80 | # println() 81 | model.zero_grad() # 清零梯度缓存 82 | 83 | # 进行训练得到预测结果 84 | log_probs,out = model(context_idxs) 85 | # print(out.size()) 86 | # print("----:", out) 87 | # println() 88 | 89 | # 计算损失值 90 | loss = loss_function(log_probs, torch.tensor([word_to_ix[target]], dtype=torch.long)) 91 | 92 | # 反向传播更新梯度 93 | loss.backward() 94 | optimizer.step() 95 | 96 | total_loss += loss.item() # 累计损失 97 | torch.save(model.state_dict(), './model_skip/model_poi.pt') 98 | torch.save(model, './model_skip/model_poi.pth') 99 | 100 | emb_dict[target] = out 101 | losses.append(total_loss) 102 | print(losses) 103 | # print(emb_dict) 104 | 105 | poi_skip_vec = {} 106 | for key,value in emb_dict.items(): 107 | poi_skip_vec[poi_dict[key]] = torch.squeeze(value,0) 108 | # print("poi_dict[key]:",poi_dict[key]) 109 | # print("size():", value.size()) 110 | # region_spatial = {} 111 | # for key,value in reg_t_con.items(): 112 | # # print(value) 113 | # region_spatial[key] = emb_dict[str(value)] 114 | # print("---finish---:",len(region_spatial)) 115 | file=open(r"../data/poi_skip_vec.pickle","wb") 116 | pickle.dump(poi_skip_vec,file) #storing_list 117 | file.close() 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /code/data_pre/pre_s1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from shapely.geometry import Point, LineString 6 | from shapely.geometry import Polygon,MultiPoint #多边形 7 | import matplotlib.pyplot as plt 8 | import json 9 | from urllib.request import urlopen, quote 10 | import requests 11 | import geopy 12 | from geopy.geocoders import Nominatim 13 | import copy 14 | import pickle 15 | import time 16 | # taxi = pd.read_csv("../data/2016_Green_Taxi_Trip_Data.csv", sep = ',') 17 | # print(taxi[:2]) 18 | 19 | census_block = pd.read_excel("../data/rollingsales_manhattan.xlsx",skiprows = 4) 20 | # print(census_block[:2]) 21 | print(census_block.columns.values.tolist()) 22 | blocks = copy.deepcopy(census_block).values.tolist() 23 | 24 | 25 | # region = census_block["BUILDING CLASS CATEGORY"].values.tolist() 26 | region = census_block["BUILDING CLASS AT TIME OF SALE"].values.tolist() 27 | 28 | # address = census_block["ADDRESS"].values.tolist() 29 | # address_split = [item.split(",")[0] for item in address] 30 | # add_pos = {} 31 | # i= 0 32 | # # tmp_dict={} 33 | # for ad in address_split: 34 | # start_t = time.time() 35 | # i+=1 36 | # geolocater = Nominatim(user_agent='demo_of_gnss_help') 37 | # # print("ad:", ad) 38 | # try: 39 | # if ad not in add_pos.keys(): 40 | # location = geolocater.geocode(ad) 41 | # if hasattr(location,'latitude') and (location.latitude is not None) and hasattr(location,'longitude') and (location.longitude is not None): 42 | # # add_pos.append([location.latitude, location.longitude]) 43 | # # if ad not in tmp_dict.keys(): 44 | # # tmp_dict[ad] = [] 45 | # add_pos[ad] = [location.latitude, location.longitude] 46 | 47 | # # NYC_house_middle.append(tmp) 48 | # # println() 49 | # except IOError: 50 | # # skip_num+=1 51 | # add_pos[ad] = [] 52 | # print('skip this row') 53 | # print("i:", i) 54 | # print(time.time()-start_t) 55 | # print(address_split) 56 | # # print(time.time()-start_t) 57 | # println 58 | region_ = list(set(region)) 59 | reg_nyc_dict = {} ##113 region in manhattan 60 | for idx,sub in enumerate(region_): 61 | reg_nyc_dict[sub] = idx 62 | # print(reg_nyc_dict) 63 | # print(len(reg_nyc_dict)) 64 | # println() 65 | 66 | skip_num = 0 67 | region_f = {} 68 | add_pos = {} 69 | i= 0 70 | NYC_house_middle = [] 71 | for sline in blocks: 72 | start_t = time.time() 73 | i+=1 74 | tmp = [] 75 | # print("sline:", sline[8],sline[18],sline[14], sline[19]) 76 | # print("address:",sline[8]) 77 | t = sline[8].split(",") 78 | ##collect lat,lon 79 | geolocater = Nominatim(user_agent='demo_of_gnss_help') 80 | try: 81 | if t[0] not in add_pos.keys(): 82 | # print("not in here") 83 | location = geolocater.geocode(t[0]) 84 | if hasattr(location,'latitude') and (location.latitude is not None) and hasattr(location,'longitude') and (location.longitude is not None): 85 | # print([location.latitude, location.longitude]) 86 | # println() 87 | # print("t:", t) 88 | # tmp.append([location.latitude, location.longitude]) 89 | # tmp.append(reg_nyc_dict[sline[18]]) 90 | add_pos[t[0]] = [location.latitude, location.longitude] 91 | tmp.append(reg_nyc_dict[sline[18]]) 92 | tmp.append(sline[14]) 93 | tmp.append(sline[19]) 94 | # print("--:",float(sline[19])/float(sline[14])) 95 | tmp.append(float(sline[19])) 96 | if reg_nyc_dict[sline[18]] not in region_f.keys(): 97 | region_f[reg_nyc_dict[sline[18]]] = [] 98 | region_f[reg_nyc_dict[sline[18]]].append([location.latitude, location.longitude]) 99 | else: 100 | region_f[reg_nyc_dict[sline[18]]].append([location.latitude, location.longitude]) 101 | NYC_house_middle.append(tmp) 102 | 103 | else: 104 | # print("---in here---") 105 | # print("add_pos[t[0]]:", add_pos[t[0]]) 106 | tmp.append(reg_nyc_dict[sline[18]]) 107 | tmp.append(sline[14]) 108 | tmp.append(sline[19]) 109 | # print("--:",float(sline[19])/float(sline[14])) 110 | # tmp.append(float(sline[19])) 111 | if reg_nyc_dict[sline[18]] not in region_f.keys(): 112 | region_f[reg_nyc_dict[sline[18]]] = [] 113 | region_f[reg_nyc_dict[sline[18]]].append(add_pos[t[0]]) 114 | else: 115 | region_f[reg_nyc_dict[sline[18]]].append(add_pos[t[0]]) 116 | NYC_house_middle.append(tmp) 117 | except IOError: 118 | add_pos[t[0]] = [] 119 | skip_num+=1 120 | # print('skip this row') 121 | print("i:", i) 122 | print(time.time()-start_t) 123 | 124 | print(region_f) 125 | print(NYC_house_middle[:3]) 126 | print(len(NYC_house_middle)) 127 | print(len(region_f)) 128 | print(len(add_pos)) 129 | print("skip_num",skip_num) 130 | 131 | file=open(r"../data/NY_house.pickle","wb") 132 | pickle.dump(NYC_house_middle,file) #storing_list 133 | file.close() 134 | file=open(r"../data/NY_stree_pos.pickle","wb") 135 | pickle.dump(add_pos,file) #storing_list 136 | file.close() 137 | 138 | file=open(r"../data/NY_region.pickle","wb") 139 | pickle.dump(region_f,file) #storing_list 140 | file.close() 141 | 142 | -------------------------------------------------------------------------------- /code/data_pre/pre_spatial_graph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from shapely.geometry import Point, LineString 6 | from shapely.geometry import Polygon,MultiPoint #多边形 7 | import matplotlib.pyplot as plt 8 | import json 9 | from urllib.request import urlopen, quote 10 | import requests 11 | import geopy 12 | from geopy.geocoders import Nominatim 13 | import copy 14 | import pickle 15 | from datetime import datetime 16 | from itertools import chain 17 | import networkx as nx 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | from math import radians, cos, sin, asin, sqrt 21 | from sklearn.cluster import DBSCAN 22 | 23 | def haversine(lon1, lat1, lon2, lat2): # 经度1,纬度1,经度2,纬度2 (十进制度数) 24 | """ 25 | Calculate the great circle distance between two points 26 | on the earth (specified in decimal degrees) 27 | """ 28 | # 将十进制度数转化为弧度 29 | lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) 30 | 31 | # haversine公式 32 | dlon = lon2 - lon1 33 | dlat = lat2 - lat1 34 | a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 35 | c = 2 * asin(sqrt(a)) 36 | r = 6371 # 地球平均半径,单位为公里 37 | return c * r * 1000 38 | 39 | def load_data(file): 40 | data_load_file = [] 41 | file_1 = open(file, "rb") 42 | data_load_file = pickle.load(file_1) 43 | return data_load_file 44 | 45 | region_back = load_data("../data/region_back_merge.pickle") 46 | region_fea = load_data("../data/region_fea.pickle") 47 | spatial_edges = [] 48 | # spatial_edges.extend(flow_edges) # add edges in flow graph 49 | # sim_num=0 50 | X = [[list(value.centroid.coords)[0][0],list(value.centroid.coords)[0][1]] for key,value in region_back.items()] 51 | # y_pred = DBSCAN(eps = 0.01, min_samples = 7).fit_predict(X) 52 | # from sklearn.cluster import SpectralClustering 53 | # sc = SpectralClustering(3, affinity='precomputed', n_init=100,assign_labels='discretize') 54 | # y_pred = sc.fit_predict(X) 55 | # print(y_pred) 56 | # println() 57 | # # print(check_index) 58 | node=[] 59 | # # reg={} 60 | # for i in range(12): 61 | # reg[i] = 0 62 | # # print(reg) 63 | # tmp=[] 64 | # for key,value in region_fea.items(): 65 | # if value==0: 66 | # # print(key,value, list(region_back[key].centroid.coords)) 67 | # # tmp.append([list(region_back[key].centroid.coords)[0],key]) 68 | # print(key) 69 | # # shapely.ops.unary_union(polygons) 70 | # reg[value]+=1 71 | # # println() 72 | # print(reg) 73 | # def takeSecond(elem): 74 | # return elem[0][0] 75 | 76 | # random = tmp 77 | # # 指定第二个元素排序 78 | # random.sort(key=takeSecond) 79 | # print(random) 80 | # print(len(random)) 81 | 82 | # println() 83 | 84 | reg_spatial={} 85 | for ii in range(180): 86 | for jj in range(ii+1, 180): 87 | # time = flow_nodes[ii].split("_")[2] 88 | # t_1 = flow_nodes[ii].split("_") 89 | # t_2 = flow_nodes[jj].split("_") 90 | t_1 = ii 91 | t_2 = jj 92 | # print("t_1:",t_1) 93 | # print("t_2:",t_2) 94 | if int(t_1) not in node: 95 | node.append(int(t_1)) 96 | if int(t_2) not in node: 97 | node.append(int(t_2)) 98 | t_1_pos = list(region_back[int(t_1)].centroid.coords)[0] 99 | t_2_pos = list(region_back[int(t_2)].centroid.coords)[0] 100 | value = haversine(t_1_pos[0], t_1_pos[1], t_2_pos[0], t_2_pos[1]) 101 | if value<= 2900: #小于5公里 102 | n1 = "r"+"_"+str(t_1) 103 | n2 = "r"+"_"+str(t_2) 104 | # pair = (n1,n2, {"weight":value, "date":int(1), "start":n1, "end":n2}) 105 | pair = (n1,n2, {"weight":1, "date":int(1), "start":n1, "end":n2}) 106 | # print(pair) 107 | if pair not in spatial_edges: 108 | spatial_edges.append(pair) 109 | # region_fea[283]=12 110 | # for ii in range(296): 111 | # for jj in range(ii+1, 296): 112 | # t_1 = ii 113 | # t_2 = jj 114 | # if int(t_1) not in node: 115 | # node.append(int(t_1)) 116 | # if int(t_2) not in node: 117 | # node.append(int(t_2)) 118 | # if region_fea[ii]==region_fea[jj]: 119 | # n1 = "r"+"_"+str(t_1) 120 | # n2 = "r"+"_"+str(t_2) 121 | # pair = (n1,n2, {"weight":1, "date":int(1), "start":n1, "end":n2}) 122 | # if pair not in spatial_edges: 123 | # spatial_edges.append(pair) 124 | # if region_fea[ii]==region_fea[jj]+1 or region_fea[ii]==region_fea[jj]+2 or region_fea[ii]==region_fea[jj]+3 or region_fea[ii]==region_fea[jj]+4 or region_fea[ii]==region_fea[jj]+5 or region_fea[ii]==region_fea[jj]+6 or region_fea[ii]==region_fea[jj]+7 or region_fea[ii]==region_fea[jj]+8 or region_fea[ii]==region_fea[jj]+9 or region_fea[ii]==region_fea[jj]+10 or region_fea[ii]==region_fea[jj]+11: 125 | # # print("ii:",region_fea[ii]) 126 | # # print("jj:",region_fea[jj]) 127 | # n1 = "r"+"_"+str(t_1) 128 | # n2 = "r"+"_"+str(t_2) 129 | # pair = (n1,n2, {"weight":1, "date":int(1), "start":n1, "end":n2}) 130 | # if pair not in spatial_edges: 131 | # spatial_edges.append(pair) 132 | # continue 133 | # print(spatial_edges) 134 | # print(len(spatial_edges)) 135 | # println() 136 | 137 | 138 | # println() 139 | print("spatial_edges:",spatial_edges) 140 | print(len(spatial_edges)) 141 | print("finish spatial graph") 142 | 143 | 144 | # #spatial graph 145 | G_spatial = nx.Graph() 146 | G_spatial.add_edges_from(spatial_edges[:]) 147 | # nx.draw(G_spatial, with_labels=True) 148 | # plt.show() 149 | print("G_spatial:",G_spatial) 150 | 151 | file=open(r"../data/spatial_graph_baseline.pickle","wb") 152 | pickle.dump(G_spatial,file) #storing_list 153 | file.close() -------------------------------------------------------------------------------- /code/pre_dataloader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | from torch_geometric.data import Data 5 | from itertools import product 6 | import numpy as np 7 | import pandas as pd 8 | from torch import nn 9 | import pickle 10 | 11 | def load_data(file): 12 | data_load_file = [] 13 | file_1 = open(file, "rb") 14 | data_load_file = pickle.load(file_1) 15 | return data_load_file 16 | resolution = 500 17 | linear = nn.Linear(512, 96) 18 | # node_features_1 = load_data("../data/region_spatial_refine.pickle") 19 | # node_features = load_data("../data/region_spatial_refine.pickle") 20 | # node_features = load_data("../data/region_spatial_refine.pickle") 21 | region_poi_vec = load_data("./data/reg_poi_vec.pickle") 22 | region_trans = linear(region_poi_vec) 23 | # node_lab = load_data("./data/nodes_lab.pickle") 24 | # train_mask = load_data("./data/train_mask.pickle") 25 | # val_mask = load_data("./data/val_mask.pickle") 26 | # test_mask = load_data("./data/test_mask.pickle") 27 | 28 | 29 | def nx_to_graph_data_obj(g): 30 | n_nodes = g.number_of_nodes() 31 | n_edges = g.number_of_edges() 32 | # nodes 33 | nx_node_ids = [n_i for n_i in g.nodes()] # contains list of nx node ids 34 | # print("nx_node_ids:", nx_node_ids) 35 | # n = np.array([nx_node_ids.index(n_i) for n_i in g.nodes()]) 36 | x_ = torch.tensor(np.ones(n_nodes).reshape(-1, 1), dtype=torch.float) 37 | # print("nx_node_ids:",nx_node_ids) 38 | n_nodes = [int(item.split("_")[1]) for item in nx_node_ids] 39 | x = torch.tensor([region_trans[item].tolist() for item in n_nodes]) 40 | 41 | file=open(r"./data/nodes_new_{}.pickle".format(7),"wb") 42 | pickle.dump(nx_node_ids,file) #storing_list 43 | file.close() 44 | 45 | # edges 46 | edges_list = [] 47 | edge_features_list = [] 48 | for node_1, node_2, attr_dict in g.edges(data=True): 49 | 50 | edge_feature = [attr_dict['weight'], attr_dict['date'], nx_node_ids.index(attr_dict['start']), nx_node_ids.index(attr_dict['end'])] # last 2 indicate self-loop 51 | # and masking 52 | edge_feature = np.array(edge_feature, dtype=int) 53 | # convert nx node ids to data obj node index 54 | i = nx_node_ids.index(node_1) 55 | j = nx_node_ids.index(node_2) 56 | edges_list.append((i, j)) 57 | edge_features_list.append(edge_feature) 58 | # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] 59 | edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) 60 | # print("edge_index:", edge_index) 61 | 62 | # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] 63 | edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.float) 64 | node_lab = load_data("./data/nodes_lab.pickle") 65 | train_mask = load_data("./data/train_mask.pickle") 66 | val_mask = load_data("./data/val_mask.pickle") 67 | test_mask = load_data("./data/test_mask.pickle") 68 | node_lab = torch.tensor(np.array(node_lab), dtype=torch.float) 69 | train_mask = torch.tensor(np.array(train_mask), dtype=torch.float) 70 | val_mask = torch.tensor(np.array(val_mask), dtype=torch.float) 71 | test_mask = torch.tensor(np.array(test_mask), dtype=torch.float) 72 | data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr,y=node_lab, train_mask=train_mask, val_mask=val_mask, test_mask=test_mask,num_features=torch.tensor([1]*880,dtype=torch.float)) 73 | return data 74 | 75 | 76 | def get_data(d): 77 | data_list = [0] 78 | data_list[0] = d 79 | # print("data_list:", data_list) 80 | data = data_list[0] 81 | # print(data) 82 | # println() 83 | keys = data_list[0].keys 84 | # data->Data() 85 | data = data_list[0].__class__() 86 | 87 | for key in keys: 88 | data[key] = [] 89 | # print("initial_data:", data) # Data(edge_index=[0], x=[0]) 90 | slices = {key: [0] for key in keys} 91 | # print(slices) # {'x': [0], 'edge_index': [0]} 92 | # print("slices:", slices) 93 | for item, key in product(data_list, keys): 94 | # print("111:", item, key) 95 | # print("222:", item[key]) 96 | data[key].append(item[key]) 97 | # print("middle_data:", data) 98 | # println() 99 | if torch.is_tensor(item[key]): 100 | 101 | s = slices[key][-1] + item[key].size(item.__cat_dim__(key, item[key])) 102 | # print("s^^^:", s) 103 | else: 104 | s = slices[key][-1] + 1 105 | # print("s***:", s) 106 | slices[key].append(s) 107 | 108 | 109 | 110 | if hasattr(data_list[0], '__num_nodes__'): 111 | data.__num_nodes__ = [] 112 | for item in data_list: 113 | data.__num_nodes__.append(item.num_nodes) 114 | 115 | for key in keys: 116 | item = data_list[0][key] 117 | if torch.is_tensor(item): 118 | print("__data[key]:", len(data[key])) 119 | print("tmp:", data.__cat_dim__(key, item)) 120 | 121 | data[key] = torch.cat(data[key], 122 | dim=data.__cat_dim__(key, item)) 123 | print("data[key]__:", len(data[key])) 124 | 125 | elif isinstance(item, int) or isinstance(item, float): 126 | data[key] = torch.tensor(data[key]) 127 | 128 | slices[key] = torch.tensor(slices[key], dtype=torch.long) 129 | 130 | com = (data, slices) 131 | # print(com) 132 | return com 133 | 134 | # hy_graph = load_data("../data/hy_new_s.pickle") 135 | hy_graph = load_data("./data/hy_new_aaai_2.pickle") 136 | d = nx_to_graph_data_obj(hy_graph) 137 | com = get_data(d) 138 | torch.save(com,'./data/dataset_new_aaai_2.pt') 139 | 140 | 141 | -------------------------------------------------------------------------------- /code/data_pre/pre_s5.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pickle 4 | import pandas as pd 5 | from itertools import chain 6 | import networkx as nx 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | from math import radians, cos, sin, asin, sqrt 10 | 11 | 12 | def load_data(file): 13 | data_load_file = [] 14 | file_1 = open(file, "rb") 15 | data_load_file = pickle.load(file_1) 16 | return data_load_file 17 | from scipy.sparse import csr_matrix 18 | 19 | flow_g = load_data('../data/flow_graph.pickle') 20 | spatial_g = load_data('../data/spatial_graph.pickle') 21 | region_attr_g = load_data('../data/region_attr_graph.pickle') 22 | # adj_matrix = csr_matrix((np.ones(len(row)), (row, col)), shape=(len(node_map), len(node_map))) 23 | # print(np.array(nx.adjacency_matrix(region_attr_g).todense())) 24 | # adj_= np.array(nx.adjacency_matrix(region_attr_g).todense()).tolist() 25 | # adj_gat = csr_matrix((np.ones(), np.array(nx.adjacency_matrix(region_attr_g).todense())), shape=(62, 62)) 26 | # print(adj_gat) 27 | # print(type(adj_gat)) 28 | 29 | # println 30 | # feature = np.random.uniform(-1, 1, size=(62, 62)) 31 | # feature = feature[np.newaxis] 32 | # print(feature.shape) 33 | # println() 34 | 35 | # file=open(r"../data/adj_gat.pickle","wb") 36 | # pickle.dump(adj_gat,file) #storing_list 37 | # file.close() 38 | # file=open(r"../data/fea_gat.pickle","wb") 39 | # pickle.dump(feature,file) #storing_list 40 | # file.close() 41 | # println() 42 | ##only get region attrbutes matrix 43 | # f = open('../data/poi_edgelist.txt','a') 44 | # for item in region_attr_g.edges(): 45 | # print(item[0].split("_")[1]," ", item[1].split("_")[1]) 46 | # f.write('\n') 47 | # f.write(str(item[0].split("_")[1])) 48 | # f.write(" ") 49 | # f.write(str(item[1].split("_")[1])) 50 | # f.close() 51 | # adj=np.array(nx.adjacency_matrix(region_attr_g).todense()) 52 | # # # print(adj) 53 | # f = open('../data/adjlist.txt','a') 54 | # for item in adj: 55 | # # print(item) 56 | # # print(item.shape) 57 | # # print(item) 58 | 59 | # f.write('\n') 60 | # for sub in item: 61 | # # print(sub) 62 | # f.write(str(0)) 63 | # f.write(" ") 64 | # f.close() 65 | # f = open('../data/labels.txt','a') 66 | # for idx, ir in enumerate(region_attr_g.nodes()): 67 | # # print(idx, ir) 68 | # f.write('\n') 69 | # f.write(ir.split("_")[1]) 70 | # f.write(" ") 71 | # f.write("1") 72 | # f.close() 73 | # f = open('../data/features.txt','a') 74 | # for idx, ir in enumerate(region_attr_g.nodes()): 75 | # # print(idx, ir) 76 | # f.write('\n') 77 | # f.write(ir.split("_")[1]) 78 | # f.write(" ") 79 | # f.write("1") 80 | # f.close() 81 | # println() 82 | 83 | 84 | # print(flow_g,spatial_g,region_attr_g) 85 | 86 | # print(flow_g.edges()) 87 | 88 | flow_nodes = list(flow_g.nodes) 89 | spatial_nodes = list(spatial_g.nodes) 90 | regat_nodes = list(region_attr_g.nodes) 91 | flow_edges = list(flow_g.edges(data=True)) 92 | # print(flow_edges) 93 | # println() 94 | spatial_edges = list(spatial_g.edges(data=True)) 95 | # print(spatial_edges) 96 | # println() 97 | regat_edges = list(region_attr_g.edges(data=True)) 98 | # print(regat_edges) 99 | # println() 100 | 101 | part_f = flow_nodes 102 | part_s = spatial_nodes 103 | part_r = regat_nodes 104 | # print(part_f) 105 | # print("--------------------------") 106 | # print(part_s) 107 | # print("--------------------------") 108 | # print(part_r) 109 | 110 | hy_edges = [] 111 | for sub in regat_nodes: 112 | for ss in spatial_nodes: 113 | tmp = ss.split("_") 114 | tmp_c = tmp[0]+'_'+tmp[1] 115 | if sub == tmp_c: 116 | pair = (sub, ss,{"weight":0, "date": tmp[2], "start":sub, "end":ss}) 117 | # print("pair:", pair) 118 | hy_edges.append(pair) 119 | 120 | for ss in spatial_nodes: 121 | for ff in flow_nodes: 122 | tps = ss.split("_") 123 | # tps_c = tps[0]+'_'+tps[1] 124 | # tpf = ff.split("_") 125 | # tpf_c = tpf[0]+'_'+tpf[1] 126 | if ss == ff: 127 | pair = (ss, ff,{"weight":0, "date":tps[2] , "start":ss, "end":ff}) 128 | # print("pair:", pair) 129 | hy_edges.append(pair) 130 | 131 | 132 | # print("hy_edges:",hy_edges) 133 | # hy_edges.extend(flow_edges) 134 | # hy_edges.extend(spatial_edges) 135 | # hy_edges.extend(regat_edges) 136 | 137 | G_hy = nx.Graph() 138 | G_hy.add_edges_from(hy_edges) 139 | G_hy.add_edges_from(flow_edges) 140 | G_hy.add_edges_from(spatial_edges) 141 | G_hy.add_edges_from(regat_edges) 142 | # nx.draw(G_hy) 143 | # plt.show() 144 | print("hyper_grapgh:", G_hy) 145 | # println() 146 | # fl = nx.Graph() 147 | # fl.add_edges_from(flow_edges[]) 148 | # fl.add_edges_from(spatial_edges[:30]) 149 | # print(fl.nodes()) 150 | 151 | # sum_1=0 152 | # for node_1, node_2, attr_dict in G_hy.edges(data=True): 153 | # if attr_dict=={}: 154 | # sum_1+=1 155 | # print(attr_dict) 156 | # print("sum_1:", sum_1) 157 | # printlist(G_hy.edges) 158 | # println() 159 | 160 | nodes_num = 3 161 | file=open(r"../data/hy_{}.pickle".format(8),"wb") 162 | pickle.dump(G_hy,file) #storing_list 163 | file.close() 164 | # file=open(r"../data/fl_sp.pickle","wb") 165 | # pickle.dump(fl,file) #storing_list 166 | # file.close() 167 | 168 | # adj_fl=np.array(nx.adjacency_matrix(fl).todense()) 169 | # f = open('../data/fl_edges.txt','a') 170 | # for item in fl.edges: 171 | # # print(item) 172 | # # print(item[0].split("_")[1]," ", item[1].split("_")[1]) 173 | # f.write(str(item[0].split("_")[1])) 174 | # f.write(" ") 175 | # f.write(str(item[1].split("_")[1])) 176 | # f.write('\n') 177 | # f.close() 178 | # f = open('../data/fl_labels.txt','a') 179 | # for idx, ir in enumerate(fl.nodes()): 180 | # # print(idx, ir) 181 | # # println 182 | # f.write(ir.split("_")[1]) 183 | # f.write(" ") 184 | # f.write("1") 185 | # f.write('\n') 186 | # f.close() 187 | 188 | 189 | #for mvure 190 | # adj=np.array(nx.adjacency_matrix(region_attr_g).todense()) 191 | # mob_adj = adj[np.newaxis,:] 192 | # # print("t_adj:", adj.shape) 193 | # np.save("../data/mvure_data/mob-adj.npy", mob_adj) 194 | 195 | # np.save("../data/mvure_data/s_adj.npy", adj) 196 | 197 | -------------------------------------------------------------------------------- /code/data_pre/pre_s6_dataloader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | from torch_geometric.data import Data 5 | from itertools import product 6 | import numpy as np 7 | import pandas as pd 8 | import pickle 9 | 10 | def load_data(file): 11 | data_load_file = [] 12 | file_1 = open(file, "rb") 13 | data_load_file = pickle.load(file_1) 14 | return data_load_file 15 | resolution = 500 16 | node_features = load_data("../data/reg_com_poi_cat_spatial.pickle") 17 | # print(len(node_features)) 18 | # println() 19 | def nx_to_graph_data_obj(g): 20 | n_nodes = g.number_of_nodes() 21 | n_edges = g.number_of_edges() 22 | # nodes 23 | nx_node_ids = [n_i for n_i in g.nodes()] # contains list of nx node ids 24 | # print("nx_node_ids:", nx_node_ids) 25 | # n = np.array([nx_node_ids.index(n_i) for n_i in g.nodes()]) 26 | x_ = torch.tensor(np.ones(n_nodes).reshape(-1, 1), dtype=torch.float) 27 | # print("nx_node_ids:",nx_node_ids) 28 | n_nodes = [int(item.split("_")[1]) for item in nx_node_ids] 29 | # print("n_nodes:",n_nodes) 30 | x = torch.tensor([node_features[item].tolist() for item in n_nodes]) 31 | # print("x:",x.size()) 32 | # print(x_.size()) 33 | # printnln() 34 | file=open(r"../data/nodes_{}.pickle".format(resolution),"wb") 35 | pickle.dump(nx_node_ids,file) #storing_list 36 | file.close() 37 | # x = torch.tensor(n.reshape(-1, 1), dtype=torch.float) 38 | # print("x:", x) 39 | # println() 40 | # edges 41 | edges_list = [] 42 | edge_features_list = [] 43 | for node_1, node_2, attr_dict in g.edges(data=True): 44 | # print("attr_dict:", attr_dict) 45 | # print("node_1:", node_1) 46 | # print("node_2:", node_2) 47 | edge_feature = [attr_dict['weight'], attr_dict['date'], nx_node_ids.index(attr_dict['start']), nx_node_ids.index(attr_dict['end'])] # last 2 indicate self-loop 48 | # and masking 49 | edge_feature = np.array(edge_feature, dtype=int) 50 | # convert nx node ids to data obj node index 51 | i = nx_node_ids.index(node_1) 52 | j = nx_node_ids.index(node_2) 53 | edges_list.append((i, j)) 54 | edge_features_list.append(edge_feature) 55 | # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] 56 | edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) 57 | # print("edge_index:", edge_index) 58 | 59 | # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] 60 | edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.float) 61 | # print("edge_attr:", edge_attr.size()) 62 | # println() 63 | # construct data obj 64 | data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) 65 | return data 66 | 67 | # print(data) 68 | # println() 69 | 70 | 71 | # edge_index = torch.tensor([ 72 | # [3, 1, 1, 2], 73 | # [1, 3, 2, 1]], dtype=torch.long) 74 | # x = torch.tensor([[-1], 75 | # [0], 76 | # [1]], dtype=torch.float) 77 | 78 | # d = Data(x=x, edge_index=edge_index) 79 | # print(type(d)) # # 80 | def get_data(d): 81 | data_list = [0] 82 | 83 | 84 | data_list[0] = d 85 | # print("data_list:", data_list) 86 | data = data_list[0] 87 | # print(data) 88 | # println() 89 | 90 | 91 | 92 | keys = data_list[0].keys 93 | 94 | 95 | # data->Data() 96 | data = data_list[0].__class__() 97 | # print("data:", data_list[0]) 98 | # println() 99 | # print(data_list[0].keys) # ['x', 'edge_index'] 100 | # print(type(data)) # 101 | 102 | # print("before_data:", data_list) 103 | 104 | 105 | 106 | for key in keys: 107 | data[key] = [] 108 | # print("initial_data:", data) # Data(edge_index=[0], x=[0]) 109 | 110 | 111 | slices = {key: [0] for key in keys} 112 | # print(slices) # {'x': [0], 'edge_index': [0]} 113 | # print("slices:", slices) 114 | 115 | for item, key in product(data_list, keys): 116 | # print("111:", item, key) 117 | 118 | # print("222:", item[key]) 119 | data[key].append(item[key]) 120 | # print("middle_data:", data) 121 | 122 | # println() 123 | if torch.is_tensor(item[key]): 124 | # print("slices[key]:", slices[key][-1]) 125 | # print("item[key]:", item.__cat_dim__(key, item[key])) 126 | # print("%%%:", item[key].size(item.__cat_dim__(key, item[key]))) 127 | # 128 | s = slices[key][-1] + item[key].size(item.__cat_dim__(key, item[key])) 129 | # print("s^^^:", s) 130 | 131 | else: 132 | s = slices[key][-1] + 1 133 | # print("s***:", s) 134 | slices[key].append(s) 135 | # print("slices_after:", slices) 136 | 137 | # print("final_data:", data) 138 | # println() 139 | 140 | 141 | if hasattr(data_list[0], '__num_nodes__'): 142 | data.__num_nodes__ = [] 143 | for item in data_list: 144 | data.__num_nodes__.append(item.num_nodes) 145 | 146 | for key in keys: 147 | item = data_list[0][key] 148 | if torch.is_tensor(item): 149 | print("__data[key]:", len(data[key])) 150 | print("tmp:", data.__cat_dim__(key, item)) 151 | 152 | data[key] = torch.cat(data[key], 153 | dim=data.__cat_dim__(key, item)) 154 | print("data[key]__:", len(data[key])) 155 | 156 | elif isinstance(item, int) or isinstance(item, float): 157 | data[key] = torch.tensor(data[key]) 158 | 159 | slices[key] = torch.tensor(slices[key], dtype=torch.long) 160 | 161 | 162 | # print("data:", data) 163 | # print("slices:", slices) 164 | com = (data, slices) 165 | # print(com) 166 | return com 167 | # import os.path as osp 168 | # def get(idx): 169 | # data = torch.load(osp.join("../data/dataset/processed/", 'dataset_{}.pt',format(idx))) 170 | # return data 171 | hy_graph = load_data("../data/hy_8.pickle") 172 | d = nx_to_graph_data_obj(hy_graph) 173 | com = get_data(d) 174 | torch.save(com,'../data/dataset/processed/dataset_9.pt') 175 | 176 | 177 | -------------------------------------------------------------------------------- /code/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch_geometric.nn import GCNConv, GATConv 5 | from torch_geometric.utils import to_dense_adj 6 | 7 | class Encoder(torch.nn.Module): 8 | def __init__(self, in_channels: int, out_channels: int, activation, 9 | base_model=GCNConv, k: int = 2): 10 | super(Encoder, self).__init__() 11 | self.base_model = base_model 12 | 13 | assert k >= 2 14 | self.k = k 15 | self.conv = [base_model(in_channels, 2 * out_channels)] 16 | for _ in range(1, k-1): 17 | self.conv.append(base_model(2 * out_channels, 2 * out_channels)) 18 | self.conv.append(base_model(2 * out_channels, out_channels)) 19 | self.conv = nn.ModuleList(self.conv) 20 | self.activation = activation 21 | def forward(self, x: torch.Tensor, edge_index: torch.Tensor): 22 | for i in range(self.k): 23 | x = self.activation(self.conv[i](x, edge_index)) 24 | return x 25 | 26 | class GCN(torch.nn.Module): 27 | def __init__(self, in_channels: int, out_channels: int, n_class: int, activation, 28 | base_model=GCNConv, dropout: float=0.5): 29 | super(GCN, self).__init__() 30 | self.base_model = base_model 31 | 32 | self.conv1 = base_model(in_channels, out_channels) 33 | self.head = base_model(out_channels, n_class) 34 | self.dropout = dropout 35 | self.activation = activation 36 | 37 | def forward(self, x: torch.Tensor, edge_index: torch.Tensor): 38 | x = F.dropout(x, self.dropout, training=self.training) 39 | x = self.activation(self.conv1(x, edge_index)) 40 | x = F.dropout(x, self.dropout, training=self.training) 41 | return F.log_softmax(self.head(x, edge_index), dim=1) 42 | 43 | class GAT(torch.nn.Module): 44 | def __init__(self, in_channels: int, out_channels: int, n_class: int, activation, 45 | base_model=GATConv, input_dropout: float=0.5, coef_dropout: float=0.5): 46 | super(GAT, self).__init__() 47 | self.base_model = base_model 48 | self.conv1 = base_model(in_channels, out_channels, 8, dropout=coef_dropout) 49 | self.head = base_model(out_channels*8, n_class, 1, dropout=coef_dropout) 50 | self.dropout = input_dropout 51 | self.activation = activation 52 | 53 | def forward(self, x: torch.Tensor, edge_index: torch.Tensor): 54 | x = F.dropout(x, self.dropout, training=self.training) 55 | x = self.activation(self.conv1(x, edge_index)) 56 | x = F.dropout(x, self.dropout, training=self.training) 57 | return F.log_softmax(self.head(x, edge_index), dim=1) 58 | 59 | 60 | class Model(torch.nn.Module): 61 | def __init__(self, encoder: Encoder, num_hidden: int, num_proj_hidden: int, 62 | tau: float = 0.5): 63 | super(Model, self).__init__() 64 | self.encoder: Encoder = encoder 65 | self.tau: float = tau 66 | 67 | self.fc1 = torch.nn.Linear(num_hidden, num_proj_hidden) 68 | self.fc2 = torch.nn.Linear(num_proj_hidden, num_hidden) 69 | self.cos = nn.CosineSimilarity() 70 | 71 | 72 | def forward(self, x: torch.Tensor, 73 | adj: torch.Tensor) -> torch.Tensor: 74 | 75 | return self.encoder(x, adj) 76 | 77 | def projection(self, z: torch.Tensor) -> torch.Tensor: 78 | z = F.elu(self.fc1(z)) 79 | return self.fc2(z) 80 | 81 | def sim(self, z1: torch.Tensor, z2: torch.Tensor): 82 | z1 = F.normalize(z1) 83 | z2 = F.normalize(z2) 84 | return torch.mm(z1, z2.t()) 85 | 86 | def semi_loss(self, z1: torch.Tensor, z2: torch.Tensor): 87 | f = lambda x: torch.exp(x / self.tau) 88 | refl_sim = f(self.sim(z1, z1)) 89 | between_sim = f(self.sim(z1, z2)) 90 | 91 | return -torch.log( 92 | between_sim.diag() 93 | / (refl_sim.sum(1) + between_sim.sum(1) - refl_sim.diag())) 94 | 95 | def batched_semi_loss(self, z1: torch.Tensor, z2: torch.Tensor, 96 | batch_size: int): 97 | # Space complexity: O(BN) (semi_loss: O(N^2)) 98 | device = z1.device 99 | num_nodes = z1.size(0) 100 | num_batches = (num_nodes - 1) // batch_size + 1 101 | f = lambda x: torch.exp(x / self.tau) 102 | indices = torch.arange(0, num_nodes).to(device) 103 | losses = [] 104 | 105 | for i in range(num_batches): 106 | mask = indices[i * batch_size:(i + 1) * batch_size] 107 | refl_sim = f(self.sim(z1[mask], z1)) # [B, N] 108 | between_sim = f(self.sim(z1[mask], z2)) # [B, N] 109 | 110 | losses.append(-torch.log( 111 | between_sim[:, i * batch_size:(i + 1) * batch_size].diag() 112 | / (refl_sim.sum(1) + between_sim.sum(1) 113 | - refl_sim[:, i * batch_size:(i + 1) * batch_size].diag()))) 114 | 115 | return torch.cat(losses) 116 | 117 | def loss(self, z1: torch.Tensor, z2: torch.Tensor, 118 | mean: bool = True, batch_size: int = 0): 119 | h1 = self.projection(z1) 120 | h2 = self.projection(z2) 121 | simi = torch.exp(self.cos(h1,h2)/self.tau) 122 | 123 | if batch_size == 0: 124 | l1 = self.semi_loss(h1, h2) 125 | l2 = self.semi_loss(h2, h1) 126 | else: 127 | l1 = self.batched_semi_loss(h1, h2, batch_size) 128 | l2 = self.batched_semi_loss(h2, h1, batch_size) 129 | 130 | ret = (l1 + l2) * 0.5 131 | #ret = ret.mean() if mean else ret.sum() 132 | 133 | return ret, simi 134 | 135 | 136 | def drop_feature(x, drop_prob): 137 | drop_mask = torch.empty( 138 | (x.size(1), ), 139 | dtype=torch.float32, 140 | device=x.device).uniform_(0, 1) < drop_prob 141 | x = x.clone() 142 | x[:, drop_mask] = 0 143 | 144 | return x 145 | 146 | 147 | class LogReg(nn.Module): 148 | def __init__(self, ft_in, nb_classes): 149 | super(LogReg, self).__init__() 150 | self.fc = nn.Linear(ft_in, nb_classes) 151 | 152 | for m in self.modules(): 153 | self.weights_init(m) 154 | 155 | def weights_init(self, m): 156 | if isinstance(m, nn.Linear): 157 | torch.nn.init.xavier_uniform_(m.weight.data) 158 | if m.bias is not None: 159 | m.bias.data.fill_(0.0) 160 | 161 | def forward(self, seq): 162 | ret = self.fc(seq) 163 | return ret 164 | -------------------------------------------------------------------------------- /pre_s6_dataloader.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch_geometric.data import Data 4 | from itertools import product 5 | import numpy as np 6 | import pandas as pd 7 | from torch import nn 8 | import pickle 9 | 10 | def load_data(file): 11 | data_load_file = [] 12 | file_1 = open(file, "rb") 13 | data_load_file = pickle.load(file_1) 14 | return data_load_file 15 | resolution = 500 16 | linear = nn.Linear(512, 96) 17 | # node_features_1 = load_data("../data/region_spatial_refine.pickle") 18 | # node_features = load_data("../data/region_spatial_refine.pickle") 19 | node_features = load_data("../data/region_spatial_refine.pickle") 20 | region_poi_vec = load_data("../data/reg_poi_vec.pickle") 21 | region_trans = linear(region_poi_vec) 22 | # print(region_trans) 23 | # print(region_trans.size()) 24 | # pritnln() 25 | # reg_com_poi_cat_spatial.pickle 26 | # node_features = load_data("../data/reg_vector_dict.pickle") 27 | 28 | # node_features_2 = load_data("../data/reg_flow_dict_vec_4.pickle") 29 | # print(type(node_features_1)) 30 | # print(type(node_features_2)) 31 | # node_feature = [torch.mean(torch.cat((i[1],j),axis= 1)) for i,j in zip(node_features_1.items(),node_features_2)] 32 | 33 | # print(node_feature[0].size()) 34 | # print(len(node_feature)) 35 | # println() 36 | def nx_to_graph_data_obj(g): 37 | n_nodes = g.number_of_nodes() 38 | n_edges = g.number_of_edges() 39 | # nodes 40 | nx_node_ids = [n_i for n_i in g.nodes()] # contains list of nx node ids 41 | # print("nx_node_ids:", nx_node_ids) 42 | # n = np.array([nx_node_ids.index(n_i) for n_i in g.nodes()]) 43 | x_ = torch.tensor(np.ones(n_nodes).reshape(-1, 1), dtype=torch.float) 44 | # print("nx_node_ids:",nx_node_ids) 45 | n_nodes = [int(item.split("_")[1]) for item in nx_node_ids] 46 | # print("n_nodes:",n_nodes) 47 | # print(len(n_nodes)) 48 | # x = torch.tensor([torch.squeeze(node_features[item],0).tolist() for item in n_nodes]) 49 | # x = torch.tensor([torch.squeeze(node_features[item],0).tolist() for item in n_nodes]) 50 | x = torch.tensor([region_trans[item].tolist() for item in n_nodes]) 51 | # print("x:",x.size()) 52 | # print(x_.size()) 53 | # printnln() 54 | file=open(r"../data/nodes_new_{}.pickle".format(7),"wb") 55 | pickle.dump(nx_node_ids,file) #storing_list 56 | file.close() 57 | # x = torch.tensor(n.reshape(-1, 1), dtype=torch.float) 58 | # print("x:", x) 59 | # println() 60 | # edges 61 | edges_list = [] 62 | edge_features_list = [] 63 | for node_1, node_2, attr_dict in g.edges(data=True): 64 | # print("attr_dict:", attr_dict) 65 | # print("node_1:", node_1) 66 | # print("node_2:", node_2) 67 | edge_feature = [attr_dict['weight'], attr_dict['date'], nx_node_ids.index(attr_dict['start']), nx_node_ids.index(attr_dict['end'])] # last 2 indicate self-loop 68 | # and masking 69 | edge_feature = np.array(edge_feature, dtype=int) 70 | # convert nx node ids to data obj node index 71 | i = nx_node_ids.index(node_1) 72 | j = nx_node_ids.index(node_2) 73 | edges_list.append((i, j)) 74 | edge_features_list.append(edge_feature) 75 | # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] 76 | edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) 77 | # print("edge_index:", edge_index) 78 | 79 | # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] 80 | edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.float) 81 | # print("edge_attr:", edge_attr.size()) 82 | # println() 83 | # construct data obj 84 | data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) 85 | return data 86 | 87 | # print(data) 88 | # println() 89 | 90 | 91 | # edge_index = torch.tensor([ 92 | # [3, 1, 1, 2], 93 | # [1, 3, 2, 1]], dtype=torch.long) 94 | # x = torch.tensor([[-1], 95 | # [0], 96 | # [1]], dtype=torch.float) 97 | 98 | # d = Data(x=x, edge_index=edge_index) 99 | # print(type(d)) # # 100 | def get_data(d): 101 | data_list = [0] 102 | data_list[0] = d 103 | # print("data_list:", data_list) 104 | data = data_list[0] 105 | # print(data) 106 | # println() 107 | keys = data_list[0].keys 108 | # data->Data() 109 | data = data_list[0].__class__() 110 | # print("data:", data_list[0]) 111 | # println() 112 | # print(data_list[0].keys) # ['x', 'edge_index'] 113 | # print(type(data)) # 114 | # print("before_data:", data_list) 115 | for key in keys: 116 | data[key] = [] 117 | # print("initial_data:", data) # Data(edge_index=[0], x=[0]) 118 | slices = {key: [0] for key in keys} 119 | # print(slices) # {'x': [0], 'edge_index': [0]} 120 | # print("slices:", slices) 121 | for item, key in product(data_list, keys): 122 | # print("111:", item, key) 123 | # print("222:", item[key]) 124 | data[key].append(item[key]) 125 | # print("middle_data:", data) 126 | # println() 127 | if torch.is_tensor(item[key]): 128 | # print("slices[key]:", slices[key][-1]) 129 | # print("item[key]:", item.__cat_dim__(key, item[key])) 130 | # print("%%%:", item[key].size(item.__cat_dim__(key, item[key]))) 131 | # 132 | s = slices[key][-1] + item[key].size(item.__cat_dim__(key, item[key])) 133 | # print("s^^^:", s) 134 | else: 135 | s = slices[key][-1] + 1 136 | # print("s***:", s) 137 | slices[key].append(s) 138 | # print("slices_after:", slices) 139 | 140 | # print("final_data:", data) 141 | # println() 142 | 143 | 144 | if hasattr(data_list[0], '__num_nodes__'): 145 | data.__num_nodes__ = [] 146 | for item in data_list: 147 | data.__num_nodes__.append(item.num_nodes) 148 | 149 | for key in keys: 150 | item = data_list[0][key] 151 | if torch.is_tensor(item): 152 | print("__data[key]:", len(data[key])) 153 | print("tmp:", data.__cat_dim__(key, item)) 154 | 155 | data[key] = torch.cat(data[key], 156 | dim=data.__cat_dim__(key, item)) 157 | print("data[key]__:", len(data[key])) 158 | 159 | elif isinstance(item, int) or isinstance(item, float): 160 | data[key] = torch.tensor(data[key]) 161 | 162 | slices[key] = torch.tensor(slices[key], dtype=torch.long) 163 | 164 | 165 | # print("data:", data) 166 | # print("slices:", slices) 167 | com = (data, slices) 168 | # print(com) 169 | return com 170 | # import os.path as osp 171 | # def get(idx): 172 | # data = torch.load(osp.join("../data/dataset/processed/", 'dataset_{}.pt',format(idx))) 173 | # return data 174 | # hy_graph = load_data("../data/hy_new_s.pickle") 175 | hy_graph = load_data("../data/hy_new_test_60.pickle") 176 | d = nx_to_graph_data_obj(hy_graph) 177 | com = get_data(d) 178 | torch.save(com,'./data/dataset/processed/dataset_new_60.pt') 179 | 180 | 181 | -------------------------------------------------------------------------------- /code/data_pre/pre_s3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #this file for traffic 4 | import numpy as np 5 | import pandas as pd 6 | from shapely.geometry import Point, LineString 7 | from shapely.geometry import Polygon,MultiPoint #多边形 8 | import matplotlib.pyplot as plt 9 | import json 10 | from urllib.request import urlopen, quote 11 | import requests 12 | import geopy 13 | from geopy.geocoders import Nominatim 14 | import copy 15 | import pickle 16 | from datetime import datetime 17 | from itertools import chain 18 | import networkx as nx 19 | import numpy as np 20 | import matplotlib.pyplot as plt 21 | from math import radians, cos, sin, asin, sqrt 22 | 23 | def haversine(lon1, lat1, lon2, lat2): # 经度1,纬度1,经度2,纬度2 (十进制度数) 24 | """ 25 | Calculate the great circle distance between two points 26 | on the earth (specified in decimal degrees) 27 | """ 28 | # 将十进制度数转化为弧度 29 | lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) 30 | 31 | # haversine公式 32 | dlon = lon2 - lon1 33 | dlat = lat2 - lat1 34 | a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 35 | c = 2 * asin(sqrt(a)) 36 | r = 6371 # 地球平均半径,单位为公里 37 | return c * r * 1000 38 | 39 | def load_data(file): 40 | data_load_file = [] 41 | file_1 = open(file, "rb") 42 | data_load_file = pickle.load(file_1) 43 | return data_load_file 44 | 45 | region_back = load_data("../data/region_back.pickle") 46 | region_traffic = load_data("../data/NY_traffic.pickle") 47 | # region_ = {} 48 | # for key,value in region.items(): 49 | # if [] not in value and len(value)>=3: 50 | # region_[key] = value 51 | # region_back = {} 52 | # for idx, tt in enumerate(region_.items()): 53 | # # print(tt) 54 | # region_back[idx] = tt[1] 55 | # print(region_back.keys()) 56 | # print("@@:", len(region)) 57 | # print("%%:", len(region_back)) 58 | # printlnn 59 | 60 | from collections import Counter 61 | 62 | sp_tm = [] 63 | for item in region_traffic[:]: #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID'] 64 | # for key,value in region_back.items(): ## remember to test the whether the region is []. 65 | # print(item) 66 | # println() 67 | # Point(4, 4) 68 | dropoff_pos = Point(item[7],item[8]) 69 | pickup_pos = Point(item[5],item[6]) 70 | # print("11:",dropoff_pos) 71 | # print("22:",pickup_pos) 72 | # poritnlnn() 73 | tmp_idx = [] 74 | for key,value in region_back.items(): ## remember to test the whether the region is []. 75 | # for item in region_traffic[:]: #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID'] 76 | 77 | # print("value:",value) 78 | # print("dropoff_pos:",dropoff_pos) 79 | # print("dropoff_pos:",dropoff_pos) 80 | # pritnln() 81 | tmp_poly = value 82 | # poly_shape.intersects(point)) 83 | if dropoff_pos.intersects(tmp_poly): 84 | dropoff_idx = key 85 | tmp_idx.append(dropoff_idx) 86 | if pickup_pos.intersects(tmp_poly): 87 | pickup_idx = key 88 | tmp_idx.append(pickup_idx) 89 | # print("tmp_idx:", tmp_idx) 90 | # print("item:", item) 91 | if len(tmp_idx)==2: 92 | # print("tmp_idx:", tmp_idx) 93 | # print("item:", item) 94 | sp_tm.append((tmp_idx[1], tmp_idx[0], item[-1])) #起点/终点/日期 95 | result = pd.value_counts(sp_tm) 96 | print("result:", result) 97 | # println() 98 | 99 | unique_region = list(set(sp_tm)) 100 | 101 | ##building flow graph 102 | flow_edges = [] 103 | for key,value in result.to_dict().items(): 104 | # print("key:", key) 105 | # print("value:", value) 106 | 107 | # println() 108 | #pair = ('r_{}_{}'.format(region_dict[key[0]], key[-1]), 'r_{}_{}'.format(region_dict[key[1]], key[-1] + 1), value) 109 | pair = ('r_{}_{}'.format(key[0], int(key[-1])),'r_{}_{}'.format(key[1], int(key[-1]+1)), {"weight":value, "date":int(key[-1]), "start":'r_{}_{}'.format(key[0], int(key[-1])), "end":'r_{}_{}'.format(key[1], int(key[-1]+1))}) 110 | flow_edges.append(pair) 111 | 112 | print("finish flow graph") 113 | 114 | 115 | ##bulding spatial graph 116 | spatial_dis = [] 117 | spatial_dict = {} 118 | 119 | flow_nodes = [] 120 | for item in unique_region: 121 | n_1 = "r"+"_"+str(item[0])+"_"+str(item[-1]) 122 | n_2 = "r"+"_"+str(item[1])+"_"+str(int(item[-1])+1) 123 | if n_1 not in flow_nodes: 124 | flow_nodes.append(n_1) 125 | if n_2 not in flow_nodes: 126 | flow_nodes.append(n_2) 127 | 128 | print("finish flow nodes") 129 | spatial_dis.sort(reverse = False) 130 | spatial_edges = [] 131 | spatial_edges.extend(flow_edges) # add edges in flow graph 132 | sim_num=0 133 | for ii in range(len(flow_nodes)): 134 | for jj in range(ii+1, len(flow_nodes)): 135 | # time = flow_nodes[ii].split("_")[2] 136 | t_1 = flow_nodes[ii].split("_") 137 | t_2 = flow_nodes[jj].split("_") 138 | # print("t_1:",t_1) 139 | # print("t_2:",t_2) 140 | t_1_pos = np.average(list(zip(*region_back[int(t_1[1])].exterior.coords.xy)), axis = 0) 141 | t_2_pos = np.average(list(zip(*region_back[int(t_2[1])].exterior.coords.xy)),axis = 0) 142 | # print("--:",t_1_pos) 143 | # print("$$:", t_2_pos) 144 | # println() 145 | 146 | value = haversine(t_1_pos[0], t_1_pos[1], t_2_pos[0], t_2_pos[1]) 147 | if value<= 2500: #小于3公里 148 | # print("value:",value) 149 | sim_num+=1 150 | # yy = key[0].split("_") 151 | # yy_1 = key[1].split("_") 152 | # print("key:", key) 153 | # println() 154 | # print(flow_nodes[ii],flow_nodes[jj]) 155 | # println() 156 | pair = (flow_nodes[ii],flow_nodes[jj], {"weight":value, "date":int(t_1[2]), "start":flow_nodes[ii], "end":flow_nodes[jj]}) 157 | if pair not in spatial_edges: 158 | spatial_edges.append(pair) 159 | # print("sim_num:",sim_num) 160 | # print("finish spatial graph--part 2") 161 | # println() 162 | #增加边 163 | params_resolution = 3 164 | for z in region_back.keys(): 165 | for j in range(params_resolution): 166 | ox = "r_{}_{}".format(z, j) 167 | oy = "r_{}_{}".format(z, j+1) 168 | pair = (ox,oy, {"weight":0, "date":int(j), "start":ox, "end":oy}) 169 | if pair not in spatial_edges: 170 | spatial_edges.append(pair) 171 | print(len(spatial_edges)) 172 | print("finish spatial graph") 173 | 174 | G_flow = nx.Graph() 175 | G_flow.add_edges_from(flow_edges[:]) 176 | # nx.draw(G_flow, with_labels=True) 177 | # plt.show() 178 | print("G_flow:",G_flow) 179 | #spatial graph 180 | G_spatial = nx.Graph() 181 | G_spatial.add_edges_from(spatial_edges[:]) 182 | # nx.draw(G_spatial, with_labels=True) 183 | # plt.show() 184 | print("G_spatial:",G_spatial) 185 | 186 | file=open(r"../data/flow_graph.pickle","wb") 187 | pickle.dump(G_flow,file) #storing_list 188 | file.close() 189 | 190 | file=open(r"../data/spatial_graph.pickle","wb") 191 | pickle.dump(G_spatial,file) #storing_list 192 | file.close() 193 | 194 | print("----spatial----:", G_spatial) 195 | print("----flow----:",G_flow) 196 | 197 | -------------------------------------------------------------------------------- /pre_s3.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pandas as pd 4 | from shapely.geometry import Point, LineString 5 | from shapely.geometry import Polygon,MultiPoint #多边形 6 | import matplotlib.pyplot as plt 7 | import json 8 | from urllib.request import urlopen, quote 9 | import requests 10 | import geopy 11 | from geopy.geocoders import Nominatim 12 | import copy 13 | import pickle 14 | from datetime import datetime 15 | from itertools import chain 16 | import networkx as nx 17 | import numpy as np 18 | import matplotlib.pyplot as plt 19 | from math import radians, cos, sin, asin, sqrt 20 | 21 | def haversine(lon1, lat1, lon2, lat2): # 22 | """ 23 | Calculate the great circle distance between two points 24 | on the earth (specified in decimal degrees) 25 | """ 26 | # 将十进制度数转化为弧度 27 | lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) 28 | 29 | # haversine公式 30 | dlon = lon2 - lon1 31 | dlat = lat2 - lat1 32 | a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 33 | c = 2 * asin(sqrt(a)) 34 | r = 6371 # 地球平均半径,单位为公里 35 | return c * r * 1000 36 | 37 | def load_data(file): 38 | data_load_file = [] 39 | file_1 = open(file, "rb") 40 | data_load_file = pickle.load(file_1) 41 | return data_load_file 42 | 43 | region_back = load_data("../data/region_back_merge.pickle") 44 | region_traffic = load_data("../data/NY_traffic_2.pickle") 45 | 46 | 47 | from collections import Counter 48 | 49 | sp_tm = [] 50 | for item in region_traffic[:]: #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID'] 51 | # for key,value in region_back.items(): ## remember to test the whether the region is []. 52 | # print(item) 53 | # println() 54 | # Point(4, 4) 55 | dropoff_pos = Point(item[7],item[8]) 56 | pickup_pos = Point(item[5],item[6]) 57 | # print("11:",dropoff_pos) 58 | # print("22:",pickup_pos) 59 | # poritnlnn() 60 | tmp_idx = [] 61 | for key,value in region_back.items(): ## remember to test the whether the region is []. 62 | # for item in region_traffic[:]: #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID'] 63 | 64 | tmp_poly = value 65 | # poly_shape.intersects(point)) 66 | if dropoff_pos.intersects(tmp_poly): 67 | dropoff_idx = key 68 | tmp_idx.append(dropoff_idx) 69 | if pickup_pos.intersects(tmp_poly): 70 | pickup_idx = key 71 | tmp_idx.append(pickup_idx) 72 | # print("tmp_idx:", tmp_idx) 73 | # print("item:", item) 74 | if len(tmp_idx)==2: 75 | # print("tmp_idx:", tmp_idx) 76 | # print("item:", item) 77 | sp_tm.append((tmp_idx[1], tmp_idx[0], item[-1])) #起点/终点/日期 78 | result = pd.value_counts(sp_tm) 79 | print("result:", result) 80 | # println() 81 | 82 | unique_region = list(set(sp_tm)) 83 | 84 | ##building flow graph 85 | flow_edges = [] 86 | for key,value in result.to_dict().items(): 87 | # print("key:", key) 88 | # print("value:", value) 89 | # if value>10: 90 | # println() 91 | #pair = ('r_{}_{}'.format(region_dict[key[0]], key[-1]), 'r_{}_{}'.format(region_dict[key[1]], key[-1] + 1), value) 92 | pair = ('r_{}_{}'.format(key[0], int(key[-1])),'r_{}_{}'.format(key[1], int(key[-1]+1)), {"weight":1, "date":int(key[-1]), "start":'r_{}_{}'.format(key[0], int(key[-1])), "end":'r_{}_{}'.format(key[1], int(key[-1]+1))}) 93 | flow_edges.append(pair) 94 | # else: 95 | # # println() 96 | # #pair = ('r_{}_{}'.format(region_dict[key[0]], key[-1]), 'r_{}_{}'.format(region_dict[key[1]], key[-1] + 1), value) 97 | # pair = ('r_{}_{}'.format(key[0], int(key[-1])),'r_{}_{}'.format(key[1], int(key[-1]+1)), {"weight":0, "date":int(key[-1]), "start":'r_{}_{}'.format(key[0], int(key[-1])), "end":'r_{}_{}'.format(key[1], int(key[-1]+1))}) 98 | # flow_edges.append(pair) 99 | 100 | print("finish flow graph") 101 | 102 | # G_flow = nx.Graph() 103 | # G_flow.add_edges_from(flow_edges[:]) 104 | 105 | # file=open(r"../data/flow_graph_new_baseline.pickle","wb") 106 | # pickle.dump(G_flow,file) #storing_list 107 | # file.close() 108 | # println() 109 | 110 | 111 | ##bulding spatial graph 112 | spatial_dis = [] 113 | spatial_dict = {} 114 | 115 | flow_nodes = [] 116 | for item in unique_region: 117 | n_1 = "r"+"_"+str(item[0])+"_"+str(item[-1]) 118 | n_2 = "r"+"_"+str(item[1])+"_"+str(int(item[-1])+1) 119 | if n_1 not in flow_nodes: 120 | flow_nodes.append(n_1) 121 | if n_2 not in flow_nodes: 122 | flow_nodes.append(n_2) 123 | 124 | print("finish flow nodes") 125 | spatial_dis.sort(reverse = False) 126 | spatial_edges = [] 127 | spatial_edges.extend(flow_edges) # add edges in flow graph 128 | sim_num=0 129 | for ii in range(len(flow_nodes)): 130 | for jj in range(ii+1, len(flow_nodes)): 131 | # time = flow_nodes[ii].split("_")[2] 132 | t_1 = flow_nodes[ii].split("_") 133 | t_2 = flow_nodes[jj].split("_") 134 | # print("t_1:",t_1) 135 | # print("t_2:",t_2) 136 | # t_1_pos = np.average(list(zip(*region_back[int(t_1[1])].exterior.coords.xy)), axis = 0) 137 | # t_2_pos = np.average(list(zip(*region_back[int(t_2[1])].exterior.coords.xy)),axis = 0) 138 | # t_1_pos = np.average(list(region_back[int(t_1[1])].exterior.coords), axis = 0) 139 | # t_2_pos = np.average(list(region_back[int(t_2[1])].exterior.coords), axis = 0) 140 | t_1_pos = list(region_back[int(t_1[1])].centroid.coords)[0] 141 | t_2_pos = list(region_back[int(t_2[1])].centroid.coords)[0] 142 | # print("--:",t_1_pos) 143 | # print("$$:", t_2_pos) 144 | # println() 145 | 146 | value = haversine(t_1_pos[0], t_1_pos[1], t_2_pos[0], t_2_pos[1]) 147 | # print("value:", value) 148 | if value<= 5000: #小于3公里 149 | # print("value:",value) 150 | sim_num+=1 151 | # yy = key[0].split("_") 152 | # yy_1 = key[1].split("_") 153 | # print("key:", key)7000 154 | # println() 155 | # print(flow_nodes[ii],flow_nodes[jj]) 156 | # println() 157 | pair = (flow_nodes[ii],flow_nodes[jj], {"weight":value, "date":int(t_1[2]), "start":flow_nodes[ii], "end":flow_nodes[jj]}) 158 | if pair not in spatial_edges: 159 | spatial_edges.append(pair) 160 | # print("sim_num:",sim_num) 161 | # print("finish spatial graph--part 2") 162 | # println() 163 | 164 | #增加边 165 | params_resolution = 2 166 | for z in region_back.keys(): 167 | for j in range(params_resolution): 168 | ox = "r_{}_{}".format(z, j) 169 | oy = "r_{}_{}".format(z, j+1) 170 | pair = (ox,oy, {"weight":0, "date":int(j), "start":ox, "end":oy}) 171 | if pair not in spatial_edges: 172 | spatial_edges.append(pair) 173 | print(len(spatial_edges)) 174 | print("finish spatial graph") 175 | 176 | G_flow = nx.Graph() 177 | G_flow.add_edges_from(flow_edges[:]) 178 | # nx.draw(G_flow, with_labels=True) 179 | # plt.show() 180 | print("G_flow:",G_flow) 181 | #spatial graph 182 | G_spatial = nx.Graph() 183 | G_spatial.add_edges_from(spatial_edges[:]) 184 | # nx.draw(G_spatial, with_labels=True) 185 | # plt.show() 186 | print("G_spatial:",G_spatial) 187 | 188 | file=open(r"../data/flow_graph_new_1.pickle","wb") 189 | pickle.dump(G_flow,file) #storing_list 190 | file.close() 191 | 192 | file=open(r"../data/spatial_graph_new_1.pickle","wb") 193 | pickle.dump(G_spatial,file) #storing_list 194 | file.close() 195 | 196 | print("----spatial----:", G_spatial) 197 | print("----flow----:",G_flow) 198 | 199 | -------------------------------------------------------------------------------- /code/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os.path as osp 3 | import random 4 | from time import perf_counter as t 5 | import yaml 6 | import numpy as np 7 | from yaml import SafeLoader 8 | from scipy.linalg import fractional_matrix_power, inv 9 | from torch.utils.data import random_split 10 | import torch 11 | import torch_geometric.transforms as T 12 | import torch.nn.functional as F 13 | import torch.nn as nn 14 | from layers import GCNConv 15 | import networkx as nx 16 | import matplotlib.pyplot as plt 17 | from torch_geometric.datasets import Planetoid, CitationFull, Amazon, Coauthor, GitHub, FacebookPagePage, LastFMAsia, DeezerEurope 18 | from torch_geometric.utils import dropout_adj 19 | from model import Encoder, Model, drop_feature 20 | from utils import normalize_adj_tensor, normalize_adj_tensor_sp, edge2adj 21 | from attack import PGD_attack_graph 22 | from eval import label_classification 23 | import pickle 24 | 25 | def train(model: Model, x, edge_index, eps, lamb, alpha, beta, steps, node_ratio): 26 | optimizer.zero_grad() 27 | adj = edge2adj(x, edge_index) 28 | edge_index_1 = dropout_adj(edge_index, p=drop_edge_rate_1)[0] 29 | edge_index_2 = dropout_adj(edge_index, p=drop_edge_rate_2)[0] 30 | # print("***:", x.size()) 31 | # println() 32 | x_1 = drop_feature(x, drop_feature_rate_1) 33 | x_2 = drop_feature(x, drop_feature_rate_2) 34 | 35 | adj_1 = edge2adj(x_1, edge_index_1) 36 | adj_2 = edge2adj(x_2, edge_index_2) 37 | 38 | if eps > 0: 39 | adj_3, x_3 = PGD_attack_graph(model, edge_index_1, edge_index, x_1, x, steps, node_ratio, alpha, beta) 40 | z = model(x, adj) 41 | z_1 = model(x_1, adj_1) 42 | z_2 = model(x_2, adj_2) 43 | # print("z:",z) 44 | # print("z_1:",z_1) 45 | # print("z_2:",z_2) 46 | # println() 47 | loss1, simi1 = model.loss(z_1,z_2,batch_size=0) 48 | loss2, simi2 = model.loss(z_1,z,batch_size=0) 49 | loss3, simi3 = model.loss(z_2,z,batch_size=0) 50 | loss1 = loss1.mean() + lamb*torch.clamp(simi1*2 - simi2.detach()-simi3.detach(), 0).mean() 51 | if eps > 0: 52 | z_3 = model(x_3,adj_3) 53 | loss2, _ = model.loss(z_1,z_3) 54 | loss2 = loss2.mean() 55 | loss = (loss1 + eps*loss2) 56 | else: 57 | loss = loss1 58 | loss2 = loss1 59 | 60 | loss.backward() 61 | optimizer.step() 62 | 63 | return loss1.item(), loss2.item() 64 | 65 | def test(model: Model, x, edge_index, y, final=False, task ="node"): 66 | model.eval() 67 | adj = edge2adj(x, edge_index) 68 | x = x.to(device) 69 | adj = adj.to(device) 70 | z = model(x, adj) 71 | print("test:", z.size()) 72 | file=open(r"./data/tmp_vector.pickle","wb") 73 | pickle.dump(z,file) #storing_list 74 | file.close() 75 | 76 | return label_classification(z, y, ratio=0.1) 77 | 78 | if __name__ == '__main__': 79 | parser = argparse.ArgumentParser() 80 | parser.add_argument('--dataset', type=str, default='Cora') 81 | parser.add_argument('--gpu_id', type=int, default=0) 82 | parser.add_argument('--config', type=str, default='config.yaml') 83 | parser.add_argument('--log', type=str, default='results/Cora/') 84 | parser.add_argument('--seed', type=int, default=39788) 85 | parser.add_argument('--eps', type=float, default=0) 86 | parser.add_argument('--alpha', type=float, default=0) 87 | parser.add_argument('--beta', type=float, default=0) 88 | parser.add_argument('--lamb', type=float, default=0) 89 | args = parser.parse_args() 90 | 91 | 92 | assert args.gpu_id in range(0, 8) 93 | 94 | 95 | config = yaml.load(open(args.config), Loader=SafeLoader) 96 | if args.dataset in config: 97 | config = config[args.dataset] 98 | else: 99 | config = { 100 | 'learning_rate': 0.001, 101 | 'num_hidden': 256, 102 | 'num_proj_hidden': 256, 103 | 'activation': 'prelu', 104 | 'base_model': 'GCNConv', 105 | 'num_layers': 2, 106 | 'drop_edge_rate_1': 0.3, 107 | 'drop_edge_rate_2': 0.4, 108 | 'drop_feature_rate_1': 0.1, 109 | 'drop_feature_rate_2': 0.0, 110 | 'tau': 0.4, 111 | 'num_epochs': 1000, 112 | 'weight_decay': 1e-5, 113 | 'drop_scheme': 'degree', 114 | } 115 | 116 | 117 | torch.manual_seed(config["seed"]) 118 | random.seed(12345) 119 | np.random.seed(config["seed"]) 120 | 121 | learning_rate = config['learning_rate'] 122 | num_hidden = config['num_hidden'] 123 | num_proj_hidden = config['num_proj_hidden'] 124 | activation = ({'relu': F.relu, 'prelu': nn.PReLU(), 'rrelu': nn.RReLU()})[config['activation']] 125 | base_model = GCNConv 126 | num_layers = config['num_layers'] 127 | 128 | drop_edge_rate_1 = config['drop_edge_rate_1'] 129 | drop_edge_rate_2 = config['drop_edge_rate_2'] 130 | drop_feature_rate_1 = config['drop_feature_rate_1'] 131 | drop_feature_rate_2 = config['drop_feature_rate_2'] 132 | tau = config['tau'] 133 | num_epochs = config['num_epochs'] 134 | weight_decay = config['weight_decay'] 135 | # switch to the customer inputs by using args.{} 136 | eps = config["eps"] # args.eps 137 | lamb = config["lamb"] # args.lamb 138 | alpha = config["alpha"] # args.alpha 139 | beta = config["beta"] # arg.sbeta 140 | 141 | 142 | sample_size = 500 143 | 144 | def get_dataset(path, name): 145 | assert name in ['Cora', 'CiteSeer', "AmazonC", "AmazonP", 'CoauthorC', 'CoauthorP',\ 146 | "DBLP", "PubMed", "GitHub", "Facebook", "LastFMAsia", "DeezerEurope"] 147 | if name =="DBLP": 148 | name = "dblp" 149 | if name == "AmazonC": 150 | return Amazon(path, "Computers", T.NormalizeFeatures()) 151 | if name == "AmazonP": 152 | return Amazon(path, "Photo", T.NormalizeFeatures()) 153 | if name == 'CoauthorC': 154 | return Coauthor(root=path, name='cs', transform=T.NormalizeFeatures()) 155 | if name == 'CoauthorP': 156 | return Coauthor(root=path, name='physics', transform=T.NormalizeFeatures()) 157 | if name == "GitHub": 158 | return GitHub(root=path,transform=T.NormalizeFeatures()) 159 | if name == "Facebook": 160 | return FacebookPagePage(root=path,transform=T.NormalizeFeatures()) 161 | if name == "LastFMAsia": 162 | return LastFMAsia(root=path,transform=T.NormalizeFeatures()) 163 | if name == "DeezerEurope": 164 | return DeezerEurope(root=path,transform=T.NormalizeFeatures()) 165 | 166 | return (CitationFull if name == 'dblp' else Planetoid)( 167 | path, 168 | name, 169 | "public", 170 | T.NormalizeFeatures()) 171 | 172 | path = osp.join(osp.expanduser('~'), 'datasets', args.dataset) 173 | # print("path:", path) 174 | # println 175 | dataset = get_dataset(path, args.dataset) 176 | # print("dataset:", dataset) 177 | data = dataset.data 178 | # print(data.num_features) 179 | # println() 180 | 181 | 182 | 183 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 184 | 185 | encoder = Encoder(data.num_features, num_hidden, activation, 186 | base_model=base_model, k=num_layers).to(device) 187 | model = Model(encoder, num_hidden, num_proj_hidden, tau).to(device) 188 | optimizer = torch.optim.Adam( 189 | model.parameters(), lr=learning_rate, weight_decay=weight_decay) 190 | 191 | start = t() 192 | prev = start 193 | G = nx.Graph() 194 | G.add_edges_from(list(zip(data.edge_index.numpy()[0],data.edge_index.numpy()[1]))) 195 | 196 | model.train() 197 | for epoch in range(1, num_epochs + 1): 198 | # uncomment to increase the eps every T epochs 199 | #if epoch%20 ==0: 200 | # eps = eps*1.1 201 | # sample a subgraph from the original one 202 | 203 | S = G.subgraph(np.random.permutation(G.number_of_nodes())[:sample_size]) 204 | # print("sample_size:", sample_size) 205 | # print("S:", S) 206 | x = data.x[np.array(S.nodes())].to(device) 207 | # print("x:", x) 208 | # print("x shape:",x.size()) 209 | # println() 210 | S = nx.relabel.convert_node_labels_to_integers(S, first_label=0, ordering='default') 211 | edge_index = np.array(S.edges()).T 212 | edge_index = torch.LongTensor(np.hstack([edge_index,edge_index[::-1]])).to(device) 213 | 214 | loss1, loss2 = train(model, x, edge_index, eps, lamb, alpha, beta, 5, 0.2) 215 | 216 | now = t() 217 | print(f'(T) | Epoch={epoch:03d}, loss1={loss1:.4f}, loss2={loss2:.4f}' 218 | f' this epoch {now - prev:.4f}, total {now - start:.4f}') 219 | prev = now 220 | 221 | print("=== Final ===") 222 | results = test(model, data.x, data.edge_index, data.y, final=True) 223 | print(results) 224 | with open(osp.join(args.log, "progress.csv"), "w") as f: 225 | f.write(str(results)) -------------------------------------------------------------------------------- /code/data_pre/pre_poi_transformer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | import pickle 4 | from shapely.geometry import Point, LineString 5 | from shapely.geometry import Polygon,MultiPoint #多边形 6 | import torch 7 | from torch import nn 8 | import numpy as np 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | 12 | 13 | def load_data(file): 14 | data_load_file = [] 15 | file_1 = open(file, "rb") 16 | data_load_file = pickle.load(file_1) 17 | return data_load_file 18 | # poi_list = ['drinking_water', 'toilets', 'school', 'hospital', 'arts_centre', 'fire_station', 'police', 'bicycle_parking', 'fountain', 'ferry_terminal', 'bench', 'cinema', 'cafe', 'pub', 'waste_basket', 'parking_entrance', 'parking', 'fast_food', 'bank', 'restaurant', 'ice_cream', 'pharmacy', 'taxi', 'post_box', 'atm', 'nightclub', 'social_facility', 'bar', 'biergarten', 'clock', 'bicycle_rental', 'community_centre', 'watering_place', 'ranger_station', 'boat_rental', 'recycling', 'payment_terminal', 'bicycle_repair_station', 'place_of_worship', 'shelter', 'telephone', 'clinic', 'dentist', 'vending_machine', 'theatre', 'charging_station', 'public_bookcase', 'post_office', 'fuel', 'doctors'] 19 | # poi_list_1 = ['drinking_water', 'toilets', 'school', 'hospital', 'arts_centre', 'fire_station', 'police', 'bicycle_parking', 'fountain', 'ferry_terminal', 'bench', 'cinema', 'cafe', 'pub', 'waste_basket', 'parking_entrance', 'parking', 'fast_food', 'bank', 'restaurant', 'ice_cream', 'pharmacy', 'taxi', 'post_box', 'atm', 'nightclub', 'social_facility', 'bar', 'biergarten', 'clock', 'bicycle_rental', 'community_centre', 'watering_place', 'ranger_station', 'boat_rental', 'recycling', 'payment_terminal', 'bicycle_repair_station', 'place_of_worship', 'shelter', 'telephone', 'clinic', 'dentist', 'vending_machine', 'theatre', 'charging_station', 'public_bookcase', 'post_office', 'fuel', 'doctors','drinking_water', 'toilets'] 20 | region_back = load_data("../data/region_back.pickle") 21 | reg_poi = load_data("../data/reg_incld_poi_new.pickle") 22 | poi_skip_vec = load_data("../data/poi_skip_vec.pickle") 23 | reg_spatial = load_data("../data/region_spatial_refine_1.pickle") 24 | flow = load_data("../data/flow_graph.pickle") 25 | check_in_label = load_data("../data/checkin_label.pickle") 26 | flow_list = list(flow.edges(data=True)) 27 | def normalization(data): 28 | _range = np.max(abs(data)) 29 | return data / _range 30 | label_norm = normalization(check_in_label) 31 | # print(label_norm) 32 | # final_vec =[] 33 | # connected_layer = nn.Linear(in_features = 200, out_features = 96) 34 | # emb = nn.Embedding(200, 200) 35 | # for key,value in reg_poi.items(): 36 | # output = np.mean([connected_layer(emb(torch.tensor(uu)).float()).tolist() for uu in value],axis=0).tolist() 37 | # final_vec.append(output) 38 | # print(np.array(final_vec).shape) 39 | 40 | # file=open(r"../data/reg_poi_vec.pickle","wb") 41 | # pickle.dump(final_vec,file) #storing_list 42 | # file.close() 43 | 44 | # println() 45 | # reg_flow = {} 46 | # for item in flow_list: 47 | # # print(item) 48 | # # print(item[2]['weight']) 49 | # # println() 50 | # r1 = item[0].split("_")[1] 51 | # r2 = item[1].split("_")[1] 52 | # if int(r1) not in reg_flow.keys(): 53 | # reg_flow[int(r1)] = 0 54 | # if int(r2) not in reg_flow.keys(): 55 | # reg_flow[int(r2)] = 0 56 | # reg_flow[int(r1)]+= item[2]['weight'] 57 | # reg_flow[int(r2)]+= item[2]['weight'] 58 | 59 | 60 | # println() 61 | 62 | reg_idx = [key for key in reg_poi.keys() if len(reg_poi[key])>0] 63 | # print(reg_idx) 64 | # prirntln() 65 | file=open(r"../data/reg_poi_idx_1.pickle","wb") 66 | pickle.dump(reg_idx,file) #storing_list 67 | file.close() 68 | 69 | 70 | 71 | max_len= 0 72 | for key,value in reg_poi.items(): 73 | if max_len< len(value): 74 | max_len = len(value) 75 | # print("max_len:",max_len) 76 | 77 | reg_poi_t = {} 78 | reg_poi_list = [] 79 | 80 | embedding_cat = torch.nn.Embedding(11, 96) # spatial 81 | linear = nn.Linear(96*3, 512) 82 | linear_trans = nn.Linear(512, 96) 83 | region_com_list = [] 84 | region_poi_gram_dict = {} 85 | for iii in range(172): 86 | if iii not in reg_idx: 87 | # reg_poi_t[key] = np.array([0.0]*96) 88 | tmp_1 = np.array([0.0]*96) 89 | region_poi_gram_dict[iii] = tmp_1.tolist() 90 | tmp_2 = embedding_cat(torch.tensor(0)) 91 | # tmp_3 = torch.squeeze(reg_spatial[iii],0).tolist() 92 | com = np.concatenate((tmp_1,tmp_2.tolist(),[label_norm[iii]]*96),axis = 0) 93 | # print("com.shape:",com.shape) 94 | com_reshape = linear(torch.tensor(com).float()).tolist() 95 | region_com_list.append(com_reshape) 96 | else: 97 | tmp_g = [] 98 | # print(reg_poi[iii]) 99 | for sub_poi in reg_poi[iii]: 100 | tmp_g.append(poi_skip_vec[sub_poi].tolist()) 101 | tmp_1 = np.mean(tmp_g, axis =0) 102 | # region_poi_gram.append(tmp_1.tolist()) 103 | region_poi_gram_dict[iii] = tmp_1.tolist() 104 | tmp_2 = embedding_cat(torch.tensor(len(reg_poi[iii]))) 105 | # tmp_3 = torch.squeeze(reg_spatial[iii],0).tolist() 106 | com = np.concatenate((tmp_1,tmp_2.tolist(),[label_norm[iii]]*96),axis = 0) 107 | com_reshape = linear(torch.tensor(com).float()).tolist() 108 | region_com_list.append(com_reshape) 109 | 110 | region_poi_gram = [] 111 | for key,value in region_poi_gram_dict.items(): 112 | # print(value) 113 | region_poi_gram.append(value) 114 | file=open(r"../data/reg_poi_vec_2.pickle","wb") 115 | pickle.dump(torch.tensor(region_poi_gram),file) #storing_list 116 | file.close() 117 | # println() 118 | 119 | region_com_array = np.array(region_com_list) 120 | # print("region_com_array:",region_com_array.shape) 121 | # reg_idx= [key for key in reg_poi_.keys()] 122 | from torch import nn 123 | encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8 ) 124 | transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6) 125 | # src = torch.rand(1, 172, 512) 126 | src = torch.unsqueeze(torch.tensor(region_com_array),0) 127 | out = transformer_encoder(src.float()) 128 | out_ = torch.squeeze(out,0) 129 | # print(out_.size()) 130 | # pritnln() 131 | out_ = torch.tensor([linear_trans(item).tolist() for item in out_]) 132 | # print(out_.size()) 133 | 134 | # reg_poi_vec = {} 135 | # for idx,vec in zip(reg_idx,out_): 136 | # reg_poi_vec[idx] = vec 137 | 138 | file=open(r"../data/reg_com_poi_cat_spatial.pickle","wb") 139 | pickle.dump(out_,file) #storing_list 140 | file.close() 141 | 142 | 143 | 144 | 145 | println() 146 | 147 | 148 | # reg_poi_={} 149 | # s = 0 150 | # emb = nn.Embedding(50, 512) 151 | # embedding_spatial = torch.nn.Embedding(15, 512) # spatial 152 | # for key,value in reg_poi.items(): 153 | # # print("value:",value) 154 | # if value!=[]: 155 | # reg_poi_[key]=[] 156 | # # print("value:",value) 157 | # if len(value)>s: 158 | # s = len(value) 159 | # for item in value: 160 | # reg_poi_[key].append(emb(torch.tensor(item)).tolist()) 161 | # spa_vec= embedding_spatial(torch.tensor(reg_spatial[idx])) 162 | # reg_poi_t = {} 163 | # reg_poi_list = [] 164 | # for iii in range(172): 165 | # # for key,value in reg_poi_.items(): 166 | # if iii not in reg_poi_.keys(): 167 | # # print("&&&:", np.array([0.0]*512).shape) 168 | # reg_poi_t[key] = np.array([0.0]*512) 169 | # # spa_vec= embedding_spatial(torch.tensor(reg_spatial[iii])).tolist() 170 | # # ci = np.concatenate((spa_vec,[0.0]*512),axis = 0) 171 | # reg_poi_list.append(np.array([0.0]*512)) 172 | # # reg_poi_list.append(ci) 173 | # else: 174 | # # print("value:",value) 175 | # tp = np.mean(reg_poi_[key],axis=0) 176 | # # spa_vec= embedding_spatial(torch.tensor(reg_spatial[iii])).tolist() 177 | # # ci = np.concatenate((spa_vec,tp.tolist()),axis = 0) 178 | # # reg_poi_list.append(np.array([0.0]*512)) 179 | # reg_poi_list.append(tp) 180 | # # print("***:",tp.shape) 181 | # reg_poi_t[key] = tp 182 | # reg_poi_list.append(tp) 183 | # print(np.array(reg_poi_list).shape) 184 | # for key,value in reg_poi.items(): 185 | # # print("value:",value) 186 | # if value!=[]: 187 | # reg_poi_[key]=[] 188 | # # print("value:",value) 189 | # if len(value)>s: 190 | # s = len(value) 191 | # for item in value: 192 | # reg_poi_[key].append(emb(torch.tensor(item)).tolist()) 193 | # # spa_vec= embedding_spatial(torch.tensor(reg_spatial[idx])) 194 | # # reg_poi_t = {} 195 | # reg_poi_list = [] 196 | # for iii in range(172): 197 | # # for key,value in reg_poi_.items(): 198 | # if iii not in reg_poi_.keys(): 199 | # # print("&&&:", np.array([0.0]*512).shape) 200 | # # reg_poi_t[key] = np.array([0.0]*512) 201 | # spa_vec= embedding_spatial(torch.tensor(reg_spatial[iii])).tolist() 202 | # ci = np.concatenate((spa_vec,[0.0]*512),axis = 0) 203 | # # reg_poi_list.append(np.array([0.0]*512)) 204 | # reg_poi_list.append(ci) 205 | # else: 206 | # # print("value:",value) 207 | # tp = np.mean(reg_poi_[key],axis=0) 208 | # spa_vec= embedding_spatial(torch.tensor(reg_spatial[iii])).tolist() 209 | # ci = np.concatenate((spa_vec,tp.tolist()),axis = 0) 210 | # # reg_poi_list.append(np.array([0.0]*512)) 211 | # reg_poi_list.append(ci) 212 | # # print("***:",tp.shape) 213 | # # reg_poi_t[key] = tp 214 | # # reg_poi_list.append(tp) 215 | # # print(np.array(reg_poi_list).shape) 216 | # fully_layer = nn.Linear(1024,512) 217 | # reg_poi_list_ = fully_layer(torch.tensor(np.array(reg_poi_list)).float()) 218 | reg_poi_list_ = torch.tensor(np.array(reg_poi_list)).float() 219 | reg_poi_list_tensor = torch.unsqueeze(reg_poi_list_,0) 220 | print(reg_poi_list_tensor.size()) 221 | 222 | reg_idx= [key for key in reg_poi_.keys()] 223 | from torch import nn 224 | encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8 ) 225 | transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6) 226 | # src = torch.rand(1, 172, 512) 227 | src = reg_poi_list_tensor 228 | out = transformer_encoder(src) 229 | # print(out.size()) 230 | out_ = torch.squeeze(out,0) 231 | print(out_.size()) 232 | print(reg_idx) 233 | print(len(reg_idx)) 234 | # reg_poi_vec = {} 235 | # for idx,vec in zip(reg_idx,out_): 236 | # reg_poi_vec[idx] = vec 237 | 238 | file=open(r"../data/reg_poi_vec_1.pickle","wb") 239 | pickle.dump(out_,file) #storing_list 240 | file.close() 241 | 242 | file=open(r"../data/reg_poi_idx_1.pickle","wb") 243 | pickle.dump(reg_idx,file) #storing_list 244 | file.close() 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | -------------------------------------------------------------------------------- /code/model_gcn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import torch 4 | from torch_geometric.nn import MessagePassing 5 | from torch_geometric.utils import add_self_loops, degree, softmax 6 | from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool, GlobalAttention, Set2Set 7 | import torch.nn.functional as F 8 | # from loader import BioDataset 9 | # from dataloader import DataLoaderFinetune 10 | from torch_scatter import scatter_add 11 | from torch_geometric.nn.inits import glorot, zeros 12 | import dill 13 | import os 14 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致 15 | class GINConv(MessagePassing): 16 | """ 17 | Extension of GIN aggregation to incorporate edge information by concatenation. 18 | 19 | Args: 20 | emb_dim (int): dimensionality of embeddings for nodes and edges. 21 | input_layer (bool): whethe the GIN conv is applied to input layer or not. (Input node labels are uniform...) 22 | 23 | See https://arxiv.org/abs/1810.00826 24 | """ 25 | def __init__(self, emb_dim, aggr = "add", input_layer = False): 26 | super(GINConv, self).__init__() 27 | # multi-layer perceptron 28 | self.mlp = torch.nn.Sequential(torch.nn.Linear(2*emb_dim, 2*emb_dim), torch.nn.BatchNorm1d(2*emb_dim), torch.nn.ReLU(), torch.nn.Linear(2*emb_dim, emb_dim)) 29 | 30 | ### Mapping 0/1 edge features to embedding 31 | self.edge_encoder = torch.nn.Linear(4, emb_dim) 32 | 33 | ### Mapping uniform input features to embedding. 34 | self.input_layer = input_layer 35 | if self.input_layer: 36 | self.input_node_embeddings = torch.nn.Embedding(2, emb_dim) 37 | torch.nn.init.xavier_uniform_(self.input_node_embeddings.weight.data) 38 | 39 | self.aggr = aggr 40 | 41 | def forward(self, x, edge_index, edge_attr): 42 | #add self loops in the edge space 43 | edge_index = add_self_loops(edge_index, num_nodes = x.size(0)) 44 | 45 | #add features corresponding to self-loop edges. 46 | self_loop_attr = torch.zeros(x.size(0), 4) 47 | self_loop_attr[:,1] = 1 # attribute for self-loop edge 48 | self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype) 49 | edge_attr = torch.cat((edge_attr, self_loop_attr), dim = 0) 50 | 51 | edge_embeddings = self.edge_encoder(edge_attr.float()) 52 | 53 | if self.input_layer: 54 | x = self.input_node_embeddings(x.to(torch.int64).view(-1,)) 55 | 56 | # return self.propagate(self.aggr, edge_index, x=x, edge_attr=edge_embeddings) 57 | return self.propagate(edge_index[0], x=x, edge_attr=edge_embeddings) 58 | 59 | def message(self, x_j, edge_attr): 60 | return torch.cat([x_j, edge_attr], dim = 1) 61 | 62 | def update(self, aggr_out): 63 | return self.mlp(aggr_out) 64 | 65 | 66 | class GCNConv(MessagePassing): 67 | 68 | def __init__(self, emb_dim, aggr = "add", input_layer = False): 69 | super(GCNConv, self).__init__() 70 | 71 | self.emb_dim = emb_dim 72 | self.linear = torch.nn.Linear(emb_dim, emb_dim) 73 | self.linear_1 = torch.nn.Linear(1, emb_dim) 74 | ### Mapping 0/1 edge features to embedding 75 | self.edge_encoder = torch.nn.Linear(4, emb_dim) 76 | 77 | ### Mapping uniform input features to embedding. 78 | self.input_layer = input_layer 79 | if self.input_layer: 80 | # self.input_node_embeddings = torch.nn.Embedding(emb_dim, emb_dim) 81 | self.input_node_embeddings = torch.nn.Linear(emb_dim,emb_dim) 82 | torch.nn.init.xavier_uniform_(self.input_node_embeddings.weight.data) 83 | 84 | self.aggr = aggr 85 | 86 | def norm(self, edge_index, num_nodes, dtype): 87 | ### assuming that self-loops have been already added in edge_index 88 | # print("edge_index:", edge_index) 89 | # println() 90 | edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype, 91 | device=edge_index[0].device) 92 | row, col = edge_index 93 | 94 | deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes) 95 | deg_inv_sqrt = deg.pow(-0.5) 96 | deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 97 | 98 | return deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col] 99 | 100 | 101 | # def forward(self, x, edge_index, edge_attr): 102 | def forward(self, x, edge_index): 103 | #add self loops in the edge space 104 | # print("edge_index:", edge_index.size()) 105 | edge_index = add_self_loops(edge_index, num_nodes = x.size(0)) 106 | # print("edge_attr:", edge_attr.size()) 107 | # ad_s = edge_index.size()[1] 108 | # add features corresponding to self-loop edges. 109 | # self_loop_attr = torch.zeros(x.size(0), 4) 110 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 111 | self_loop_attr = torch.zeros(edge_index[0].size()[1], 4) 112 | # print("edge_index[0].size()[1]:",edge_index[0].size()[1]) 113 | # print("self_loop_attr:",self_loop_attr.size()) 114 | self_loop_attr[:,1] = 1 # attribute for self-loop edge 115 | # self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype) 116 | # edge_attr = torch.cat((edge_attr, self_loop_attr), dim = 0) 117 | edge_attr = self_loop_attr.cuda() 118 | # print("edge_attr:", edge_attr) 119 | # print("edge_index:", edge_index) 120 | # println() 121 | edge_embeddings = self.edge_encoder(edge_attr.float()) 122 | if self.input_layer: 123 | x = self.input_node_embeddings(x.float()) 124 | 125 | norm = self.norm(edge_index[0].long(), x.size(0), x.dtype) 126 | x = self.linear(x) 127 | # norm = self.linear_1(norm.view(-1, 1)) 128 | # print("edge_index[0]:", edge_index[0].size(), x.size(), edge_embeddings.size(), norm.size()) 129 | return self.propagate(edge_index[0], x=x, edge_attr=edge_embeddings, norm = norm) 130 | 131 | def message(self, x_j, edge_attr, norm): 132 | # print("edge_attr:", edge_attr.size()) 133 | # print("x_j:", x_j.size()) 134 | # print("norm:", norm.size()) 135 | tmp = x_j + edge_attr 136 | # print("***:", tmp.size()) 137 | # print("norm:", norm.size()) 138 | # print("&&:", norm.view(-1, 1)*(x_j + edge_attr)) 139 | # println() 140 | return norm.view(-1, 1) * (x_j + edge_attr) 141 | 142 | 143 | class GATConv(MessagePassing): 144 | def __init__(self, emb_dim, heads=2, negative_slope=0.2, aggr = "add", input_layer = False): 145 | super(GATConv, self).__init__() 146 | 147 | self.aggr = aggr 148 | 149 | self.emb_dim = emb_dim 150 | self.heads = heads 151 | self.negative_slope = negative_slope 152 | 153 | self.weight_linear = torch.nn.Linear(emb_dim, heads * emb_dim) 154 | self.att = torch.nn.Parameter(torch.Tensor(1, heads, 2 * emb_dim)) 155 | 156 | self.bias = torch.nn.Parameter(torch.Tensor(emb_dim)) 157 | 158 | ### Mapping 0/1 edge features to embedding 159 | self.edge_encoder = torch.nn.Linear(4, heads * emb_dim) 160 | 161 | ### Mapping uniform input features to embedding. 162 | self.input_layer = input_layer 163 | if self.input_layer: 164 | self.input_node_embeddings = torch.nn.Embedding(2, emb_dim) 165 | torch.nn.init.xavier_uniform_(self.input_node_embeddings.weight.data) 166 | 167 | self.reset_parameters() 168 | 169 | def reset_parameters(self): 170 | glorot(self.att) 171 | zeros(self.bias) 172 | 173 | def forward(self, x, edge_index, edge_attr): 174 | #add self loops in the edge space 175 | 176 | edge_index = add_self_loops(edge_index, num_nodes = x.size(0)) 177 | 178 | #add features corresponding to self-loop edges. 179 | self_loop_attr = torch.zeros(x.size(0), 4) 180 | self_loop_attr[:,1] = 1 # attribute for self-loop edge 181 | self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype) 182 | edge_attr = torch.cat((edge_attr, self_loop_attr), dim = 0) 183 | 184 | edge_embeddings = self.edge_encoder(edge_attr) 185 | 186 | if self.input_layer: 187 | x = self.input_node_embeddings(x.to(torch.int64).view(-1,)) 188 | 189 | x = self.weight_linear(x).view(-1, self.heads, self.emb_dim) 190 | return self.propagate(self.aggr, edge_index, x=x, edge_attr=edge_embeddings) 191 | 192 | def message(self, edge_index, x_i, x_j, edge_attr): 193 | edge_attr = edge_attr.view(-1, self.heads, self.emb_dim) 194 | x_j += edge_attr 195 | 196 | alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1) 197 | 198 | alpha = F.leaky_relu(alpha, self.negative_slope) 199 | alpha = softmax(alpha, edge_index[0]) 200 | 201 | return x_j * alpha.view(-1, self.heads, 1) 202 | 203 | def update(self, aggr_out): 204 | aggr_out = aggr_out.mean(dim=1) 205 | aggr_out = aggr_out + self.bias 206 | 207 | return aggr_out 208 | 209 | 210 | class GraphSAGEConv(MessagePassing): 211 | def __init__(self, emb_dim, aggr = "mean", input_layer = False): 212 | super(GraphSAGEConv, self).__init__() 213 | 214 | self.emb_dim = emb_dim 215 | self.linear = torch.nn.Linear(emb_dim, emb_dim) 216 | 217 | ### Mapping 0/1 edge features to embedding 218 | self.edge_encoder = torch.nn.Linear(4, emb_dim) 219 | 220 | ### Mapping uniform input features to embedding. 221 | self.input_layer = input_layer 222 | if self.input_layer: 223 | self.input_node_embeddings = torch.nn.Embedding(2, emb_dim) 224 | torch.nn.init.xavier_uniform_(self.input_node_embeddings.weight.data) 225 | 226 | self.aggr = aggr 227 | 228 | def forward(self, x, edge_index, edge_attr): 229 | #add self loops in the edge space 230 | edge_index = add_self_loops(edge_index, num_nodes = x.size(0)) 231 | 232 | #add features corresponding to self-loop edges. 233 | self_loop_attr = torch.zeros(x.size(0), 4) 234 | self_loop_attr[:,7] = 1 # attribute for self-loop edge 235 | self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype) 236 | edge_attr = torch.cat((edge_attr, self_loop_attr), dim = 0) 237 | 238 | edge_embeddings = self.edge_encoder(edge_attr) 239 | 240 | if self.input_layer: 241 | x = self.input_node_embeddings(x.to(torch.int64).view(-1,)) 242 | 243 | x = self.linear(x) 244 | 245 | return self.propagate(self.aggr, edge_index, x=x, edge_attr=edge_embeddings) 246 | 247 | def message(self, x_j, edge_attr): 248 | return x_j + edge_attr 249 | 250 | def update(self, aggr_out): 251 | return F.normalize(aggr_out, p = 2, dim = -1) 252 | 253 | 254 | class GNN(torch.nn.Module): 255 | """ 256 | Extension of GIN to incorporate edge information by concatenation. 257 | 258 | Args: 259 | num_layer (int): the number of GNN layers 260 | emb_dim (int): dimensionality of embeddings 261 | JK (str): last, concat, max or sum. 262 | max_pool_layer (int): the layer from which we use max pool rather than add pool for neighbor aggregation 263 | drop_ratio (float): dropout rate 264 | gnn_type: gin, gat, graphsage, gcn 265 | 266 | See https://arxiv.org/abs/1810.00826 267 | JK-net: https://arxiv.org/abs/1806.03536 268 | 269 | Output: 270 | node representations 271 | 272 | """ 273 | def __init__(self, num_layer, emb_dim, JK = "last", drop_ratio = 0, gnn_type = "gin"): 274 | super(GNN, self).__init__() 275 | self.num_layer = num_layer 276 | self.drop_ratio = drop_ratio 277 | self.JK = JK 278 | 279 | if self.num_layer < 2: 280 | raise ValueError("Number of GNN layers must be greater than 1.") 281 | 282 | ###List of message-passing GNN convs 283 | self.gnns = torch.nn.ModuleList() 284 | for layer in range(num_layer): 285 | if layer == 0: 286 | input_layer = True 287 | else: 288 | input_layer = False 289 | 290 | if gnn_type == "gin": 291 | self.gnns.append(GINConv(emb_dim, aggr = "add", input_layer = input_layer)) 292 | elif gnn_type == "gcn": 293 | self.gnns.append(GCNConv(emb_dim, input_layer = input_layer)) 294 | elif gnn_type == "gat": 295 | self.gnns.append(GATConv(emb_dim, input_layer = input_layer)) 296 | elif gnn_type == "graphsage": 297 | self.gnns.append(GraphSAGEConv(emb_dim, input_layer = input_layer)) 298 | 299 | #def forward(self, x, edge_index, edge_attr): 300 | def forward(self, x, edge_index): 301 | h_list = [x] 302 | for layer in range(self.num_layer): 303 | # h = self.gnns[layer](h_list[layer], edge_index, edge_attr)+ 1e-6 304 | h = self.gnns[layer](h_list[layer], edge_index)+ 1e-6 305 | # print("---stop here---") 306 | if layer == self.num_layer - 1: 307 | #remove relu from the last layer 308 | h = F.dropout(h, self.drop_ratio, training = self.training) 309 | else: 310 | h = F.dropout(F.relu(h), self.drop_ratio, training = self.training) 311 | h_list.append(h) 312 | 313 | if self.JK == "last": 314 | # print("h_list[-1]:", h_list[-1].size()) 315 | node_representation = h_list[-1] 316 | elif self.JK == "sum": 317 | h_list = [h.unsqueeze_(0) for h in h_list] 318 | node_representation = torch.sum(torch.cat(h_list[1:], dim = 0), dim = 0)[0] 319 | return node_representation 320 | 321 | 322 | class GNN_graphpred(torch.nn.Module): 323 | """ 324 | Extension of GIN to incorporate edge information by concatenation. 325 | 326 | Args: 327 | num_layer (int): the number of GNN layers 328 | emb_dim (int): dimensionality of embeddings 329 | num_tasks (int): number of tasks in multi-task learning scenario 330 | drop_ratio (float): dropout rate 331 | JK (str): last, concat, max or sum. 332 | graph_pooling (str): sum, mean, max, attention, set2set 333 | 334 | See https://arxiv.org/abs/1810.00826 335 | JK-net: https://arxiv.org/abs/1806.03536 336 | """ 337 | def __init__(self, num_layer, emb_dim, num_tasks, JK = "last", drop_ratio = 0, graph_pooling = "mean", gnn_type = "gin"): 338 | super(GNN_graphpred, self).__init__() 339 | self.num_layer = num_layer 340 | self.drop_ratio = drop_ratio 341 | self.JK = JK 342 | self.emb_dim = emb_dim 343 | self.num_tasks = num_tasks 344 | 345 | if self.num_layer < 2: 346 | raise ValueError("Number of GNN layers must be greater than 1.") 347 | 348 | self.gnn = GNN(num_layer, emb_dim, JK, drop_ratio, gnn_type = gnn_type) 349 | 350 | #Different kind of graph pooling 351 | if graph_pooling == "sum": 352 | self.pool = global_add_pool 353 | elif graph_pooling == "mean": 354 | self.pool = global_mean_pool 355 | elif graph_pooling == "max": 356 | self.pool = global_max_pool 357 | elif graph_pooling == "attention": 358 | self.pool = GlobalAttention(gate_nn = torch.nn.Linear(emb_dim, 1)) 359 | else: 360 | raise ValueError("Invalid graph pooling type.") 361 | 362 | self.graph_pred_linear = torch.nn.Linear(2*self.emb_dim, self.num_tasks) 363 | 364 | def from_pretrained(self, model_file): 365 | self.gnn.load_state_dict(torch.load(model_file, map_location=lambda storage, loc: storage)) 366 | 367 | def forward(self, data): 368 | x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch 369 | node_representation = self.gnn(x, edge_index, edge_attr) 370 | 371 | pooled = self.pool(node_representation, batch) 372 | center_node_rep = node_representation[data.center_node_idx] 373 | 374 | graph_rep = torch.cat([pooled, center_node_rep], dim = 1) 375 | 376 | return self.graph_pred_linear(graph_rep) 377 | 378 | 379 | if __name__ == "__main__": 380 | pass 381 | 382 | 383 | 384 | -------------------------------------------------------------------------------- /code/train_edit.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os.path as osp 3 | import random 4 | from time import perf_counter as t 5 | import yaml 6 | import numpy as np 7 | from yaml import SafeLoader 8 | from scipy.linalg import fractional_matrix_power, inv 9 | from torch.utils.data import random_split 10 | import torch 11 | import torch_geometric.transforms as T 12 | import torch.nn.functional as F 13 | import torch.nn as nn 14 | from layers import GCNConv 15 | import networkx as nx 16 | import matplotlib.pyplot as plt 17 | from torch_geometric.datasets import Planetoid, CitationFull, Amazon, Coauthor, GitHub, FacebookPagePage, LastFMAsia, DeezerEurope 18 | from torch_geometric.utils import dropout_adj 19 | from model import Encoder, Model, drop_feature 20 | from utils import normalize_adj_tensor, normalize_adj_tensor_sp, edge2adj 21 | from attack import PGD_attack_graph 22 | from eval import label_classification 23 | import pickle 24 | from torch_geometric.nn import global_mean_pool, global_add_pool 25 | from model_gcn import GNN 26 | import torch.optim as optim 27 | import os 28 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # for remindering of the minmatch of shape 29 | import warnings 30 | 31 | def fxn(): 32 | warnings.warn("deprecated", DeprecationWarning) 33 | 34 | with warnings.catch_warnings(): 35 | warnings.simplefilter("ignore") 36 | fxn() 37 | 38 | def load_data(file): 39 | data_load_file = [] 40 | file_1 = open(file, "rb") 41 | data_load_file = pickle.load(file_1) 42 | return data_load_file 43 | hy = load_data("./data/hy_new_aaai_2.pickle") 44 | 45 | class vgae(nn.Module): 46 | def __init__(self, gnn, emb_dim): 47 | super(vgae, self).__init__() 48 | self.encoder = gnn 49 | self.encoder_mean = nn.Sequential(nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim)) 50 | # make sure std is positive 51 | self.encoder_std = nn.Sequential(nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim), nn.Softplus()) 52 | # only reconstruct first 7-dim, please refer to https://github.com/snap-stanford/pretrain-gnns/issues/30 53 | self.decoder = nn.Sequential(nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, 4), nn.Sigmoid()) 54 | self.decoder_edge = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 1)) 55 | 56 | self.bceloss = nn.BCELoss(reduction='none') 57 | self.pool = global_mean_pool 58 | self.add_pool = global_add_pool 59 | self.sigmoid = nn.Sigmoid() 60 | self.softplus = nn.Softplus() 61 | self.softmax = nn.Softmax(dim=1) 62 | 63 | # reconstruct 4-class & 3-class edge_attr for 1st & 2nd dimension 64 | self.decoder_1 = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 4)) 65 | self.decoder_2 = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 4)) 66 | self.crossentropyloss = nn.CrossEntropyLoss(reduction='none') 67 | 68 | def forward_encoder(self, x, edge_index): 69 | x = self.encoder(x, edge_index) 70 | x_mean = self.encoder_mean(x) 71 | x_std = self.encoder_std(x) 72 | gaussian_noise = torch.randn(x_mean.shape).to(x.device) 73 | x = gaussian_noise * x_std + x_mean 74 | return x.detach(), x_mean, x_std 75 | 76 | # def forward_decoder(self, x, edge_index, edge_index_neg): 77 | def forward_decoder(self, x, edge_index): 78 | eleWise_mul = x[edge_index[0]] * x[edge_index[1]] 79 | edge_attr_pred = self.decoder(eleWise_mul) 80 | edge_pos = self.sigmoid( self.decoder_edge(eleWise_mul) ).squeeze() 81 | # edge_neg = self.sigmoid( self.decoder_edge(x[edge_index_neg[0]] * x[edge_index_neg[1]]) ).squeeze() 82 | # return edge_attr_pred, edge_pos, edge_neg 83 | return edge_pos 84 | 85 | def loss_vgae(self, edge_attr_pred, edge_attr, edge_pos_pred, edge_neg_pred, edge_index_batch, edge_index_neg_batch, x_mean, x_std, batch, reward=None): 86 | # evaluate p(A|Z) 87 | num_edge, _ = edge_attr_pred.shape 88 | loss_rec = self.bceloss(edge_attr_pred.reshape(-1), edge_attr[:, :4].reshape(-1)) 89 | loss_rec = loss_rec.reshape((num_edge, -1)).sum(dim=1) 90 | 91 | loss_edge_pos = self.bceloss(edge_pos_pred, torch.ones(edge_pos_pred.shape).to(edge_pos_pred.device)) 92 | loss_edge_neg = self.bceloss(edge_neg_pred, torch.zeros(edge_neg_pred.shape).to(edge_neg_pred.device)) 93 | loss_pos = loss_rec + loss_edge_pos 94 | loss_pos = self.pool(loss_pos, edge_index_batch) 95 | loss_neg = self.pool(loss_edge_neg, edge_index_neg_batch) 96 | loss_rec = loss_pos + loss_neg 97 | #print('loss_pos + loss_neg', loss_pos, loss_neg) 98 | if not reward is None: 99 | loss_rec = loss_rec * reward 100 | #print("reward:", reward) 101 | #print("loss_rec:", loss_rec) 102 | 103 | # evaluate p(Z|X,A) 104 | kl_divergence = - 0.5 * (1 + 2 * torch.log(x_std+ 1e-6) - x_mean**2 - x_std**2).sum(dim=1) 105 | kl_ones = torch.ones(kl_divergence.shape).to(kl_divergence.device) 106 | kl_divergence = self.pool(kl_divergence, batch) 107 | kl_double_norm = 1 / self.add_pool(kl_ones, batch) 108 | kl_divergence = kl_divergence * kl_double_norm 109 | loss = (loss_rec + kl_divergence).mean() 110 | ''' 111 | # link prediction for sanity check 112 | from sklearn.metrics import roc_auc_score 113 | from sklearn.metrics import average_precision_score 114 | print(roc_auc_score(edge_attr.cpu().numpy(), edge_attr_pred.detach().cpu().numpy()), average_precision_score(edge_attr.cpu().numpy(), edge_attr_pred.detach().cpu().numpy())) 115 | ''' 116 | return loss, (loss_edge_pos.mean()+loss_edge_neg.mean()).item()/2 117 | 118 | def generate(self, data): 119 | x, _, _ = self.forward_encoder(data.x, data.edge_index) 120 | eleWise_mul = torch.einsum('nd,md->nmd', x, x) 121 | # calculate softmax probability 122 | prob = self.decoder_edge(eleWise_mul).squeeze() 123 | # print("prob:", prob.size()) 124 | # pritnl() 125 | prob = torch.exp(prob) 126 | prob[torch.isinf(prob)] = 1e10 127 | prob[list(range(x.shape[0])), list(range(x.shape[0]))] = 0 128 | prob = torch.einsum('nm,n->nm', prob, 1 / prob.sum(dim=1)) 129 | 130 | # sparsify 131 | 132 | prob[prob < 1e-1] = 0 133 | prob[prob.sum(dim=1) == 0] = 1 134 | prob[list(range(x.shape[0])), list(range(x.shape[0]))] = 0 135 | prob = torch.einsum('nm,n->nm', prob, 1 / prob.sum(dim=1)) 136 | 137 | # predict 4-class & 3-class edge_attr for 1st & 2nd dimension 138 | edge_attr_prob_1 = self.softmax(self.decoder_1(eleWise_mul)) 139 | edge_attr_rand_1 = torch.rand((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1])) 140 | edge_attr_pred_1 = torch.zeros((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1]), dtype=torch.int64) 141 | for n in range(3): 142 | edge_attr_pred_1[edge_attr_rand_1 >= edge_attr_prob_1[:, :, n]] = n + 1 143 | edge_attr_rand_1 -= edge_attr_prob_1[:, :, n] 144 | 145 | edge_attr_prob_2 = self.softmax(self.decoder_2(eleWise_mul)) 146 | edge_attr_rand_2 = torch.rand((edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1])) 147 | edge_attr_pred_2 = torch.zeros((edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1]), dtype=torch.int64) 148 | for n in range(2): 149 | edge_attr_pred_2[edge_attr_rand_2 >= edge_attr_prob_2[:, :, n]] = n + 1 150 | edge_attr_rand_2 -= edge_attr_prob_2[:, :, n] 151 | 152 | edge_attr_pred = torch.cat((edge_attr_pred_1.reshape((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1], 1)), 153 | edge_attr_pred_2.reshape( 154 | (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1)),edge_attr_pred_2.reshape( 155 | (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1)),edge_attr_pred_2.reshape( 156 | (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1))), dim=2) 157 | 158 | 159 | return prob, edge_attr_pred 160 | 161 | def train(model: Model, x, edge_index, eps, model_1, optimizer_1,lamb, alpha, beta, steps, node_ratio): 162 | optimizer.zero_grad() 163 | adj = edge2adj(x, edge_index) 164 | edge_index_1 = dropout_adj(edge_index, p=drop_edge_rate_1)[0] 165 | edge_index_2 = dropout_adj(edge_index, p=drop_edge_rate_2)[0] 166 | # print("***:", x.size()) 167 | # println() 168 | x_1 = drop_feature(x, drop_feature_rate_1) 169 | # print("x_1", x_1) 170 | x_2 = drop_feature(x, drop_feature_rate_2) 171 | 172 | # adj_1 = edge2adj(x_1, edge_index_1) 173 | adj_2 = edge2adj(x_2, edge_index_2) 174 | # print("adj_1:", adj_1) 175 | # print("adj_1_shape:", adj_1.size()) 176 | 'learning to sample' 177 | x_1, x_mean, x_std = model_1.forward_encoder(x, edge_index) 178 | # print("x_1", x_1) 179 | # println() 180 | edge_pos_pred = model_1.forward_decoder(x,edge_index) 181 | # print("edge_index:", edge_index) 182 | # print("edge_pos_pred:", edge_pos_pred.size()) 183 | s = torch.sparse_coo_tensor(edge_index,edge_pos_pred, (adj.size()[0],adj.size()[1])) 184 | adj_1 = s.to_dense() 185 | # print("adj_vgae:", adj_vgae) 186 | # print("x_1:",x_1.size()) 187 | # print(edge_pos_pred) 188 | # print("x_3:", x_3.size()) 189 | # print(edge_pos_pred.size()) 190 | # println() 191 | 192 | 193 | if eps > 0: 194 | adj_3, x_3 = PGD_attack_graph(model, edge_index_1, edge_index, x_1, x, steps, node_ratio, alpha, beta) 195 | z = model(x, adj) 196 | z_1 = model(x_1, adj_1) 197 | z_2 = model(x_2, adj_2) 198 | # print("z_2:", z_2.size()) 199 | node_list = list(hy.nodes) 200 | poi_view = [] 201 | spatial_view = [] 202 | flow_view = [] 203 | for item in node_list: 204 | if item.endswith("s"): 205 | spatial_view.append(node_list.index(item)) 206 | elif item.endswith("p"): 207 | poi_view.append(node_list.index(item)) 208 | else: 209 | flow_view.append(node_list.index(item)) 210 | # print("node_list:", node_list) 211 | # print(len(node_list)) 212 | '''cross-view conhtarstive learning''' 213 | linear = nn.Linear(len(spatial_view), 180).to(device) 214 | linear_1 = nn.Linear(len(flow_view), 180).to(device) 215 | poi_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in poi_view]),requires_grad=True).to(device) 216 | spatial_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in spatial_view]),requires_grad=True).to(device) 217 | flow_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in flow_view]),requires_grad=True).to(device) 218 | flow_out = linear_1(flow_view_tensor.view(128,len(flow_view)).float()) 219 | flow_trans = flow_out.view(180,128) 220 | spatial_out = linear(spatial_view_tensor.view(128,len(spatial_view)).float()) 221 | spatial_trans = spatial_out.view(180,128).float() 222 | # print(spatial_trans.size()) 223 | loss_v1, simi_v1 = model.loss(flow_trans.float(),spatial_trans.float(),batch_size=0) 224 | loss_v2, simi_v2 = model.loss(flow_trans.float(),poi_view_tensor.float(),batch_size=0) 225 | loss_v3, simi_v3 = model.loss(spatial_trans.float(),poi_view_tensor.float(),batch_size=0) 226 | # print(loss_v1.mean(), loss_v2.mean(), loss_v3.mean()) 227 | '''adaptative weight for cross-view loss''' 228 | model_fs = nn.Sequential(nn.Linear(360, 1),nn.ReLU()).to(device) 229 | # mlp = nn.Linear(in_features = 360, out_features = 1).to(device) 230 | flow_spatial = torch.cat((flow_trans,spatial_trans),0).to(device) 231 | flow_poi = torch.cat((flow_trans,poi_view_tensor),0).to(device) 232 | spatial_poi = torch.cat((spatial_trans, poi_view_tensor),0).to(device) 233 | fs_w = model_fs(flow_spatial.view(128,-1).float()).mean() 234 | fp_w = model_fs(flow_poi.view(128,-1).float()).mean() 235 | sp_w = model_fs(spatial_poi.view(128,-1).float()).mean() 236 | # print("fs_w:",fs_w.item()) 237 | # print("fp_w:",fp_w.item()) 238 | # print("sp_w:",sp_w.item()) 239 | loss_view = fs_w.item()*loss_v1+fp_w.item()*loss_v2+sp_w.item()*loss_v3 240 | # print("loss_view:",loss_view.mean()) 241 | # pritnln() 242 | loss1, simi1 = model.loss(z_1,z_2,batch_size=0) 243 | loss2, simi2 = model.loss(z_1,z,batch_size=0) 244 | loss3, simi3 = model.loss(z_2,z,batch_size=0) 245 | loss3 = loss3 -loss3.mean() 246 | # loss3 = loss3.mean() 247 | # print("loss3:", loss3) 248 | loss3[loss3 > 0] = 1 249 | loss3[loss3 <= 0] = 0.01 # weaken the reward for low cl loss 250 | # print("loss3:", loss3) 251 | # println() 252 | reward = loss3 253 | loss1 = loss1.mean() + lamb*torch.clamp(simi1*2 - simi2.detach()-simi3.detach(), 0).mean() 254 | # loss_vage = loss3 255 | # loss_vage = loss3*loss1 256 | # print("loss_vage:",loss_vage) 257 | # print("loss_vage:",loss_vage.size()) 258 | # println() 259 | if eps > 0: 260 | z_3 = model(x_3,adj_3) 261 | loss2, _ = model.loss(z_1,z_3) 262 | loss2 = loss2.mean() 263 | loss = (loss1 + eps*loss2+loss_view.mean()) 264 | else: 265 | loss = loss1+loss_view.mean() 266 | loss2 = loss1 267 | 268 | 269 | loss.backward(retain_graph=True) 270 | loss_vage = (reward*loss).mean() 271 | # print("loss_vage:",loss_vage) 272 | # println() 273 | loss_vage.backward(retain_graph=True) 274 | optimizer.step() 275 | optimizer_1.step() 276 | 277 | return loss1.item(), loss2.item(),loss_vage.item() 278 | 279 | def test(model: Model, x, edge_index, model_1,y, final=False, task ="node"): 280 | model.eval() 281 | adj = edge2adj(x, edge_index) 282 | x = x.to(device) 283 | adj = adj.to(device) 284 | # print("adj.size():", adj.size()) 285 | z = model(x, adj) 286 | # print("test:", z.size()) 287 | file=open(r"./data/tmp_vector.pickle","wb") 288 | pickle.dump(z,file) #storing_list 289 | file.close() 290 | 291 | x_1, x_mean, x_std = model_1.forward_encoder(x, edge_index.to(device)) 292 | x_1 = x_1.to(device) 293 | # print("x_1", x_1) 294 | # println() 295 | edge_pos_pred = model_1.forward_decoder(x,edge_index.to(device)) 296 | # print("x:", x.size()) 297 | # print("edge_index:", edge_index) 298 | # print("edge_pos_pred:", edge_pos_pred.size()) 299 | s = torch.sparse_coo_tensor(edge_index.to(device),edge_pos_pred.to(device), (adj.size()[0],adj.size()[1])) 300 | adj_1 = s.to_dense() 301 | adj_1 = adj_1.to(device) 302 | z_1 = model(x_1, adj_1) 303 | print("test z_1:", z_1.size()) 304 | 305 | 306 | file=open(r"./data/tmp_vector_vgae_2.pickle","wb") 307 | pickle.dump(z_1,file) #storing_list 308 | file.close() 309 | 310 | return label_classification(z, y, ratio=0.1),label_classification(z_1, y, ratio=0.1) 311 | 312 | if __name__ == '__main__': 313 | parser = argparse.ArgumentParser() 314 | parser.add_argument('--dataset', type=str, default='Cora') 315 | parser.add_argument('--gpu_id', type=int, default=0) 316 | parser.add_argument('--config', type=str, default='config.yaml') 317 | parser.add_argument('--log', type=str, default='results/Cora/') 318 | parser.add_argument('--seed', type=int, default=39788) 319 | parser.add_argument('--eps', type=float, default=0.5) 320 | parser.add_argument('--alpha', type=float, default=0.1) 321 | parser.add_argument('--beta', type=float, default=0.1) 322 | parser.add_argument('--lamb', type=float, default=0.0) 323 | args = parser.parse_args() 324 | 325 | 326 | assert args.gpu_id in range(0, 8) 327 | 328 | 329 | config = yaml.load(open(args.config), Loader=SafeLoader) 330 | if args.dataset in config: 331 | config = config[args.dataset] 332 | else: 333 | config = { 334 | 'learning_rate': 0.001, 335 | 'num_hidden': 256, 336 | 'num_proj_hidden': 256, 337 | 'activation': 'prelu', 338 | 'base_model': 'GCNConv', 339 | 'num_layers': 2, 340 | 'drop_edge_rate_1': 0.3, 341 | 'drop_edge_rate_2': 0.4, 342 | 'drop_feature_rate_1': 0.1, 343 | 'drop_feature_rate_2': 0.0, 344 | 'tau': 0.4, 345 | 'num_epochs': 1000, 346 | 'weight_decay': 1e-5, 347 | 'drop_scheme': 'degree', 348 | } 349 | 350 | 351 | torch.manual_seed(config["seed"]) 352 | random.seed(12345) 353 | np.random.seed(config["seed"]) 354 | 355 | learning_rate = config['learning_rate'] 356 | num_hidden = config['num_hidden'] 357 | num_proj_hidden = config['num_proj_hidden'] 358 | activation = ({'relu': F.relu, 'prelu': nn.PReLU(), 'rrelu': nn.RReLU()})[config['activation']] 359 | base_model = GCNConv 360 | num_layers = config['num_layers'] 361 | 362 | drop_edge_rate_1 = config['drop_edge_rate_1'] 363 | drop_edge_rate_2 = config['drop_edge_rate_2'] 364 | drop_feature_rate_1 = config['drop_feature_rate_1'] 365 | drop_feature_rate_2 = config['drop_feature_rate_2'] 366 | tau = config['tau'] 367 | num_epochs = config['num_epochs'] 368 | weight_decay = config['weight_decay'] 369 | # switch to the customer inputs by using args.{} 370 | eps = config["eps"] # args.eps 371 | lamb = config["lamb"] # args.lamb 372 | alpha = config["alpha"] # args.alpha 373 | beta = config["beta"] # arg.sbeta 374 | 375 | 376 | sample_size = 1388 377 | 378 | def get_dataset(path, name): 379 | assert name in ['Cora', 'CiteSeer', "AmazonC", "AmazonP", 'CoauthorC', 'CoauthorP',\ 380 | "DBLP", "PubMed", "GitHub", "Facebook", "LastFMAsia", "DeezerEurope"] 381 | if name =="DBLP": 382 | name = "dblp" 383 | if name == "AmazonC": 384 | return Amazon(path, "Computers", T.NormalizeFeatures()) 385 | if name == "AmazonP": 386 | return Amazon(path, "Photo", T.NormalizeFeatures()) 387 | if name == 'CoauthorC': 388 | return Coauthor(root=path, name='cs', transform=T.NormalizeFeatures()) 389 | if name == 'CoauthorP': 390 | return Coauthor(root=path, name='physics', transform=T.NormalizeFeatures()) 391 | if name == "GitHub": 392 | return GitHub(root=path,transform=T.NormalizeFeatures()) 393 | if name == "Facebook": 394 | return FacebookPagePage(root=path,transform=T.NormalizeFeatures()) 395 | if name == "LastFMAsia": 396 | return LastFMAsia(root=path,transform=T.NormalizeFeatures()) 397 | if name == "DeezerEurope": 398 | return DeezerEurope(root=path,transform=T.NormalizeFeatures()) 399 | 400 | return (CitationFull if name == 'dblp' else Planetoid)( 401 | path, 402 | name, 403 | "public", 404 | T.NormalizeFeatures()) 405 | 406 | path = osp.join(osp.expanduser('~'), 'datasets', args.dataset) 407 | # print("path:", path) 408 | # println 409 | dataset = get_dataset(path, args.dataset) 410 | # print("dataset:", dataset) 411 | data = dataset.data 412 | # print(data.num_features) 413 | # println() 414 | 415 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 416 | 417 | encoder = Encoder(data.num_features, num_hidden, activation, 418 | base_model=base_model, k=num_layers).to(device) 419 | model = Model(encoder, num_hidden, num_proj_hidden, tau).to(device) 420 | optimizer = torch.optim.Adam( 421 | model.parameters(), lr=learning_rate, weight_decay=weight_decay) 422 | 423 | start = t() 424 | prev = start 425 | G = nx.Graph() 426 | G.add_edges_from(list(zip(data.edge_index.numpy()[0],data.edge_index.numpy()[1]))) 427 | 428 | 429 | gnn_generative_1 = GNN(3, 96, JK="last", drop_ratio=0, gnn_type= "gcn") 430 | model_generative_1 = vgae(gnn_generative_1, 96) 431 | model_generative_1.to(device) 432 | optimizer_generative_1 = optim.Adam(model_generative_1.parameters(), lr=learning_rate, weight_decay=weight_decay) 433 | '''set training''' 434 | model.train(),model_generative_1.train() 435 | for epoch in range(1, num_epochs + 1): 436 | # uncomment to increase the eps every T epochs 437 | #if epoch%20 ==0: 438 | # eps = eps*1.1 439 | # sample a subgraph from the original one 440 | 441 | S = G.subgraph(np.random.permutation(G.number_of_nodes())[:sample_size]) 442 | x = data.x[np.array(S.nodes())].to(device) 443 | S = nx.relabel.convert_node_labels_to_integers(S, first_label=0, ordering='default') 444 | edge_index = np.array(S.edges()).T 445 | edge_index = torch.LongTensor(np.hstack([edge_index,edge_index[::-1]])).to(device) 446 | # edge_attr = np.array(S.edges()).T 447 | # edge_index = torch.LongTensor(np.hstack([edge_index,edge_index[::-1]])).to(device) 448 | 449 | loss1, loss2, loss3 = train(model, x, edge_index, eps, model_generative_1,optimizer_generative_1, lamb, alpha, beta, 5, 0.2) 450 | 451 | now = t() 452 | print(f'(T) | Epoch={epoch:03d}, loss1={loss1:.4f}, loss2={loss2:.4f}' 453 | f' this epoch {now - prev:.4f}, total {now - start:.4f}') 454 | prev = now 455 | 456 | print("=== Final ===") 457 | results,results_1 = test(model, data.x, data.edge_index, model_generative_1,data.y, final=True) 458 | print(results,results_1) 459 | with open(osp.join(args.log, "progress.csv"), "w") as f: 460 | f.write(str(results)) -------------------------------------------------------------------------------- /code/train_edit_auto.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os.path as osp 3 | import random 4 | from time import perf_counter as t 5 | import yaml 6 | import numpy as np 7 | from yaml import SafeLoader 8 | from scipy.linalg import fractional_matrix_power, inv 9 | from torch.utils.data import random_split 10 | import torch 11 | import torch_geometric.transforms as T 12 | import torch.nn.functional as F 13 | import torch.nn as nn 14 | from layers import GCNConv 15 | import networkx as nx 16 | import matplotlib.pyplot as plt 17 | from torch_geometric.datasets import Planetoid, CitationFull, Amazon, Coauthor, GitHub, FacebookPagePage, LastFMAsia, DeezerEurope 18 | from torch_geometric.utils import dropout_adj 19 | from model import Encoder, Model, drop_feature 20 | from utils import normalize_adj_tensor, normalize_adj_tensor_sp, edge2adj 21 | from attack import PGD_attack_graph 22 | from eval import label_classification 23 | import pickle 24 | from torch_geometric.nn import global_mean_pool, global_add_pool 25 | from model_gcn import GNN 26 | import torch.optim as optim 27 | import os 28 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # for remindering of the minmatch of shape 29 | import warnings 30 | warnings.filterwarnings('ignore') 31 | def load_data(file): 32 | data_load_file = [] 33 | file_1 = open(file, "rb") 34 | data_load_file = pickle.load(file_1) 35 | return data_load_file 36 | hy = load_data("./data/hy_new_aaai_2.pickle") 37 | # hy = load_data("./data/hy_aaai_chi_1.pickle") 38 | # print(len(list(hy.nodes()))) 39 | # println 40 | 41 | class vgae(nn.Module): 42 | def __init__(self, gnn, emb_dim): 43 | super(vgae, self).__init__() 44 | self.encoder = gnn 45 | self.encoder_mean = nn.Sequential(nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim)) 46 | # make sure std is positive 47 | self.encoder_std = nn.Sequential(nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim), nn.Softplus()) 48 | # only reconstruct first 7-dim, please refer to https://github.com/snap-stanford/pretrain-gnns/issues/30 49 | self.decoder = nn.Sequential(nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, 4), nn.Sigmoid()) 50 | self.decoder_edge = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 1)) 51 | 52 | self.bceloss = nn.BCELoss(reduction='none') 53 | self.pool = global_mean_pool 54 | self.add_pool = global_add_pool 55 | self.sigmoid = nn.Sigmoid() 56 | self.softplus = nn.Softplus() 57 | self.softmax = nn.Softmax(dim=1) 58 | 59 | # reconstruct 4-class & 3-class edge_attr for 1st & 2nd dimension 60 | self.decoder_1 = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 4)) 61 | self.decoder_2 = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 4)) 62 | self.crossentropyloss = nn.CrossEntropyLoss(reduction='none') 63 | 64 | def forward_encoder(self, x, edge_index): 65 | x = self.encoder(x, edge_index) 66 | x_mean = self.encoder_mean(x) 67 | x_std = self.encoder_std(x) 68 | gaussian_noise = torch.randn(x_mean.shape).to(x.device) 69 | x = gaussian_noise * x_std + x_mean 70 | return x.detach(), x_mean, x_std 71 | 72 | # def forward_decoder(self, x, edge_index, edge_index_neg): 73 | def forward_decoder(self, x, edge_index): 74 | eleWise_mul = x[edge_index[0]] * x[edge_index[1]] 75 | edge_attr_pred = self.decoder(eleWise_mul) 76 | edge_pos = self.sigmoid( self.decoder_edge(eleWise_mul) ).squeeze() 77 | # edge_neg = self.sigmoid( self.decoder_edge(x[edge_index_neg[0]] * x[edge_index_neg[1]]) ).squeeze() 78 | # return edge_attr_pred, edge_pos, edge_neg 79 | return edge_pos 80 | 81 | def loss_vgae(self, edge_pos_pred, edge_index_batch, x_mean, x_std, reward=None): 82 | # evaluate p(A|Z) 83 | # num_edge, _ = edge_attr_pred.shape 84 | # loss_rec = self.bceloss(edge_attr_pred.reshape(-1), edge_attr[:, :4].reshape(-1)) 85 | # loss_rec = loss_rec.reshape((num_edge, -1)).sum(dim=1) 86 | 87 | loss_edge_pos = self.bceloss(edge_pos_pred, torch.ones(edge_pos_pred.shape).to(edge_pos_pred.device)) 88 | # loss_edge_neg = self.bceloss(edge_neg_pred, torch.zeros(edge_neg_pred.shape).to(edge_neg_pred.device)) 89 | # loss_pos = loss_rec + loss_edge_pos 90 | loss_pos = loss_edge_pos 91 | loss_pos_cat = torch.cat((loss_pos, loss_pos), 0).view(2, -1) 92 | # print("loss_pos:", loss_pos_cat.size()) 93 | # print("edge_index_batch:", edge_index_batch.size()) 94 | # println() 95 | loss_pos = self.pool(loss_pos_cat, edge_index_batch) 96 | # loss_neg = self.pool(loss_edge_neg, edge_index_neg_batch) 97 | # loss_rec = loss_pos + loss_neg 98 | loss_rec = loss_pos 99 | #print('loss_pos + loss_neg', loss_pos, loss_neg) 100 | if not reward is None: 101 | loss_rec = loss_rec * reward 102 | #print("reward:", reward) 103 | #print("loss_rec:", loss_rec) 104 | 105 | # evaluate p(Z|X,A) 106 | kl_divergence = - 0.5 * (1 + 2 * torch.log(x_std+ 1e-6) - x_mean**2 - x_std**2).sum(dim=1) 107 | kl_ones = torch.ones(kl_divergence.shape).to(kl_divergence.device) 108 | # kl_divergence = self.pool(kl_divergence, batch) 109 | # kl_double_norm = 1 / self.add_pool(kl_ones, batch) 110 | # kl_divergence = kl_divergence * kl_double_norm 111 | # print("loss_rec:",loss_rec.mean()) 112 | # print("kl_divergence:",kl_divergence.size()) 113 | # println() 114 | loss = (loss_rec.mean(axis=1) + kl_divergence).mean() 115 | ''' 116 | # link prediction for sanity check 117 | from sklearn.metrics import roc_auc_score 118 | from sklearn.metrics import average_precision_score 119 | print(roc_auc_score(edge_attr.cpu().numpy(), edge_attr_pred.detach().cpu().numpy()), average_precision_score(edge_attr.cpu().numpy(), edge_attr_pred.detach().cpu().numpy())) 120 | ''' 121 | return loss, loss_edge_pos.mean().item() 122 | # return loss, (loss_edge_pos.mean()+loss_edge_neg.mean()).item()/2 123 | 124 | def generate(self, data): 125 | x, _, _ = self.forward_encoder(data.x, data.edge_index) 126 | eleWise_mul = torch.einsum('nd,md->nmd', x, x) 127 | # calculate softmax probability 128 | prob = self.decoder_edge(eleWise_mul).squeeze() 129 | # print("prob:", prob.size()) 130 | # pritnl() 131 | prob = torch.exp(prob) 132 | prob[torch.isinf(prob)] = 1e10 133 | prob[list(range(x.shape[0])), list(range(x.shape[0]))] = 0 134 | prob = torch.einsum('nm,n->nm', prob, 1 / prob.sum(dim=1)) 135 | 136 | # sparsify 137 | 138 | prob[prob < 1e-1] = 0 139 | prob[prob.sum(dim=1) == 0] = 1 140 | prob[list(range(x.shape[0])), list(range(x.shape[0]))] = 0 141 | prob = torch.einsum('nm,n->nm', prob, 1 / prob.sum(dim=1)) 142 | 143 | # predict 4-class & 3-class edge_attr for 1st & 2nd dimension 144 | edge_attr_prob_1 = self.softmax(self.decoder_1(eleWise_mul)) 145 | edge_attr_rand_1 = torch.rand((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1])) 146 | edge_attr_pred_1 = torch.zeros((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1]), dtype=torch.int64) 147 | for n in range(3): 148 | edge_attr_pred_1[edge_attr_rand_1 >= edge_attr_prob_1[:, :, n]] = n + 1 149 | edge_attr_rand_1 -= edge_attr_prob_1[:, :, n] 150 | 151 | edge_attr_prob_2 = self.softmax(self.decoder_2(eleWise_mul)) 152 | edge_attr_rand_2 = torch.rand((edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1])) 153 | edge_attr_pred_2 = torch.zeros((edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1]), dtype=torch.int64) 154 | for n in range(2): 155 | edge_attr_pred_2[edge_attr_rand_2 >= edge_attr_prob_2[:, :, n]] = n + 1 156 | edge_attr_rand_2 -= edge_attr_prob_2[:, :, n] 157 | 158 | edge_attr_pred = torch.cat((edge_attr_pred_1.reshape((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1], 1)), 159 | edge_attr_pred_2.reshape( 160 | (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1)),edge_attr_pred_2.reshape( 161 | (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1)),edge_attr_pred_2.reshape( 162 | (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1))), dim=2) 163 | 164 | 165 | return prob, edge_attr_pred 166 | 167 | def train(model: Model, x, edge_index, eps, model_1, optimizer_1,model_2, optimizer_2,lamb, alpha, beta, steps, node_ratio): 168 | optimizer.zero_grad() 169 | adj = edge2adj(x, edge_index) 170 | edge_index_1 = dropout_adj(edge_index, p=drop_edge_rate_1)[0] 171 | edge_index_2 = dropout_adj(edge_index, p=drop_edge_rate_2)[0] 172 | # print("***:", x.size()) 173 | # println() 174 | x_1 = drop_feature(x, drop_feature_rate_1) 175 | # print("x_1", x_1) 176 | x_2 = drop_feature(x, drop_feature_rate_2) 177 | 178 | # adj_1 = edge2adj(x_1, edge_index_1) 179 | adj_2 = edge2adj(x_2, edge_index_2) 180 | # print("adj_1:", adj_1) 181 | # print("adj_1_shape:", adj_1.size()) 182 | 'learning to sample' 183 | x_1, x_mean, x_std = model_1.forward_encoder(x, edge_index) 184 | # print("x_1", x_1) 185 | # println() 186 | edge_pos_pred = model_1.forward_decoder(x_1,edge_index) 187 | # print("edge_index:", edge_index) 188 | # print("edge_pos_pred:", edge_pos_pred.size()) 189 | s = torch.sparse_coo_tensor(edge_index,edge_pos_pred, (adj.size()[0],adj.size()[1])) 190 | adj_1 = s.to_dense() 191 | # print("adj_vgae:", adj_vgae) 192 | # print("x_1:",x_1.size()) 193 | # print(edge_pos_pred) 194 | # print("x_3:", x_3.size()) 195 | # print(edge_pos_pred.size()) 196 | # println() 197 | 198 | x_2, x_mean, x_std = model_2.forward_encoder(x, edge_index) 199 | # print("x_1", x_1) 200 | # println() 201 | edge_pos_pred = model_2.forward_decoder(x_2,edge_index) 202 | # print("edge_index:", edge_index) 203 | # print("edge_pos_pred:", edge_pos_pred.size()) 204 | 205 | s = torch.sparse_coo_tensor(edge_index,edge_pos_pred, (adj.size()[0],adj.size()[1])) 206 | adj_2 = s.to_dense() 207 | 208 | 209 | if eps > 0: 210 | print("x_1:", x_1.size()) 211 | print("x:", x.size()) 212 | file=open(r"./data/tmp_case_before.pickle","wb") 213 | pickle.dump(x,file) #storing_list 214 | file.close() 215 | adj_3, x_3 = PGD_attack_graph(model, edge_index_1, edge_index, x_1, x, steps, node_ratio, alpha, beta) 216 | print("x_3:", x_3.size()) 217 | file=open(r"./data/tmp_case_after.pickle","wb") 218 | pickle.dump(x_3,file) #storing_list 219 | file.close() 220 | # println() 221 | z = model(x, adj) 222 | z_1 = model(x_1, adj_1) 223 | z_2 = model(x_2, adj_2) 224 | # print("x:", x) 225 | # print("edge_index:", edge_index) 226 | 227 | '''adding cross-view contrastive learning''' 228 | node_list = list(hy.nodes) 229 | # print(node_list) 230 | # println() 231 | poi_view = [] 232 | spatial_view = [] 233 | flow_view = [] 234 | for item in node_list: 235 | if item.endswith("s"): 236 | spatial_view.append(node_list.index(item)) 237 | elif item.endswith("p"): 238 | poi_view.append(node_list.index(item)) 239 | else: 240 | flow_view.append(node_list.index(item)) 241 | 242 | # '''cross-view conhtarstive learning''' 243 | # linear = nn.Linear(len(spatial_view), 180).to(device) 244 | # linear_1 = nn.Linear(len(flow_view), 180).to(device) 245 | # poi_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in poi_view]),requires_grad=True).to(device) 246 | # spatial_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in spatial_view]),requires_grad=True).to(device) 247 | # flow_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in flow_view]),requires_grad=True).to(device) 248 | # flow_out = linear_1(flow_view_tensor.view(128,len(flow_view)).float()) 249 | # flow_trans = flow_out.view(180,128) 250 | # spatial_out = linear(spatial_view_tensor.view(128,len(spatial_view)).float()) 251 | # spatial_trans = spatial_out.view(180,128).float() 252 | # # print(spatial_trans.size()) 253 | # loss_v1, simi_v1 = model.loss(flow_trans.float(),spatial_trans.float(),batch_size=0) 254 | # loss_v2, simi_v2 = model.loss(flow_trans.float(),poi_view_tensor.float(),batch_size=0) 255 | # loss_v3, simi_v3 = model.loss(spatial_trans.float(),poi_view_tensor.float(),batch_size=0) 256 | # # print(loss_v1.mean(), loss_v2.mean(), loss_v3.mean()) 257 | # '''adaptative weight for cross-view loss''' 258 | # model_fs = nn.Sequential(nn.Linear(360, 1),nn.ReLU()).to(device) 259 | # # mlp = nn.Linear(in_features = 360, out_features = 1).to(device) 260 | # flow_spatial = torch.cat((flow_trans,spatial_trans),0).to(device) 261 | # flow_poi = torch.cat((flow_trans,poi_view_tensor),0).to(device) 262 | # spatial_poi = torch.cat((spatial_trans, poi_view_tensor),0).to(device) 263 | # fs_w = model_fs(flow_spatial.view(128,-1).float()).mean() 264 | # fp_w = model_fs(flow_poi.view(128,-1).float()).mean() 265 | # sp_w = model_fs(spatial_poi.view(128,-1).float()).mean() 266 | # # print("fs_w:",fs_w.item()) 267 | # # print("fp_w:",fp_w.item()) 268 | # # print("sp_w:",sp_w.item()) 269 | # loss_view = fs_w.item()*loss_v1+fp_w.item()*loss_v2+sp_w.item()*loss_v3 270 | '''cross-view conhtarstive learning''' 271 | reg_num = 180 272 | linear = nn.Linear(len(spatial_view), reg_num).to(device) 273 | linear_1 = nn.Linear(len(flow_view), reg_num).to(device) 274 | poi_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in poi_view]),requires_grad=True).to(device) 275 | spatial_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in spatial_view]),requires_grad=True).to(device) 276 | flow_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in flow_view]),requires_grad=True).to(device) 277 | flow_out = linear_1(flow_view_tensor.view(128,len(flow_view)).float()) 278 | flow_trans = flow_out.view(reg_num,128) 279 | spatial_out = linear(spatial_view_tensor.view(128,len(spatial_view)).float()) 280 | spatial_trans = spatial_out.view(reg_num,128).float() 281 | # print(spatial_trans.size()) 282 | loss_v1, simi_v1 = model.loss(flow_trans.float(),spatial_trans.float(),batch_size=0) 283 | loss_v2, simi_v2 = model.loss(flow_trans.float(),poi_view_tensor.float(),batch_size=0) 284 | loss_v3, simi_v3 = model.loss(spatial_trans.float(),poi_view_tensor.float(),batch_size=0) 285 | # print(loss_v1.mean(), loss_v2.mean(), loss_v3.mean()) 286 | '''adaptative weight for cross-view loss''' 287 | model_fs = nn.Sequential(nn.Linear(reg_num*2, 1),nn.ReLU()).to(device) 288 | # mlp = nn.Linear(in_features = 360, out_features = 1).to(device) 289 | flow_spatial = torch.cat((flow_trans,spatial_trans),0).to(device) 290 | flow_poi = torch.cat((flow_trans,poi_view_tensor),0).to(device) 291 | spatial_poi = torch.cat((spatial_trans, poi_view_tensor),0).to(device) 292 | fs_w = model_fs(flow_spatial.view(128,-1).float()).mean() 293 | fp_w = model_fs(flow_poi.view(128,-1).float()).mean() 294 | sp_w = model_fs(spatial_poi.view(128,-1).float()).mean() 295 | # print("fs_w:",fs_w.item()) 296 | # print("fp_w:",fp_w.item()) 297 | # print("sp_w:",sp_w.item()) 298 | loss_view = fs_w.item()*loss_v1+fp_w.item()*loss_v2+sp_w.item()*loss_v3 299 | 300 | 301 | loss1, simi1 = model.loss(z_1,z_2,batch_size=0) 302 | loss2, simi2 = model.loss(z_1,z,batch_size=0) 303 | loss3, simi3 = model.loss(z_2,z,batch_size=0) 304 | loss3 = loss3 -loss3.mean() 305 | # loss3 = loss3.mean() 306 | # print("loss3:", loss3) 307 | loss3[loss3 > 0] = 1 308 | loss3[loss3 <= 0] = 0.01 # weaken the reward for low cl loss 309 | 310 | loss1 = loss1.mean() + lamb*torch.clamp(simi1*2 - simi2.detach()-simi3.detach(), 0).mean() 311 | # loss_vage = loss3 312 | # loss_vage = loss3*loss1 313 | # print("loss_vage:",loss_vage) 314 | # print("loss_vage:",loss_vage.size()) 315 | # println() 316 | if eps > 0: 317 | z_3 = model(x_3,adj_3) 318 | loss2, _ = model.loss(z_1,z_3) 319 | loss2 = loss2.mean() 320 | loss = (loss1 + eps*loss2+0.05*loss_view.mean()) 321 | else: 322 | loss = loss1+0.05*loss_view.mean() 323 | loss2 = loss1 324 | '''Adding loss for VGAE''' 325 | loss_3, link_loss_2 = model_2.loss_vgae(edge_pos_pred,edge_index, x_mean, x_std, reward=loss3.mean().item()) 326 | # loss_2 = loss_2 327 | # print("loss_vage:",loss_vage) 328 | # print("loss1:", loss1) 329 | # print("loss:", loss) 330 | # println() 331 | 332 | loss.backward(retain_graph=True) 333 | # loss_vage = (loss3*loss).mean() 334 | loss_vage = loss_3+link_loss_2 335 | # print("loss_vage:",loss_vage) 336 | # println() 337 | loss_vage.backward(retain_graph=True) 338 | optimizer.step() 339 | optimizer_1.step() 340 | 341 | return loss1.item(), loss2.item(),loss_vage.item() 342 | 343 | def test(model: Model, x, edge_index, model_1,y, final=False, task ="node"): 344 | model.eval() 345 | adj = edge2adj(x, edge_index) 346 | x = x.to(device) 347 | adj = adj.to(device) 348 | # print("adj.size():", adj.size()) 349 | # z = model(x, adj) 350 | # print("test:", z.size()) 351 | # file=open(r"./data/tmp_vector.pickle","wb") 352 | # pickle.dump(z,file) #storing_list 353 | # file.close() 354 | 355 | x_1, x_mean, x_std = model_1.forward_encoder(x, edge_index.to(device)) 356 | x_1 = x_1.to(device) 357 | # print("x_1", x_1) 358 | # println() 359 | edge_pos_pred = model_1.forward_decoder(x,edge_index.to(device)) 360 | # print("x:", x.size()) 361 | # print("edge_index:", edge_index) 362 | # print("edge_pos_pred:", edge_pos_pred.size()) 363 | s = torch.sparse_coo_tensor(edge_index.to(device),edge_pos_pred.to(device), (adj.size()[0],adj.size()[1])) 364 | adj_1 = s.to_dense() 365 | adj_1 = adj_1.to(device) 366 | z_1 = model(x_1, adj_1) 367 | print("test z_1:", z_1.size()) 368 | 369 | 370 | file=open(r"./data/tmp_vector_chi_3.pickle","wb") 371 | pickle.dump(z_1,file) #storing_list 372 | file.close() 373 | 374 | # return label_classification(z, y, ratio=0.1),label_classification(z_1, y, ratio=0.1) 375 | return label_classification(z_1, y, ratio=0.1) 376 | 377 | if __name__ == '__main__': 378 | parser = argparse.ArgumentParser() 379 | parser.add_argument('--dataset', type=str, default='Cora') 380 | parser.add_argument('--gpu_id', type=int, default=0) 381 | parser.add_argument('--config', type=str, default='config.yaml') 382 | parser.add_argument('--log', type=str, default='results/Cora/') 383 | parser.add_argument('--seed', type=int, default=39788) 384 | parser.add_argument('--eps', type=float, default=0.5) 385 | parser.add_argument('--alpha', type=float, default=0.1) 386 | parser.add_argument('--beta', type=float, default=0.1) 387 | parser.add_argument('--lamb', type=float, default=0.05) 388 | args = parser.parse_args() 389 | 390 | 391 | assert args.gpu_id in range(0, 8) 392 | 393 | 394 | config = yaml.load(open(args.config), Loader=SafeLoader) 395 | if args.dataset in config: 396 | config = config[args.dataset] 397 | else: 398 | config = { 399 | 'learning_rate': 0.001, 400 | 'num_hidden': 256, 401 | 'num_proj_hidden': 256, 402 | 'activation': 'prelu', 403 | 'base_model': 'GCNConv', 404 | 'num_layers': 2, 405 | 'drop_edge_rate_1': 0.3, 406 | 'drop_edge_rate_2': 0.4, 407 | 'drop_feature_rate_1': 0.1, 408 | 'drop_feature_rate_2': 0.0, 409 | 'tau': 0.4, 410 | 'num_epochs': 1000, 411 | 'weight_decay': 1e-5, 412 | 'drop_scheme': 'degree', 413 | } 414 | 415 | 416 | torch.manual_seed(config["seed"]) 417 | random.seed(12345) 418 | np.random.seed(config["seed"]) 419 | 420 | learning_rate = config['learning_rate'] 421 | num_hidden = config['num_hidden'] 422 | num_proj_hidden = config['num_proj_hidden'] 423 | activation = ({'relu': F.relu, 'prelu': nn.PReLU(), 'rrelu': nn.RReLU()})[config['activation']] 424 | base_model = GCNConv 425 | num_layers = config['num_layers'] 426 | 427 | drop_edge_rate_1 = config['drop_edge_rate_1'] 428 | drop_edge_rate_2 = config['drop_edge_rate_2'] 429 | drop_feature_rate_1 = config['drop_feature_rate_1'] 430 | drop_feature_rate_2 = config['drop_feature_rate_2'] 431 | tau = config['tau'] 432 | num_epochs = config['num_epochs'] 433 | weight_decay = config['weight_decay'] 434 | # switch to the customer inputs by using args.{} 435 | eps = config["eps"] # args.eps 436 | lamb = config["lamb"] # args.lamb 437 | alpha = config["alpha"] # args.alpha 438 | beta = config["beta"] # arg.sbeta 439 | 440 | 441 | sample_size = 1388 # new york(1388) 442 | # sample_size = 2234 #chicago 443 | 444 | def get_dataset(path, name): 445 | assert name in ['Cora', 'CiteSeer', "AmazonC", "AmazonP", 'CoauthorC', 'CoauthorP',\ 446 | "DBLP", "PubMed", "GitHub", "Facebook", "LastFMAsia", "DeezerEurope"] 447 | if name =="DBLP": 448 | name = "dblp" 449 | if name == "AmazonC": 450 | return Amazon(path, "Computers", T.NormalizeFeatures()) 451 | if name == "AmazonP": 452 | return Amazon(path, "Photo", T.NormalizeFeatures()) 453 | if name == 'CoauthorC': 454 | return Coauthor(root=path, name='cs', transform=T.NormalizeFeatures()) 455 | if name == 'CoauthorP': 456 | return Coauthor(root=path, name='physics', transform=T.NormalizeFeatures()) 457 | if name == "GitHub": 458 | return GitHub(root=path,transform=T.NormalizeFeatures()) 459 | if name == "Facebook": 460 | return FacebookPagePage(root=path,transform=T.NormalizeFeatures()) 461 | if name == "LastFMAsia": 462 | return LastFMAsia(root=path,transform=T.NormalizeFeatures()) 463 | if name == "DeezerEurope": 464 | return DeezerEurope(root=path,transform=T.NormalizeFeatures()) 465 | 466 | return (CitationFull if name == 'dblp' else Planetoid)( 467 | path, 468 | name, 469 | "public", 470 | T.NormalizeFeatures()) 471 | 472 | path = osp.join(osp.expanduser('~'), 'datasets', args.dataset) 473 | # print("path:", path) 474 | # println 475 | dataset = get_dataset(path, args.dataset) 476 | # print("dataset:", dataset) 477 | data = dataset.data 478 | # print(data.num_features) 479 | # println() 480 | 481 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 482 | 483 | encoder = Encoder(data.num_features, num_hidden, activation, 484 | base_model=base_model, k=num_layers).to(device) 485 | model = Model(encoder, num_hidden, num_proj_hidden, tau).to(device) 486 | optimizer = torch.optim.Adam( 487 | model.parameters(), lr=learning_rate, weight_decay=weight_decay) 488 | 489 | start = t() 490 | prev = start 491 | G = nx.Graph() 492 | G.add_edges_from(list(zip(data.edge_index.numpy()[0],data.edge_index.numpy()[1]))) 493 | 494 | 495 | gnn_generative_1 = GNN(3, 96, JK="last", drop_ratio=0, gnn_type= "gcn") 496 | model_generative_1 = vgae(gnn_generative_1, 96) 497 | model_generative_1.to(device) 498 | optimizer_generative_1 = optim.Adam(model_generative_1.parameters(), lr=learning_rate, weight_decay=weight_decay) 499 | gnn_generative_2 = GNN(3, 96, JK="last", drop_ratio=0, gnn_type= "gcn") 500 | model_generative_2 = vgae(gnn_generative_2, 96) 501 | model_generative_2.to(device) 502 | optimizer_generative_2 = optim.Adam(model_generative_2.parameters(), lr=learning_rate, weight_decay=weight_decay) 503 | 504 | '''set training''' 505 | model.train(),model_generative_1.train() 506 | # import time 507 | # start_time = time.time() 508 | for epoch in range(1, num_epochs + 1): 509 | # uncomment to increase the eps every T epochs 510 | #if epoch%20 ==0: 511 | # eps = eps*1.1 512 | # sample a subgraph from the original one 513 | 514 | S = G.subgraph(np.random.permutation(G.number_of_nodes())[:sample_size]) 515 | x = data.x[np.array(S.nodes())].to(device) 516 | # print("S.nodes():", S.nodes()) 517 | # println() 518 | S = nx.relabel.convert_node_labels_to_integers(S, first_label=0, ordering='default') 519 | edge_index = np.array(S.edges()).T 520 | # print("S.edges():", S.edges()) 521 | edge_index = torch.LongTensor(np.hstack([edge_index,edge_index[::-1]])).to(device) 522 | 523 | # println() 524 | # edge_attr = np.array(S.edges()).T 525 | # edge_index = torch.LongTensor(np.hstack([edge_index,edge_index[::-1]])).to(device) 526 | 527 | loss1, loss2, loss3 = train(model, x, edge_index, eps, model_generative_1,optimizer_generative_1,model_generative_2,optimizer_generative_2, lamb, alpha, beta, 5, 0.2) 528 | 529 | now = t() 530 | print(f'(T) | Epoch={epoch:03d}, loss1={loss1:.4f}, loss2={loss2:.4f}' 531 | f' this epoch {now - prev:.4f}, total {now - start:.4f}') 532 | prev = now 533 | # end_time = time.time() 534 | # print("during time:", (end_time-start_time)/300) 535 | # printnl() 536 | print("=== Final ===") 537 | results_1 = test(model, data.x, data.edge_index, model_generative_1,data.y, final=True) 538 | print(results_1) 539 | with open(osp.join(args.log, "progress.csv"), "w") as f: 540 | f.write(str(results_1)) --------------------------------------------------------------------------------