├── code
    ├── init
    ├── init.py
    ├── data
    │   ├── init
    │   ├── init.py
    │   ├── data.pt
    │   ├── poi_matrix.pickle
    │   ├── poi_dict_new.pickle
    │   ├── poi_skip_vec.pickle
    │   ├── region_back.pickle
    │   ├── reg_vector_dict.pickle
    │   ├── reg_incld_poi_new.pickle
    │   └── region_attr_graph.pickle
    ├── data_pre
    │   ├── init.py
    │   ├── pre_s2.py
    │   ├── pre_s4.py
    │   ├── pre_s1.py
    │   ├── pre_spatial_graph.py
    │   ├── pre_s5.py
    │   ├── pre_s6_dataloader.py
    │   ├── pre_s3.py
    │   └── pre_poi_transformer.py
    ├── distance_dict.pickle
    ├── view_split.py
    ├── layers.py
    ├── utils.py
    ├── batch_train.py
    ├── test_data.py
    ├── eval.py
    ├── config.yaml
    ├── vector_transform.py
    ├── attack.py
    ├── pre_dataloader.py
    ├── model.py
    ├── train.py
    ├── model_gcn.py
    ├── train_edit.py
    └── train_edit_auto.py
├── house
    ├── init.py
    ├── pre_s9.py
    └── pre_s7.py
├── pictures
    ├── init.py
    ├── result.png
    ├── framework.png
    └── case_study.png
├── exptract_regions.py
├── pre_s2.py
├── pre_s4.py
├── region_spatial.py
├── pre_poifrom_osm.py
├── pre_spatial_graph.py
├── pre_s10.py
├── pre_poi_transformer.py
├── pre_s5.py
├── README.md
├── pre_s1.py
├── pre_s14_poi_skip.py
├── pre_s6_dataloader.py
└── pre_s3.py


/code/init:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/code/init.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/house/init.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/code/data/init:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/code/data/init.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pictures/init.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/code/data_pre/init.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/code/data/data.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/data.pt


--------------------------------------------------------------------------------
/pictures/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/pictures/result.png


--------------------------------------------------------------------------------
/pictures/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/pictures/framework.png


--------------------------------------------------------------------------------
/pictures/case_study.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/pictures/case_study.png


--------------------------------------------------------------------------------
/code/data/poi_matrix.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/poi_matrix.pickle


--------------------------------------------------------------------------------
/code/distance_dict.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/distance_dict.pickle


--------------------------------------------------------------------------------
/code/data/poi_dict_new.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/poi_dict_new.pickle


--------------------------------------------------------------------------------
/code/data/poi_skip_vec.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/poi_skip_vec.pickle


--------------------------------------------------------------------------------
/code/data/region_back.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/region_back.pickle


--------------------------------------------------------------------------------
/code/data/reg_vector_dict.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/reg_vector_dict.pickle


--------------------------------------------------------------------------------
/code/data/reg_incld_poi_new.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/reg_incld_poi_new.pickle


--------------------------------------------------------------------------------
/code/data/region_attr_graph.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUDS/GraphST/HEAD/code/data/region_attr_graph.pickle


--------------------------------------------------------------------------------
/code/view_split.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pickle
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | def load_data(file):
 8 |         data_load_file = []
 9 |         file_1 = open(file, "rb")
10 |         data_load_file = pickle.load(file_1)
11 |         return data_load_file
12 | hy = load_data("./data/hy_new_aaai_2.pickle")
13 | 
14 | node_list = list(hy.nodes)
15 | poi_view = []
16 | spatial_view = []
17 | flow_view = []
18 | for item in node_list:
19 |     if item.endswith("s"):
20 |         spatial_view.append(node_list.index(item))
21 |     elif item.endswith("p"):
22 |         poi_view.append(node_list.index(item))
23 |     else:
24 |         flow_view.append(node_list.index(item))
25 | print(len(poi_view))
26 | print(len(spatial_view)) 
27 | print(len(flow_view)) 
28 | print(len(poi_view)+len(spatial_view)+len(flow_view))     
29 | # print(poi_view)
30 | # print(flow_view)
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/code/layers.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | 
 5 | from torch.nn.parameter import Parameter
 6 | from torch.nn.modules.module import Module
 7 | 
 8 | 
 9 | class GCNConv(Module):
10 |     """
11 |     Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
12 |     """
13 | 
14 |     def __init__(self, in_features, out_features, bias=True):
15 |         super(GCNConv, self).__init__()
16 |         self.in_features = in_features
17 |         self.out_features = out_features
18 |         self.weight = Parameter(torch.FloatTensor(in_features, out_features))
19 |         if bias:
20 |             self.bias = Parameter(torch.FloatTensor(out_features))
21 |         else:
22 |             self.register_parameter('bias', None)
23 |         self.reset_parameters()
24 | 
25 |     def reset_parameters(self):
26 |         stdv = 1. / math.sqrt(self.weight.size(1))
27 |         self.weight.data.uniform_(-stdv, stdv)
28 |         if self.bias is not None:
29 |             self.bias.data.uniform_(-stdv, stdv)
30 | 
31 |     def forward(self, input, adj):
32 |         support = torch.mm(input, self.weight)
33 |         output = torch.spmm(adj, support)
34 |         if self.bias is not None:
35 |             return output + self.bias
36 |         else:
37 |             return output
38 | 
39 |     def __repr__(self):
40 |         return self.__class__.__name__ + ' (' \
41 |                + str(self.in_features) + ' -> ' \
42 |                + str(self.out_features) + ')'


--------------------------------------------------------------------------------
/code/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_scatter import scatter_add
 3 | from torch_geometric.utils import get_laplacian, add_self_loops
 4 | 
 5 | 
 6 | def normalize_adj_tensor(adj):
 7 |     """Symmetrically normalize adjacency tensor."""
 8 |     rowsum = torch.sum(adj,1)
 9 |     d_inv_sqrt = torch.pow(rowsum, -0.5)
10 |     d_inv_sqrt[d_inv_sqrt == float("Inf")] = 0.
11 |     d_mat_inv_sqrt = torch.diag(d_inv_sqrt)
12 |     return torch.mm(torch.mm(adj,d_mat_inv_sqrt).transpose(0,1),d_mat_inv_sqrt)
13 | 
14 | def normalize_adj_tensor_sp(adj):
15 |     """Symmetrically normalize sparse adjacency tensor."""
16 |     device = adj.device
17 |     adj = adj.to("cpu")
18 |     rowsum = torch.spmm(adj, torch.ones((adj.size(0),1))).reshape(-1)
19 |     d_inv_sqrt = torch.pow(rowsum, -0.5)
20 |     d_inv_sqrt[d_inv_sqrt == float("Inf")] = 0.
21 |     d_mat_inv_sqrt = torch.diag(d_inv_sqrt)
22 |     adj = torch.mm(torch.smm(adj.transpose(0,1),d_mat_inv_sqrt.transpose(0,1)),d_mat_inv_sqrt)
23 |     return adj.to(device)
24 | 
25 | def edge2adj(x, edge_index):
26 |     """Convert edge index to adjacency matrix"""
27 |     num_nodes = x.shape[0]
28 |     tmp, _ = add_self_loops(edge_index, num_nodes=num_nodes)
29 |     edge_weight = torch.ones(tmp.size(1), dtype=None,
30 |                                      device=edge_index.device)
31 | 
32 |     row, col = tmp[0], tmp[1]
33 |     deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
34 |     deg_inv_sqrt = deg.pow_(-0.5)
35 |     deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0)
36 |     edge_weight = deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
37 |     return torch.sparse.FloatTensor(tmp, edge_weight,torch.Size((num_nodes, num_nodes)))


--------------------------------------------------------------------------------
/exptract_regions.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | from shapely.geometry import Point, LineString
 4 | from shapely.geometry import Polygon,MultiPoint,MultiPolygon  
 5 | import numpy as np
 6 | import json
 7 | import geopandas
 8 | import shapefile
 9 | 
10 |  
11 | m_region =[]
12 | shp_df = geopandas.GeoDataFrame.from_file("../data/2010 Census Blocks/geo_export_c80540b5-38fc-4bb4-81cd-ae8082c49f02.shp",encoding = 'gb18030').values.tolist()
13 | for item in shp_df:
14 |     if item[2] == "Manhattan":
15 |         m_region.append(item)
16 | 
17 | # print(len(m_region))
18 | q_index = []
19 | for hh in m_region:
20 |     if hh[3] not in q_index:
21 |         q_index.append(hh[3])
22 | print(len(q_index))
23 | region_dict = {}
24 | for item in m_region:
25 |     if item[3] not in region_dict.keys():
26 |         region_dict[item[3]] = item
27 |     else:
28 |         if item[5] > region_dict[item[3]][5]:
29 |             region_dict[item[3]] = item
30 | # print(m_region)
31 | # print(len(region_dict))
32 | region_trans = {}
33 | for key,value in region_dict.items():
34 |     region_trans[int(key)] = value[-1]
35 | # print(region_trans[1051])
36 | # print(len(region_trans))
37 | region_s = {}
38 | for idx,im in enumerate(region_trans.items()):
39 |     region_s[idx] = im[1]
40 | # print(len(region_s))
41 | # print(region_s[0])
42 | import pickle
43 | file=open(r"../data/region_back.pickle","wb")
44 | pickle.dump(region_s,file) #storing_list
45 | file.close()
46 | 
47 |     
48 | 
49 |             
50 |   
51 |         
52 |   
53 |     
54 |   
55 |     
56 |   
57 |     
58 |   
59 |     
60 |   
61 |     
62 |   
63 |     
64 |   
65 |     
66 |   
67 |     
68 |         


--------------------------------------------------------------------------------
/code/batch_train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess as sp
 3 | 
 4 | 
 5 | datasets = ["Cora", "CiteSeer", "AmazonP", "AmazonC", "CoauthorC", "CoauthorP"]
 6 | """
 7 | seeds = [0,1,2,3,4,39788]
 8 | epses = [0.5 ,1, 1.5, 2]
 9 | alphas = [50, 200, 600]
10 | betas = [0.001, 0.01]
11 | lambs = [0, 0.5, 1, 1.5, 2]
12 | 
13 | jobs = []
14 | for dataset in datasets:
15 |     for seed in seeds:
16 |         for eps in epses:
17 |             for alpha in alphas:
18 |                 for beta in betas:
19 |                     for lamb in lambs:
20 |                         log = "results/%s_%d_%g_%g_%g_%g"%(dataset, seed, eps, alpha, beta, lamb)
21 |                         jobs.append({'dataset':dataset, 'seed': seed, 'eps': eps, 'alpha': alpha, 'beta': beta, 'lamb':lamb, 'log': log})
22 |            
23 | for job in jobs:
24 |     print(job)
25 | 
26 | for job in jobs: 
27 |     path = job['log']
28 |     if not os.path.exists(path):
29 |         sp.call(['mkdir', path])
30 |         print("Starting: ", job)
31 |         sp.call(['python', 'train.py',
32 |             '--dataset', job['dataset'],
33 |             '--seed', str(job['seed']),
34 |             '--eps', str(job['eps']),
35 |             '--alpha', str(job['alpha']),
36 |             '--beta', str(job['beta']),
37 |             '--lamb', str(job['lamb']),
38 |             '--log', path
39 |                 ])
40 | """
41 | 
42 | jobs = []
43 | for dataset in datasets:
44 |     log = "results/%s"%dataset
45 |     jobs.append({'dataset':dataset,'log': log})
46 |     
47 | for job in jobs: 
48 |     path = job['log']
49 |     if not os.path.exists(path):
50 |         sp.call(['mkdir', path])
51 |         print("Starting: ", job)
52 |         sp.call(['python', 'train.py',
53 |             '--dataset', job['dataset'],
54 |             '--log', path
55 |                 ]) 


--------------------------------------------------------------------------------
/code/data_pre/pre_s2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | from shapely.geometry import Point, LineString
 6 | from shapely.geometry import Polygon,MultiPoint  #多边形
 7 | import matplotlib.pyplot as plt
 8 | import json
 9 | from urllib.request import urlopen, quote
10 | import requests
11 | import geopy
12 | from geopy.geocoders import Nominatim
13 | import copy
14 | import pickle
15 | from datetime import datetime
16 | taxi = pd.read_csv("../data/2016_Green_Taxi_Trip_Data.csv", sep = ',')
17 | # print(taxi[:100])
18 | print(taxi.columns.values.tolist()) #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID']
19 | 
20 | def load_data(file):
21 |         data_load_file = []
22 |         file_1 = open(file, "rb")
23 |         data_load_file = pickle.load(file_1)
24 |         return data_load_file
25 |     
26 | # region = load_data("../data/NY_region.pickle")
27 | # selection_dataset['year'] = selection_dataset['Trip Start Timestamp'].map(lambda x: x.split('-')[0])
28 | taxi['date'] = taxi['lpep_pickup_datetime'].map(lambda x:x.split(' ')[0])
29 | taxi['day'] = taxi['date'].map(lambda x:x.split('/')[0]).apply(int)
30 | taxi["date"] = pd.to_datetime(taxi["date"]).dt.date
31 | s_date = datetime.strptime('20160101', '%Y%m%d').date()
32 | e_date = datetime.strptime('20160103', '%Y%m%d').date()
33 | week_df = taxi[(taxi['date'] >= s_date) & (taxi['date'] <= e_date)]
34 | month_traffic = week_df.drop(['date'], axis=1)
35 | # println()
36 | # month_traffic = y_traffic.loc[[y_traffic['month'] == MONTH]]
37 | #a whole year include 77 regions and a month inlucde 70 regions
38 | #month_traffic = year_traffic
39 | print("one week data:", len(month_traffic))
40 | 
41 | 
42 | month_traffic = month_traffic.values.tolist()
43 | file=open(r"../data/NY_traffic.pickle","wb")
44 | pickle.dump(month_traffic,file) #storing_list
45 | file.close()
46 | 
47 | 


--------------------------------------------------------------------------------
/pre_s2.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import pandas as pd
 4 | from shapely.geometry import Point, LineString
 5 | from shapely.geometry import Polygon,MultiPoint  #多边形
 6 | import matplotlib.pyplot as plt
 7 | import json
 8 | from urllib.request import urlopen, quote
 9 | import requests
10 | import geopy
11 | from geopy.geocoders import Nominatim
12 | import copy
13 | import pickle
14 | from datetime import datetime
15 | taxi = pd.read_csv("../data/2016_Green_Taxi_Trip_Data.csv", sep = ',')
16 | # taxi = pd.read_csv("../data/2015_Green_Taxi_Trip_Data.csv", sep = ',')
17 | # print(taxi[:100])
18 | print(taxi.columns.values.tolist()) #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID']
19 | 
20 | def load_data(file):
21 |         data_load_file = []
22 |         file_1 = open(file, "rb")
23 |         data_load_file = pickle.load(file_1)
24 |         return data_load_file
25 |     
26 | # region = load_data("../data/NY_region.pickle")
27 | # selection_dataset['year'] = selection_dataset['Trip Start Timestamp'].map(lambda x: x.split('-')[0])
28 | taxi['date'] = taxi['lpep_pickup_datetime'].map(lambda x:x.split(' ')[0])
29 | # taxi['date'] = taxi['pickup_datetime'].map(lambda x:x.split(' ')[0])
30 | taxi['day'] = taxi['date'].map(lambda x:x.split('/')[1]).apply(int)
31 | taxi["date"] = pd.to_datetime(taxi["date"]).dt.date
32 | s_date = datetime.strptime('20160101', '%Y%m%d').date()
33 | e_date = datetime.strptime('20160101', '%Y%m%d').date()
34 | week_df = taxi[(taxi['date'] >= s_date) & (taxi['date'] <= e_date)]
35 | month_traffic = week_df.drop(['date'], axis=1)
36 | # println()
37 | # month_traffic = y_traffic.loc[[y_traffic['month'] == MONTH]]
38 | #a whole year include 77 regions and a month inlucde 70 regions
39 | #month_traffic = year_traffic
40 | print("one week data:", len(month_traffic))
41 | print(month_traffic['day'])
42 | 
43 | # pritnln()
44 | month_traffic = month_traffic.values.tolist()
45 | file=open(r"../data/NY_traffic_1_.pickle","wb")
46 | pickle.dump(month_traffic,file) #storing_list
47 | file.close()
48 | 
49 | 


--------------------------------------------------------------------------------
/pre_s4.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pickle
  3 | import pandas as pd
  4 | import numpy as np
  5 | import copy
  6 | from shapely.geometry import Point, LineString
  7 | from shapely.geometry import Polygon,MultiPoint  #多边形
  8 | import torch
  9 | import networkx as nx
 10 | import matplotlib.pyplot as pl
 11 | 
 12 | 
 13 | 
 14 | 
 15 | def load_data(file):
 16 |         data_load_file = []
 17 |         file_1 = open(file, "rb")
 18 |         data_load_file = pickle.load(file_1)
 19 |         return data_load_file
 20 | 
 21 | 
 22 | reg_vec_sort = load_data("../data/reg_poi_vec.pickle")
 23 | region_que = load_data("../data/reg_poi_idx.pickle")
 24 | region_attr_edges=[]
 25 | 
 26 | 
 27 | for idx in region_que:
 28 |     for idt in range(idx+1, len(reg_vec_sort)):
 29 |         # print("^^:",reg_vec_sort[idx].size())
 30 |         # print("**:",reg_vec_sort[idx+1].size())
 31 |         # pritnln()
 32 |         output = torch.cosine_similarity(torch.unsqueeze(reg_vec_sort[idx],0), torch.unsqueeze(reg_vec_sort[idt],0), eps=1e-08).mean()
 33 |         # print("output:", output.item())
 34 |         # pritnln()
 35 |         #0.87
 36 |         if output.item()>=0.9:
 37 |             tmp_1 = "r" + '_' + str(idx)+"_"+"p"
 38 |             tmp_2 = "r" + '_' + str(idt)+"_"+"p"
 39 |             # sim_dict[key] = [tmp_1, tmp_2, value]
 40 |             region_attr_edges.append([tmp_1, tmp_2, output.item()])
 41 | print(len(region_attr_edges))
 42 | # println()
 43 | G = nx.Graph()
 44 | # for edge in edges:
 45 | #     G.add_edge(edge[0],edge[1],weight= edge[2])
 46 | 
 47 | [G.add_edge(edge[0],edge[1],weight= edge[2], date = "1", start = edge[0], end = edge[1] ) for edge in region_attr_edges]
 48 | # print(len(G.adj))
 49 | # nx.draw(G, with_labels=True)
 50 | # plt.show()
 51 | 
 52 | 
 53 | file=open(r"../data/region_attr_graph_test.pickle","wb")
 54 | pickle.dump(G,file) #storing_list
 55 | file.close()
 56 | 
 57 | print("attr_region:", G)
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/code/test_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pandas as pd
 4 | import pickle
 5 | from shapely.geometry import Point, LineString
 6 | from shapely.geometry import Polygon,MultiPoint  #多边形
 7 | import torch
 8 | from torch import nn
 9 | import numpy as np
10 | 
11 | def load_data(file):
12 |     data_load_file = []
13 |     file_1 = open(file, "rb")
14 |     data_load_file = pickle.load(file_1)
15 |     return data_load_file
16 | 
17 | view_graph = load_data("./data/hy_new_aaai_2.pickle")
18 | region_lab = load_data("./data/region_label.pickle")
19 | # print(region_lab)
20 | # print(region_lab.keys())
21 | # print(type(region_lab.keys()))
22 | # pritnln()
23 | nodes_list = view_graph.nodes()
24 | nodes_lab = []
25 | for item in nodes_list:
26 |     # print("item:", item)
27 |     idx = int(item.split("_")[1])
28 |     if idx in region_lab.keys():
29 |         tmp_lab = region_lab[idx]
30 |         nodes_lab.append(tmp_lab)
31 |     else:
32 |         nodes_lab.append(5)
33 | # print("nodes_lab:",nodes_lab)
34 | data_num = 1388
35 | file=open(r"./data/nodes_lab.pickle","wb")
36 | pickle.dump(nodes_lab,file) #storing_list
37 | file.close()
38 | '''train mask'''
39 | l1 = [True]*int(0.051698670605613*data_num)
40 | l2 = [False]*(data_num-int(0.051698670605613*data_num))
41 | l1.extend(l2)
42 | tmp_len = int(0.051698670605613*data_num)
43 | train_mask = l1
44 | # print(tmp_len)
45 | file=open(r"./data/train_mask.pickle","wb")
46 | pickle.dump(train_mask,file) #storing_list
47 | file.close()
48 | '''val mask'''
49 | l3 = [True]*int(0.18463810930576072*data_num)
50 | l4 = [False]*(data_num-int(0.18463810930576072*data_num)-tmp_len)
51 | l5 = [False]* tmp_len
52 | l5.extend(l3)
53 | l5.extend(l4)
54 | tmp_len_val = len(l3)
55 | val_mask = l5
56 | file=open(r"./data/val_mask.pickle","wb")
57 | pickle.dump(val_mask,file) #storing_list
58 | file.close()
59 | # print(len(val_mask))
60 | '''test mask'''
61 | l6 = [True]*int(0.36927621861152143*data_num)
62 | l7 = [False]*(data_num-int(0.36927621861152143*data_num)-tmp_len-tmp_len_val)
63 | l8 = [False]* (tmp_len+tmp_len_val)
64 | l8.extend(l6)
65 | l8.extend(l7)
66 | test_mask = l8
67 | file=open(r"./data/test_mask.pickle","wb")
68 | pickle.dump(test_mask,file) #storing_list
69 | file.close()
70 | 
71 | print("---Done---")
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/region_spatial.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import pickle
 4 | import numpy as np
 5 | # import matplotlib.pyplot as plt
 6 | from sklearn import metrics
 7 | from sklearn.cluster import KMeans 
 8 | from sklearn.metrics import adjusted_mutual_info_score
 9 | import json
10 | import numpy as np
11 | import pandas as pd
12 | from sklearn import linear_model
13 | from sklearn.model_selection import KFold
14 | from sklearn.metrics import adjusted_rand_score
15 | from sklearn.metrics import normalized_mutual_info_score
16 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
17 | from shapely.geometry import Polygon
18 | from shapely import wkt
19 | import geopandas as gpd
20 | import math
21 | from math import cos
22 | def load_data(file):
23 |         data_load_file = []
24 |         file_1 = open(file, "rb")
25 |         data_load_file = pickle.load(file_1)
26 |         return data_load_file
27 | 
28 | para= 3000
29 | region_pos = load_data("../data/region_back.pickle")
30 | reg_sm = {}
31 | all_pos = []
32 | for key,value in region_pos.items():
33 |     reg_sm[key] = list(value.centroid.coords)[0]
34 |     all_pos.append(list(value.centroid.coords)[0])
35 | max_lon = max([item[0] for item in all_pos])
36 | min_lon = min([item[0] for item in all_pos])
37 | max_lat = max([item[1] for item in all_pos])
38 | min_lat = min([item[1] for item in all_pos])
39 | # print(max_lon,min_lon,max_lat,min_lat)
40 | dis_lon = (max_lon-min_lon)*111100
41 | lon_num = math.ceil(dis_lon/para)
42 | 
43 | 
44 | dis_lat = (max_lat-min_lat)*111100*cos(max_lat-min_lat)
45 | # print(dis_lat)
46 | lat_num = math.ceil(dis_lat/para)
47 | # print(lon_num,lat_num)
48 | 
49 | reg_token = {}
50 | li=[]
51 | for idx,pos in enumerate(all_pos):
52 |     lon = pos[0]-min_lon
53 |     lat = pos[1]-min_lat
54 |     x,y = int(lon*111100/para),int(lat*111100*cos(lat)/para)
55 |     tok= x*21+y
56 |     if tok not in li:
57 |         li.append(tok)
58 |     reg_token[idx] = tok
59 |     # print("cor_token:", idx,x,y,tok)
60 | # print(reg_token)
61 | print(len(li))
62 | li_map = {}
63 | for idx,uu in enumerate(li):
64 |     li_map[uu] = idx
65 | reg_t_con ={}
66 | ton=[]
67 | for key,value in reg_token.items():
68 |     reg_t_con[key] = li_map[value]
69 |     if li_map[value] not in ton:
70 |         ton.append(li_map[value])
71 | print(reg_t_con)
72 | print(max(ton))
73 | 
74 | file=open(r"../data/region_spatial.pickle","wb")
75 | pickle.dump(reg_t_con,file) #storing_list
76 | file.close()
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/code/eval.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import functools
 3 | 
 4 | from sklearn.metrics import f1_score, roc_auc_score
 5 | from sklearn.linear_model import LogisticRegression
 6 | from sklearn.svm import SVC
 7 | from sklearn.model_selection import train_test_split, GridSearchCV
 8 | from sklearn.multiclass import OneVsRestClassifier
 9 | from sklearn.preprocessing import normalize, OneHotEncoder
10 | 
11 | 
12 | def repeat(n_times):
13 |     def decorator(f):
14 |         @functools.wraps(f)
15 |         def wrapper(*args, **kwargs):
16 |             results = [f(*args, **kwargs) for _ in range(n_times)]
17 |             statistics = {}
18 |             for key in results[0].keys():
19 |                 values = [r[key] for r in results]
20 |                 statistics[key] = {
21 |                     'mean': np.mean(values),
22 |                     'std': np.std(values)}
23 |             print_statistics(statistics, f.__name__)
24 |             return statistics
25 |         return wrapper
26 |     return decorator
27 | 
28 | 
29 | def prob_to_one_hot(y_pred):
30 |     ret = np.zeros(y_pred.shape, np.bool)
31 |     indices = np.argmax(y_pred, axis=1)
32 |     for i in range(y_pred.shape[0]):
33 |         ret[i][indices[i]] = True
34 |     return ret
35 | 
36 | 
37 | def print_statistics(statistics, function_name):
38 |     print(f'(E) | {function_name}:', end=' ')
39 |     for i, key in enumerate(statistics.keys()):
40 |         mean = statistics[key]['mean']
41 |         std = statistics[key]['std']
42 |         print(f'{key}={mean:.4f}+-{std:.4f}', end='')
43 |         if i != len(statistics.keys()) - 1:
44 |             print(',', end=' ')
45 |         else:
46 |             print()
47 | 
48 | 
49 | @repeat(20)
50 | def label_classification(embeddings, y, ratio):
51 |     X = embeddings.detach().cpu().numpy()
52 |     Y = y.detach().cpu().numpy()
53 |     Y = Y.reshape(-1, 1)
54 |     onehot_encoder = OneHotEncoder(categories='auto').fit(Y)
55 |     Y = onehot_encoder.transform(Y).toarray().astype(np.bool)
56 | 
57 |     X = normalize(X, norm='l2')
58 | 
59 |     X_train, X_test, y_train, y_test = train_test_split(X, Y,
60 |                                                         test_size=1 - ratio)
61 | 
62 |     logreg = LogisticRegression(solver='liblinear')
63 |     c = 2.0 ** np.arange(-10, 10)
64 | 
65 |     clf = GridSearchCV(estimator=OneVsRestClassifier(logreg),
66 |                        param_grid=dict(estimator__C=c), n_jobs=8, cv=5,
67 |                        verbose=0)
68 |     clf.fit(X_train, y_train)
69 | 
70 |     y_pred = clf.predict_proba(X_test)
71 |     y_pred = prob_to_one_hot(y_pred)
72 | 
73 |     acc = np.sum(np.where(y_test)[1]==np.where(y_pred)[1])/len(y_pred)
74 | 
75 |     return {"ACC": acc}
76 | 


--------------------------------------------------------------------------------
/code/config.yaml:
--------------------------------------------------------------------------------
  1 | Cora:
  2 |   seed: 4
  3 |   learning_rate: 0.0005
  4 |   num_hidden: 128
  5 |   num_proj_hidden: 128
  6 |   activation: 'relu'
  7 |   base_model: 'GCNConv'
  8 |   num_layers: 2
  9 |   drop_edge_rate_1: 0.2
 10 |   drop_edge_rate_2: 0.4
 11 |   drop_feature_rate_1: 0.3
 12 |   drop_feature_rate_2: 0.4
 13 |   tau: 0.4
 14 |   num_epochs: 300
 15 |   weight_decay: 0.00001
 16 |   eps: 0.5
 17 |   alpha: 200
 18 |   beta: 0.01
 19 |   lamb: 0
 20 | CiteSeer:
 21 |   seed: 2
 22 |   learning_rate: 0.001
 23 |   num_hidden: 256
 24 |   num_proj_hidden: 256
 25 |   activation: 'prelu'
 26 |   base_model: 'GCNConv'
 27 |   num_layers: 2
 28 |   drop_edge_rate_1: 0.2
 29 |   drop_edge_rate_2: 0.0
 30 |   drop_feature_rate_1: 0.3
 31 |   drop_feature_rate_2: 0.2
 32 |   tau: 0.9
 33 |   num_epochs: 200
 34 |   weight_decay: 0.00001
 35 |   eps: 1
 36 |   alpha: 50
 37 |   beta: 0.01
 38 |   lamb: 0
 39 | AmazonC:
 40 |    seed: 3
 41 |    learning_rate: 0.01
 42 |    num_hidden: 128
 43 |    num_proj_hidden: 128
 44 |    activation: "rrelu"
 45 |    base_model: 'GCNConv'
 46 |    num_layers: 2
 47 |    drop_edge_rate_1: 0.6
 48 |    drop_edge_rate_2: 0.3
 49 |    drop_feature_rate_1: 0.2
 50 |    drop_feature_rate_2: 0.3
 51 |    tau: 0.2
 52 |    num_epochs: 2000
 53 |    weight_decay: 0.00001
 54 |    eps: 1
 55 |    alpha: 50
 56 |    beta: 0.01
 57 |    lamb: 0
 58 | AmazonP:
 59 |    seed: 3
 60 |    learning_rate: 0.1
 61 |    num_hidden: 256
 62 |    num_proj_hidden: 64
 63 |    activation: "relu"
 64 |    base_model: 'GCNConv'
 65 |    num_layers: 2
 66 |    drop_edge_rate_1: 0.3
 67 |    drop_edge_rate_2: 0.5
 68 |    drop_feature_rate_1: 0.1
 69 |    drop_feature_rate_2: 0.1
 70 |    tau: 0.3
 71 |    num_epochs: 2000
 72 |    weight_decay: 0.00001
 73 |    eps: 0.5
 74 |    alpha: 200
 75 |    beta: 0.01
 76 |    lamb: 0
 77 | CoauthorC:
 78 |    seed: 1
 79 |    learning_rate: 0.0005
 80 |    num_hidden: 256
 81 |    num_proj_hidden: 256
 82 |    activation: "rrelu"
 83 |    base_model: 'GCNConv'
 84 |    num_layers: 2
 85 |    drop_edge_rate_1: 0.3
 86 |    drop_edge_rate_2: 0.2
 87 |    drop_feature_rate_1: 0.3
 88 |    drop_feature_rate_2: 0.4
 89 |    tau: 0.4
 90 |    num_epochs: 1000
 91 |    weight_decay: 0.00001
 92 |    eps: 2
 93 |    alpha: 50
 94 |    beta: 0.001
 95 |    lamb: 2
 96 | CoauthorP:
 97 |    seed: 1
 98 |    learning_rate: 0.01
 99 |    num_hidden: 128
100 |    num_proj_hidden: 64
101 |    activation: "rrelu"
102 |    base_model: 'GCNConv'
103 |    num_layers: 2
104 |    drop_edge_rate_1: 0.4
105 |    drop_edge_rate_2: 0.1
106 |    drop_feature_rate_1: 0.1
107 |    drop_feature_rate_2: 0.4
108 |    tau: 0.5
109 |    num_epochs: 1500
110 |    weight_decay: 0.00001
111 |    eps: 1
112 |    alpha: 50
113 |    beta: 0.001
114 |    lamb: 0


--------------------------------------------------------------------------------
/code/vector_transform.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import torch
 4 | from torch_geometric.data import Data
 5 | from itertools import product
 6 | import numpy as np
 7 | import pandas as pd
 8 | from torch import nn
 9 | import pickle
10 | 
11 | import warnings
12 | warnings.filterwarnings("ignore")
13 | 
14 | def load_data(file):
15 |         data_load_file = []
16 |         file_1 = open(file, "rb")
17 |         data_load_file = pickle.load(file_1)
18 |         return data_load_file
19 |         
20 | # tmp_vector = load_data("./data/tmp_vector.pickle")
21 | 
22 | # tmp_vector = load_data("./data/tmp_vector_vgae_au.pickle")
23 | 
24 | # tmp_vector = load_data("./data/tmp_vector_vgae_3.pickle")
25 | tmp_vector = load_data("./data/tmp_vector_chi_3.pickle")
26 | linear = nn.Linear(128, 16)
27 | ten = torch.tensor(tmp_vector.tolist())
28 | # print(ten.size())
29 | # final = []
30 | # for item in ten:
31 | #     tmp = linear(item).tolist()
32 | #     final.append(tmp)
33 | # final_vector = torch.tensor(final)
34 | # print(final_vector.size())
35 | 
36 | # hy = load_data("./data/hy_new_aaai_2.pickle")
37 | 
38 | hy = load_data("./data/hy_aaai_chi_1.pickle")
39 | region = ten
40 | 
41 | nn_1 = nn.Linear(128,96)
42 | hy_nodes_dict={}
43 | for n,n_vec in zip(hy.nodes(),region):
44 |     tp = n.split("_")[1]
45 |     if tp not in hy_nodes_dict.keys():
46 |         hy_nodes_dict[int(tp)] = []
47 |         hy_nodes_dict[int(tp)].append(n_vec.tolist())
48 |     else:
49 |         hy_nodes_dict[int(tp)].append(n_vec.tolist())
50 | 
51 |  
52 | hy_com  = {}
53 | for key,value in hy_nodes_dict.items():
54 |     tmp = np.mean(value, axis=0).tolist()
55 |     tmp_ = torch.tensor(tmp).tolist()
56 |     hy_com[int(key)]  = tmp_
57 | 
58 | linear = nn.Linear(128, 16)
59 | hycom_vec = []
60 | for key,value in hy_com.items():
61 |     hycom_vec.append(linear(torch.tensor(value)).tolist())
62 | vec_final_ = np.reshape(np.tile(np.array(hycom_vec),(30,4)),(234,30,4,16))
63 | 
64 | print(np.array(hycom_vec).shape)
65 | print("vec_final_:",vec_final_.shape)
66 | file=open(r"./data/tmp_7.pickle","wb")
67 | pickle.dump(vec_final_,file) #storing_list
68 | file.close()
69 | file=open(r"./data/tmp_house.pickle","wb")
70 | pickle.dump(np.array(hycom_vec),file) #storing_list
71 | file.close()
72 | 
73 | 
74 | 
75 | '''transform for traffi prediction--32 dimension vector'''
76 | trans_1 = np.reshape(np.array(hycom_vec), 234*16)
77 | linear_traffic = nn.Linear(234*16, 32)
78 | traffic_vec = linear_traffic(torch.tensor(trans_1).float()).detach().numpy()
79 | # print(traffic_vec.size())
80 | file=open(r"./data/traff_vec.pickle","wb")
81 | pickle.dump(traffic_vec,file) #storing_list
82 | file.close()
83 | 
84 | print("---finish---")
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/pre_poifrom_osm.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | import pickle
 4 | from shapely.geometry import Point, LineString
 5 | from shapely.geometry import Polygon,MultiPoint  #多边形
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | 
10 | def load_data(file):
11 |     data_load_file = []
12 |     file_1 = open(file, "rb")
13 |     data_load_file = pickle.load(file_1)
14 |     return data_load_file
15 | poi = pd.read_csv("../data/poi_nyc.csv",sep=",").values.tolist()
16 | region_back = load_data("../data/region_back_merge.pickle")
17 | # print(poi.columns.values.tolist())
18 | # pritnln()
19 | region_poi={}
20 | poi_list=[]
21 | for key,value in region_back.items():
22 |     region_poi[key] = []
23 | for item in poi:
24 |     # print(item[23], item[84], item[92])
25 |     for key,value in region_back.items():
26 |         tmp_point = Point(item[3],item[0])
27 |         if tmp_point.intersects(value):
28 |             if item[23]!=" ":
29 |                 if item[23] not in region_poi[key]:
30 |                     region_poi[key].append(item[23])
31 |                 if item[23] not in poi_list:
32 |                     poi_list.append(item[23])
33 |             elif item[84]!=" ":
34 |                 if item[84] not in region_poi[key]:
35 |                     region_poi[key].append(item[84])
36 |                 if item[84] not in poi_list:
37 |                     poi_list.append(item[84])
38 | print(region_poi)
39 | print(poi_list)
40 | # poi_list = ['drinking_water', 'toilets', 'school', 'hospital', 'arts_centre', 'fire_station', 'police', 'bicycle_parking', 'fountain', 'ferry_terminal', 'bench', 'cinema', 'cafe', 'pub', 'waste_basket', 'parking_entrance', 'parking', 'fast_food', 'bank', 'restaurant', 'ice_cream', 'pharmacy', 'taxi', 'post_box', 'atm', 'nightclub', 'social_facility', 'bar', 'biergarten', 'clock', 'bicycle_rental', 'community_centre', 'watering_place', 'ranger_station', 'boat_rental', 'recycling', 'payment_terminal', 'bicycle_repair_station', 'place_of_worship', 'shelter', 'telephone', 'clinic', 'dentist', 'vending_machine', 'theatre', 'charging_station', 'public_bookcase', 'post_office', 'fuel', 'doctors']
41 | poi_dict = {}
42 | for idx,item in enumerate(poi_list):
43 |     poi_dict[item]=idx
44 | print("sum of the category of POI:", len(poi_dict))
45 | reg_incld_poi={}
46 | for key,value in region_poi.items():
47 |     reg_incld_poi[key] = []
48 |     for uu in value:
49 |         if uu in poi_dict.keys():
50 |             reg_incld_poi[key].append(poi_dict[uu])
51 | print("reg_incld_poi:",reg_incld_poi)
52 | 
53 | import pickle
54 | file=open(r"../data/reg_incld_poi_new.pickle","wb")
55 | pickle.dump(reg_incld_poi,file) #storing_list
56 | file.close()
57 | 
58 | file=open(r"../data/poi_dict_new.pickle","wb")
59 | pickle.dump(poi_dict,file) #storing_list
60 | file.close()
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/pre_spatial_graph.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import pandas as pd
 4 | from shapely.geometry import Point, LineString
 5 | from shapely.geometry import Polygon,MultiPoint  #多边形
 6 | import matplotlib.pyplot as plt
 7 | import json
 8 | from urllib.request import urlopen, quote
 9 | import requests
10 | import geopy
11 | from geopy.geocoders import Nominatim
12 | import copy
13 | import pickle
14 | from datetime import datetime
15 | from itertools import chain
16 | import networkx as nx
17 | import numpy as np
18 | import matplotlib.pyplot as plt
19 | from math import radians, cos, sin, asin, sqrt
20 | 
21 | def haversine(lon1, lat1, lon2, lat2): #
22 | 
23 |     lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
24 | 
25 |     # haversine公式
26 |     dlon = lon2 - lon1 
27 |     dlat = lat2 - lat1 
28 |     a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
29 |     c = 2 * asin(sqrt(a)) 
30 |     r = 6371 
31 |     return c * r * 1000
32 | 
33 | def load_data(file):
34 |         data_load_file = []
35 |         file_1 = open(file, "rb")
36 |         data_load_file = pickle.load(file_1)
37 |         return data_load_file
38 |     
39 | region_back = load_data("../data/region_back_merge.pickle")
40 | region_fea = load_data("../data/reg_fea.pickle")
41 | # print(region_fea)
42 | # prtinln() 
43 | spatial_edges = []
44 | # spatial_edges.extend(flow_edges) # add edges in flow graph
45 | # sim_num=0
46 | 
47 | # print(check_index)
48 | node=[]
49 | 
50 | reg_spatial={}
51 | for ii in range(180):
52 |     for jj in range(ii+1, 180):
53 |         # time = flow_nodes[ii].split("_")[2]
54 |         # t_1 = flow_nodes[ii].split("_")
55 |         # t_2 = flow_nodes[jj].split("_")
56 |         t_1 = ii
57 |         t_2 = jj
58 |         # print("t_1:",t_1)
59 |         # print("t_2:",t_2)
60 |         if int(t_1) not in node:
61 |             node.append(int(t_1))
62 |         if int(t_2) not in node:
63 |             node.append(int(t_2))
64 |         t_1_pos = list(region_back[int(t_1)].centroid.coords)[0]
65 |         t_2_pos = list(region_back[int(t_2)].centroid.coords)[0]
66 |         value = haversine(t_1_pos[0], t_1_pos[1], t_2_pos[0], t_2_pos[1])
67 |         if value<= 5600:  #小于5公里
68 |             n1 = "r"+"_"+str(t_1)
69 |             n2 = "r"+"_"+str(t_2)
70 |             pair = (n1,n2, {"weight":value, "date":int(1), "start":n1, "end":n2})
71 |             if pair not in spatial_edges:
72 |                 spatial_edges.append(pair)
73 | 
74 | print(len(spatial_edges))
75 | # println()
76 | 
77 | 
78 | # println()
79 | print("spatial_edges:",spatial_edges)
80 | print(len(spatial_edges))
81 | print("finish spatial graph")
82 | # println()
83 | 
84 | # #spatial graph
85 | G_spatial = nx.Graph()
86 | G_spatial.add_edges_from(spatial_edges[:])
87 | # nx.draw(G_spatial, with_labels=True)
88 | # plt.show()
89 | print("G_spatial:",G_spatial)
90 | 
91 | file=open(r"../data/spatial_graph_5600.pickle","wb")
92 | pickle.dump(G_spatial,file) #storing_list
93 | file.close()


--------------------------------------------------------------------------------
/pre_s10.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | import pickle
 4 | from shapely.geometry import Point, LineString
 5 | from shapely.geometry import Polygon,MultiPoint  #多边形
 6 | import torch
 7 | from torch import nn
 8 | import networkx as nx
 9 | import numpy as np
10 | 
11 | 
12 | def load_data(file):
13 |     data_load_file = []
14 |     file_1 = open(file, "rb")
15 |     data_load_file = pickle.load(file_1)
16 |     return data_load_file
17 | 
18 | region = load_data("../data/hy_vector_signal_trans_18.pickle")
19 | check_vector = load_data("../data/ck_poi.pickle") 
20 | hy = load_data("../data/hy_6.pickle") 
21 | 
22 | 
23 | print(region.size())
24 | 
25 | 
26 | print(check_vector[0].size())
27 | print(hy.nodes())
28 | print(hy)
29 | 
30 | hy_nodes_dict={}
31 | for n,n_vec in zip(hy.nodes(),region):
32 | 	tp = n.split("_")[1]
33 | 	if tp not in hy_nodes_dict.keys():
34 | 		hy_nodes_dict[tp] = []
35 | 		hy_nodes_dict[tp].append(n_vec.tolist())
36 | 	else:
37 | 		hy_nodes_dict[tp].append(n_vec.tolist())
38 | 
39 | hy_com  = {}
40 | for key,value in hy_nodes_dict.items():
41 |     tmp = np.mean(value, axis=0).tolist()
42 |     tmp_ = torch.tensor(tmp).tolist()
43 |     hy_com[int(key)]  = tmp_
44 | 
45 | 
46 | file=open(r"../data/hy_com_dict_trans.pickle","wb")
47 | pickle.dump(hy_com,file) #storing_list
48 | file.close()
49 | print("---finish---")
50 | 
51 | 
52 | 
53 | println()
54 | from sklearn.linear_model import Lasso,Ridge
55 | from sklearn.model_selection import train_test_split 
56 | import matplotlib.pyplot as plt
57 | import numpy as np
58 | import mglearn
59 | 
60 | # 读取数据，并划分训练集和测试集
61 | X,y = mglearn.datasets.load_extended_boston()
62 | X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)
63 | # 通过设置不同的alpha值建立三个lasso实例
64 | lasso = Lasso().fit(X_train,y_train)
65 | lasso001 =Lasso(alpha=0.01).fit(X_train,y_train)
66 | lasso00001 = Lasso(alpha=0.0001).fit(X_train,y_train)
67 | print('**********************************')
68 | print("Lasso alpha=1")
69 | print ("training set score:{:.2f}".format(lasso.score(X_train,y_train)))
70 | print ("test set score:{:.2f}".format(lasso.score(X_test,y_test)))
71 | print ("Number of features used:{}".format(np.sum(lasso.coef_!=0)))
72 | 
73 | print('**********************************')
74 | print("Lasso alpha=0.01")
75 | print ("training set score:{:.2f}".format(lasso001.score(X_train,y_train)))
76 | print ("test set score:{:.2f}".format(lasso001.score(X_test,y_test)))
77 | print ("Number of features used:{}".format(np.sum(lasso001.coef_!=0)))
78 | 
79 | print('**********************************')
80 | print("Lasso alpha=0.0001")
81 | print ("training set score:{:.2f}".format(lasso00001.score(X_train,y_train)))
82 | print ("test set score:{:.2f}".format(lasso00001.score(X_test,y_test)))
83 | print ("Number of features used:{}".format(np.sum(lasso00001.coef_!=0)))
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/pre_poi_transformer.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pandas as pd
  3 | import pickle
  4 | from shapely.geometry import Point, LineString
  5 | from shapely.geometry import Polygon,MultiPoint  
  6 | import torch
  7 | from torch import nn
  8 | import numpy as np
  9 | 
 10 | 
 11 | def load_data(file):
 12 |     data_load_file = []
 13 |     file_1 = open(file, "rb")
 14 |     data_load_file = pickle.load(file_1)
 15 |     return data_load_file
 16 | region_back = load_data("../data/region_back_merge.pickle")
 17 | reg_poi = load_data("../data/reg_incld_poi_new.pickle")
 18 | # reg_spatial = load_data("../data/region_spatial.pickle")
 19 | poi_max=[]
 20 | for key,value in reg_poi.items():
 21 |     poi_max.extend(value)
 22 | print(max(poi_max)) #there are 120 fin-grained pois
 23 | # println()
 24 | reg_poi_={}
 25 | s = 0
 26 | emb = nn.Embedding(120, 512)
 27 | embedding_spatial = torch.nn.Embedding(15, 512)  # spatial
 28 | for key,value in reg_poi.items():
 29 |     # print("value:",value)
 30 |     if value!=[]:
 31 |         reg_poi_[key]=[]
 32 |         # print("value:",value)
 33 |         if len(value)>s:
 34 |             s = len(value)
 35 |         for item in value:
 36 |             reg_poi_[key].append(emb(torch.tensor(item)).tolist())
 37 | # spa_vec= embedding_spatial(torch.tensor(reg_spatial[idx]))
 38 | # reg_poi_t = {}
 39 | reg_poi_list = []
 40 | for iii in range(180):
 41 | # for key,value in reg_poi_.items():
 42 |     if iii not in reg_poi_.keys():
 43 |         reg_poi_list.append(np.array([0.0]*512))
 44 |         # reg_poi_list.append(ci)
 45 |     else:
 46 |         # print("value:",value)
 47 |         tp = np.mean(reg_poi_[key],axis=0)
 48 |         reg_poi_list.append(tp)
 49 | reg_poi_list_  = torch.tensor(np.array(reg_poi_list)).float()
 50 | reg_poi_list_tensor = torch.unsqueeze(reg_poi_list_,0)
 51 | print(reg_poi_list_tensor.size())
 52 | 
 53 | reg_idx= [key for key in reg_poi_.keys()]
 54 | from torch import nn
 55 | encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8 )
 56 | transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
 57 | # src = torch.rand(1, 172, 512)
 58 | src = reg_poi_list_tensor
 59 | out = transformer_encoder(src)
 60 | # print(out.size())
 61 | out_ = torch.squeeze(out,0)
 62 | print(out_.size())
 63 | print(reg_idx)
 64 | print(len(reg_idx))
 65 | # reg_poi_vec = {}
 66 | # for idx,vec in zip(reg_idx,out_):
 67 | #     reg_poi_vec[idx] = vec
 68 | 
 69 | file=open(r"../data/reg_poi_vec.pickle","wb")
 70 | pickle.dump(out_,file) #storing_list
 71 | file.close()
 72 | 
 73 | file=open(r"../data/reg_poi_idx.pickle","wb")
 74 | pickle.dump(reg_idx,file) #storing_list
 75 | file.close()
 76 |         
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 |     
 84 |     
 85 |     
 86 |     
 87 |     
 88 |     
 89 |     
 90 |     
 91 |     
 92 |     
 93 |     
 94 |     
 95 |     
 96 |     
 97 |     
 98 |     
 99 |     
100 |     
101 |     
102 |     
103 |     
104 |     
105 |     
106 |     
107 | 
108 | 


--------------------------------------------------------------------------------
/code/attack.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from utils import normalize_adj_tensor, normalize_adj_tensor_sp, edge2adj
 4 | 
 5 | def bisection(a,eps,xi,ub=1):
 6 |     pa = torch.clamp(a, 0, ub)
 7 |     if torch.sum(pa) <= eps:
 8 |         upper_S_update = pa
 9 |     else:
10 |         mu_l = torch.min(a-1)
11 |         mu_u = torch.max(a)
12 |         mu_a = (mu_u + mu_l)/2
13 |         while torch.abs(mu_u - mu_l)>xi:
14 |             mu_a = (mu_u + mu_l)/2
15 |             gu = torch.sum(torch.clamp(a-mu_a, 0, ub)) - eps
16 |             gu_l = torch.sum(torch.clamp(a-mu_l, 0, ub)) - eps
17 |             if gu == 0:
18 |                 break
19 |             if torch.sign(gu) == torch.sign(gu_l):
20 |                 mu_l = mu_a
21 |             else:
22 |                 mu_u = mu_a
23 |         upper_S_update = torch.clamp(a-mu_a, 0, ub)
24 |     return upper_S_update
25 | 
26 | 
27 | def PGD_attack_graph(model, edge_index_1, edge_index_2, x_1, x_2, steps, node_ratio, alpha, beta):
28 |     """ PGD attack on both features and edges"""
29 |     for param in  model.parameters():
30 |         param.requires_grad = False
31 |     model.eval()
32 |     device = x_1.device
33 |     total_edges = edge_index_2.shape[1]
34 |     n_node = x_2.shape[0]
35 |     eps = total_edges * node_ratio/2
36 |     xi = 1e-3
37 |     
38 |     A_ = torch.sparse.FloatTensor(edge_index_2, torch.ones(total_edges,device=device), torch.Size((n_node, n_node))).to_dense() 
39 |     C_ = torch.ones_like(A_) - 2 * A_ - torch.eye(A_.shape[0],device=device)
40 |     S_ = torch.zeros_like(A_, requires_grad= True)
41 |     mask = torch.ones_like(A_)
42 |     mask = mask - torch.tril(mask)
43 |     delta = torch.zeros_like(x_2, device=device, requires_grad=True)
44 |     adj_1 = edge2adj(x_1, edge_index_1)
45 |     model.to(device)
46 |     for epoch in range(steps):
47 |         S = (S_ * mask)
48 |         S = S + S.T
49 |         A_prime = A_ + (S * C_)
50 |         adj_hat = normalize_adj_tensor(A_prime + torch.eye(n_node,device=device))
51 |         z1 = model(x_1, adj_1)
52 |         z2 = model(x_2 + delta, adj_hat) 
53 |         loss, _ = model.loss(z1, z2, batch_size=0) 
54 |         attack_loss = loss.mean()
55 |         attack_loss.backward()
56 |         S_.data = (S_.data + alpha/np.sqrt(epoch+1)*S_.grad.detach()) # annealing
57 |         S_.data = bisection(S_.data, eps, xi) # clip S
58 |         S_.grad.zero_()
59 |           
60 |         delta.data = (delta.data + beta*delta.grad.detach().sign()).clamp(-0.04,0.04)        
61 |         delta.grad.zero_()
62 | 
63 |     randm = torch.rand(n_node, n_node,device=device)
64 |     discretized_S = torch.where(S_.detach() > randm, torch.ones(n_node, n_node,device=device), torch.zeros(n_node, n_node, device=device))
65 |     discretized_S = discretized_S + discretized_S.T
66 |     A_hat = A_ + discretized_S * C_ + torch.eye(n_node,device=device)
67 |         
68 |     for param in model.parameters():
69 |         param.requires_grad = True
70 |     model.train()
71 |     x_hat = x_2 + delta.data.to(device)
72 |     assert torch.equal(A_hat, A_hat.transpose(0,1))
73 |     return normalize_adj_tensor(A_hat), x_hat


--------------------------------------------------------------------------------
/pre_s5.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pickle
 3 | import pandas as pd
 4 | from itertools import chain
 5 | import networkx as nx
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from math import radians, cos, sin, asin, sqrt
 9 | 
10 | 
11 | def load_data(file):
12 |     data_load_file = []
13 |     file_1 = open(file, "rb")
14 |     data_load_file = pickle.load(file_1)
15 |     return data_load_file
16 | from scipy.sparse import csr_matrix
17 | 
18 | flow_g = load_data('../data/flow_graph_2.pickle')
19 | 
20 | spatial_g = load_data('../data/spatial_graph_new_1.pickle')
21 | region_attr_g = load_data('../data/region_attr_graph_test.pickle')
22 | 
23 | 
24 | flow_nodes = list(flow_g.nodes)
25 | spatial_nodes = list(spatial_g.nodes)
26 | # flowsum_nodes = list(flow_sum_g.nodes)
27 | regat_nodes = list(region_attr_g.nodes)
28 | flow_edges = list(flow_g.edges(data=True))
29 | # print("****:",len(flowsum_nodes))
30 | # println()
31 | spatial_edges = list(spatial_g.edges(data=True))
32 | # print(":**************",spatial_edges)
33 | # println()
34 | # flowsum_edges = list(flow_sum_g.edges(data=True))
35 | regat_edges = list(region_attr_g.edges(data=True))
36 | # print(regat_edges)
37 | 
38 | 
39 | part_f = flow_nodes
40 | part_s = spatial_nodes
41 | # part_flow = flowsum_nodes
42 | 
43 | 
44 | hy_edges = []
45 | for sub in regat_nodes:
46 |     for ss in flow_nodes:
47 |         tmp_ss = ss.split("_")
48 |         tmp_sub = sub.split("_")
49 |         tmp_c = tmp_ss[0]+'_'+tmp_ss[1]
50 |         tmp_s = tmp_sub[0]+'_'+tmp_sub[1]
51 |        
52 |         if tmp_s == tmp_c:
53 |             # pair = (sub, ss,{"weight":1, "date": tmp[2], "start":sub, "end":ss})
54 |             pair = (sub, ss,{"weight":1, "date": tmp_ss[2], "start":sub, "end":ss})
55 |             # print("pair:", pair)
56 |             # println()
57 |             # if pair not in hy_edges:
58 |             hy_edges.append(pair)
59 | print(len(hy_edges))
60 | 
61 | for ss in spatial_nodes:
62 |     for ff in flow_nodes:
63 |         tps = ss.split("_")
64 |         # tps_c = tps[0]+'_'+tps[1]
65 |         tpf = ff.split("_")
66 |         # tpf_c = tpf[0]+'_'+tpf[1]
67 |         # print("ff:",ff)
68 |         # print("ss:",ss)
69 |         # print(tpf)
70 |         # println()
71 |         # ss_=ss+"_"+"s"
72 |         if tps[1] == tpf[1]:
73 |             # pair = (ss, ff,{"weight":0, "date":tpf[2] , "start":ss, "end":ff})
74 |             pair = (ss, ff,{"weight":0, "date":1 , "start":ss, "end":ff})
75 |             # print("pair:", pair)
76 |             # pritnln()
77 |             hy_edges.append(pair)
78 |       
79 | 
80 | print("hy_edges:",len(hy_edges))
81 | 
82 | G_hy = nx.Graph()
83 | G_hy.add_edges_from(hy_edges)
84 | G_hy.add_edges_from(flow_edges)
85 | # G_hy.add_edges_from(flowsum_edges)
86 | G_hy.add_edges_from(spatial_edges)
87 | G_hy.add_edges_from(regat_edges)
88 | # nx.draw(G_hy)
89 | # plt.show()
90 | print("hyper_grapgh:", G_hy)
91 | 
92 | print(G_hy) 
93 | nodes_num = 3
94 | file=open(r"../data/hy_new_test_60.pickle","wb")
95 | pickle.dump(G_hy,file) #storing_list
96 | file.close()
97 | 
98 | 


--------------------------------------------------------------------------------
/house/pre_s9.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu May 19 19:33:25 2022
 4 | 
 5 | @author: User
 6 | """
 7 | # this file is to predict hosue price
 8 | import pandas as pd
 9 | import pickle
10 | from shapely.geometry import Point, LineString
11 | from shapely.geometry import Polygon,MultiPoint  #多边形
12 | import torch
13 | from torch import nn
14 | import networkx as nx
15 | import numpy as np
16 | from sklearn.linear_model import Lasso,Ridge
17 | from sklearn.model_selection import train_test_split 
18 | import matplotlib.pyplot as plt
19 | import numpy as np
20 | # import mglearn
21 | from sklearn.metrics import mean_absolute_error
22 | from sklearn.metrics import r2_score
23 | from sklearn import svm
24 | from sklearn import metrics
25 | 
26 | 
27 | def load_data(file):
28 |     data_load_file = []
29 |     file_1 = open(file, "rb")
30 |     data_load_file = pickle.load(file_1)
31 |     return data_load_file
32 | 
33 | 
34 | train_set = load_data("../data/train_house.pickle") 
35 | test_set = load_data("../data/test_house.pickle")
36 | emb_dim = 16
37 | embedding_1 = nn.Embedding(1000000, emb_dim)
38 | linear = nn.Linear(144, 16)
39 | 
40 |     
41 | for name in ['ours']:
42 |     tmp_vec = load_data("../data/baseline/{}_vector.pickle".format(name))
43 |     # print(tmp_vec.shape)
44 |     # println()
45 |     train_vec = []
46 |     train_y =[]
47 |     for item in train_set:
48 |         # print(item)
49 |         # printl()
50 |         tmp=[]
51 |         # tmp.extend(linear(torch.tensor(tmp_vec[item[0]])).tolist())
52 |         tmp.extend(linear(torch.tensor(tmp_vec[item[0]]).float()).tolist())
53 |         tmp.extend(embedding_1(torch.tensor(item[1])).tolist())
54 |         train_y.append(item[-1])
55 |         train_vec.append(tmp)
56 |     # print(train_vec)
57 |     # print(train_y)
58 |     # println()
59 |     test_vec= []
60 |     test_y =[]
61 |     for item in test_set:
62 |         tmp=[]
63 |         tmp.extend(linear(torch.tensor(tmp_vec[item[0]]).float()).tolist())
64 |         tmp.extend(embedding_1(torch.tensor(item[1])).tolist())
65 |         # tmp.append(item[0])
66 |         # tmp.append(item[1])
67 |         test_y.append(item[-1])
68 |         test_vec.append(tmp)
69 |     
70 |     # lasso00001 = Lasso(alpha=0.00001).fit(test_vec,test_y)
71 |     lasso01 = Lasso(alpha=0.00001).fit(test_vec,test_y)
72 |     y_pred_lasso=lasso01.fit(train_vec,train_y).predict(test_vec)
73 |     # print(len(y_pred_lasso))
74 |     # print(test_y)
75 |     r2_score_lasso=r2_score(test_y,y_pred_lasso)
76 |     # print(mean_absolute_error(test_y,y_pred_lasso))
77 |     # print(r2_score_lasso)
78 |     test_y_ = []
79 |     y_pred_lasso_= []
80 |     for i,j in zip(test_y,y_pred_lasso):
81 |         if i!=0:
82 |             test_y_.append(i)
83 |             y_pred_lasso_.append(j)
84 |     
85 |     # y = np.array([1,1])
86 |     # y_hat = np.array([2,3])
87 |     MSE = metrics.mean_squared_error(test_y,y_pred_lasso)
88 |     RMSE = metrics.mean_squared_error(test_y,y_pred_lasso)**0.5
89 |     MAE = metrics.mean_absolute_error(test_y,y_pred_lasso)
90 |     MAPE = metrics.mean_absolute_percentage_error(test_y_,y_pred_lasso_)
91 |     print("{}:".format(name), MSE,RMSE,MAE,MAPE,r2_score_lasso)
92 |     # print(test_y)
93 |     print('**********************************')
94 | println()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![Framework](./pictures/framework.png)
 2 | 
 3 | # Initial Data link
 4 | https://drive.google.com/drive/folders/1heuy-28olym0Tn4bkMaJTABVF9kXMZNe?usp=sharing
 5 | 
 6 | 
 7 | # Spatial-Temporal Graph Learning with Adversarial Contrastive Adaptation #
 8 | This is the implementation of Spatial-Temporal Graph Learning with Adversarial Contrastive Adaptation (ICML'23) in the following paper:
 9 | 
10 | ## Requirements ##
11 | Pytorch = 1.7.0 and Tensorflow = 1.15.3 (crime prediction task (ST-SHN))
12 | 
13 | ## Data ##
14 | Moblity data, crime data, census block data, POI data, house price of Chicago ([https://data.cityofchicago.org/](https://data.cityofchicago.org/)) and New York([https://opendata.cityofnewyork.us/](https://opendata.cityofnewyork.us/)).
15 | Also, we provide the process data and processing code for data preprocessing.
16 | 
17 | ## Data Preocessing ##
18 |     cd data_pre
19 |     pre_s1.py # collecing positions of 180 (234 regions) in Manhatton (a certain district) in New York (Chicago).
20 |     pre_s2.py # collecting traffic data
21 |     pre_s3.py # preprocessing the traffic data on correspongding regions
22 |     pre_s4.py # preprocessing the POI data and obtain POI-aware Region Graph
23 |     pre_poi_transformer.py # obatining the features of nodes by Transformer and Skip-gram
24 |     pre_spatial_graph.py # constructing the trajectory-based Regoin Graph and Distance-based Region Graph
25 |     pre_s5.py # constructing a hierarical graph
26 |     pre_s6_dataloader.py # obtaining the dataloder (dataset.pt file) for next step to get region embeddings
27 |     cd house
28 |     pre_s7.py # processing house data
29 | 
30 | ## Hyperparameters ##
31 | The dimensionality d of region representation is set as 96 . The depth of convolutional layers in GCN is set as 2. The learning rate is initialized as 0.001 with the weight decay of 1e-5. The number of hidden units is 256. The number of projection hidden is 256. The eps is set as 0.5. And alpha is set as.1, beta is set as 0.1; the lamb is set as 0.05. For the crime prediction backbone model, ST-SHN is configured with the learning rate of 0.001 and the weight decay of 0.96. The depth of the spatial path aggregation layers is set as 2. For the traffic prediction backbone model ST-GCN, the historical time window of all tests are set as 60 minutes with 12 observed data points that are utilized to forecast traffic conditions in the next 15, 30, 45 minutes. The baselines are implemented with the source code released by their original papers. We further apply the grid search strategy for parameter tuning in baselines to achieve their best performance. 
32 | 
33 | ## Spatial-Temporal Adversarial Graph Model (STAG) Training and Obtaining Region Representations ##
34 |     train_edit_auto.py   # training for obatining region representations
35 | 
36 | ## TASK 1: Crime Prediction Task ##
37 | The code of the ST-SHN is [https://github.com/akaxlh/ST-SHN](https://github.com/akaxlh/ST-SHN)
38 | 
39 | Replace the area embedding with the region represenation vetcor and run `HG_ST_labcode.py`.
40 | 
41 | 
42 | ## TASK 2:  Traffic Prediction Task ##
43 | The code of the ST-GCN is [https://github.com/VeritasYin/STGCN_IJCAI-18](https://github.com/VeritasYin/STGCN_IJCAI-18)
44 | ST-GCN predicts traffic in one stage, namely 15 minutes.
45 | Run `main.py`.
46 | 
47 | 
48 | ## TASK 3: House Prediction Prediction Task ##
49 | We use Lasso Regression to perform house price evaluation task.
50 | 
51 | Just `cd house` and run `pre_s9.py` and got the regression result.
52 | 
53 | ![Prediction Results](./pictures/result.png)
54 | 
55 | ## Citing ##
56 | If our paper benefits to your research, please cite our paper using the bitex below:
57 | 
58 |     @inproceedings{zhang2023spatial,
59 |       title={Spatial-Temporal Graph Learning with Adversarial Contrastive Adaptation},
60 |       author={Zhang, Qianru and Huang, Chao and Xia, Lianghao and Wang, Zheng and Yiu, Siu Ming and Han, Ruihua},
61 |       booktitle={International Conference on Machine Learning},
62 |       pages={41151--41163},
63 |       year={2023},
64 |       organization={PMLR}
65 |     }
66 | 


--------------------------------------------------------------------------------
/pre_s1.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import pandas as pd
  4 | from shapely.geometry import Point, LineString
  5 | from shapely.geometry import Polygon,MultiPoint  #多边形
  6 | import matplotlib.pyplot as plt
  7 | import json
  8 | from urllib.request import urlopen, quote
  9 | import requests
 10 | import geopy
 11 | from geopy.geocoders import Nominatim
 12 | import copy
 13 | import pickle
 14 | import time
 15 | # taxi = pd.read_csv("../data/2016_Green_Taxi_Trip_Data.csv", sep = ',')
 16 | # print(taxi[:2])
 17 | 
 18 | census_block = pd.read_excel("../data/rollingsales_manhattan.xlsx",skiprows = 4)
 19 | # print(census_block[:2])
 20 | print(census_block.columns.values.tolist())
 21 | blocks = copy.deepcopy(census_block).values.tolist()
 22 | 
 23 | 
 24 | # region = census_block["BUILDING CLASS CATEGORY"].values.tolist()
 25 | region = census_block["BUILDING CLASS AT TIME OF SALE"].values.tolist()
 26 | 
 27 | 
 28 | region_ = list(set(region))
 29 | reg_nyc_dict = {} ##113 region in manhattan
 30 | for idx,sub in enumerate(region_):
 31 |     reg_nyc_dict[sub] = idx
 32 | # print(reg_nyc_dict)
 33 | # print(len(reg_nyc_dict))
 34 | # println()
 35 | 
 36 | skip_num = 0
 37 | region_f = {}
 38 | add_pos = {}
 39 | i= 0
 40 | NYC_house_middle = []
 41 | for sline in blocks:
 42 |     start_t = time.time()
 43 |     i+=1
 44 |     tmp = []
 45 |     # print("sline:", sline[8],sline[18],sline[14], sline[19])
 46 |     # print("address:",sline[8])
 47 |     t = sline[8].split(",")
 48 |     ##collect lat,lon
 49 |     geolocater = Nominatim(user_agent='demo_of_gnss_help')
 50 |     try:
 51 |         if t[0] not in add_pos.keys():
 52 |             # print("not in here")
 53 |             location = geolocater.geocode(t[0])
 54 |             if hasattr(location,'latitude') and (location.latitude is not None) and hasattr(location,'longitude') and (location.longitude is not None):
 55 |                 # print([location.latitude, location.longitude])
 56 |                 # println()
 57 |                 # print("t:", t)
 58 |                 # tmp.append([location.latitude, location.longitude])
 59 |                 # tmp.append(reg_nyc_dict[sline[18]])
 60 |                 add_pos[t[0]] = [location.latitude, location.longitude]
 61 |                 tmp.append(reg_nyc_dict[sline[18]])
 62 |                 tmp.append(sline[14])
 63 |                 tmp.append(sline[19])
 64 |                 # print("--:",float(sline[19])/float(sline[14]))
 65 |                 tmp.append(float(sline[19]))
 66 |                 if reg_nyc_dict[sline[18]] not in region_f.keys():
 67 |                     region_f[reg_nyc_dict[sline[18]]] = []
 68 |                     region_f[reg_nyc_dict[sline[18]]].append([location.latitude, location.longitude])
 69 |                 else:
 70 |                     region_f[reg_nyc_dict[sline[18]]].append([location.latitude, location.longitude])
 71 |                 NYC_house_middle.append(tmp)
 72 |                 
 73 |         else:
 74 |             # print("---in here---")
 75 |             # print("add_pos[t[0]]:", add_pos[t[0]])
 76 |             tmp.append(reg_nyc_dict[sline[18]])
 77 |             tmp.append(sline[14])
 78 |             tmp.append(sline[19])
 79 |             # print("--:",float(sline[19])/float(sline[14]))
 80 |             # tmp.append(float(sline[19]))
 81 |             if reg_nyc_dict[sline[18]] not in region_f.keys():
 82 |                 region_f[reg_nyc_dict[sline[18]]] = []
 83 |                 region_f[reg_nyc_dict[sline[18]]].append(add_pos[t[0]])
 84 |             else:
 85 |                 region_f[reg_nyc_dict[sline[18]]].append(add_pos[t[0]])
 86 |             NYC_house_middle.append(tmp)
 87 |     except IOError:
 88 |         add_pos[t[0]] = []
 89 |         skip_num+=1
 90 |         # print('skip this row')
 91 |     print("i:", i)
 92 |     print(time.time()-start_t)
 93 | 
 94 | print(region_f)    
 95 | print(NYC_house_middle[:3])
 96 | print(len(NYC_house_middle))
 97 | print(len(region_f))
 98 | print(len(add_pos))
 99 | print("skip_num",skip_num)
100 | 
101 | file=open(r"../data/NY_house.pickle","wb")
102 | pickle.dump(NYC_house_middle,file) #storing_list
103 | file.close()
104 | file=open(r"../data/NY_stree_pos.pickle","wb")
105 | pickle.dump(add_pos,file) #storing_list
106 | file.close()
107 | 
108 | file=open(r"../data/NY_region.pickle","wb")
109 | pickle.dump(region_f,file) #storing_list
110 | file.close()
111 | 
112 | 


--------------------------------------------------------------------------------
/house/pre_s7.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch
  3 | # import networkx as nx
  4 | import matplotlib.pyplot as pl
  5 | import pickle
  6 | import pandas as pd
  7 | import numpy as np
  8 | import math
  9 | import numpy as np
 10 | import pandas as pd
 11 | from shapely.geometry import Point, LineString
 12 | from shapely.geometry import Polygon,MultiPoint  #多边形
 13 | import matplotlib.pyplot as plt
 14 | import json
 15 | from urllib.request import urlopen, quote
 16 | import requests
 17 | import geopy
 18 | from geopy.geocoders import Nominatim
 19 | import copy
 20 | import pickle
 21 | import time
 22 | 
 23 | 
 24 | def load_data(file):
 25 |         data_load_file = []
 26 |         file_1 = open(file, "rb")
 27 |         data_load_file = pickle.load(file_1)
 28 |         return data_load_file
 29 | house =  load_data("../data/NY_house.pickle")
 30 | region_old = load_data("../data/NY_region.pickle")
 31 | region_new = load_data("../data/region_back_merge.pickle")
 32 | 
 33 | region_map = {}
 34 | for key,value in region_old.items():
 35 |     tmp = [(item[1],item[0]) for item in value if item!=[]]
 36 |     if len(tmp)>=3:
 37 |         tmp_ = Polygon(tmp)
 38 |         for k,v in region_new.items():
 39 |             if tmp_.intersects(v):
 40 |                  region_map[key]=k
 41 |                  break
 42 |     else:
 43 |         tmp_ = Point(tmp)
 44 |         for k,v in region_new.items():
 45 |             if tmp_.intersects(v):
 46 |                  region_map[key]=k
 47 |                  break
 48 | 
 49 | 
 50 | 
 51 | # print(region_map)
 52 | # print(len(region_old))
 53 | # print(len(region_map))
 54 | # println()
 55 | 
 56 | # region_ = {}
 57 | # for key,value in region.items():
 58 | #     if [] not in value and len(value)>=3:
 59 | #         region_[key] = value
 60 | # region_back = {}
 61 | # map_region = {}
 62 | # for idx, tt in enumerate(region_.items()):
 63 | #     # print(tt)
 64 | #     map_region[tt[0]] = idx
 65 | #     region_back[idx] = tt[1]
 66 | 
 67 | left_region = [item for item in region_map.keys()]
 68 | house_refine = []
 69 | #and np.isnan(float(ie[1])) == False
 70 | for ie in house:
 71 | 	if ie[0] in left_region and np.isnan(float(ie[1])) == False and float(ie[-1])!=0.0 and float(ie[1])!=0.0:
 72 | 		house_refine.append([region_map[ie[0]],ie[1],ie[2]])
 73 | 
 74 | 
 75 | house_sum = []
 76 | for item in house_refine:
 77 |     tmp = []
 78 |     tmp.append(item[0])
 79 |     tmp.append(item[1])
 80 |     tmp.append(item[2])
 81 |     tmp.append(float(item[2]/item[1]))
 82 |     house_sum.append(tmp)
 83 | 
 84 |     # print("item:", item)
 85 | 
 86 | 
 87 | 
 88 | house_array = np.array(house_sum)
 89 | house_unit = house_array[:, 1]
 90 | price_unit = house_array[:, 3]
 91 | 
 92 | unit_max, unit_min  = max(house_unit), min(house_unit)
 93 | price_max, price_min  =  max(price_unit), min(price_unit)
 94 | print(price_max,price_min)
 95 | 
 96 | # re = pd.cut(house_unit, bins=[unit_min,1000,1500, 2000, 2500, unit_max])
 97 | # print("re:",re.tolist())
 98 | # house_uni_class = pd.cut(house_unit, [unit_min-1,1500, 2000, 2500,3000, 3500,4000,unit_max], labels=False).tolist() # 7 classes 
 99 | # price_class = pd.cut(price_unit, [0, 2500,5000,10000, 15000,20000, 30000, 40000,50000, 60000, 70000, 80000, price_max], labels=False).tolist()  # 12 classes
100 | # ,7000,7500,8000,8500,9000,9500,10000, 10500, 11000, 11500,12000],labels=False).values.tolist()
101 | 
102 | # print(price_class)
103 | house_feature = []
104 | for item,unit,price in zip(house_sum,house_unit,price_unit):
105 |     tmp = []
106 |     tmp.append(item[0])
107 |     tmp.append(int(unit))
108 |     tmp.append(int(price))
109 |     house_feature.append(tmp)
110 | 
111 | train_house = house_feature[:700]
112 | test_house = house_feature[700:]
113 | # print(train_house)
114 | # print(test_house)
115 | # print(len(test_house))
116 | # print(len(house_feature))
117 | # print 
118 | # println()
119 | file=open(r"../data/train_house.pickle","wb")
120 | pickle.dump(train_house,file) #storing_list
121 | file.close()
122 | file=open(r"../data/test_house.pickle","wb")
123 | pickle.dump(test_house,file) #storing_list
124 | file.close()
125 | 
126 | # re = pd.cut(house.sqft, bins=[0,500,1000,1500,2000, 2500, 3000,3500, 4000,4500, 5000, 5500, 6000,6500
127 | # ,7000,7500,8000,8500,9000,9500,10000, 10500, 11000, 11500,12000],labels=False).values.tolist()
128 | # # hs = pd.read_csv("../data/house_source_extra.csv",sep = ",").values.tolist()
129 | # house['sq'] = re
130 | # hou = house.dropna(axis=0,how='any') #drop all rows that have any NaN values
131 | # # print(len(hos))
132 | # classi = hou['sq'].values.tolist()
133 | # hos = hou.values.tolist()
134 | # classier = list(set(classi))
135 | 
136 | 
137 | # print("house_refine:",house_sum)
138 | # print("before:", len(house))
139 | # print("after:", len(house_sum))
140 | 


--------------------------------------------------------------------------------
/code/data_pre/pre_s4.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import pickle
  4 | import pandas as pd
  5 | import numpy as np
  6 | import copy
  7 | from shapely.geometry import Point, LineString
  8 | from shapely.geometry import Polygon,MultiPoint  #多边形
  9 | import torch
 10 | import networkx as nx
 11 | import matplotlib.pyplot as pl
 12 | 
 13 | 
 14 | 
 15 | 
 16 | def load_data(file):
 17 |         data_load_file = []
 18 |         file_1 = open(file, "rb")
 19 |         data_load_file = pickle.load(file_1)
 20 |         return data_load_file
 21 | 
 22 | 
 23 | reg_vec_sort = load_data("../data/reg_poi_vec_2.pickle")
 24 | region_que = load_data("../data/reg_poi_idx_1.pickle")
 25 | 
 26 | region_attr_edges=[]
 27 | # region_que
 28 | # for idx in range(len(reg_vec_sort)):
 29 | # 	for idt in range(idx+1, len(reg_vec_sort)):
 30 | # 		output = torch.cosine_similarity(reg_vec_sort[idx], reg_vec_sort[idt], eps=1e-08).mean()
 31 | # 		if output>=0.8:
 32 | # 			tmp_1 = "r" + '_' + str(idx)
 33 | # 			tmp_2 = "r" + '_' + str(idt)
 34 | # 	        # sim_dict[key] = [tmp_1, tmp_2, value]
 35 | # 			region_attr_edges.append([tmp_1, tmp_2, output.item()])
 36 | # print("reg_vec_sort:",len(reg_vec_sort))
 37 | # print("region_que:",region_que)
 38 | # print(reg_vec_sort[170])
 39 | 
 40 | # pritnnl()
 41 | 
 42 | for idx in region_que:
 43 |     for idt in range(idx+1, len(reg_vec_sort)):
 44 |         # print("^^:",reg_vec_sort[idx].size())
 45 |         # print("**:",reg_vec_sort[idx+1].size())
 46 |         # pritnln()
 47 |         output = torch.cosine_similarity(torch.unsqueeze(reg_vec_sort[idx],0), torch.unsqueeze(reg_vec_sort[idt],0), eps=1e-08).mean()
 48 |         # print("output:", output.item())
 49 |         # pritnln()
 50 |         if output.item()>=0.850:
 51 |             tmp_1 = "r" + '_' + str(idx)
 52 |             tmp_2 = "r" + '_' + str(idt)
 53 |             # sim_dict[key] = [tmp_1, tmp_2, value]
 54 |             region_attr_edges.append([tmp_1, tmp_2, output.item()])
 55 | # print(len(region_attr_edges))
 56 | # println()
 57 | G = nx.Graph()
 58 | # for edge in edges:
 59 | #     G.add_edge(edge[0],edge[1],weight= edge[2])
 60 | 
 61 | [G.add_edge(edge[0],edge[1],weight= edge[2], date = "1", start = edge[0], end = edge[1] ) for edge in region_attr_edges]
 62 | # print(len(G.adj))
 63 | # nx.draw(G, with_labels=True)
 64 | # plt.show()
 65 | 
 66 | 
 67 | file=open(r"../data/region_attr_graph.pickle","wb")
 68 | pickle.dump(G,file) #storing_list
 69 | file.close()
 70 | 
 71 | print("attr_region:", G)
 72 | # similarity_dict = {}
 73 | # similarity_list = []
 74 | # for ii in range(emb.size()[0]):
 75 | #     # print(emb[ii])
 76 | #     # print(emb[ii].shape)
 77 | #     # print(ii.shape)
 78 | #     for jj in range(ii+1, emb.size()[0]):
 79 | #         # print(emb[jj])
 80 | #         # print(emb[jj].shape)
 81 | #         output = torch.cosine_similarity(emb[ii], emb[jj], eps=1e-08).mean()
 82 | #         # print("similarity:", output.item())
 83 | #         # println()
 84 | #         similarity_list.append(output.item())
 85 | #         tmp = 'r_{}_{}'.format(ii, jj)
 86 | #         similarity_dict[tmp] = output.item()
 87 | # similarity_list.sort(reverse = True)
 88 | # # print(similarity_list)
 89 | # print(len(similarity_list))
 90 | # sum_1 = 0
 91 | # for item in similarity_list:
 92 | #     if item>=1.0:
 93 | #         sum_1+=1
 94 | # print(sum_1)
 95 | # print(sum_1/len(similarity_list))
 96 | # print(similarity_dict)
 97 | # sim_dict = {}
 98 | # edges = []
 99 | # for key,value in similarity_dict.items():
100 | #     tmp = key.split('_')
101 | #     # print("tmp:", tmp)
102 | #     # print(tmp[0] + '_' + tmp[1])
103 | #     # print(tmp[0] + '_' + tmp[2])
104 | #     if value >=0.8:
105 | #         tmp_1 = tmp[0] + '_' + tmp[1]
106 | #         tmp_2 = tmp[0] + '_' + tmp[2]
107 | #         sim_dict[key] = [tmp_1, tmp_2, value]
108 | #         edges.append([tmp_1, tmp_2, value])
109 | 
110 | # print(len(edges))
111 | # # println()
112 | # G = nx.Graph()
113 | # # for edge in edges:
114 | # #     G.add_edge(edge[0],edge[1],weight= edge[2])
115 | 
116 | # [G.add_edge(edge[0],edge[1],weight= edge[2], date = "1", start = edge[0], end = edge[1] ) for edge in edges]
117 | # # print(len(G.adj))
118 | # nx.draw(G, with_labels=True)
119 | # plt.show()
120 | 
121 | 
122 | # file=open(r"../data/region_attr_sim_graph_{}.pickle".format(resolution),"wb")
123 | # pickle.dump(G,file) #storing_list
124 | # file.close()
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 


--------------------------------------------------------------------------------
/pre_s14_poi_skip.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | from torch import nn
  4 | import numpy as np
  5 | import torch.nn.functional as F
  6 | import torch.optim as optim
  7 | import pickle
  8 | 
  9 | 
 10 | def load_data(file):
 11 |     data_load_file = []
 12 |     file_1 = open(file, "rb")
 13 |     data_load_file = pickle.load(file_1)
 14 |     return data_load_file
 15 | poi_list = ['drinking_water', 'toilets', 'school', 'hospital', 'arts_centre', 'fire_station', 'police', 'bicycle_parking', 'fountain', 'ferry_terminal', 'bench', 'cinema', 'cafe', 'pub', 'waste_basket', 'parking_entrance', 'parking', 'fast_food', 'bank', 'restaurant', 'ice_cream', 'pharmacy', 'taxi', 'post_box', 'atm', 'nightclub', 'social_facility', 'bar', 'biergarten', 'clock', 'bicycle_rental', 'community_centre', 'watering_place', 'ranger_station', 'boat_rental', 'recycling', 'payment_terminal', 'bicycle_repair_station', 'place_of_worship', 'shelter', 'telephone', 'clinic', 'dentist', 'vending_machine', 'theatre', 'charging_station', 'public_bookcase', 'post_office', 'fuel', 'doctors']
 16 | poi_list_1 = ['drinking_water', 'toilets', 'school', 'hospital', 'arts_centre', 'fire_station', 'police', 'bicycle_parking', 'fountain', 'ferry_terminal', 'bench', 'cinema', 'cafe', 'pub', 'waste_basket', 'parking_entrance', 'parking', 'fast_food', 'bank', 'restaurant', 'ice_cream', 'pharmacy', 'taxi', 'post_box', 'atm', 'nightclub', 'social_facility', 'bar', 'biergarten', 'clock', 'bicycle_rental', 'community_centre', 'watering_place', 'ranger_station', 'boat_rental', 'recycling', 'payment_terminal', 'bicycle_repair_station', 'place_of_worship', 'shelter', 'telephone', 'clinic', 'dentist', 'vending_machine', 'theatre', 'charging_station', 'public_bookcase', 'post_office', 'fuel', 'doctors','drinking_water', 'toilets']
 17 | region_back = load_data("../data/region_back.pickle")
 18 | reg_poi = load_data("../data/reg_incld_poi_new.pickle")
 19 | # print(reg_poi)
 20 | # print(reg_poi)
 21 | poi_dict = {}
 22 | for idx, item in enumerate(poi_list):
 23 |     poi_dict[item] = idx
 24 | # print(poi_dict)
 25 | # println()
 26 | 
 27 | 
 28 | 
 29 | CONTEXT_SIZE = 2
 30 | EMBEDDING_DIM = 96  # 编码向量的维度
 31 | 
 32 | # test_sentence = """0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 0 1""".split()
 33 | test_sentence = poi_list_1
 34 | # print(test_sentence)
 35 | # preinln()
 36 | # 构建训练集数据 ([ 第一个单词, 第二个单词 ], 预测目标)
 37 | trigrams = [([test_sentence[i], test_sentence[i + 1]], test_sentence[i + 2])
 38 |             for i in range(len(test_sentence) - 2)]
 39 | # trigrams = [([test_sentence[i]], test_sentence[i + 1])
 40 | #             for i in range(len(test_sentence) - 2)]
 41 | # print(trigrams)
 42 | # println()
 43 | # 构建测试集数据
 44 | vocab = set(test_sentence)
 45 | word_to_ix = {word: i for i, word in enumerate(vocab)}
 46 | # print(vocab)
 47 | # print(word_to_ix)
 48 | # println()
 49 | # 定义模型
 50 | class NGramLanguageModeler(nn.Module):
 51 | 
 52 |     def __init__(self, vocab_size, embedding_dim, context_size):
 53 |         super(NGramLanguageModeler, self).__init__()
 54 |         self.embeddings = nn.Embedding(vocab_size, embedding_dim)
 55 |         self.linear1 = nn.Linear(context_size * embedding_dim, 128)
 56 |         self.linear2 = nn.Linear(128, EMBEDDING_DIM)
 57 | 
 58 |     def forward(self, inputs):
 59 |         embeds = self.embeddings(inputs).view((1, -1))  # 进行embedding
 60 |         # print(embeds.size())
 61 |         # pritjnln()
 62 |         out = F.relu(self.linear1(embeds))  # 经过第一个全连接层
 63 |         out = self.linear2(out)  # 经过第二个全连接层
 64 |         log_probs = F.log_softmax(out, dim=1)
 65 |         return log_probs,out
 66 | 
 67 | # # 进行训练
 68 | losses = []
 69 | loss_function = nn.NLLLoss()
 70 | model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
 71 | optimizer = optim.SGD(model.parameters(), lr=0.0005)
 72 | emb_dict={}
 73 | for epoch in range(1500):
 74 |     total_loss = 0
 75 |     for context, target in trigrams:
 76 |         # 准备输入模型的数据
 77 |         context_idxs = torch.tensor([word_to_ix[w] for w in context], dtype=torch.long)
 78 |         # print("context_idxs：",context_idxs)
 79 |         # print(context_idxs.size())
 80 |         # println()
 81 |         model.zero_grad()  # 清零梯度缓存
 82 | 
 83 |         # 进行训练得到预测结果
 84 |         log_probs,out = model(context_idxs)
 85 |         # print(out.size())
 86 |         # print("----:", out)
 87 |         # println()
 88 | 
 89 |         # 计算损失值
 90 |         loss = loss_function(log_probs, torch.tensor([word_to_ix[target]], dtype=torch.long))
 91 | 
 92 |         # 反向传播更新梯度
 93 |         loss.backward()
 94 |         optimizer.step()
 95 | 
 96 |         total_loss += loss.item()  # 累计损失
 97 |         torch.save(model.state_dict(), './model_skip/model_poi.pt')
 98 |         torch.save(model, './model_skip/model_poi.pth')
 99 |         
100 |         emb_dict[target] = out
101 |     losses.append(total_loss)
102 | print(losses)
103 | # print(emb_dict)
104 | 
105 | poi_skip_vec = {}
106 | for key,value in emb_dict.items():
107 |     poi_skip_vec[poi_dict[key]] = torch.squeeze(value,0)
108 |     # print("poi_dict[key]:",poi_dict[key])
109 |     # print("size():", value.size())
110 | # region_spatial = {}
111 | # for key,value in reg_t_con.items():
112 | #     # print(value)
113 | #     region_spatial[key] = emb_dict[str(value)]
114 | # print("---finish---:",len(region_spatial))
115 | file=open(r"../data/poi_skip_vec.pickle","wb")
116 | pickle.dump(poi_skip_vec,file) #storing_list
117 | file.close()
118 | 
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/code/data_pre/pre_s1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from shapely.geometry import Point, LineString
  6 | from shapely.geometry import Polygon,MultiPoint  #多边形
  7 | import matplotlib.pyplot as plt
  8 | import json
  9 | from urllib.request import urlopen, quote
 10 | import requests
 11 | import geopy
 12 | from geopy.geocoders import Nominatim
 13 | import copy
 14 | import pickle
 15 | import time
 16 | # taxi = pd.read_csv("../data/2016_Green_Taxi_Trip_Data.csv", sep = ',')
 17 | # print(taxi[:2])
 18 | 
 19 | census_block = pd.read_excel("../data/rollingsales_manhattan.xlsx",skiprows = 4)
 20 | # print(census_block[:2])
 21 | print(census_block.columns.values.tolist())
 22 | blocks = copy.deepcopy(census_block).values.tolist()
 23 | 
 24 | 
 25 | # region = census_block["BUILDING CLASS CATEGORY"].values.tolist()
 26 | region = census_block["BUILDING CLASS AT TIME OF SALE"].values.tolist()
 27 | 
 28 | # address = census_block["ADDRESS"].values.tolist()
 29 | # address_split = [item.split(",")[0] for item in address]
 30 | # add_pos = {}
 31 | # i= 0
 32 | # # tmp_dict={}
 33 | # for ad in address_split:
 34 | #     start_t = time.time()
 35 | #     i+=1
 36 | #     geolocater = Nominatim(user_agent='demo_of_gnss_help')
 37 | #     # print("ad:", ad)
 38 | #     try:
 39 | #         if ad not in add_pos.keys():
 40 | #             location = geolocater.geocode(ad)
 41 | #             if hasattr(location,'latitude') and (location.latitude is not None) and hasattr(location,'longitude') and (location.longitude is not None):
 42 | #                 # add_pos.append([location.latitude, location.longitude])
 43 | #                 # if ad not in tmp_dict.keys():
 44 | #                 # tmp_dict[ad] = []
 45 | #                 add_pos[ad] = [location.latitude, location.longitude]
 46 |     
 47 | #         # NYC_house_middle.append(tmp)
 48 | #             # println()
 49 | #     except IOError:
 50 | #         # skip_num+=1
 51 | #         add_pos[ad] = []
 52 | #         print('skip this row')
 53 | #     print("i:", i)
 54 | #     print(time.time()-start_t)
 55 | # print(address_split)
 56 | # # print(time.time()-start_t)
 57 | # println
 58 | region_ = list(set(region))
 59 | reg_nyc_dict = {} ##113 region in manhattan
 60 | for idx,sub in enumerate(region_):
 61 |     reg_nyc_dict[sub] = idx
 62 | # print(reg_nyc_dict)
 63 | # print(len(reg_nyc_dict))
 64 | # println()
 65 | 
 66 | skip_num = 0
 67 | region_f = {}
 68 | add_pos = {}
 69 | i= 0
 70 | NYC_house_middle = []
 71 | for sline in blocks:
 72 |     start_t = time.time()
 73 |     i+=1
 74 |     tmp = []
 75 |     # print("sline:", sline[8],sline[18],sline[14], sline[19])
 76 |     # print("address:",sline[8])
 77 |     t = sline[8].split(",")
 78 |     ##collect lat,lon
 79 |     geolocater = Nominatim(user_agent='demo_of_gnss_help')
 80 |     try:
 81 |         if t[0] not in add_pos.keys():
 82 |             # print("not in here")
 83 |             location = geolocater.geocode(t[0])
 84 |             if hasattr(location,'latitude') and (location.latitude is not None) and hasattr(location,'longitude') and (location.longitude is not None):
 85 |                 # print([location.latitude, location.longitude])
 86 |                 # println()
 87 |                 # print("t:", t)
 88 |                 # tmp.append([location.latitude, location.longitude])
 89 |                 # tmp.append(reg_nyc_dict[sline[18]])
 90 |                 add_pos[t[0]] = [location.latitude, location.longitude]
 91 |                 tmp.append(reg_nyc_dict[sline[18]])
 92 |                 tmp.append(sline[14])
 93 |                 tmp.append(sline[19])
 94 |                 # print("--:",float(sline[19])/float(sline[14]))
 95 |                 tmp.append(float(sline[19]))
 96 |                 if reg_nyc_dict[sline[18]] not in region_f.keys():
 97 |                     region_f[reg_nyc_dict[sline[18]]] = []
 98 |                     region_f[reg_nyc_dict[sline[18]]].append([location.latitude, location.longitude])
 99 |                 else:
100 |                     region_f[reg_nyc_dict[sline[18]]].append([location.latitude, location.longitude])
101 |                 NYC_house_middle.append(tmp)
102 |                 
103 |         else:
104 |             # print("---in here---")
105 |             # print("add_pos[t[0]]:", add_pos[t[0]])
106 |             tmp.append(reg_nyc_dict[sline[18]])
107 |             tmp.append(sline[14])
108 |             tmp.append(sline[19])
109 |             # print("--:",float(sline[19])/float(sline[14]))
110 |             # tmp.append(float(sline[19]))
111 |             if reg_nyc_dict[sline[18]] not in region_f.keys():
112 |                 region_f[reg_nyc_dict[sline[18]]] = []
113 |                 region_f[reg_nyc_dict[sline[18]]].append(add_pos[t[0]])
114 |             else:
115 |                 region_f[reg_nyc_dict[sline[18]]].append(add_pos[t[0]])
116 |             NYC_house_middle.append(tmp)
117 |     except IOError:
118 |         add_pos[t[0]] = []
119 |         skip_num+=1
120 |         # print('skip this row')
121 |     print("i:", i)
122 |     print(time.time()-start_t)
123 | 
124 | print(region_f)    
125 | print(NYC_house_middle[:3])
126 | print(len(NYC_house_middle))
127 | print(len(region_f))
128 | print(len(add_pos))
129 | print("skip_num",skip_num)
130 | 
131 | file=open(r"../data/NY_house.pickle","wb")
132 | pickle.dump(NYC_house_middle,file) #storing_list
133 | file.close()
134 | file=open(r"../data/NY_stree_pos.pickle","wb")
135 | pickle.dump(add_pos,file) #storing_list
136 | file.close()
137 | 
138 | file=open(r"../data/NY_region.pickle","wb")
139 | pickle.dump(region_f,file) #storing_list
140 | file.close()
141 | 
142 | 


--------------------------------------------------------------------------------
/code/data_pre/pre_spatial_graph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from shapely.geometry import Point, LineString
  6 | from shapely.geometry import Polygon,MultiPoint  #多边形
  7 | import matplotlib.pyplot as plt
  8 | import json
  9 | from urllib.request import urlopen, quote
 10 | import requests
 11 | import geopy
 12 | from geopy.geocoders import Nominatim
 13 | import copy
 14 | import pickle
 15 | from datetime import datetime
 16 | from itertools import chain
 17 | import networkx as nx
 18 | import numpy as np
 19 | import matplotlib.pyplot as plt
 20 | from math import radians, cos, sin, asin, sqrt
 21 | from sklearn.cluster import DBSCAN
 22 | 
 23 | def haversine(lon1, lat1, lon2, lat2): # 经度1，纬度1，经度2，纬度2 （十进制度数）
 24 |     """
 25 |     Calculate the great circle distance between two points 
 26 |     on the earth (specified in decimal degrees)
 27 |     """
 28 |     # 将十进制度数转化为弧度
 29 |     lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
 30 | 
 31 |     # haversine公式
 32 |     dlon = lon2 - lon1 
 33 |     dlat = lat2 - lat1 
 34 |     a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
 35 |     c = 2 * asin(sqrt(a)) 
 36 |     r = 6371 # 地球平均半径，单位为公里
 37 |     return c * r * 1000
 38 | 
 39 | def load_data(file):
 40 |         data_load_file = []
 41 |         file_1 = open(file, "rb")
 42 |         data_load_file = pickle.load(file_1)
 43 |         return data_load_file
 44 |     
 45 | region_back = load_data("../data/region_back_merge.pickle")
 46 | region_fea = load_data("../data/region_fea.pickle")
 47 | spatial_edges = []
 48 | # spatial_edges.extend(flow_edges) # add edges in flow graph
 49 | # sim_num=0
 50 | X = [[list(value.centroid.coords)[0][0],list(value.centroid.coords)[0][1]] for key,value in region_back.items()]
 51 | # y_pred = DBSCAN(eps = 0.01, min_samples = 7).fit_predict(X)
 52 | # from sklearn.cluster import SpectralClustering
 53 | # sc = SpectralClustering(3, affinity='precomputed', n_init=100,assign_labels='discretize')
 54 | # y_pred = sc.fit_predict(X)  
 55 | # print(y_pred)
 56 | # println()
 57 | # # print(check_index)
 58 | node=[]
 59 | # # reg={}
 60 | # for i in range(12):
 61 | #     reg[i] = 0
 62 | # # print(reg)
 63 | # tmp=[]
 64 | # for key,value in region_fea.items():
 65 | #     if value==0:
 66 | #         # print(key,value, list(region_back[key].centroid.coords))
 67 | #     #     tmp.append([list(region_back[key].centroid.coords)[0],key])
 68 | #         print(key)
 69 | #         # shapely.ops.unary_union(polygons)
 70 | #         reg[value]+=1
 71 | #     # println()
 72 | # print(reg)
 73 | # def takeSecond(elem):
 74 | #     return elem[0][0]
 75 |  
 76 | # random = tmp
 77 | # # 指定第二个元素排序
 78 | # random.sort(key=takeSecond)
 79 | # print(random)
 80 | # print(len(random))
 81 | 
 82 | # println()
 83 | 
 84 | reg_spatial={}
 85 | for ii in range(180):
 86 |     for jj in range(ii+1, 180):
 87 |         # time = flow_nodes[ii].split("_")[2]
 88 |         # t_1 = flow_nodes[ii].split("_")
 89 |         # t_2 = flow_nodes[jj].split("_")
 90 |         t_1 = ii
 91 |         t_2 = jj
 92 |         # print("t_1:",t_1)
 93 |         # print("t_2:",t_2)
 94 |         if int(t_1) not in node:
 95 |             node.append(int(t_1))
 96 |         if int(t_2) not in node:
 97 |             node.append(int(t_2))
 98 |         t_1_pos = list(region_back[int(t_1)].centroid.coords)[0]
 99 |         t_2_pos = list(region_back[int(t_2)].centroid.coords)[0]
100 |         value = haversine(t_1_pos[0], t_1_pos[1], t_2_pos[0], t_2_pos[1])
101 |         if value<= 2900:  #小于5公里
102 |             n1 = "r"+"_"+str(t_1)
103 |             n2 = "r"+"_"+str(t_2)
104 |             # pair = (n1,n2, {"weight":value, "date":int(1), "start":n1, "end":n2})
105 |             pair = (n1,n2, {"weight":1, "date":int(1), "start":n1, "end":n2})
106 |             # print(pair)
107 |             if pair not in spatial_edges:
108 |                 spatial_edges.append(pair)
109 | # region_fea[283]=12
110 | # for ii in range(296):
111 | #     for jj in range(ii+1, 296):
112 | #         t_1 = ii
113 | #         t_2 = jj
114 | #         if int(t_1) not in node:
115 | #             node.append(int(t_1))
116 | #         if int(t_2) not in node:
117 | #             node.append(int(t_2))
118 | #         if region_fea[ii]==region_fea[jj]:
119 | #             n1 = "r"+"_"+str(t_1)
120 | #             n2 = "r"+"_"+str(t_2)
121 | #             pair = (n1,n2, {"weight":1, "date":int(1), "start":n1, "end":n2})
122 | #             if pair not in spatial_edges:
123 | #                 spatial_edges.append(pair)
124 | #         if region_fea[ii]==region_fea[jj]+1 or region_fea[ii]==region_fea[jj]+2 or region_fea[ii]==region_fea[jj]+3 or region_fea[ii]==region_fea[jj]+4 or region_fea[ii]==region_fea[jj]+5 or region_fea[ii]==region_fea[jj]+6 or region_fea[ii]==region_fea[jj]+7 or region_fea[ii]==region_fea[jj]+8 or region_fea[ii]==region_fea[jj]+9 or region_fea[ii]==region_fea[jj]+10 or region_fea[ii]==region_fea[jj]+11:
125 | #             # print("ii:",region_fea[ii])
126 | #             # print("jj:",region_fea[jj])
127 | #             n1 = "r"+"_"+str(t_1)
128 | #             n2 = "r"+"_"+str(t_2)
129 | #             pair = (n1,n2, {"weight":1, "date":int(1), "start":n1, "end":n2})
130 | #             if pair not in spatial_edges:
131 | #                 spatial_edges.append(pair)
132 | #                 continue
133 | # print(spatial_edges)
134 | # print(len(spatial_edges))
135 | # println()
136 | 
137 | 
138 | # println()
139 | print("spatial_edges:",spatial_edges)
140 | print(len(spatial_edges))
141 | print("finish spatial graph")
142 | 
143 | 
144 | # #spatial graph
145 | G_spatial = nx.Graph()
146 | G_spatial.add_edges_from(spatial_edges[:])
147 | # nx.draw(G_spatial, with_labels=True)
148 | # plt.show()
149 | print("G_spatial:",G_spatial)
150 | 
151 | file=open(r"../data/spatial_graph_baseline.pickle","wb")
152 | pickle.dump(G_spatial,file) #storing_list
153 | file.close()


--------------------------------------------------------------------------------
/code/pre_dataloader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import torch
  4 | from torch_geometric.data import Data
  5 | from itertools import product
  6 | import numpy as np
  7 | import pandas as pd
  8 | from torch import nn
  9 | import pickle
 10 | 
 11 | def load_data(file):
 12 |         data_load_file = []
 13 |         file_1 = open(file, "rb")
 14 |         data_load_file = pickle.load(file_1)
 15 |         return data_load_file
 16 | resolution = 500
 17 | linear = nn.Linear(512, 96)
 18 | # node_features_1 = load_data("../data/region_spatial_refine.pickle") 
 19 | # node_features = load_data("../data/region_spatial_refine.pickle")
 20 | # node_features = load_data("../data/region_spatial_refine.pickle")
 21 | region_poi_vec = load_data("./data/reg_poi_vec.pickle")
 22 | region_trans = linear(region_poi_vec)
 23 | # node_lab = load_data("./data/nodes_lab.pickle")
 24 | # train_mask = load_data("./data/train_mask.pickle")
 25 | # val_mask = load_data("./data/val_mask.pickle")
 26 | # test_mask = load_data("./data/test_mask.pickle")
 27 | 
 28 | 
 29 | def nx_to_graph_data_obj(g):
 30 |     n_nodes = g.number_of_nodes()
 31 |     n_edges = g.number_of_edges()
 32 |     # nodes
 33 |     nx_node_ids = [n_i for n_i in g.nodes()]  # contains list of nx node ids
 34 |     # print("nx_node_ids:", nx_node_ids)
 35 |     # n = np.array([nx_node_ids.index(n_i) for n_i in g.nodes()])
 36 |     x_ = torch.tensor(np.ones(n_nodes).reshape(-1, 1), dtype=torch.float)
 37 |     # print("nx_node_ids:",nx_node_ids)
 38 |     n_nodes = [int(item.split("_")[1]) for item in nx_node_ids]
 39 |     x = torch.tensor([region_trans[item].tolist() for item in n_nodes])
 40 |     
 41 |     file=open(r"./data/nodes_new_{}.pickle".format(7),"wb")
 42 |     pickle.dump(nx_node_ids,file) #storing_list
 43 |     file.close()
 44 | 
 45 |     # edges
 46 |     edges_list = []
 47 |     edge_features_list = []
 48 |     for node_1, node_2, attr_dict in g.edges(data=True):
 49 | 
 50 |         edge_feature = [attr_dict['weight'], attr_dict['date'], nx_node_ids.index(attr_dict['start']), nx_node_ids.index(attr_dict['end'])]  # last 2 indicate self-loop
 51 |         # and masking
 52 |         edge_feature = np.array(edge_feature, dtype=int)
 53 |         # convert nx node ids to data obj node index
 54 |         i = nx_node_ids.index(node_1)
 55 |         j = nx_node_ids.index(node_2)
 56 |         edges_list.append((i, j))
 57 |         edge_features_list.append(edge_feature)
 58 |     # data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
 59 |     edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long)
 60 |     # print("edge_index:", edge_index)
 61 |     
 62 |     # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
 63 |     edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.float)
 64 |     node_lab = load_data("./data/nodes_lab.pickle")
 65 |     train_mask = load_data("./data/train_mask.pickle")
 66 |     val_mask = load_data("./data/val_mask.pickle")
 67 |     test_mask = load_data("./data/test_mask.pickle")
 68 |     node_lab = torch.tensor(np.array(node_lab), dtype=torch.float)
 69 |     train_mask = torch.tensor(np.array(train_mask), dtype=torch.float)
 70 |     val_mask = torch.tensor(np.array(val_mask), dtype=torch.float)
 71 |     test_mask = torch.tensor(np.array(test_mask), dtype=torch.float)
 72 |     data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr,y=node_lab, train_mask=train_mask, val_mask=val_mask, test_mask=test_mask,num_features=torch.tensor([1]*880,dtype=torch.float))
 73 |     return data
 74 | 
 75 | 
 76 | def get_data(d):
 77 |     data_list = [0]
 78 |     data_list[0] = d
 79 |     # print("data_list:", data_list)
 80 |     data = data_list[0]
 81 |     # print(data)
 82 |     # println()
 83 |     keys = data_list[0].keys
 84 |     # data->Data()
 85 |     data = data_list[0].__class__()
 86 | 
 87 |     for key in keys:
 88 |         data[key] = []
 89 |     # print("initial_data:", data) # Data(edge_index=[0], x=[0])
 90 |     slices = {key: [0] for key in keys}
 91 |     # print(slices) # {'x': [0], 'edge_index': [0]}
 92 |     # print("slices:", slices)
 93 |     for item, key in product(data_list, keys):
 94 |         # print("111:", item, key)
 95 |         # print("222:", item[key])
 96 |         data[key].append(item[key])
 97 |         # print("middle_data:", data)
 98 |         # println()
 99 |         if torch.is_tensor(item[key]):
100 | 
101 |             s = slices[key][-1] + item[key].size(item.__cat_dim__(key, item[key]))
102 |             # print("s^^^:", s)
103 |         else:
104 |             s = slices[key][-1] + 1
105 |             # print("s***:", s)
106 |         slices[key].append(s)
107 | 
108 |     
109 |     
110 |     if hasattr(data_list[0], '__num_nodes__'):
111 |         data.__num_nodes__ = []
112 |         for item in data_list:
113 |             data.__num_nodes__.append(item.num_nodes)
114 |     
115 |     for key in keys:
116 |         item = data_list[0][key]
117 |         if torch.is_tensor(item):
118 |             print("__data[key]:", len(data[key]))
119 |             print("tmp:", data.__cat_dim__(key, item))
120 |             
121 |             data[key] = torch.cat(data[key],
122 |                                   dim=data.__cat_dim__(key, item))
123 |             print("data[key]__:", len(data[key]))
124 |             
125 |         elif isinstance(item, int) or isinstance(item, float):
126 |             data[key] = torch.tensor(data[key])
127 |     
128 |         slices[key] = torch.tensor(slices[key], dtype=torch.long)
129 |         
130 |     com = (data, slices)
131 |     # print(com)
132 |     return com
133 | 
134 | # hy_graph = load_data("../data/hy_new_s.pickle")
135 | hy_graph = load_data("./data/hy_new_aaai_2.pickle")
136 | d = nx_to_graph_data_obj(hy_graph)
137 | com = get_data(d)
138 | torch.save(com,'./data/dataset_new_aaai_2.pt')
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------
/code/data_pre/pre_s5.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import pickle
  4 | import pandas as pd
  5 | from itertools import chain
  6 | import networkx as nx
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | from math import radians, cos, sin, asin, sqrt
 10 | 
 11 | 
 12 | def load_data(file):
 13 |     data_load_file = []
 14 |     file_1 = open(file, "rb")
 15 |     data_load_file = pickle.load(file_1)
 16 |     return data_load_file
 17 | from scipy.sparse import csr_matrix
 18 | 
 19 | flow_g = load_data('../data/flow_graph.pickle')
 20 | spatial_g = load_data('../data/spatial_graph.pickle')
 21 | region_attr_g = load_data('../data/region_attr_graph.pickle')
 22 | # adj_matrix = csr_matrix((np.ones(len(row)), (row, col)), shape=(len(node_map), len(node_map)))
 23 | # print(np.array(nx.adjacency_matrix(region_attr_g).todense()))
 24 | # adj_= np.array(nx.adjacency_matrix(region_attr_g).todense()).tolist()
 25 | # adj_gat = csr_matrix((np.ones(), np.array(nx.adjacency_matrix(region_attr_g).todense())), shape=(62, 62))
 26 | # print(adj_gat)
 27 | # print(type(adj_gat))
 28 | 
 29 | # println
 30 | # feature = np.random.uniform(-1, 1, size=(62, 62))
 31 | # feature = feature[np.newaxis]
 32 | # print(feature.shape)
 33 | # println()
 34 | 
 35 | # file=open(r"../data/adj_gat.pickle","wb")
 36 | # pickle.dump(adj_gat,file) #storing_list
 37 | # file.close()
 38 | # file=open(r"../data/fea_gat.pickle","wb")
 39 | # pickle.dump(feature,file) #storing_list
 40 | # file.close()
 41 | # println()
 42 | ##only get region attrbutes matrix
 43 | # f = open('../data/poi_edgelist.txt','a')
 44 | # for item in region_attr_g.edges():
 45 | #     print(item[0].split("_")[1]," ", item[1].split("_")[1])
 46 | #     f.write('\n')
 47 | #     f.write(str(item[0].split("_")[1]))
 48 | #     f.write(" ")
 49 | #     f.write(str(item[1].split("_")[1]))
 50 | # f.close()
 51 | # adj=np.array(nx.adjacency_matrix(region_attr_g).todense())
 52 | # # # print(adj)
 53 | # f = open('../data/adjlist.txt','a')
 54 | # for item in adj:
 55 | #     # print(item)
 56 | #     # print(item.shape)
 57 | #     # print(item)
 58 |     
 59 | #     f.write('\n')
 60 | #     for sub in item:
 61 | #         # print(sub)
 62 | #         f.write(str(0))
 63 | #         f.write(" ")
 64 | # f.close()
 65 | # f = open('../data/labels.txt','a')
 66 | # for idx, ir in enumerate(region_attr_g.nodes()):
 67 | #     # print(idx, ir)
 68 | #     f.write('\n')
 69 | #     f.write(ir.split("_")[1])
 70 | #     f.write(" ")
 71 | #     f.write("1")
 72 | # f.close()
 73 | # f = open('../data/features.txt','a')
 74 | # for idx, ir in enumerate(region_attr_g.nodes()):
 75 | #     # print(idx, ir)
 76 | #     f.write('\n')
 77 | #     f.write(ir.split("_")[1])
 78 | #     f.write(" ")
 79 | #     f.write("1")
 80 | # f.close()
 81 | # println()
 82 | 
 83 | 
 84 | # print(flow_g,spatial_g,region_attr_g)
 85 | 
 86 | # print(flow_g.edges())
 87 | 
 88 | flow_nodes = list(flow_g.nodes)
 89 | spatial_nodes = list(spatial_g.nodes)
 90 | regat_nodes = list(region_attr_g.nodes)
 91 | flow_edges = list(flow_g.edges(data=True))
 92 | # print(flow_edges)
 93 | # println()
 94 | spatial_edges = list(spatial_g.edges(data=True))
 95 | # print(spatial_edges)
 96 | # println()
 97 | regat_edges = list(region_attr_g.edges(data=True))
 98 | # print(regat_edges)
 99 | # println()
100 | 
101 | part_f = flow_nodes
102 | part_s = spatial_nodes
103 | part_r = regat_nodes
104 | # print(part_f)
105 | # print("--------------------------")
106 | # print(part_s)
107 | # print("--------------------------")
108 | # print(part_r)
109 | 
110 | hy_edges = []
111 | for sub in regat_nodes:
112 |     for ss in spatial_nodes:
113 |         tmp = ss.split("_")
114 |         tmp_c = tmp[0]+'_'+tmp[1]
115 |         if sub == tmp_c:
116 |             pair = (sub, ss,{"weight":0, "date": tmp[2], "start":sub, "end":ss})
117 |             # print("pair:", pair)
118 |             hy_edges.append(pair)
119 | 
120 | for ss in spatial_nodes:
121 |     for ff in flow_nodes:
122 |         tps = ss.split("_")
123 |         # tps_c = tps[0]+'_'+tps[1]
124 |         # tpf = ff.split("_")
125 |         # tpf_c = tpf[0]+'_'+tpf[1]
126 |         if ss == ff:
127 |             pair = (ss, ff,{"weight":0, "date":tps[2] , "start":ss, "end":ff})
128 |             # print("pair:", pair)
129 |             hy_edges.append(pair)
130 |       
131 | 
132 | # print("hy_edges:",hy_edges)
133 | # hy_edges.extend(flow_edges)
134 | # hy_edges.extend(spatial_edges)
135 | # hy_edges.extend(regat_edges)
136 |  
137 | G_hy = nx.Graph()
138 | G_hy.add_edges_from(hy_edges)
139 | G_hy.add_edges_from(flow_edges)
140 | G_hy.add_edges_from(spatial_edges)
141 | G_hy.add_edges_from(regat_edges)
142 | # nx.draw(G_hy)
143 | # plt.show()
144 | print("hyper_grapgh:", G_hy)
145 | # println()
146 | # fl = nx.Graph()
147 | # fl.add_edges_from(flow_edges[])
148 | # fl.add_edges_from(spatial_edges[:30])
149 | # print(fl.nodes())
150 | 
151 | # sum_1=0
152 | # for node_1, node_2, attr_dict in G_hy.edges(data=True):
153 | #     if attr_dict=={}:
154 | #         sum_1+=1
155 |         # print(attr_dict)
156 | # print("sum_1:", sum_1)
157 | # printlist(G_hy.edges)      
158 | # println()
159 |  
160 | nodes_num = 3
161 | file=open(r"../data/hy_{}.pickle".format(8),"wb")
162 | pickle.dump(G_hy,file) #storing_list
163 | file.close()
164 | # file=open(r"../data/fl_sp.pickle","wb")
165 | # pickle.dump(fl,file) #storing_list
166 | # file.close()
167 | 
168 | # adj_fl=np.array(nx.adjacency_matrix(fl).todense())
169 | # f = open('../data/fl_edges.txt','a')
170 | # for item in fl.edges:
171 | #     # print(item)
172 | #     # print(item[0].split("_")[1]," ", item[1].split("_")[1])
173 | #     f.write(str(item[0].split("_")[1]))
174 | #     f.write(" ")
175 | #     f.write(str(item[1].split("_")[1]))
176 | #     f.write('\n')
177 | # f.close()
178 | # f = open('../data/fl_labels.txt','a')
179 | # for idx, ir in enumerate(fl.nodes()):
180 | #     # print(idx, ir)
181 | #     # println
182 | #     f.write(ir.split("_")[1])
183 | #     f.write(" ")
184 | #     f.write("1")
185 | #     f.write('\n')
186 | # f.close()
187 | 
188 | 
189 | #for mvure
190 | # adj=np.array(nx.adjacency_matrix(region_attr_g).todense())
191 | # mob_adj = adj[np.newaxis,:]
192 | # # print("t_adj:", adj.shape)
193 | # np.save("../data/mvure_data/mob-adj.npy", mob_adj)
194 | 
195 | # np.save("../data/mvure_data/s_adj.npy", adj)
196 | 
197 | 


--------------------------------------------------------------------------------
/code/data_pre/pre_s6_dataloader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import torch
  4 | from torch_geometric.data import Data
  5 | from itertools import product
  6 | import numpy as np
  7 | import pandas as pd
  8 | import pickle
  9 | 
 10 | def load_data(file):
 11 |         data_load_file = []
 12 |         file_1 = open(file, "rb")
 13 |         data_load_file = pickle.load(file_1)
 14 |         return data_load_file
 15 | resolution = 500
 16 | node_features = load_data("../data/reg_com_poi_cat_spatial.pickle")
 17 | # print(len(node_features))
 18 | # println()
 19 | def nx_to_graph_data_obj(g):
 20 |     n_nodes = g.number_of_nodes()
 21 |     n_edges = g.number_of_edges()
 22 |     # nodes
 23 |     nx_node_ids = [n_i for n_i in g.nodes()]  # contains list of nx node ids
 24 |     # print("nx_node_ids:", nx_node_ids)
 25 |     # n = np.array([nx_node_ids.index(n_i) for n_i in g.nodes()])
 26 |     x_ = torch.tensor(np.ones(n_nodes).reshape(-1, 1), dtype=torch.float)
 27 |     # print("nx_node_ids:",nx_node_ids)
 28 |     n_nodes = [int(item.split("_")[1]) for item in nx_node_ids]
 29 |     # print("n_nodes:",n_nodes)
 30 |     x = torch.tensor([node_features[item].tolist() for item in n_nodes])
 31 |     # print("x:",x.size())
 32 |     # print(x_.size())
 33 |     # printnln()
 34 |     file=open(r"../data/nodes_{}.pickle".format(resolution),"wb")
 35 |     pickle.dump(nx_node_ids,file) #storing_list
 36 |     file.close()
 37 |     # x = torch.tensor(n.reshape(-1, 1), dtype=torch.float)
 38 |     # print("x:", x)
 39 |     # println()
 40 |     # edges
 41 |     edges_list = []
 42 |     edge_features_list = []
 43 |     for node_1, node_2, attr_dict in g.edges(data=True):
 44 |         # print("attr_dict:", attr_dict)
 45 |         # print("node_1:", node_1)
 46 |         # print("node_2:", node_2)
 47 |         edge_feature = [attr_dict['weight'], attr_dict['date'], nx_node_ids.index(attr_dict['start']), nx_node_ids.index(attr_dict['end'])]  # last 2 indicate self-loop
 48 |         # and masking
 49 |         edge_feature = np.array(edge_feature, dtype=int)
 50 |         # convert nx node ids to data obj node index
 51 |         i = nx_node_ids.index(node_1)
 52 |         j = nx_node_ids.index(node_2)
 53 |         edges_list.append((i, j))
 54 |         edge_features_list.append(edge_feature)
 55 |     # data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
 56 |     edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long)
 57 |     # print("edge_index:", edge_index)
 58 |     
 59 |     # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
 60 |     edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.float)
 61 |     # print("edge_attr:", edge_attr.size())
 62 |     # println()
 63 |     # construct data obj
 64 |     data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
 65 |     return data
 66 | 
 67 | # print(data)
 68 | # println()
 69 | 
 70 | 
 71 | # edge_index = torch.tensor([
 72 | #     [3, 1, 1, 2],
 73 | #     [1, 3, 2, 1]], dtype=torch.long)
 74 | # x = torch.tensor([[-1],
 75 | #                   [0],
 76 | #                   [1]], dtype=torch.float)
 77 | 
 78 | # d = Data(x=x, edge_index=edge_index)
 79 | # print(type(d)) # # <class 'torch_geometric.data.data.Data'>
 80 | def get_data(d):
 81 |     data_list = [0]
 82 |     
 83 |     
 84 |     data_list[0] = d
 85 |     # print("data_list:", data_list)
 86 |     data = data_list[0]
 87 |     # print(data)
 88 |     # println()
 89 |     
 90 |     
 91 |     
 92 |     keys = data_list[0].keys
 93 |     
 94 |     
 95 |     # data->Data()
 96 |     data = data_list[0].__class__()
 97 |     # print("data:", data_list[0])
 98 |     # println()
 99 |     # print(data_list[0].keys) # ['x', 'edge_index']
100 |     # print(type(data)) # <class 'torch_geometric.data.data.Data'>
101 |     
102 |     # print("before_data:", data_list)
103 |     
104 |     
105 |     
106 |     for key in keys:
107 |         data[key] = []
108 |     # print("initial_data:", data) # Data(edge_index=[0], x=[0])
109 |     
110 |     
111 |     slices = {key: [0] for key in keys}
112 |     # print(slices) # {'x': [0], 'edge_index': [0]}
113 |     # print("slices:", slices)
114 |     
115 |     for item, key in product(data_list, keys):
116 |         # print("111:", item, key)
117 |       
118 |         # print("222:", item[key])
119 |         data[key].append(item[key])
120 |         # print("middle_data:", data)
121 |         
122 |         # println()
123 |         if torch.is_tensor(item[key]):
124 |             # print("slices[key]:", slices[key][-1])
125 |             # print("item[key]:", item.__cat_dim__(key, item[key]))
126 |             # print("%%%:", item[key].size(item.__cat_dim__(key, item[key])))
127 |             # 
128 |             s = slices[key][-1] + item[key].size(item.__cat_dim__(key, item[key]))
129 |             # print("s^^^:", s)
130 |             
131 |         else:
132 |             s = slices[key][-1] + 1
133 |             # print("s***:", s)
134 |         slices[key].append(s)
135 |         # print("slices_after:", slices)
136 |     
137 |     # print("final_data:", data)
138 |     # println()
139 |     
140 |     
141 |     if hasattr(data_list[0], '__num_nodes__'):
142 |         data.__num_nodes__ = []
143 |         for item in data_list:
144 |             data.__num_nodes__.append(item.num_nodes)
145 |     
146 |     for key in keys:
147 |         item = data_list[0][key]
148 |         if torch.is_tensor(item):
149 |             print("__data[key]:", len(data[key]))
150 |             print("tmp:", data.__cat_dim__(key, item))
151 |             
152 |             data[key] = torch.cat(data[key],
153 |                                   dim=data.__cat_dim__(key, item))
154 |             print("data[key]__:", len(data[key]))
155 |             
156 |         elif isinstance(item, int) or isinstance(item, float):
157 |             data[key] = torch.tensor(data[key])
158 |     
159 |         slices[key] = torch.tensor(slices[key], dtype=torch.long)
160 |         
161 |         
162 |     # print("data:", data)
163 |     # print("slices:", slices)   
164 |     com = (data, slices)
165 |     # print(com)
166 |     return com
167 | # import os.path as osp
168 | # def get(idx):
169 | #     data = torch.load(osp.join("../data/dataset/processed/", 'dataset_{}.pt',format(idx)))
170 | #     return data
171 | hy_graph = load_data("../data/hy_8.pickle")
172 | d = nx_to_graph_data_obj(hy_graph)
173 | com = get_data(d)
174 | torch.save(com,'../data/dataset/processed/dataset_9.pt')
175 | 
176 | 
177 | 


--------------------------------------------------------------------------------
/code/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch_geometric.nn import GCNConv, GATConv
  5 | from torch_geometric.utils import to_dense_adj
  6 | 
  7 | class Encoder(torch.nn.Module):
  8 |     def __init__(self, in_channels: int, out_channels: int, activation,
  9 |                  base_model=GCNConv, k: int = 2):
 10 |         super(Encoder, self).__init__()
 11 |         self.base_model = base_model
 12 | 
 13 |         assert k >= 2
 14 |         self.k = k
 15 |         self.conv = [base_model(in_channels, 2 * out_channels)]
 16 |         for _ in range(1, k-1):
 17 |             self.conv.append(base_model(2 * out_channels, 2 * out_channels))
 18 |         self.conv.append(base_model(2 * out_channels, out_channels))
 19 |         self.conv = nn.ModuleList(self.conv)
 20 |         self.activation = activation
 21 |     def forward(self, x: torch.Tensor, edge_index: torch.Tensor):
 22 |         for i in range(self.k):
 23 |             x = self.activation(self.conv[i](x, edge_index))
 24 |         return x
 25 |     
 26 | class GCN(torch.nn.Module):
 27 |     def __init__(self, in_channels: int, out_channels: int, n_class: int, activation,
 28 |                  base_model=GCNConv, dropout: float=0.5):
 29 |         super(GCN, self).__init__()
 30 |         self.base_model = base_model
 31 | 
 32 |         self.conv1 = base_model(in_channels, out_channels)
 33 |         self.head = base_model(out_channels, n_class)
 34 |         self.dropout = dropout
 35 |         self.activation = activation
 36 |         
 37 |     def forward(self, x: torch.Tensor, edge_index: torch.Tensor):
 38 |         x = F.dropout(x, self.dropout, training=self.training)
 39 |         x = self.activation(self.conv1(x, edge_index))
 40 |         x = F.dropout(x, self.dropout, training=self.training)
 41 |         return F.log_softmax(self.head(x, edge_index), dim=1)
 42 |     
 43 | class GAT(torch.nn.Module):
 44 |     def __init__(self, in_channels: int, out_channels: int, n_class: int, activation,
 45 |                  base_model=GATConv, input_dropout: float=0.5, coef_dropout: float=0.5):
 46 |         super(GAT, self).__init__()
 47 |         self.base_model = base_model
 48 |         self.conv1 = base_model(in_channels, out_channels, 8, dropout=coef_dropout)
 49 |         self.head = base_model(out_channels*8, n_class, 1, dropout=coef_dropout)
 50 |         self.dropout = input_dropout
 51 |         self.activation = activation
 52 |         
 53 |     def forward(self, x: torch.Tensor, edge_index: torch.Tensor):
 54 |         x = F.dropout(x, self.dropout, training=self.training)
 55 |         x = self.activation(self.conv1(x, edge_index))
 56 |         x = F.dropout(x, self.dropout, training=self.training)
 57 |         return F.log_softmax(self.head(x, edge_index), dim=1)
 58 | 
 59 | 
 60 | class Model(torch.nn.Module):
 61 |     def __init__(self, encoder: Encoder, num_hidden: int, num_proj_hidden: int,
 62 |                  tau: float = 0.5):
 63 |         super(Model, self).__init__()
 64 |         self.encoder: Encoder = encoder
 65 |         self.tau: float = tau
 66 | 
 67 |         self.fc1 = torch.nn.Linear(num_hidden, num_proj_hidden)
 68 |         self.fc2 = torch.nn.Linear(num_proj_hidden, num_hidden)
 69 |         self.cos = nn.CosineSimilarity()
 70 |         
 71 |         
 72 |     def forward(self, x: torch.Tensor,
 73 |                 adj: torch.Tensor) -> torch.Tensor:
 74 |        
 75 |         return self.encoder(x, adj)
 76 | 
 77 |     def projection(self, z: torch.Tensor) -> torch.Tensor:
 78 |         z = F.elu(self.fc1(z))
 79 |         return self.fc2(z)
 80 | 
 81 |     def sim(self, z1: torch.Tensor, z2: torch.Tensor):
 82 |         z1 = F.normalize(z1)
 83 |         z2 = F.normalize(z2)
 84 |         return torch.mm(z1, z2.t())
 85 | 
 86 |     def semi_loss(self, z1: torch.Tensor, z2: torch.Tensor):
 87 |         f = lambda x: torch.exp(x / self.tau)
 88 |         refl_sim = f(self.sim(z1, z1))
 89 |         between_sim = f(self.sim(z1, z2))
 90 | 
 91 |         return -torch.log(
 92 |             between_sim.diag()
 93 |             / (refl_sim.sum(1) + between_sim.sum(1) - refl_sim.diag()))
 94 | 
 95 |     def batched_semi_loss(self, z1: torch.Tensor, z2: torch.Tensor,
 96 |                           batch_size: int):
 97 |         # Space complexity: O(BN) (semi_loss: O(N^2))
 98 |         device = z1.device
 99 |         num_nodes = z1.size(0)
100 |         num_batches = (num_nodes - 1) // batch_size + 1
101 |         f = lambda x: torch.exp(x / self.tau)
102 |         indices = torch.arange(0, num_nodes).to(device)
103 |         losses = []
104 | 
105 |         for i in range(num_batches):
106 |             mask = indices[i * batch_size:(i + 1) * batch_size]
107 |             refl_sim = f(self.sim(z1[mask], z1))  # [B, N]
108 |             between_sim = f(self.sim(z1[mask], z2))  # [B, N]
109 | 
110 |             losses.append(-torch.log(
111 |                 between_sim[:, i * batch_size:(i + 1) * batch_size].diag()
112 |                 / (refl_sim.sum(1) + between_sim.sum(1)
113 |                    - refl_sim[:, i * batch_size:(i + 1) * batch_size].diag())))
114 | 
115 |         return torch.cat(losses)
116 | 
117 |     def loss(self, z1: torch.Tensor, z2: torch.Tensor,
118 |              mean: bool = True, batch_size: int = 0):
119 |         h1 = self.projection(z1)
120 |         h2 = self.projection(z2)
121 |         simi = torch.exp(self.cos(h1,h2)/self.tau)
122 |             
123 |         if batch_size == 0:
124 |             l1 = self.semi_loss(h1, h2)
125 |             l2 = self.semi_loss(h2, h1)
126 |         else:
127 |             l1 = self.batched_semi_loss(h1, h2, batch_size)
128 |             l2 = self.batched_semi_loss(h2, h1, batch_size)
129 | 
130 |         ret = (l1 + l2) * 0.5
131 |         #ret = ret.mean() if mean else ret.sum()
132 | 
133 |         return ret, simi
134 | 
135 | 
136 | def drop_feature(x, drop_prob):
137 |     drop_mask = torch.empty(
138 |         (x.size(1), ),
139 |         dtype=torch.float32,
140 |         device=x.device).uniform_(0, 1) < drop_prob
141 |     x = x.clone()
142 |     x[:, drop_mask] = 0
143 | 
144 |     return x
145 | 
146 | 
147 | class LogReg(nn.Module):
148 |     def __init__(self, ft_in, nb_classes):
149 |         super(LogReg, self).__init__()
150 |         self.fc = nn.Linear(ft_in, nb_classes)
151 | 
152 |         for m in self.modules():
153 |             self.weights_init(m)
154 | 
155 |     def weights_init(self, m):
156 |         if isinstance(m, nn.Linear):
157 |             torch.nn.init.xavier_uniform_(m.weight.data)
158 |             if m.bias is not None:
159 |                 m.bias.data.fill_(0.0)
160 | 
161 |     def forward(self, seq):
162 |         ret = self.fc(seq)
163 |         return ret
164 | 


--------------------------------------------------------------------------------
/pre_s6_dataloader.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | from torch_geometric.data import Data
  4 | from itertools import product
  5 | import numpy as np
  6 | import pandas as pd
  7 | from torch import nn
  8 | import pickle
  9 | 
 10 | def load_data(file):
 11 |         data_load_file = []
 12 |         file_1 = open(file, "rb")
 13 |         data_load_file = pickle.load(file_1)
 14 |         return data_load_file
 15 | resolution = 500
 16 | linear = nn.Linear(512, 96)
 17 | # node_features_1 = load_data("../data/region_spatial_refine.pickle") 
 18 | # node_features = load_data("../data/region_spatial_refine.pickle")
 19 | node_features = load_data("../data/region_spatial_refine.pickle")
 20 | region_poi_vec = load_data("../data/reg_poi_vec.pickle")
 21 | region_trans = linear(region_poi_vec)
 22 | # print(region_trans)
 23 | # print(region_trans.size())
 24 | # pritnln()
 25 | # reg_com_poi_cat_spatial.pickle
 26 | # node_features = load_data("../data/reg_vector_dict.pickle") 
 27 | 
 28 | # node_features_2 = load_data("../data/reg_flow_dict_vec_4.pickle")
 29 | # print(type(node_features_1))
 30 | # print(type(node_features_2))
 31 | # node_feature = [torch.mean(torch.cat((i[1],j),axis= 1)) for i,j in zip(node_features_1.items(),node_features_2)]
 32 | 
 33 | # print(node_feature[0].size())
 34 | # print(len(node_feature))
 35 | # println()
 36 | def nx_to_graph_data_obj(g):
 37 |     n_nodes = g.number_of_nodes()
 38 |     n_edges = g.number_of_edges()
 39 |     # nodes
 40 |     nx_node_ids = [n_i for n_i in g.nodes()]  # contains list of nx node ids
 41 |     # print("nx_node_ids:", nx_node_ids)
 42 |     # n = np.array([nx_node_ids.index(n_i) for n_i in g.nodes()])
 43 |     x_ = torch.tensor(np.ones(n_nodes).reshape(-1, 1), dtype=torch.float)
 44 |     # print("nx_node_ids:",nx_node_ids)
 45 |     n_nodes = [int(item.split("_")[1]) for item in nx_node_ids]
 46 |     # print("n_nodes:",n_nodes)
 47 |     # print(len(n_nodes))
 48 |     # x = torch.tensor([torch.squeeze(node_features[item],0).tolist() for item in n_nodes])
 49 |     # x = torch.tensor([torch.squeeze(node_features[item],0).tolist() for item in n_nodes])
 50 |     x = torch.tensor([region_trans[item].tolist() for item in n_nodes])
 51 |     # print("x:",x.size())
 52 |     # print(x_.size())
 53 |     # printnln()
 54 |     file=open(r"../data/nodes_new_{}.pickle".format(7),"wb")
 55 |     pickle.dump(nx_node_ids,file) #storing_list
 56 |     file.close()
 57 |     # x = torch.tensor(n.reshape(-1, 1), dtype=torch.float)
 58 |     # print("x:", x)
 59 |     # println()
 60 |     # edges
 61 |     edges_list = []
 62 |     edge_features_list = []
 63 |     for node_1, node_2, attr_dict in g.edges(data=True):
 64 |         # print("attr_dict:", attr_dict)
 65 |         # print("node_1:", node_1)
 66 |         # print("node_2:", node_2)
 67 |         edge_feature = [attr_dict['weight'], attr_dict['date'], nx_node_ids.index(attr_dict['start']), nx_node_ids.index(attr_dict['end'])]  # last 2 indicate self-loop
 68 |         # and masking
 69 |         edge_feature = np.array(edge_feature, dtype=int)
 70 |         # convert nx node ids to data obj node index
 71 |         i = nx_node_ids.index(node_1)
 72 |         j = nx_node_ids.index(node_2)
 73 |         edges_list.append((i, j))
 74 |         edge_features_list.append(edge_feature)
 75 |     # data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
 76 |     edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long)
 77 |     # print("edge_index:", edge_index)
 78 |     
 79 |     # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
 80 |     edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.float)
 81 |     # print("edge_attr:", edge_attr.size())
 82 |     # println()
 83 |     # construct data obj
 84 |     data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
 85 |     return data
 86 | 
 87 | # print(data)
 88 | # println()
 89 | 
 90 | 
 91 | # edge_index = torch.tensor([
 92 | #     [3, 1, 1, 2],
 93 | #     [1, 3, 2, 1]], dtype=torch.long)
 94 | # x = torch.tensor([[-1],
 95 | #                   [0],
 96 | #                   [1]], dtype=torch.float)
 97 | 
 98 | # d = Data(x=x, edge_index=edge_index)
 99 | # print(type(d)) # # <class 'torch_geometric.data.data.Data'>
100 | def get_data(d):
101 |     data_list = [0]
102 |     data_list[0] = d
103 |     # print("data_list:", data_list)
104 |     data = data_list[0]
105 |     # print(data)
106 |     # println()
107 |     keys = data_list[0].keys
108 |     # data->Data()
109 |     data = data_list[0].__class__()
110 |     # print("data:", data_list[0])
111 |     # println()
112 |     # print(data_list[0].keys) # ['x', 'edge_index']
113 |     # print(type(data)) # <class 'torch_geometric.data.data.Data'>
114 |     # print("before_data:", data_list)
115 |     for key in keys:
116 |         data[key] = []
117 |     # print("initial_data:", data) # Data(edge_index=[0], x=[0])
118 |     slices = {key: [0] for key in keys}
119 |     # print(slices) # {'x': [0], 'edge_index': [0]}
120 |     # print("slices:", slices)
121 |     for item, key in product(data_list, keys):
122 |         # print("111:", item, key)
123 |         # print("222:", item[key])
124 |         data[key].append(item[key])
125 |         # print("middle_data:", data)
126 |         # println()
127 |         if torch.is_tensor(item[key]):
128 |             # print("slices[key]:", slices[key][-1])
129 |             # print("item[key]:", item.__cat_dim__(key, item[key]))
130 |             # print("%%%:", item[key].size(item.__cat_dim__(key, item[key])))
131 |             # 
132 |             s = slices[key][-1] + item[key].size(item.__cat_dim__(key, item[key]))
133 |             # print("s^^^:", s)
134 |         else:
135 |             s = slices[key][-1] + 1
136 |             # print("s***:", s)
137 |         slices[key].append(s)
138 |         # print("slices_after:", slices)
139 |     
140 |     # print("final_data:", data)
141 |     # println()
142 |     
143 |     
144 |     if hasattr(data_list[0], '__num_nodes__'):
145 |         data.__num_nodes__ = []
146 |         for item in data_list:
147 |             data.__num_nodes__.append(item.num_nodes)
148 |     
149 |     for key in keys:
150 |         item = data_list[0][key]
151 |         if torch.is_tensor(item):
152 |             print("__data[key]:", len(data[key]))
153 |             print("tmp:", data.__cat_dim__(key, item))
154 |             
155 |             data[key] = torch.cat(data[key],
156 |                                   dim=data.__cat_dim__(key, item))
157 |             print("data[key]__:", len(data[key]))
158 |             
159 |         elif isinstance(item, int) or isinstance(item, float):
160 |             data[key] = torch.tensor(data[key])
161 |     
162 |         slices[key] = torch.tensor(slices[key], dtype=torch.long)
163 |         
164 |         
165 |     # print("data:", data)
166 |     # print("slices:", slices)   
167 |     com = (data, slices)
168 |     # print(com)
169 |     return com
170 | # import os.path as osp
171 | # def get(idx):
172 | #     data = torch.load(osp.join("../data/dataset/processed/", 'dataset_{}.pt',format(idx)))
173 | #     return data
174 | # hy_graph = load_data("../data/hy_new_s.pickle")
175 | hy_graph = load_data("../data/hy_new_test_60.pickle")
176 | d = nx_to_graph_data_obj(hy_graph)
177 | com = get_data(d)
178 | torch.save(com,'./data/dataset/processed/dataset_new_60.pt')
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/code/data_pre/pre_s3.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | #this file for traffic
  4 | import numpy as np
  5 | import pandas as pd
  6 | from shapely.geometry import Point, LineString
  7 | from shapely.geometry import Polygon,MultiPoint  #多边形
  8 | import matplotlib.pyplot as plt
  9 | import json
 10 | from urllib.request import urlopen, quote
 11 | import requests
 12 | import geopy
 13 | from geopy.geocoders import Nominatim
 14 | import copy
 15 | import pickle
 16 | from datetime import datetime
 17 | from itertools import chain
 18 | import networkx as nx
 19 | import numpy as np
 20 | import matplotlib.pyplot as plt
 21 | from math import radians, cos, sin, asin, sqrt
 22 | 
 23 | def haversine(lon1, lat1, lon2, lat2): # 经度1，纬度1，经度2，纬度2 （十进制度数）
 24 |     """
 25 |     Calculate the great circle distance between two points 
 26 |     on the earth (specified in decimal degrees)
 27 |     """
 28 |     # 将十进制度数转化为弧度
 29 |     lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
 30 | 
 31 |     # haversine公式
 32 |     dlon = lon2 - lon1 
 33 |     dlat = lat2 - lat1 
 34 |     a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
 35 |     c = 2 * asin(sqrt(a)) 
 36 |     r = 6371 # 地球平均半径，单位为公里
 37 |     return c * r * 1000
 38 | 
 39 | def load_data(file):
 40 |         data_load_file = []
 41 |         file_1 = open(file, "rb")
 42 |         data_load_file = pickle.load(file_1)
 43 |         return data_load_file
 44 |     
 45 | region_back = load_data("../data/region_back.pickle")
 46 | region_traffic = load_data("../data/NY_traffic.pickle")
 47 | # region_ = {}
 48 | # for key,value in region.items():
 49 | #     if [] not in value and len(value)>=3:
 50 | #         region_[key] = value
 51 | # region_back = {}
 52 | # for idx, tt in enumerate(region_.items()):
 53 | #     # print(tt)
 54 | #     region_back[idx] = tt[1]
 55 | # print(region_back.keys())
 56 | # print("@@:", len(region))
 57 | # print("%%:", len(region_back))
 58 | # printlnn
 59 | 
 60 | from collections import Counter
 61 | 
 62 | sp_tm = []
 63 | for item in region_traffic[:]: #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID']
 64 | # for key,value in region_back.items(): ## remember to test the whether the region is [].
 65 |     # print(item)
 66 |     # println()
 67 |     # Point(4, 4)
 68 |     dropoff_pos =  Point(item[7],item[8])
 69 |     pickup_pos =  Point(item[5],item[6])
 70 |     # print("11:",dropoff_pos)
 71 |     # print("22:",pickup_pos)
 72 |     # poritnlnn()
 73 |     tmp_idx = []
 74 |     for key,value in region_back.items(): ## remember to test the whether the region is [].
 75 |     # for item in region_traffic[:]: #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID']
 76 |         
 77 |         # print("value:",value)
 78 |         # print("dropoff_pos:",dropoff_pos)
 79 |         # print("dropoff_pos:",dropoff_pos)
 80 |         # pritnln()
 81 |         tmp_poly = value
 82 |         # poly_shape.intersects(point))
 83 |         if dropoff_pos.intersects(tmp_poly):
 84 |                 dropoff_idx = key
 85 |                 tmp_idx.append(dropoff_idx)
 86 |         if pickup_pos.intersects(tmp_poly):
 87 |                 pickup_idx = key
 88 |                 tmp_idx.append(pickup_idx)
 89 |     # print("tmp_idx:", tmp_idx)
 90 |     # print("item:", item)
 91 |     if len(tmp_idx)==2:
 92 |         # print("tmp_idx:", tmp_idx)
 93 |         # print("item:", item)
 94 |         sp_tm.append((tmp_idx[1], tmp_idx[0], item[-1])) #起点/终点/日期
 95 | result = pd.value_counts(sp_tm)
 96 | print("result:", result)
 97 | # println()
 98 | 
 99 | unique_region = list(set(sp_tm))
100 | 
101 | ##building flow graph
102 | flow_edges = []
103 | for key,value in result.to_dict().items():
104 |     # print("key:", key)
105 |     # print("value:", value)
106 |     
107 |     # println()
108 |     #pair = ('r_{}_{}'.format(region_dict[key[0]], key[-1]), 'r_{}_{}'.format(region_dict[key[1]], key[-1] + 1), value)
109 |     pair = ('r_{}_{}'.format(key[0], int(key[-1])),'r_{}_{}'.format(key[1], int(key[-1]+1)), {"weight":value, "date":int(key[-1]), "start":'r_{}_{}'.format(key[0], int(key[-1])), "end":'r_{}_{}'.format(key[1], int(key[-1]+1))})
110 |     flow_edges.append(pair)
111 | 
112 | print("finish flow graph")
113 |  
114 | 
115 | ##bulding spatial graph
116 | spatial_dis = []
117 | spatial_dict = {}
118 | 
119 | flow_nodes = []
120 | for item in unique_region:
121 |     n_1 = "r"+"_"+str(item[0])+"_"+str(item[-1])
122 |     n_2 = "r"+"_"+str(item[1])+"_"+str(int(item[-1])+1)
123 |     if n_1 not in flow_nodes:
124 |         flow_nodes.append(n_1)
125 |     if n_2 not in flow_nodes:
126 |         flow_nodes.append(n_2)
127 | 
128 | print("finish flow nodes")      
129 | spatial_dis.sort(reverse = False)
130 | spatial_edges = []
131 | spatial_edges.extend(flow_edges) # add edges in flow graph
132 | sim_num=0
133 | for ii in range(len(flow_nodes)):
134 |     for jj in range(ii+1, len(flow_nodes)):
135 |         # time = flow_nodes[ii].split("_")[2]
136 |         t_1 = flow_nodes[ii].split("_")
137 |         t_2 = flow_nodes[jj].split("_")
138 |         # print("t_1:",t_1)
139 |         # print("t_2:",t_2)
140 |         t_1_pos = np.average(list(zip(*region_back[int(t_1[1])].exterior.coords.xy)), axis = 0)
141 |         t_2_pos = np.average(list(zip(*region_back[int(t_2[1])].exterior.coords.xy)),axis = 0)
142 |         # print("--:",t_1_pos)
143 |         # print("$$:", t_2_pos)
144 |         # println()
145 | 
146 |         value = haversine(t_1_pos[0], t_1_pos[1], t_2_pos[0], t_2_pos[1])
147 |         if value<= 2500:  #小于3公里
148 |             # print("value:",value)
149 |             sim_num+=1
150 |             # yy = key[0].split("_")
151 |             # yy_1 = key[1].split("_")
152 |             # print("key:", key)
153 |             # println()
154 |             # print(flow_nodes[ii],flow_nodes[jj])
155 |             # println()
156 |             pair = (flow_nodes[ii],flow_nodes[jj], {"weight":value, "date":int(t_1[2]), "start":flow_nodes[ii], "end":flow_nodes[jj]})
157 |             if pair not in spatial_edges:
158 |                 spatial_edges.append(pair)
159 | # print("sim_num:",sim_num)
160 | # print("finish spatial graph--part 2")
161 | # println()
162 | #增加边
163 | params_resolution  = 3
164 | for z in region_back.keys():
165 |     for j in range(params_resolution):
166 |         ox = "r_{}_{}".format(z, j)
167 |         oy = "r_{}_{}".format(z, j+1)
168 |         pair = (ox,oy, {"weight":0, "date":int(j), "start":ox, "end":oy})
169 |         if pair not in spatial_edges:
170 |             spatial_edges.append(pair)
171 | print(len(spatial_edges))
172 | print("finish spatial graph")
173 | 
174 | G_flow = nx.Graph()
175 | G_flow.add_edges_from(flow_edges[:])
176 | # nx.draw(G_flow, with_labels=True)
177 | # plt.show()
178 | print("G_flow:",G_flow)
179 | #spatial graph
180 | G_spatial = nx.Graph()
181 | G_spatial.add_edges_from(spatial_edges[:])
182 | # nx.draw(G_spatial, with_labels=True)
183 | # plt.show()
184 | print("G_spatial:",G_spatial)
185 | 
186 | file=open(r"../data/flow_graph.pickle","wb")
187 | pickle.dump(G_flow,file) #storing_list
188 | file.close()
189 | 
190 | file=open(r"../data/spatial_graph.pickle","wb")
191 | pickle.dump(G_spatial,file) #storing_list
192 | file.close()
193 | 
194 | print("----spatial----:", G_spatial)
195 | print("----flow----:",G_flow)
196 | 
197 |     


--------------------------------------------------------------------------------
/pre_s3.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import pandas as pd
  4 | from shapely.geometry import Point, LineString
  5 | from shapely.geometry import Polygon,MultiPoint  #多边形
  6 | import matplotlib.pyplot as plt
  7 | import json
  8 | from urllib.request import urlopen, quote
  9 | import requests
 10 | import geopy
 11 | from geopy.geocoders import Nominatim
 12 | import copy
 13 | import pickle
 14 | from datetime import datetime
 15 | from itertools import chain
 16 | import networkx as nx
 17 | import numpy as np
 18 | import matplotlib.pyplot as plt
 19 | from math import radians, cos, sin, asin, sqrt
 20 | 
 21 | def haversine(lon1, lat1, lon2, lat2): # 
 22 |     """
 23 |     Calculate the great circle distance between two points 
 24 |     on the earth (specified in decimal degrees)
 25 |     """
 26 |     # 将十进制度数转化为弧度
 27 |     lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
 28 | 
 29 |     # haversine公式
 30 |     dlon = lon2 - lon1 
 31 |     dlat = lat2 - lat1 
 32 |     a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
 33 |     c = 2 * asin(sqrt(a)) 
 34 |     r = 6371 # 地球平均半径，单位为公里
 35 |     return c * r * 1000
 36 | 
 37 | def load_data(file):
 38 |         data_load_file = []
 39 |         file_1 = open(file, "rb")
 40 |         data_load_file = pickle.load(file_1)
 41 |         return data_load_file
 42 |     
 43 | region_back = load_data("../data/region_back_merge.pickle")
 44 | region_traffic = load_data("../data/NY_traffic_2.pickle")
 45 | 
 46 | 
 47 | from collections import Counter
 48 | 
 49 | sp_tm = []
 50 | for item in region_traffic[:]: #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID']
 51 | # for key,value in region_back.items(): ## remember to test the whether the region is [].
 52 |     # print(item)
 53 |     # println()
 54 |     # Point(4, 4)
 55 |     dropoff_pos =  Point(item[7],item[8])
 56 |     pickup_pos =  Point(item[5],item[6])
 57 |     # print("11:",dropoff_pos)
 58 |     # print("22:",pickup_pos)
 59 |     # poritnlnn()
 60 |     tmp_idx = []
 61 |     for key,value in region_back.items(): ## remember to test the whether the region is [].
 62 |     # for item in region_traffic[:]: #['VendorID', 'lpep_pickup_datetime', 'Lpep_dropoff_datetime', 'Store_and_fwd_flag', 'RateCodeID', 'Pickup_longitude', 'Pickup_latitude', 'Dropoff_longitude', 'Dropoff_latitude', 'Passenger_count', 'Trip_distance', 'Fare_amount', 'Extra', 'MTA_tax', 'Tip_amount', 'Tolls_amount', 'Ehail_fee', 'improvement_surcharge', 'Total_amount', 'Payment_type', 'Trip_type ', 'PULocationID', 'DOLocationID']
 63 | 
 64 |         tmp_poly = value
 65 |         # poly_shape.intersects(point))
 66 |         if dropoff_pos.intersects(tmp_poly):
 67 |                 dropoff_idx = key
 68 |                 tmp_idx.append(dropoff_idx)
 69 |         if pickup_pos.intersects(tmp_poly):
 70 |                 pickup_idx = key
 71 |                 tmp_idx.append(pickup_idx)
 72 |     # print("tmp_idx:", tmp_idx)
 73 |     # print("item:", item)
 74 |     if len(tmp_idx)==2:
 75 |         # print("tmp_idx:", tmp_idx)
 76 |         # print("item:", item)
 77 |         sp_tm.append((tmp_idx[1], tmp_idx[0], item[-1])) #起点/终点/日期
 78 | result = pd.value_counts(sp_tm)
 79 | print("result:", result)
 80 | # println()
 81 | 
 82 | unique_region = list(set(sp_tm))
 83 | 
 84 | ##building flow graph
 85 | flow_edges = []
 86 | for key,value in result.to_dict().items():
 87 |     # print("key:", key)
 88 |     # print("value:", value)
 89 |     # if value>10:
 90 |     # println()
 91 |     #pair = ('r_{}_{}'.format(region_dict[key[0]], key[-1]), 'r_{}_{}'.format(region_dict[key[1]], key[-1] + 1), value)
 92 |     pair = ('r_{}_{}'.format(key[0], int(key[-1])),'r_{}_{}'.format(key[1], int(key[-1]+1)), {"weight":1, "date":int(key[-1]), "start":'r_{}_{}'.format(key[0], int(key[-1])), "end":'r_{}_{}'.format(key[1], int(key[-1]+1))})
 93 |     flow_edges.append(pair)
 94 |     # else:
 95 |     #     # println()
 96 |     #     #pair = ('r_{}_{}'.format(region_dict[key[0]], key[-1]), 'r_{}_{}'.format(region_dict[key[1]], key[-1] + 1), value)
 97 |     #     pair = ('r_{}_{}'.format(key[0], int(key[-1])),'r_{}_{}'.format(key[1], int(key[-1]+1)), {"weight":0, "date":int(key[-1]), "start":'r_{}_{}'.format(key[0], int(key[-1])), "end":'r_{}_{}'.format(key[1], int(key[-1]+1))})
 98 |     #     flow_edges.append(pair)
 99 | 
100 | print("finish flow graph")
101 | 
102 | # G_flow = nx.Graph()
103 | # G_flow.add_edges_from(flow_edges[:])
104 | 
105 | # file=open(r"../data/flow_graph_new_baseline.pickle","wb")
106 | # pickle.dump(G_flow,file) #storing_list
107 | # file.close()
108 | # println()
109 |  
110 | 
111 | ##bulding spatial graph
112 | spatial_dis = []
113 | spatial_dict = {}
114 | 
115 | flow_nodes = []
116 | for item in unique_region:
117 |     n_1 = "r"+"_"+str(item[0])+"_"+str(item[-1])
118 |     n_2 = "r"+"_"+str(item[1])+"_"+str(int(item[-1])+1)
119 |     if n_1 not in flow_nodes:
120 |         flow_nodes.append(n_1)
121 |     if n_2 not in flow_nodes:
122 |         flow_nodes.append(n_2)
123 | 
124 | print("finish flow nodes")      
125 | spatial_dis.sort(reverse = False)
126 | spatial_edges = []
127 | spatial_edges.extend(flow_edges) # add edges in flow graph
128 | sim_num=0
129 | for ii in range(len(flow_nodes)):
130 |     for jj in range(ii+1, len(flow_nodes)):
131 |         # time = flow_nodes[ii].split("_")[2]
132 |         t_1 = flow_nodes[ii].split("_")
133 |         t_2 = flow_nodes[jj].split("_")
134 |         # print("t_1:",t_1)
135 |         # print("t_2:",t_2)
136 |         # t_1_pos = np.average(list(zip(*region_back[int(t_1[1])].exterior.coords.xy)), axis = 0)
137 |         # t_2_pos = np.average(list(zip(*region_back[int(t_2[1])].exterior.coords.xy)),axis = 0)
138 |         # t_1_pos = np.average(list(region_back[int(t_1[1])].exterior.coords), axis = 0)
139 |         # t_2_pos = np.average(list(region_back[int(t_2[1])].exterior.coords), axis = 0)
140 |         t_1_pos  = list(region_back[int(t_1[1])].centroid.coords)[0]
141 |         t_2_pos = list(region_back[int(t_2[1])].centroid.coords)[0]
142 |         # print("--:",t_1_pos)
143 |         # print("$$:", t_2_pos)
144 |         # println()
145 | 
146 |         value = haversine(t_1_pos[0], t_1_pos[1], t_2_pos[0], t_2_pos[1])
147 |         # print("value:", value)
148 |         if value<= 5000:  #小于3公里
149 |             # print("value:",value)
150 |             sim_num+=1
151 |             # yy = key[0].split("_")
152 |             # yy_1 = key[1].split("_")
153 |             # print("key:", key)7000
154 |             # println()
155 |             # print(flow_nodes[ii],flow_nodes[jj])
156 |             # println()
157 |             pair = (flow_nodes[ii],flow_nodes[jj], {"weight":value, "date":int(t_1[2]), "start":flow_nodes[ii], "end":flow_nodes[jj]})
158 |             if pair not in spatial_edges:
159 |                 spatial_edges.append(pair)
160 | # print("sim_num:",sim_num)
161 | # print("finish spatial graph--part 2")
162 | # println()
163 | 
164 | #增加边
165 | params_resolution  = 2
166 | for z in region_back.keys():
167 |     for j in range(params_resolution):
168 |         ox = "r_{}_{}".format(z, j)
169 |         oy = "r_{}_{}".format(z, j+1)
170 |         pair = (ox,oy, {"weight":0, "date":int(j), "start":ox, "end":oy})
171 |         if pair not in spatial_edges:
172 |             spatial_edges.append(pair)
173 | print(len(spatial_edges))
174 | print("finish spatial graph")
175 | 
176 | G_flow = nx.Graph()
177 | G_flow.add_edges_from(flow_edges[:])
178 | # nx.draw(G_flow, with_labels=True)
179 | # plt.show()
180 | print("G_flow:",G_flow)
181 | #spatial graph
182 | G_spatial = nx.Graph()
183 | G_spatial.add_edges_from(spatial_edges[:])
184 | # nx.draw(G_spatial, with_labels=True)
185 | # plt.show()
186 | print("G_spatial:",G_spatial)
187 | 
188 | file=open(r"../data/flow_graph_new_1.pickle","wb")
189 | pickle.dump(G_flow,file) #storing_list
190 | file.close()
191 | 
192 | file=open(r"../data/spatial_graph_new_1.pickle","wb")
193 | pickle.dump(G_spatial,file) #storing_list
194 | file.close()
195 | 
196 | print("----spatial----:", G_spatial)
197 | print("----flow----:",G_flow)
198 | 
199 |     


--------------------------------------------------------------------------------
/code/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os.path as osp
  3 | import random
  4 | from time import perf_counter as t
  5 | import yaml
  6 | import numpy as np
  7 | from yaml import SafeLoader
  8 | from scipy.linalg import fractional_matrix_power, inv
  9 | from torch.utils.data import random_split
 10 | import torch
 11 | import torch_geometric.transforms as T
 12 | import torch.nn.functional as F
 13 | import torch.nn as nn
 14 | from layers import GCNConv
 15 | import networkx as nx
 16 | import matplotlib.pyplot as plt
 17 | from torch_geometric.datasets import Planetoid, CitationFull, Amazon, Coauthor, GitHub, FacebookPagePage, LastFMAsia, DeezerEurope
 18 | from torch_geometric.utils import dropout_adj
 19 | from model import Encoder, Model, drop_feature
 20 | from utils import normalize_adj_tensor, normalize_adj_tensor_sp, edge2adj
 21 | from attack import PGD_attack_graph
 22 | from eval import label_classification
 23 | import pickle
 24 | 
 25 | def train(model: Model, x, edge_index, eps, lamb, alpha, beta, steps, node_ratio):
 26 |     optimizer.zero_grad()
 27 |     adj = edge2adj(x, edge_index)
 28 |     edge_index_1 = dropout_adj(edge_index, p=drop_edge_rate_1)[0]
 29 |     edge_index_2 = dropout_adj(edge_index, p=drop_edge_rate_2)[0]
 30 |     # print("***:", x.size())
 31 |     # println()
 32 |     x_1 = drop_feature(x, drop_feature_rate_1)
 33 |     x_2 = drop_feature(x, drop_feature_rate_2)  
 34 | 
 35 |     adj_1 = edge2adj(x_1, edge_index_1)
 36 |     adj_2 = edge2adj(x_2, edge_index_2)
 37 |     
 38 |     if eps > 0:
 39 |         adj_3, x_3 = PGD_attack_graph(model, edge_index_1, edge_index, x_1, x, steps, node_ratio, alpha, beta)
 40 |     z = model(x, adj)
 41 |     z_1 = model(x_1, adj_1)
 42 |     z_2 = model(x_2, adj_2)
 43 |     # print("z:",z)
 44 |     # print("z_1:",z_1)
 45 |     # print("z_2:",z_2)
 46 |     # println()
 47 |     loss1, simi1 = model.loss(z_1,z_2,batch_size=0)
 48 |     loss2, simi2 = model.loss(z_1,z,batch_size=0)
 49 |     loss3, simi3 = model.loss(z_2,z,batch_size=0)
 50 |     loss1 = loss1.mean() + lamb*torch.clamp(simi1*2 - simi2.detach()-simi3.detach(), 0).mean()
 51 |     if eps > 0:  
 52 |         z_3 = model(x_3,adj_3)
 53 |         loss2, _ = model.loss(z_1,z_3)
 54 |         loss2 = loss2.mean()
 55 |         loss = (loss1 + eps*loss2)
 56 |     else: 
 57 |         loss = loss1
 58 |         loss2 = loss1
 59 | 
 60 |     loss.backward()
 61 |     optimizer.step()
 62 |  
 63 |     return loss1.item(), loss2.item()
 64 | 
 65 | def test(model: Model, x, edge_index, y, final=False, task ="node"):   
 66 |     model.eval()
 67 |     adj = edge2adj(x, edge_index)
 68 |     x = x.to(device)
 69 |     adj = adj.to(device)
 70 |     z = model(x, adj)
 71 |     print("test:", z.size())
 72 |     file=open(r"./data/tmp_vector.pickle","wb")
 73 |     pickle.dump(z,file) #storing_list
 74 |     file.close()
 75 |     
 76 |     return label_classification(z, y, ratio=0.1)
 77 | 
 78 | if __name__ == '__main__':
 79 |     parser = argparse.ArgumentParser()
 80 |     parser.add_argument('--dataset', type=str, default='Cora')
 81 |     parser.add_argument('--gpu_id', type=int, default=0)
 82 |     parser.add_argument('--config', type=str, default='config.yaml')
 83 |     parser.add_argument('--log', type=str, default='results/Cora/')
 84 |     parser.add_argument('--seed', type=int, default=39788)
 85 |     parser.add_argument('--eps', type=float, default=0)
 86 |     parser.add_argument('--alpha', type=float, default=0)
 87 |     parser.add_argument('--beta', type=float, default=0)
 88 |     parser.add_argument('--lamb', type=float, default=0)
 89 |     args = parser.parse_args()
 90 | 
 91 | 
 92 |     assert args.gpu_id in range(0, 8)
 93 | 
 94 |     
 95 |     config = yaml.load(open(args.config), Loader=SafeLoader)
 96 |     if args.dataset in config:
 97 |         config = config[args.dataset]
 98 |     else:
 99 |         config = {
100 |         'learning_rate': 0.001,
101 |         'num_hidden': 256,
102 |         'num_proj_hidden': 256,
103 |         'activation': 'prelu',
104 |         'base_model': 'GCNConv',
105 |         'num_layers': 2,
106 |         'drop_edge_rate_1': 0.3,
107 |         'drop_edge_rate_2': 0.4,
108 |         'drop_feature_rate_1': 0.1,
109 |         'drop_feature_rate_2': 0.0,
110 |         'tau': 0.4,
111 |         'num_epochs': 1000,
112 |         'weight_decay': 1e-5,
113 |         'drop_scheme': 'degree',
114 |     }
115 |     
116 |         
117 |     torch.manual_seed(config["seed"])
118 |     random.seed(12345)
119 |     np.random.seed(config["seed"])
120 |     
121 |     learning_rate = config['learning_rate']
122 |     num_hidden = config['num_hidden']
123 |     num_proj_hidden = config['num_proj_hidden']
124 |     activation = ({'relu': F.relu, 'prelu': nn.PReLU(), 'rrelu': nn.RReLU()})[config['activation']]
125 |     base_model = GCNConv
126 |     num_layers = config['num_layers']
127 | 
128 |     drop_edge_rate_1 = config['drop_edge_rate_1']
129 |     drop_edge_rate_2 = config['drop_edge_rate_2']
130 |     drop_feature_rate_1 = config['drop_feature_rate_1']
131 |     drop_feature_rate_2 = config['drop_feature_rate_2']
132 |     tau = config['tau']
133 |     num_epochs = config['num_epochs']
134 |     weight_decay = config['weight_decay']
135 |     # switch to the customer inputs by using args.{}
136 |     eps = config["eps"] # args.eps
137 |     lamb = config["lamb"] # args.lamb
138 |     alpha = config["alpha"] # args.alpha
139 |     beta = config["beta"] # arg.sbeta
140 |     
141 |     
142 |     sample_size = 500
143 |     
144 |     def get_dataset(path, name):
145 |         assert name in ['Cora', 'CiteSeer', "AmazonC", "AmazonP", 'CoauthorC', 'CoauthorP',\
146 |                         "DBLP", "PubMed", "GitHub", "Facebook", "LastFMAsia", "DeezerEurope"]
147 |         if name =="DBLP":
148 |             name = "dblp"
149 |         if name == "AmazonC":
150 |             return Amazon(path, "Computers", T.NormalizeFeatures())
151 |         if name == "AmazonP":
152 |             return Amazon(path, "Photo", T.NormalizeFeatures())
153 |         if name == 'CoauthorC':
154 |             return Coauthor(root=path, name='cs', transform=T.NormalizeFeatures())
155 |         if name == 'CoauthorP':
156 |             return Coauthor(root=path, name='physics', transform=T.NormalizeFeatures())
157 |         if name == "GitHub":
158 |             return GitHub(root=path,transform=T.NormalizeFeatures())
159 |         if name == "Facebook":
160 |             return FacebookPagePage(root=path,transform=T.NormalizeFeatures())    
161 |         if name == "LastFMAsia":
162 |             return LastFMAsia(root=path,transform=T.NormalizeFeatures())
163 |         if name == "DeezerEurope":
164 |             return DeezerEurope(root=path,transform=T.NormalizeFeatures())
165 | 
166 |         return (CitationFull if name == 'dblp' else Planetoid)(
167 |             path,
168 |             name,
169 |             "public",
170 |             T.NormalizeFeatures())
171 |         
172 |     path = osp.join(osp.expanduser('~'), 'datasets', args.dataset)
173 |     # print("path:", path)
174 |     # println
175 |     dataset = get_dataset(path, args.dataset)
176 |     # print("dataset:", dataset)
177 |     data = dataset.data  
178 |     # print(data.num_features)
179 |     # println()
180 |     
181 |     
182 |     
183 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
184 |     
185 |     encoder = Encoder(data.num_features, num_hidden, activation,
186 |                       base_model=base_model, k=num_layers).to(device)
187 |     model = Model(encoder, num_hidden, num_proj_hidden, tau).to(device)
188 |     optimizer = torch.optim.Adam(
189 |         model.parameters(), lr=learning_rate, weight_decay=weight_decay)
190 | 
191 |     start = t()
192 |     prev = start    
193 |     G = nx.Graph()
194 |     G.add_edges_from(list(zip(data.edge_index.numpy()[0],data.edge_index.numpy()[1])))
195 |     
196 |     model.train()
197 |     for epoch in range(1, num_epochs + 1):
198 |         # uncomment to increase the eps every T epochs
199 |         #if epoch%20 ==0:
200 |         #    eps = eps*1.1
201 |         # sample a subgraph from the original one
202 | 
203 |         S = G.subgraph(np.random.permutation(G.number_of_nodes())[:sample_size])
204 |         # print("sample_size:", sample_size)
205 |         # print("S:", S)
206 |         x = data.x[np.array(S.nodes())].to(device)
207 |         # print("x:", x)
208 |         # print("x shape:",x.size())
209 |         # println()
210 |         S = nx.relabel.convert_node_labels_to_integers(S, first_label=0, ordering='default')
211 |         edge_index = np.array(S.edges()).T
212 |         edge_index = torch.LongTensor(np.hstack([edge_index,edge_index[::-1]])).to(device)
213 | 
214 |         loss1, loss2 = train(model, x, edge_index, eps, lamb, alpha, beta, 5, 0.2)
215 |             
216 |         now = t()                                     
217 |         print(f'(T) | Epoch={epoch:03d}, loss1={loss1:.4f}, loss2={loss2:.4f}'
218 |               f' this epoch {now - prev:.4f}, total {now - start:.4f}')
219 |         prev = now
220 | 
221 |     print("=== Final ===")
222 |     results = test(model, data.x, data.edge_index, data.y, final=True)
223 |     print(results)
224 |     with open(osp.join(args.log, "progress.csv"), "w") as f:
225 |         f.write(str(results))


--------------------------------------------------------------------------------
/code/data_pre/pre_poi_transformer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import pandas as pd
  3 | import pickle
  4 | from shapely.geometry import Point, LineString
  5 | from shapely.geometry import Polygon,MultiPoint  #多边形
  6 | import torch
  7 | from torch import nn
  8 | import numpy as np
  9 | import torch.nn.functional as F
 10 | import torch.optim as optim
 11 | 
 12 | 
 13 | def load_data(file):
 14 |     data_load_file = []
 15 |     file_1 = open(file, "rb")
 16 |     data_load_file = pickle.load(file_1)
 17 |     return data_load_file
 18 | # poi_list = ['drinking_water', 'toilets', 'school', 'hospital', 'arts_centre', 'fire_station', 'police', 'bicycle_parking', 'fountain', 'ferry_terminal', 'bench', 'cinema', 'cafe', 'pub', 'waste_basket', 'parking_entrance', 'parking', 'fast_food', 'bank', 'restaurant', 'ice_cream', 'pharmacy', 'taxi', 'post_box', 'atm', 'nightclub', 'social_facility', 'bar', 'biergarten', 'clock', 'bicycle_rental', 'community_centre', 'watering_place', 'ranger_station', 'boat_rental', 'recycling', 'payment_terminal', 'bicycle_repair_station', 'place_of_worship', 'shelter', 'telephone', 'clinic', 'dentist', 'vending_machine', 'theatre', 'charging_station', 'public_bookcase', 'post_office', 'fuel', 'doctors']
 19 | # poi_list_1 = ['drinking_water', 'toilets', 'school', 'hospital', 'arts_centre', 'fire_station', 'police', 'bicycle_parking', 'fountain', 'ferry_terminal', 'bench', 'cinema', 'cafe', 'pub', 'waste_basket', 'parking_entrance', 'parking', 'fast_food', 'bank', 'restaurant', 'ice_cream', 'pharmacy', 'taxi', 'post_box', 'atm', 'nightclub', 'social_facility', 'bar', 'biergarten', 'clock', 'bicycle_rental', 'community_centre', 'watering_place', 'ranger_station', 'boat_rental', 'recycling', 'payment_terminal', 'bicycle_repair_station', 'place_of_worship', 'shelter', 'telephone', 'clinic', 'dentist', 'vending_machine', 'theatre', 'charging_station', 'public_bookcase', 'post_office', 'fuel', 'doctors','drinking_water', 'toilets']
 20 | region_back = load_data("../data/region_back.pickle")
 21 | reg_poi = load_data("../data/reg_incld_poi_new.pickle")
 22 | poi_skip_vec = load_data("../data/poi_skip_vec.pickle")
 23 | reg_spatial = load_data("../data/region_spatial_refine_1.pickle")
 24 | flow = load_data("../data/flow_graph.pickle")
 25 | check_in_label = load_data("../data/checkin_label.pickle")
 26 | flow_list = list(flow.edges(data=True))
 27 | def normalization(data):
 28 |     _range = np.max(abs(data))
 29 |     return data / _range
 30 | label_norm = normalization(check_in_label)
 31 | # print(label_norm)
 32 | # final_vec =[]
 33 | # connected_layer = nn.Linear(in_features = 200, out_features = 96)
 34 | # emb = nn.Embedding(200, 200)
 35 | # for key,value in reg_poi.items():
 36 | #     output = np.mean([connected_layer(emb(torch.tensor(uu)).float()).tolist() for uu in value],axis=0).tolist()
 37 | #     final_vec.append(output)
 38 | # print(np.array(final_vec).shape)
 39 | 
 40 | # file=open(r"../data/reg_poi_vec.pickle","wb")
 41 | # pickle.dump(final_vec,file) #storing_list
 42 | # file.close()
 43 | 
 44 | # println()
 45 | # reg_flow = {}
 46 | # for item in flow_list:
 47 | #     # print(item)
 48 | #     # print(item[2]['weight'])
 49 | #     # println()
 50 | #     r1 = item[0].split("_")[1]
 51 | #     r2 = item[1].split("_")[1]
 52 | #     if int(r1) not in reg_flow.keys():
 53 | #         reg_flow[int(r1)] = 0
 54 | #     if int(r2) not in reg_flow.keys():
 55 | #         reg_flow[int(r2)] = 0
 56 | #     reg_flow[int(r1)]+= item[2]['weight']
 57 | #     reg_flow[int(r2)]+= item[2]['weight']
 58 | 
 59 | 
 60 | # println()
 61 | 
 62 | reg_idx = [key for key in reg_poi.keys() if len(reg_poi[key])>0]
 63 | # print(reg_idx)
 64 | # prirntln()
 65 | file=open(r"../data/reg_poi_idx_1.pickle","wb")
 66 | pickle.dump(reg_idx,file) #storing_list
 67 | file.close()
 68 | 
 69 | 
 70 | 
 71 | max_len= 0
 72 | for key,value in reg_poi.items():
 73 |     if max_len< len(value):
 74 |         max_len = len(value)
 75 | # print("max_len:",max_len)
 76 | 
 77 | reg_poi_t = {}
 78 | reg_poi_list = []
 79 | 
 80 | embedding_cat = torch.nn.Embedding(11, 96)  # spatial
 81 | linear = nn.Linear(96*3, 512)
 82 | linear_trans = nn.Linear(512, 96)
 83 | region_com_list = []
 84 | region_poi_gram_dict  = {}
 85 | for iii in range(172):
 86 |     if iii not in reg_idx:
 87 |         # reg_poi_t[key] = np.array([0.0]*96)
 88 |         tmp_1 = np.array([0.0]*96)
 89 |         region_poi_gram_dict[iii] = tmp_1.tolist()
 90 |         tmp_2 = embedding_cat(torch.tensor(0))
 91 |         # tmp_3 = torch.squeeze(reg_spatial[iii],0).tolist()
 92 |         com = np.concatenate((tmp_1,tmp_2.tolist(),[label_norm[iii]]*96),axis = 0)
 93 |         # print("com.shape:",com.shape)
 94 |         com_reshape = linear(torch.tensor(com).float()).tolist()
 95 |         region_com_list.append(com_reshape)    
 96 |     else:
 97 |         tmp_g = []
 98 |         # print(reg_poi[iii])
 99 |         for sub_poi in reg_poi[iii]:
100 |             tmp_g.append(poi_skip_vec[sub_poi].tolist())
101 |         tmp_1 = np.mean(tmp_g, axis =0)
102 |         # region_poi_gram.append(tmp_1.tolist())
103 |         region_poi_gram_dict[iii] = tmp_1.tolist()
104 |         tmp_2 = embedding_cat(torch.tensor(len(reg_poi[iii])))
105 |         # tmp_3 = torch.squeeze(reg_spatial[iii],0).tolist()
106 |         com = np.concatenate((tmp_1,tmp_2.tolist(),[label_norm[iii]]*96),axis = 0)
107 |         com_reshape = linear(torch.tensor(com).float()).tolist()
108 |         region_com_list.append(com_reshape)
109 |         
110 | region_poi_gram = []
111 | for key,value in region_poi_gram_dict.items():
112 |     # print(value)
113 |     region_poi_gram.append(value)
114 | file=open(r"../data/reg_poi_vec_2.pickle","wb")
115 | pickle.dump(torch.tensor(region_poi_gram),file) #storing_list
116 | file.close()
117 | # println()        
118 |   
119 | region_com_array = np.array(region_com_list)
120 | # print("region_com_array:",region_com_array.shape)       
121 | # reg_idx= [key for key in reg_poi_.keys()]
122 | from torch import nn
123 | encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8 )
124 | transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
125 | # src = torch.rand(1, 172, 512)
126 | src =  torch.unsqueeze(torch.tensor(region_com_array),0)
127 | out = transformer_encoder(src.float())
128 | out_ = torch.squeeze(out,0)
129 | # print(out_.size())
130 | # pritnln()
131 | out_ = torch.tensor([linear_trans(item).tolist() for item in out_])
132 | # print(out_.size())
133 | 
134 | # reg_poi_vec = {}
135 | # for idx,vec in zip(reg_idx,out_):
136 | #     reg_poi_vec[idx] = vec
137 | 
138 | file=open(r"../data/reg_com_poi_cat_spatial.pickle","wb")
139 | pickle.dump(out_,file) #storing_list
140 | file.close()
141 | 
142 | 
143 |          
144 |         
145 | println()
146 | 
147 | 
148 | # reg_poi_={}
149 | # s = 0
150 | # emb = nn.Embedding(50, 512)
151 | # embedding_spatial = torch.nn.Embedding(15, 512)  # spatial
152 | # for key,value in reg_poi.items():
153 | #     # print("value:",value)
154 | #     if value!=[]:
155 | #         reg_poi_[key]=[]
156 | #         # print("value:",value)
157 | #         if len(value)>s:
158 | #             s = len(value)
159 | #         for item in value:
160 | #             reg_poi_[key].append(emb(torch.tensor(item)).tolist())
161 | # spa_vec= embedding_spatial(torch.tensor(reg_spatial[idx]))
162 | # reg_poi_t = {}
163 | # reg_poi_list = []
164 | # for iii in range(172):
165 | # # for key,value in reg_poi_.items():
166 | #     if iii not in reg_poi_.keys():
167 | #         # print("&&&:", np.array([0.0]*512).shape)
168 | #         reg_poi_t[key] = np.array([0.0]*512)
169 | #         # spa_vec= embedding_spatial(torch.tensor(reg_spatial[iii])).tolist()
170 | #         # ci = np.concatenate((spa_vec,[0.0]*512),axis = 0)
171 | #         reg_poi_list.append(np.array([0.0]*512))
172 | #         # reg_poi_list.append(ci)
173 | #     else:
174 | #         # print("value:",value)
175 | #         tp = np.mean(reg_poi_[key],axis=0)
176 | #         # spa_vec= embedding_spatial(torch.tensor(reg_spatial[iii])).tolist()
177 | #         # ci = np.concatenate((spa_vec,tp.tolist()),axis = 0)
178 | #         # reg_poi_list.append(np.array([0.0]*512))
179 | #         reg_poi_list.append(tp)
180 | #         # print("***:",tp.shape)
181 | #         reg_poi_t[key] = tp
182 |         # reg_poi_list.append(tp)
183 | # print(np.array(reg_poi_list).shape)
184 | # for key,value in reg_poi.items():
185 | #     # print("value:",value)
186 | #     if value!=[]:
187 | #         reg_poi_[key]=[]
188 | #         # print("value:",value)
189 | #         if len(value)>s:
190 | #             s = len(value)
191 | #         for item in value:
192 | #             reg_poi_[key].append(emb(torch.tensor(item)).tolist())
193 | # # spa_vec= embedding_spatial(torch.tensor(reg_spatial[idx]))
194 | # # reg_poi_t = {}
195 | # reg_poi_list = []
196 | # for iii in range(172):
197 | # # for key,value in reg_poi_.items():
198 | #     if iii not in reg_poi_.keys():
199 | #         # print("&&&:", np.array([0.0]*512).shape)
200 | #         # reg_poi_t[key] = np.array([0.0]*512)
201 | #         spa_vec= embedding_spatial(torch.tensor(reg_spatial[iii])).tolist()
202 | #         ci = np.concatenate((spa_vec,[0.0]*512),axis = 0)
203 | #         # reg_poi_list.append(np.array([0.0]*512))
204 | #         reg_poi_list.append(ci)
205 | #     else:
206 | #         # print("value:",value)
207 | #         tp = np.mean(reg_poi_[key],axis=0)
208 | #         spa_vec= embedding_spatial(torch.tensor(reg_spatial[iii])).tolist()
209 | #         ci = np.concatenate((spa_vec,tp.tolist()),axis = 0)
210 | #         # reg_poi_list.append(np.array([0.0]*512))
211 | #         reg_poi_list.append(ci)
212 | #         # print("***:",tp.shape)
213 | #         # reg_poi_t[key] = tp
214 | #         # reg_poi_list.append(tp)
215 | # # print(np.array(reg_poi_list).shape)
216 | # fully_layer = nn.Linear(1024,512)
217 | # reg_poi_list_ = fully_layer(torch.tensor(np.array(reg_poi_list)).float())
218 | reg_poi_list_ = torch.tensor(np.array(reg_poi_list)).float()
219 | reg_poi_list_tensor = torch.unsqueeze(reg_poi_list_,0)
220 | print(reg_poi_list_tensor.size())
221 | 
222 | reg_idx= [key for key in reg_poi_.keys()]
223 | from torch import nn
224 | encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8 )
225 | transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
226 | # src = torch.rand(1, 172, 512)
227 | src = reg_poi_list_tensor
228 | out = transformer_encoder(src)
229 | # print(out.size())
230 | out_ = torch.squeeze(out,0)
231 | print(out_.size())
232 | print(reg_idx)
233 | print(len(reg_idx))
234 | # reg_poi_vec = {}
235 | # for idx,vec in zip(reg_idx,out_):
236 | #     reg_poi_vec[idx] = vec
237 | 
238 | file=open(r"../data/reg_poi_vec_1.pickle","wb")
239 | pickle.dump(out_,file) #storing_list
240 | file.close()
241 | 
242 | file=open(r"../data/reg_poi_idx_1.pickle","wb")
243 | pickle.dump(reg_idx,file) #storing_list
244 | file.close()
245 |         
246 | 
247 | 
248 | 
249 | 
250 | 
251 | 
252 |     
253 |     
254 |     
255 |     
256 |     
257 |     
258 |     
259 |     
260 |     
261 |     
262 |     
263 |     
264 |     
265 |     
266 |     
267 |     
268 |     
269 |     
270 |     
271 |     
272 |     
273 |     
274 |     
275 |     
276 | 
277 | 


--------------------------------------------------------------------------------
/code/model_gcn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import torch
  4 | from torch_geometric.nn import MessagePassing
  5 | from torch_geometric.utils import add_self_loops, degree, softmax
  6 | from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool, GlobalAttention, Set2Set
  7 | import torch.nn.functional as F
  8 | # from loader import BioDataset
  9 | # from dataloader import DataLoaderFinetune
 10 | from torch_scatter import scatter_add
 11 | from torch_geometric.nn.inits import glorot, zeros
 12 | import dill
 13 | import os
 14 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致
 15 | class GINConv(MessagePassing):
 16 |     """
 17 |     Extension of GIN aggregation to incorporate edge information by concatenation.
 18 | 
 19 |     Args:
 20 |         emb_dim (int): dimensionality of embeddings for nodes and edges.
 21 |         input_layer (bool): whethe the GIN conv is applied to input layer or not. (Input node labels are uniform...)
 22 | 
 23 |     See https://arxiv.org/abs/1810.00826
 24 |     """
 25 |     def __init__(self, emb_dim, aggr = "add", input_layer = False):
 26 |         super(GINConv, self).__init__()
 27 |         # multi-layer perceptron
 28 |         self.mlp = torch.nn.Sequential(torch.nn.Linear(2*emb_dim, 2*emb_dim), torch.nn.BatchNorm1d(2*emb_dim), torch.nn.ReLU(), torch.nn.Linear(2*emb_dim, emb_dim))
 29 | 
 30 |         ### Mapping 0/1 edge features to embedding
 31 |         self.edge_encoder = torch.nn.Linear(4, emb_dim)
 32 | 
 33 |         ### Mapping uniform input features to embedding.
 34 |         self.input_layer = input_layer
 35 |         if self.input_layer:
 36 |             self.input_node_embeddings = torch.nn.Embedding(2, emb_dim)
 37 |             torch.nn.init.xavier_uniform_(self.input_node_embeddings.weight.data)
 38 | 
 39 |         self.aggr = aggr
 40 | 
 41 |     def forward(self, x, edge_index, edge_attr):
 42 |         #add self loops in the edge space
 43 |         edge_index = add_self_loops(edge_index, num_nodes = x.size(0))
 44 | 
 45 |         #add features corresponding to self-loop edges.
 46 |         self_loop_attr = torch.zeros(x.size(0), 4)
 47 |         self_loop_attr[:,1] = 1 # attribute for self-loop edge
 48 |         self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype)
 49 |         edge_attr = torch.cat((edge_attr, self_loop_attr), dim = 0)
 50 | 
 51 |         edge_embeddings = self.edge_encoder(edge_attr.float())
 52 | 
 53 |         if self.input_layer:
 54 |             x = self.input_node_embeddings(x.to(torch.int64).view(-1,))
 55 | 
 56 |         # return self.propagate(self.aggr, edge_index, x=x, edge_attr=edge_embeddings)
 57 |         return self.propagate(edge_index[0], x=x, edge_attr=edge_embeddings)
 58 | 
 59 |     def message(self, x_j, edge_attr):
 60 |         return torch.cat([x_j, edge_attr], dim = 1)
 61 | 
 62 |     def update(self, aggr_out):
 63 |         return self.mlp(aggr_out)
 64 | 
 65 | 
 66 | class GCNConv(MessagePassing):
 67 | 
 68 |     def __init__(self, emb_dim, aggr = "add", input_layer = False):
 69 |         super(GCNConv, self).__init__()
 70 | 
 71 |         self.emb_dim = emb_dim
 72 |         self.linear = torch.nn.Linear(emb_dim, emb_dim)
 73 |         self.linear_1 = torch.nn.Linear(1, emb_dim)
 74 |         ### Mapping 0/1 edge features to embedding
 75 |         self.edge_encoder = torch.nn.Linear(4, emb_dim)
 76 | 
 77 |         ### Mapping uniform input features to embedding.
 78 |         self.input_layer = input_layer
 79 |         if self.input_layer:
 80 |             # self.input_node_embeddings = torch.nn.Embedding(emb_dim, emb_dim)
 81 |             self.input_node_embeddings = torch.nn.Linear(emb_dim,emb_dim)
 82 |             torch.nn.init.xavier_uniform_(self.input_node_embeddings.weight.data)
 83 | 
 84 |         self.aggr = aggr
 85 | 
 86 |     def norm(self, edge_index, num_nodes, dtype):
 87 |         ### assuming that self-loops have been already added in edge_index
 88 |         # print("edge_index:", edge_index)
 89 |         # println()
 90 |         edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
 91 |                                      device=edge_index[0].device)
 92 |         row, col = edge_index
 93 |         
 94 |         deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
 95 |         deg_inv_sqrt = deg.pow(-0.5)
 96 |         deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
 97 | 
 98 |         return deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
 99 | 
100 | 
101 |     # def forward(self, x, edge_index, edge_attr):
102 |     def forward(self, x, edge_index):
103 |         #add self loops in the edge space
104 |         # print("edge_index:", edge_index.size())
105 |         edge_index = add_self_loops(edge_index, num_nodes = x.size(0))
106 |         # print("edge_attr:", edge_attr.size())
107 |         # ad_s = edge_index.size()[1]
108 |         # add features corresponding to self-loop edges.
109 |         # self_loop_attr = torch.zeros(x.size(0), 4)
110 |         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
111 |         self_loop_attr = torch.zeros(edge_index[0].size()[1], 4)
112 |         # print("edge_index[0].size()[1]:",edge_index[0].size()[1])
113 |         # print("self_loop_attr:",self_loop_attr.size())
114 |         self_loop_attr[:,1] = 1 # attribute for self-loop edge
115 |         # self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype)
116 |         # edge_attr = torch.cat((edge_attr, self_loop_attr), dim = 0)
117 |         edge_attr = self_loop_attr.cuda()
118 |         # print("edge_attr:", edge_attr)
119 |         # print("edge_index:", edge_index)
120 |         # println()
121 |         edge_embeddings = self.edge_encoder(edge_attr.float())
122 |         if self.input_layer:
123 |             x = self.input_node_embeddings(x.float())
124 | 
125 |         norm = self.norm(edge_index[0].long(), x.size(0), x.dtype)
126 |         x = self.linear(x)
127 |         # norm = self.linear_1(norm.view(-1, 1))
128 |         # print("edge_index[0]:", edge_index[0].size(), x.size(), edge_embeddings.size(), norm.size())
129 |         return self.propagate(edge_index[0], x=x, edge_attr=edge_embeddings, norm = norm)
130 | 
131 |     def message(self, x_j, edge_attr, norm):
132 |         # print("edge_attr:", edge_attr.size())
133 |         # print("x_j:", x_j.size())
134 |         # print("norm:", norm.size())
135 |         tmp = x_j + edge_attr
136 |         # print("***:", tmp.size())
137 |         # print("norm:", norm.size())
138 |         # print("&&:", norm.view(-1, 1)*(x_j + edge_attr))
139 |         # println()
140 |         return norm.view(-1, 1) * (x_j + edge_attr)
141 | 
142 | 
143 | class GATConv(MessagePassing):
144 |     def __init__(self, emb_dim, heads=2, negative_slope=0.2, aggr = "add", input_layer = False):
145 |         super(GATConv, self).__init__()
146 | 
147 |         self.aggr = aggr
148 | 
149 |         self.emb_dim = emb_dim
150 |         self.heads = heads
151 |         self.negative_slope = negative_slope
152 | 
153 |         self.weight_linear = torch.nn.Linear(emb_dim, heads * emb_dim)
154 |         self.att = torch.nn.Parameter(torch.Tensor(1, heads, 2 * emb_dim))
155 | 
156 |         self.bias = torch.nn.Parameter(torch.Tensor(emb_dim))
157 | 
158 |         ### Mapping 0/1 edge features to embedding
159 |         self.edge_encoder = torch.nn.Linear(4, heads * emb_dim)
160 | 
161 |         ### Mapping uniform input features to embedding.
162 |         self.input_layer = input_layer
163 |         if self.input_layer:
164 |             self.input_node_embeddings = torch.nn.Embedding(2, emb_dim)
165 |             torch.nn.init.xavier_uniform_(self.input_node_embeddings.weight.data)
166 | 
167 |         self.reset_parameters()
168 | 
169 |     def reset_parameters(self):
170 |         glorot(self.att)
171 |         zeros(self.bias)
172 | 
173 |     def forward(self, x, edge_index, edge_attr):
174 |         #add self loops in the edge space
175 |         
176 |         edge_index = add_self_loops(edge_index, num_nodes = x.size(0))
177 | 
178 |         #add features corresponding to self-loop edges.
179 |         self_loop_attr = torch.zeros(x.size(0), 4)
180 |         self_loop_attr[:,1] = 1 # attribute for self-loop edge
181 |         self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype)
182 |         edge_attr = torch.cat((edge_attr, self_loop_attr), dim = 0)
183 | 
184 |         edge_embeddings = self.edge_encoder(edge_attr)
185 | 
186 |         if self.input_layer:
187 |             x = self.input_node_embeddings(x.to(torch.int64).view(-1,))
188 | 
189 |         x = self.weight_linear(x).view(-1, self.heads, self.emb_dim)
190 |         return self.propagate(self.aggr, edge_index, x=x, edge_attr=edge_embeddings)
191 | 
192 |     def message(self, edge_index, x_i, x_j, edge_attr):
193 |         edge_attr = edge_attr.view(-1, self.heads, self.emb_dim)
194 |         x_j += edge_attr
195 | 
196 |         alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1)
197 | 
198 |         alpha = F.leaky_relu(alpha, self.negative_slope)
199 |         alpha = softmax(alpha, edge_index[0])
200 | 
201 |         return x_j * alpha.view(-1, self.heads, 1)
202 | 
203 |     def update(self, aggr_out):
204 |         aggr_out = aggr_out.mean(dim=1)
205 |         aggr_out = aggr_out + self.bias
206 | 
207 |         return aggr_out
208 | 
209 | 
210 | class GraphSAGEConv(MessagePassing):
211 |     def __init__(self, emb_dim, aggr = "mean", input_layer = False):
212 |         super(GraphSAGEConv, self).__init__()
213 | 
214 |         self.emb_dim = emb_dim
215 |         self.linear = torch.nn.Linear(emb_dim, emb_dim)
216 |         
217 |         ### Mapping 0/1 edge features to embedding
218 |         self.edge_encoder = torch.nn.Linear(4, emb_dim)
219 | 
220 |         ### Mapping uniform input features to embedding.
221 |         self.input_layer = input_layer
222 |         if self.input_layer:
223 |             self.input_node_embeddings = torch.nn.Embedding(2, emb_dim)
224 |             torch.nn.init.xavier_uniform_(self.input_node_embeddings.weight.data)
225 | 
226 |         self.aggr = aggr
227 | 
228 |     def forward(self, x, edge_index, edge_attr):
229 |         #add self loops in the edge space
230 |         edge_index = add_self_loops(edge_index, num_nodes = x.size(0))
231 | 
232 |         #add features corresponding to self-loop edges.
233 |         self_loop_attr = torch.zeros(x.size(0), 4)
234 |         self_loop_attr[:,7] = 1 # attribute for self-loop edge
235 |         self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype)
236 |         edge_attr = torch.cat((edge_attr, self_loop_attr), dim = 0)
237 | 
238 |         edge_embeddings = self.edge_encoder(edge_attr)
239 | 
240 |         if self.input_layer:
241 |             x = self.input_node_embeddings(x.to(torch.int64).view(-1,))
242 | 
243 |         x = self.linear(x)
244 | 
245 |         return self.propagate(self.aggr, edge_index, x=x, edge_attr=edge_embeddings)
246 | 
247 |     def message(self, x_j, edge_attr):
248 |         return x_j + edge_attr
249 | 
250 |     def update(self, aggr_out):
251 |         return F.normalize(aggr_out, p = 2, dim = -1)
252 | 
253 | 
254 | class GNN(torch.nn.Module):
255 |     """
256 |     Extension of GIN to incorporate edge information by concatenation.
257 | 
258 |     Args:
259 |         num_layer (int): the number of GNN layers
260 |         emb_dim (int): dimensionality of embeddings
261 |         JK (str): last, concat, max or sum.
262 |         max_pool_layer (int): the layer from which we use max pool rather than add pool for neighbor aggregation
263 |         drop_ratio (float): dropout rate
264 |         gnn_type: gin, gat, graphsage, gcn
265 |         
266 |     See https://arxiv.org/abs/1810.00826
267 |     JK-net: https://arxiv.org/abs/1806.03536
268 | 
269 |     Output:
270 |         node representations
271 | 
272 |     """
273 |     def __init__(self, num_layer, emb_dim, JK = "last", drop_ratio = 0, gnn_type = "gin"):
274 |         super(GNN, self).__init__()
275 |         self.num_layer = num_layer
276 |         self.drop_ratio = drop_ratio
277 |         self.JK = JK
278 | 
279 |         if self.num_layer < 2:
280 |             raise ValueError("Number of GNN layers must be greater than 1.")
281 | 
282 |         ###List of message-passing GNN convs
283 |         self.gnns = torch.nn.ModuleList()
284 |         for layer in range(num_layer):
285 |             if layer == 0:
286 |                 input_layer = True
287 |             else:
288 |                 input_layer = False
289 | 
290 |             if gnn_type == "gin":
291 |                 self.gnns.append(GINConv(emb_dim, aggr = "add", input_layer = input_layer))
292 |             elif gnn_type == "gcn":
293 |                 self.gnns.append(GCNConv(emb_dim, input_layer = input_layer))
294 |             elif gnn_type == "gat":
295 |                 self.gnns.append(GATConv(emb_dim, input_layer = input_layer))
296 |             elif gnn_type == "graphsage":
297 |                 self.gnns.append(GraphSAGEConv(emb_dim, input_layer = input_layer))
298 | 
299 |     #def forward(self, x, edge_index, edge_attr):
300 |     def forward(self, x, edge_index):
301 |         h_list = [x]
302 |         for layer in range(self.num_layer):
303 |             # h = self.gnns[layer](h_list[layer], edge_index, edge_attr)+ 1e-6
304 |             h = self.gnns[layer](h_list[layer], edge_index)+ 1e-6
305 |             # print("---stop here---")
306 |             if layer == self.num_layer - 1:
307 |                 #remove relu from the last layer
308 |                 h = F.dropout(h, self.drop_ratio, training = self.training)
309 |             else:
310 |                 h = F.dropout(F.relu(h), self.drop_ratio, training = self.training)
311 |             h_list.append(h)
312 | 
313 |         if self.JK == "last":
314 |             # print("h_list[-1]:", h_list[-1].size())
315 |             node_representation = h_list[-1]
316 |         elif self.JK == "sum":
317 |             h_list = [h.unsqueeze_(0) for h in h_list]
318 |             node_representation = torch.sum(torch.cat(h_list[1:], dim = 0), dim = 0)[0]
319 |         return node_representation
320 | 
321 | 
322 | class GNN_graphpred(torch.nn.Module):
323 |     """
324 |     Extension of GIN to incorporate edge information by concatenation.
325 | 
326 |     Args:
327 |         num_layer (int): the number of GNN layers
328 |         emb_dim (int): dimensionality of embeddings
329 |         num_tasks (int): number of tasks in multi-task learning scenario
330 |         drop_ratio (float): dropout rate
331 |         JK (str): last, concat, max or sum.
332 |         graph_pooling (str): sum, mean, max, attention, set2set
333 |         
334 |     See https://arxiv.org/abs/1810.00826
335 |     JK-net: https://arxiv.org/abs/1806.03536
336 |     """
337 |     def __init__(self, num_layer, emb_dim, num_tasks, JK = "last", drop_ratio = 0, graph_pooling = "mean", gnn_type = "gin"):
338 |         super(GNN_graphpred, self).__init__()
339 |         self.num_layer = num_layer
340 |         self.drop_ratio = drop_ratio
341 |         self.JK = JK
342 |         self.emb_dim = emb_dim
343 |         self.num_tasks = num_tasks
344 | 
345 |         if self.num_layer < 2:
346 |             raise ValueError("Number of GNN layers must be greater than 1.")
347 | 
348 |         self.gnn = GNN(num_layer, emb_dim, JK, drop_ratio, gnn_type = gnn_type)
349 | 
350 |         #Different kind of graph pooling
351 |         if graph_pooling == "sum":
352 |             self.pool = global_add_pool
353 |         elif graph_pooling == "mean":
354 |             self.pool = global_mean_pool
355 |         elif graph_pooling == "max":
356 |             self.pool = global_max_pool
357 |         elif graph_pooling == "attention":
358 |             self.pool = GlobalAttention(gate_nn = torch.nn.Linear(emb_dim, 1))
359 |         else:
360 |             raise ValueError("Invalid graph pooling type.")
361 | 
362 |         self.graph_pred_linear = torch.nn.Linear(2*self.emb_dim, self.num_tasks)
363 | 
364 |     def from_pretrained(self, model_file):
365 |         self.gnn.load_state_dict(torch.load(model_file, map_location=lambda storage, loc: storage))
366 | 
367 |     def forward(self, data):
368 |         x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch
369 |         node_representation = self.gnn(x, edge_index, edge_attr)
370 |         
371 |         pooled = self.pool(node_representation, batch)
372 |         center_node_rep = node_representation[data.center_node_idx]
373 | 
374 |         graph_rep = torch.cat([pooled, center_node_rep], dim = 1)
375 | 
376 |         return self.graph_pred_linear(graph_rep)
377 | 
378 | 
379 | if __name__ == "__main__":
380 |     pass
381 | 
382 | 
383 | 
384 | 


--------------------------------------------------------------------------------
/code/train_edit.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os.path as osp
  3 | import random
  4 | from time import perf_counter as t
  5 | import yaml
  6 | import numpy as np
  7 | from yaml import SafeLoader
  8 | from scipy.linalg import fractional_matrix_power, inv
  9 | from torch.utils.data import random_split
 10 | import torch
 11 | import torch_geometric.transforms as T
 12 | import torch.nn.functional as F
 13 | import torch.nn as nn
 14 | from layers import GCNConv
 15 | import networkx as nx
 16 | import matplotlib.pyplot as plt
 17 | from torch_geometric.datasets import Planetoid, CitationFull, Amazon, Coauthor, GitHub, FacebookPagePage, LastFMAsia, DeezerEurope
 18 | from torch_geometric.utils import dropout_adj
 19 | from model import Encoder, Model, drop_feature
 20 | from utils import normalize_adj_tensor, normalize_adj_tensor_sp, edge2adj
 21 | from attack import PGD_attack_graph
 22 | from eval import label_classification
 23 | import pickle
 24 | from torch_geometric.nn import global_mean_pool, global_add_pool
 25 | from model_gcn import GNN
 26 | import torch.optim as optim
 27 | import os
 28 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # for remindering of the minmatch of shape
 29 | import warnings
 30 | 
 31 | def fxn():
 32 |     warnings.warn("deprecated", DeprecationWarning)
 33 | 
 34 | with warnings.catch_warnings():
 35 |     warnings.simplefilter("ignore")
 36 |     fxn()
 37 | 
 38 | def load_data(file):
 39 |         data_load_file = []
 40 |         file_1 = open(file, "rb")
 41 |         data_load_file = pickle.load(file_1)
 42 |         return data_load_file
 43 | hy = load_data("./data/hy_new_aaai_2.pickle")
 44 | 
 45 | class vgae(nn.Module):
 46 |     def __init__(self, gnn, emb_dim):
 47 |         super(vgae, self).__init__()
 48 |         self.encoder = gnn
 49 |         self.encoder_mean = nn.Sequential(nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim))
 50 |         # make sure std is positive
 51 |         self.encoder_std = nn.Sequential(nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim), nn.Softplus())
 52 |         # only reconstruct first 7-dim, please refer to https://github.com/snap-stanford/pretrain-gnns/issues/30
 53 |         self.decoder = nn.Sequential(nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, 4), nn.Sigmoid())
 54 |         self.decoder_edge = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 1))
 55 | 
 56 |         self.bceloss = nn.BCELoss(reduction='none')
 57 |         self.pool = global_mean_pool
 58 |         self.add_pool = global_add_pool
 59 |         self.sigmoid = nn.Sigmoid()
 60 |         self.softplus = nn.Softplus()
 61 |         self.softmax = nn.Softmax(dim=1)
 62 | 
 63 |         # reconstruct 4-class & 3-class edge_attr for 1st & 2nd dimension
 64 |         self.decoder_1 = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 4))
 65 |         self.decoder_2 = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 4))
 66 |         self.crossentropyloss = nn.CrossEntropyLoss(reduction='none')
 67 | 
 68 |     def forward_encoder(self, x, edge_index):
 69 |         x = self.encoder(x, edge_index)
 70 |         x_mean = self.encoder_mean(x)
 71 |         x_std = self.encoder_std(x)
 72 |         gaussian_noise = torch.randn(x_mean.shape).to(x.device)
 73 |         x = gaussian_noise * x_std + x_mean
 74 |         return x.detach(), x_mean, x_std
 75 | 
 76 |     # def forward_decoder(self, x, edge_index, edge_index_neg):
 77 |     def forward_decoder(self, x, edge_index):
 78 |         eleWise_mul = x[edge_index[0]] * x[edge_index[1]]
 79 |         edge_attr_pred = self.decoder(eleWise_mul)
 80 |         edge_pos = self.sigmoid( self.decoder_edge(eleWise_mul) ).squeeze()
 81 |         # edge_neg = self.sigmoid( self.decoder_edge(x[edge_index_neg[0]] * x[edge_index_neg[1]]) ).squeeze()
 82 |         # return edge_attr_pred, edge_pos, edge_neg
 83 |         return edge_pos
 84 | 
 85 |     def loss_vgae(self, edge_attr_pred, edge_attr, edge_pos_pred, edge_neg_pred, edge_index_batch, edge_index_neg_batch, x_mean, x_std, batch, reward=None):
 86 |         # evaluate p(A|Z)
 87 |         num_edge, _ = edge_attr_pred.shape
 88 |         loss_rec = self.bceloss(edge_attr_pred.reshape(-1), edge_attr[:, :4].reshape(-1))
 89 |         loss_rec = loss_rec.reshape((num_edge, -1)).sum(dim=1)
 90 | 
 91 |         loss_edge_pos = self.bceloss(edge_pos_pred, torch.ones(edge_pos_pred.shape).to(edge_pos_pred.device))
 92 |         loss_edge_neg = self.bceloss(edge_neg_pred, torch.zeros(edge_neg_pred.shape).to(edge_neg_pred.device))
 93 |         loss_pos = loss_rec + loss_edge_pos
 94 |         loss_pos = self.pool(loss_pos, edge_index_batch)
 95 |         loss_neg = self.pool(loss_edge_neg, edge_index_neg_batch)
 96 |         loss_rec = loss_pos + loss_neg
 97 |         #print('loss_pos + loss_neg', loss_pos, loss_neg)
 98 |         if not reward is None:
 99 |             loss_rec = loss_rec * reward
100 |             #print("reward:", reward)
101 |             #print("loss_rec:", loss_rec)
102 | 
103 |         # evaluate p(Z|X,A)
104 |         kl_divergence = - 0.5 * (1 + 2 * torch.log(x_std+ 1e-6) - x_mean**2 - x_std**2).sum(dim=1)
105 |         kl_ones = torch.ones(kl_divergence.shape).to(kl_divergence.device)
106 |         kl_divergence = self.pool(kl_divergence, batch)
107 |         kl_double_norm = 1 / self.add_pool(kl_ones, batch)
108 |         kl_divergence = kl_divergence * kl_double_norm
109 |         loss = (loss_rec + kl_divergence).mean()
110 |         '''
111 |         # link prediction for sanity check
112 |         from sklearn.metrics import roc_auc_score
113 |         from sklearn.metrics import average_precision_score
114 |         print(roc_auc_score(edge_attr.cpu().numpy(), edge_attr_pred.detach().cpu().numpy()), average_precision_score(edge_attr.cpu().numpy(), edge_attr_pred.detach().cpu().numpy()))
115 |         '''
116 |         return loss, (loss_edge_pos.mean()+loss_edge_neg.mean()).item()/2
117 | 
118 |     def generate(self, data):
119 |         x, _, _ = self.forward_encoder(data.x, data.edge_index)
120 |         eleWise_mul = torch.einsum('nd,md->nmd', x, x)
121 |         # calculate softmax probability
122 |         prob = self.decoder_edge(eleWise_mul).squeeze()
123 |         # print("prob:", prob.size())
124 |         # pritnl()
125 |         prob = torch.exp(prob)
126 |         prob[torch.isinf(prob)] = 1e10
127 |         prob[list(range(x.shape[0])), list(range(x.shape[0]))] = 0
128 |         prob = torch.einsum('nm,n->nm', prob, 1 / prob.sum(dim=1))
129 | 
130 |         # sparsify
131 |         
132 |         prob[prob < 1e-1] = 0
133 |         prob[prob.sum(dim=1) == 0] = 1
134 |         prob[list(range(x.shape[0])), list(range(x.shape[0]))] = 0
135 |         prob = torch.einsum('nm,n->nm', prob, 1 / prob.sum(dim=1))
136 | 
137 |         # predict 4-class & 3-class edge_attr for 1st & 2nd dimension
138 |         edge_attr_prob_1 = self.softmax(self.decoder_1(eleWise_mul))
139 |         edge_attr_rand_1 = torch.rand((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1]))
140 |         edge_attr_pred_1 = torch.zeros((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1]), dtype=torch.int64)
141 |         for n in range(3):
142 |             edge_attr_pred_1[edge_attr_rand_1 >= edge_attr_prob_1[:, :, n]] = n + 1
143 |             edge_attr_rand_1 -= edge_attr_prob_1[:, :, n]
144 | 
145 |         edge_attr_prob_2 = self.softmax(self.decoder_2(eleWise_mul))
146 |         edge_attr_rand_2 = torch.rand((edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1]))
147 |         edge_attr_pred_2 = torch.zeros((edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1]), dtype=torch.int64)
148 |         for n in range(2):
149 |             edge_attr_pred_2[edge_attr_rand_2 >= edge_attr_prob_2[:, :, n]] = n + 1
150 |             edge_attr_rand_2 -= edge_attr_prob_2[:, :, n]
151 | 
152 |         edge_attr_pred = torch.cat((edge_attr_pred_1.reshape((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1], 1)),
153 |                                     edge_attr_pred_2.reshape(
154 |                                         (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1)),edge_attr_pred_2.reshape(
155 |                                         (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1)),edge_attr_pred_2.reshape(
156 |                                         (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1))), dim=2)
157 |         
158 | 
159 |         return prob, edge_attr_pred
160 | 
161 | def train(model: Model, x, edge_index, eps, model_1, optimizer_1,lamb, alpha, beta, steps, node_ratio):
162 |     optimizer.zero_grad()
163 |     adj = edge2adj(x, edge_index)
164 |     edge_index_1 = dropout_adj(edge_index, p=drop_edge_rate_1)[0]
165 |     edge_index_2 = dropout_adj(edge_index, p=drop_edge_rate_2)[0]
166 |     # print("***:", x.size())
167 |     # println()
168 |     x_1 = drop_feature(x, drop_feature_rate_1)
169 |     # print("x_1", x_1)
170 |     x_2 = drop_feature(x, drop_feature_rate_2)  
171 |     
172 |     # adj_1 = edge2adj(x_1, edge_index_1)
173 |     adj_2 = edge2adj(x_2, edge_index_2)
174 |     # print("adj_1:", adj_1)
175 |     # print("adj_1_shape:", adj_1.size())
176 |     'learning to sample'
177 |     x_1, x_mean, x_std = model_1.forward_encoder(x, edge_index)
178 |     # print("x_1", x_1)
179 |     # println()
180 |     edge_pos_pred = model_1.forward_decoder(x,edge_index)
181 |     # print("edge_index:", edge_index)
182 |     # print("edge_pos_pred:", edge_pos_pred.size())
183 |     s = torch.sparse_coo_tensor(edge_index,edge_pos_pred, (adj.size()[0],adj.size()[1]))
184 |     adj_1 = s.to_dense()
185 |     # print("adj_vgae:", adj_vgae)
186 |     # print("x_1:",x_1.size())
187 |     # print(edge_pos_pred)
188 |     # print("x_3:", x_3.size())
189 |     # print(edge_pos_pred.size())
190 |     # println()
191 |         
192 |     
193 |     if eps > 0:
194 |         adj_3, x_3 = PGD_attack_graph(model, edge_index_1, edge_index, x_1, x, steps, node_ratio, alpha, beta)
195 |     z = model(x, adj)
196 |     z_1 = model(x_1, adj_1)
197 |     z_2 = model(x_2, adj_2)
198 |     # print("z_2:", z_2.size())
199 |     node_list = list(hy.nodes)
200 |     poi_view = []
201 |     spatial_view = []
202 |     flow_view = []
203 |     for item in node_list:
204 |         if item.endswith("s"):
205 |             spatial_view.append(node_list.index(item))
206 |         elif item.endswith("p"):
207 |             poi_view.append(node_list.index(item))
208 |         else:
209 |             flow_view.append(node_list.index(item))
210 |     # print("node_list:", node_list)
211 |     # print(len(node_list))
212 |     '''cross-view conhtarstive learning'''
213 |     linear = nn.Linear(len(spatial_view), 180).to(device)
214 |     linear_1 = nn.Linear(len(flow_view), 180).to(device)
215 |     poi_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in poi_view]),requires_grad=True).to(device)
216 |     spatial_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in spatial_view]),requires_grad=True).to(device)
217 |     flow_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in flow_view]),requires_grad=True).to(device)
218 |     flow_out = linear_1(flow_view_tensor.view(128,len(flow_view)).float())
219 |     flow_trans = flow_out.view(180,128)
220 |     spatial_out = linear(spatial_view_tensor.view(128,len(spatial_view)).float())
221 |     spatial_trans = spatial_out.view(180,128).float()
222 |     # print(spatial_trans.size())
223 |     loss_v1, simi_v1 = model.loss(flow_trans.float(),spatial_trans.float(),batch_size=0)
224 |     loss_v2, simi_v2 = model.loss(flow_trans.float(),poi_view_tensor.float(),batch_size=0)
225 |     loss_v3, simi_v3 = model.loss(spatial_trans.float(),poi_view_tensor.float(),batch_size=0)
226 |     # print(loss_v1.mean(), loss_v2.mean(), loss_v3.mean())
227 |     '''adaptative weight for cross-view loss'''
228 |     model_fs = nn.Sequential(nn.Linear(360, 1),nn.ReLU()).to(device)
229 |     # mlp = nn.Linear(in_features = 360, out_features = 1).to(device)
230 |     flow_spatial = torch.cat((flow_trans,spatial_trans),0).to(device)
231 |     flow_poi = torch.cat((flow_trans,poi_view_tensor),0).to(device)
232 |     spatial_poi = torch.cat((spatial_trans, poi_view_tensor),0).to(device)
233 |     fs_w = model_fs(flow_spatial.view(128,-1).float()).mean()
234 |     fp_w = model_fs(flow_poi.view(128,-1).float()).mean()
235 |     sp_w = model_fs(spatial_poi.view(128,-1).float()).mean()
236 |     # print("fs_w:",fs_w.item())
237 |     # print("fp_w:",fp_w.item())
238 |     # print("sp_w:",sp_w.item())
239 |     loss_view = fs_w.item()*loss_v1+fp_w.item()*loss_v2+sp_w.item()*loss_v3
240 |     # print("loss_view:",loss_view.mean())
241 |     # pritnln()
242 |     loss1, simi1 = model.loss(z_1,z_2,batch_size=0)
243 |     loss2, simi2 = model.loss(z_1,z,batch_size=0)
244 |     loss3, simi3 = model.loss(z_2,z,batch_size=0)
245 |     loss3 = loss3 -loss3.mean()
246 |     # loss3 = loss3.mean()
247 |     # print("loss3:", loss3)
248 |     loss3[loss3 > 0] = 1
249 |     loss3[loss3 <= 0] = 0.01  # weaken the reward for low cl loss
250 |     # print("loss3:", loss3)
251 |     # println()
252 |     reward = loss3
253 |     loss1 = loss1.mean() + lamb*torch.clamp(simi1*2 - simi2.detach()-simi3.detach(), 0).mean()
254 |     # loss_vage = loss3
255 |     # loss_vage = loss3*loss1                                                                                   
256 |     # print("loss_vage:",loss_vage)
257 |     # print("loss_vage:",loss_vage.size())
258 |     # println()
259 |     if eps > 0:  
260 |         z_3 = model(x_3,adj_3)
261 |         loss2, _ = model.loss(z_1,z_3)
262 |         loss2 = loss2.mean()
263 |         loss = (loss1 + eps*loss2+loss_view.mean())
264 |     else: 
265 |         loss = loss1+loss_view.mean()
266 |         loss2 = loss1
267 | 
268 |     
269 |     loss.backward(retain_graph=True)
270 |     loss_vage = (reward*loss).mean()
271 |     # print("loss_vage:",loss_vage)
272 |     # println()
273 |     loss_vage.backward(retain_graph=True)
274 |     optimizer.step()
275 |     optimizer_1.step()
276 |  
277 |     return loss1.item(), loss2.item(),loss_vage.item()
278 | 
279 | def test(model: Model, x, edge_index, model_1,y, final=False, task ="node"):   
280 |     model.eval()
281 |     adj = edge2adj(x, edge_index)
282 |     x = x.to(device)
283 |     adj = adj.to(device)
284 |     # print("adj.size():", adj.size())
285 |     z = model(x, adj)
286 |     # print("test:", z.size())
287 |     file=open(r"./data/tmp_vector.pickle","wb")
288 |     pickle.dump(z,file) #storing_list
289 |     file.close()
290 |     
291 |     x_1, x_mean, x_std = model_1.forward_encoder(x, edge_index.to(device))
292 |     x_1 = x_1.to(device)
293 |     # print("x_1", x_1)
294 |     # println()
295 |     edge_pos_pred = model_1.forward_decoder(x,edge_index.to(device))
296 |     # print("x:", x.size())
297 |     # print("edge_index:", edge_index)
298 |     # print("edge_pos_pred:", edge_pos_pred.size())
299 |     s = torch.sparse_coo_tensor(edge_index.to(device),edge_pos_pred.to(device), (adj.size()[0],adj.size()[1]))
300 |     adj_1 = s.to_dense()
301 |     adj_1 = adj_1.to(device)
302 |     z_1 = model(x_1, adj_1)
303 |     print("test z_1:", z_1.size())
304 |     
305 |     
306 |     file=open(r"./data/tmp_vector_vgae_2.pickle","wb")
307 |     pickle.dump(z_1,file) #storing_list
308 |     file.close()
309 |     
310 |     return label_classification(z, y, ratio=0.1),label_classification(z_1, y, ratio=0.1)
311 | 
312 | if __name__ == '__main__':
313 |     parser = argparse.ArgumentParser()
314 |     parser.add_argument('--dataset', type=str, default='Cora')
315 |     parser.add_argument('--gpu_id', type=int, default=0)
316 |     parser.add_argument('--config', type=str, default='config.yaml')
317 |     parser.add_argument('--log', type=str, default='results/Cora/')
318 |     parser.add_argument('--seed', type=int, default=39788)
319 |     parser.add_argument('--eps', type=float, default=0.5)
320 |     parser.add_argument('--alpha', type=float, default=0.1)
321 |     parser.add_argument('--beta', type=float, default=0.1)
322 |     parser.add_argument('--lamb', type=float, default=0.0)
323 |     args = parser.parse_args()
324 | 
325 | 
326 |     assert args.gpu_id in range(0, 8)
327 | 
328 |     
329 |     config = yaml.load(open(args.config), Loader=SafeLoader)
330 |     if args.dataset in config:
331 |         config = config[args.dataset]
332 |     else:
333 |         config = {
334 |         'learning_rate': 0.001,
335 |         'num_hidden': 256,
336 |         'num_proj_hidden': 256,
337 |         'activation': 'prelu',
338 |         'base_model': 'GCNConv',
339 |         'num_layers': 2,
340 |         'drop_edge_rate_1': 0.3,
341 |         'drop_edge_rate_2': 0.4,
342 |         'drop_feature_rate_1': 0.1,
343 |         'drop_feature_rate_2': 0.0,
344 |         'tau': 0.4,
345 |         'num_epochs': 1000,
346 |         'weight_decay': 1e-5,
347 |         'drop_scheme': 'degree',
348 |     }
349 |     
350 |         
351 |     torch.manual_seed(config["seed"])
352 |     random.seed(12345)
353 |     np.random.seed(config["seed"])
354 |     
355 |     learning_rate = config['learning_rate']
356 |     num_hidden = config['num_hidden']
357 |     num_proj_hidden = config['num_proj_hidden']
358 |     activation = ({'relu': F.relu, 'prelu': nn.PReLU(), 'rrelu': nn.RReLU()})[config['activation']]
359 |     base_model = GCNConv
360 |     num_layers = config['num_layers']
361 | 
362 |     drop_edge_rate_1 = config['drop_edge_rate_1']
363 |     drop_edge_rate_2 = config['drop_edge_rate_2']
364 |     drop_feature_rate_1 = config['drop_feature_rate_1']
365 |     drop_feature_rate_2 = config['drop_feature_rate_2']
366 |     tau = config['tau']
367 |     num_epochs = config['num_epochs']
368 |     weight_decay = config['weight_decay']
369 |     # switch to the customer inputs by using args.{}
370 |     eps = config["eps"] # args.eps
371 |     lamb = config["lamb"] # args.lamb
372 |     alpha = config["alpha"] # args.alpha
373 |     beta = config["beta"] # arg.sbeta
374 |     
375 |     
376 |     sample_size = 1388
377 |     
378 |     def get_dataset(path, name):
379 |         assert name in ['Cora', 'CiteSeer', "AmazonC", "AmazonP", 'CoauthorC', 'CoauthorP',\
380 |                         "DBLP", "PubMed", "GitHub", "Facebook", "LastFMAsia", "DeezerEurope"]
381 |         if name =="DBLP":
382 |             name = "dblp"
383 |         if name == "AmazonC":
384 |             return Amazon(path, "Computers", T.NormalizeFeatures())
385 |         if name == "AmazonP":
386 |             return Amazon(path, "Photo", T.NormalizeFeatures())
387 |         if name == 'CoauthorC':
388 |             return Coauthor(root=path, name='cs', transform=T.NormalizeFeatures())
389 |         if name == 'CoauthorP':
390 |             return Coauthor(root=path, name='physics', transform=T.NormalizeFeatures())
391 |         if name == "GitHub":
392 |             return GitHub(root=path,transform=T.NormalizeFeatures())
393 |         if name == "Facebook":
394 |             return FacebookPagePage(root=path,transform=T.NormalizeFeatures())    
395 |         if name == "LastFMAsia":
396 |             return LastFMAsia(root=path,transform=T.NormalizeFeatures())
397 |         if name == "DeezerEurope":
398 |             return DeezerEurope(root=path,transform=T.NormalizeFeatures())
399 | 
400 |         return (CitationFull if name == 'dblp' else Planetoid)(
401 |             path,
402 |             name,
403 |             "public",
404 |             T.NormalizeFeatures())
405 |         
406 |     path = osp.join(osp.expanduser('~'), 'datasets', args.dataset)
407 |     # print("path:", path)
408 |     # println
409 |     dataset = get_dataset(path, args.dataset)
410 |     # print("dataset:", dataset)
411 |     data = dataset.data  
412 |     # print(data.num_features)
413 |     # println()
414 |     
415 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
416 |     
417 |     encoder = Encoder(data.num_features, num_hidden, activation,
418 |                       base_model=base_model, k=num_layers).to(device)
419 |     model = Model(encoder, num_hidden, num_proj_hidden, tau).to(device)
420 |     optimizer = torch.optim.Adam(
421 |         model.parameters(), lr=learning_rate, weight_decay=weight_decay)
422 | 
423 |     start = t()
424 |     prev = start    
425 |     G = nx.Graph()
426 |     G.add_edges_from(list(zip(data.edge_index.numpy()[0],data.edge_index.numpy()[1])))
427 |     
428 |     
429 |     gnn_generative_1 = GNN(3, 96, JK="last", drop_ratio=0, gnn_type= "gcn")
430 |     model_generative_1 = vgae(gnn_generative_1, 96)
431 |     model_generative_1.to(device)
432 |     optimizer_generative_1 = optim.Adam(model_generative_1.parameters(), lr=learning_rate, weight_decay=weight_decay)
433 |     '''set training'''
434 |     model.train(),model_generative_1.train()
435 |     for epoch in range(1, num_epochs + 1):
436 |         # uncomment to increase the eps every T epochs
437 |         #if epoch%20 ==0:
438 |         #    eps = eps*1.1
439 |         # sample a subgraph from the original one
440 | 
441 |         S = G.subgraph(np.random.permutation(G.number_of_nodes())[:sample_size])
442 |         x = data.x[np.array(S.nodes())].to(device)
443 |         S = nx.relabel.convert_node_labels_to_integers(S, first_label=0, ordering='default')
444 |         edge_index = np.array(S.edges()).T
445 |         edge_index = torch.LongTensor(np.hstack([edge_index,edge_index[::-1]])).to(device)
446 |         # edge_attr = np.array(S.edges()).T
447 |         # edge_index = torch.LongTensor(np.hstack([edge_index,edge_index[::-1]])).to(device)
448 | 
449 |         loss1, loss2, loss3 = train(model, x, edge_index, eps, model_generative_1,optimizer_generative_1, lamb, alpha, beta, 5, 0.2)
450 |             
451 |         now = t()                                     
452 |         print(f'(T) | Epoch={epoch:03d}, loss1={loss1:.4f}, loss2={loss2:.4f}'
453 |               f' this epoch {now - prev:.4f}, total {now - start:.4f}')
454 |         prev = now
455 | 
456 |     print("=== Final ===")
457 |     results,results_1 = test(model, data.x, data.edge_index, model_generative_1,data.y, final=True)
458 |     print(results,results_1)
459 |     with open(osp.join(args.log, "progress.csv"), "w") as f:
460 |         f.write(str(results))


--------------------------------------------------------------------------------
/code/train_edit_auto.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os.path as osp
  3 | import random
  4 | from time import perf_counter as t
  5 | import yaml
  6 | import numpy as np
  7 | from yaml import SafeLoader
  8 | from scipy.linalg import fractional_matrix_power, inv
  9 | from torch.utils.data import random_split
 10 | import torch
 11 | import torch_geometric.transforms as T
 12 | import torch.nn.functional as F
 13 | import torch.nn as nn
 14 | from layers import GCNConv
 15 | import networkx as nx
 16 | import matplotlib.pyplot as plt
 17 | from torch_geometric.datasets import Planetoid, CitationFull, Amazon, Coauthor, GitHub, FacebookPagePage, LastFMAsia, DeezerEurope
 18 | from torch_geometric.utils import dropout_adj
 19 | from model import Encoder, Model, drop_feature
 20 | from utils import normalize_adj_tensor, normalize_adj_tensor_sp, edge2adj
 21 | from attack import PGD_attack_graph
 22 | from eval import label_classification
 23 | import pickle
 24 | from torch_geometric.nn import global_mean_pool, global_add_pool
 25 | from model_gcn import GNN
 26 | import torch.optim as optim
 27 | import os
 28 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # for remindering of the minmatch of shape
 29 | import warnings
 30 | warnings.filterwarnings('ignore')
 31 | def load_data(file):
 32 |         data_load_file = []
 33 |         file_1 = open(file, "rb")
 34 |         data_load_file = pickle.load(file_1)
 35 |         return data_load_file
 36 | hy = load_data("./data/hy_new_aaai_2.pickle")
 37 | # hy = load_data("./data/hy_aaai_chi_1.pickle")
 38 | # print(len(list(hy.nodes())))
 39 | # println
 40 | 
 41 | class vgae(nn.Module):
 42 |     def __init__(self, gnn, emb_dim):
 43 |         super(vgae, self).__init__()
 44 |         self.encoder = gnn
 45 |         self.encoder_mean = nn.Sequential(nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim))
 46 |         # make sure std is positive
 47 |         self.encoder_std = nn.Sequential(nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim), nn.Softplus())
 48 |         # only reconstruct first 7-dim, please refer to https://github.com/snap-stanford/pretrain-gnns/issues/30
 49 |         self.decoder = nn.Sequential(nn.ReLU(inplace=True), nn.Linear(emb_dim, emb_dim), nn.ReLU(inplace=True), nn.Linear(emb_dim, 4), nn.Sigmoid())
 50 |         self.decoder_edge = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 1))
 51 | 
 52 |         self.bceloss = nn.BCELoss(reduction='none')
 53 |         self.pool = global_mean_pool
 54 |         self.add_pool = global_add_pool
 55 |         self.sigmoid = nn.Sigmoid()
 56 |         self.softplus = nn.Softplus()
 57 |         self.softmax = nn.Softmax(dim=1)
 58 | 
 59 |         # reconstruct 4-class & 3-class edge_attr for 1st & 2nd dimension
 60 |         self.decoder_1 = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 4))
 61 |         self.decoder_2 = nn.Sequential(nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.ReLU(), nn.Linear(emb_dim, 4))
 62 |         self.crossentropyloss = nn.CrossEntropyLoss(reduction='none')
 63 | 
 64 |     def forward_encoder(self, x, edge_index):
 65 |         x = self.encoder(x, edge_index)
 66 |         x_mean = self.encoder_mean(x)
 67 |         x_std = self.encoder_std(x)
 68 |         gaussian_noise = torch.randn(x_mean.shape).to(x.device)
 69 |         x = gaussian_noise * x_std + x_mean
 70 |         return x.detach(), x_mean, x_std
 71 | 
 72 |     # def forward_decoder(self, x, edge_index, edge_index_neg):
 73 |     def forward_decoder(self, x, edge_index):
 74 |         eleWise_mul = x[edge_index[0]] * x[edge_index[1]]
 75 |         edge_attr_pred = self.decoder(eleWise_mul)
 76 |         edge_pos = self.sigmoid( self.decoder_edge(eleWise_mul) ).squeeze()
 77 |         # edge_neg = self.sigmoid( self.decoder_edge(x[edge_index_neg[0]] * x[edge_index_neg[1]]) ).squeeze()
 78 |         # return edge_attr_pred, edge_pos, edge_neg
 79 |         return edge_pos
 80 | 
 81 |     def loss_vgae(self, edge_pos_pred, edge_index_batch, x_mean, x_std, reward=None):
 82 |         # evaluate p(A|Z)
 83 |         # num_edge, _ = edge_attr_pred.shape
 84 |         # loss_rec = self.bceloss(edge_attr_pred.reshape(-1), edge_attr[:, :4].reshape(-1))
 85 |         # loss_rec = loss_rec.reshape((num_edge, -1)).sum(dim=1)
 86 | 
 87 |         loss_edge_pos = self.bceloss(edge_pos_pred, torch.ones(edge_pos_pred.shape).to(edge_pos_pred.device))
 88 |         # loss_edge_neg = self.bceloss(edge_neg_pred, torch.zeros(edge_neg_pred.shape).to(edge_neg_pred.device))
 89 |         # loss_pos = loss_rec + loss_edge_pos
 90 |         loss_pos = loss_edge_pos
 91 |         loss_pos_cat = torch.cat((loss_pos, loss_pos), 0).view(2, -1)
 92 |         # print("loss_pos:", loss_pos_cat.size())
 93 |         # print("edge_index_batch:", edge_index_batch.size())
 94 |         # println()
 95 |         loss_pos = self.pool(loss_pos_cat, edge_index_batch)
 96 |         # loss_neg = self.pool(loss_edge_neg, edge_index_neg_batch)
 97 |         # loss_rec = loss_pos + loss_neg
 98 |         loss_rec = loss_pos
 99 |         #print('loss_pos + loss_neg', loss_pos, loss_neg)
100 |         if not reward is None:
101 |             loss_rec = loss_rec * reward
102 |             #print("reward:", reward)
103 |             #print("loss_rec:", loss_rec)
104 | 
105 |         # evaluate p(Z|X,A)
106 |         kl_divergence = - 0.5 * (1 + 2 * torch.log(x_std+ 1e-6) - x_mean**2 - x_std**2).sum(dim=1)
107 |         kl_ones = torch.ones(kl_divergence.shape).to(kl_divergence.device)
108 |         # kl_divergence = self.pool(kl_divergence, batch)
109 |         # kl_double_norm = 1 / self.add_pool(kl_ones, batch)
110 |         # kl_divergence = kl_divergence * kl_double_norm
111 |         # print("loss_rec:",loss_rec.mean())
112 |         # print("kl_divergence:",kl_divergence.size())
113 |         # println()
114 |         loss = (loss_rec.mean(axis=1) + kl_divergence).mean()
115 |         '''
116 |         # link prediction for sanity check
117 |         from sklearn.metrics import roc_auc_score
118 |         from sklearn.metrics import average_precision_score
119 |         print(roc_auc_score(edge_attr.cpu().numpy(), edge_attr_pred.detach().cpu().numpy()), average_precision_score(edge_attr.cpu().numpy(), edge_attr_pred.detach().cpu().numpy()))
120 |         '''
121 |         return loss, loss_edge_pos.mean().item()
122 |         # return loss, (loss_edge_pos.mean()+loss_edge_neg.mean()).item()/2
123 | 
124 |     def generate(self, data):
125 |         x, _, _ = self.forward_encoder(data.x, data.edge_index)
126 |         eleWise_mul = torch.einsum('nd,md->nmd', x, x)
127 |         # calculate softmax probability
128 |         prob = self.decoder_edge(eleWise_mul).squeeze()
129 |         # print("prob:", prob.size())
130 |         # pritnl()
131 |         prob = torch.exp(prob)
132 |         prob[torch.isinf(prob)] = 1e10
133 |         prob[list(range(x.shape[0])), list(range(x.shape[0]))] = 0
134 |         prob = torch.einsum('nm,n->nm', prob, 1 / prob.sum(dim=1))
135 | 
136 |         # sparsify
137 |         
138 |         prob[prob < 1e-1] = 0
139 |         prob[prob.sum(dim=1) == 0] = 1
140 |         prob[list(range(x.shape[0])), list(range(x.shape[0]))] = 0
141 |         prob = torch.einsum('nm,n->nm', prob, 1 / prob.sum(dim=1))
142 | 
143 |         # predict 4-class & 3-class edge_attr for 1st & 2nd dimension
144 |         edge_attr_prob_1 = self.softmax(self.decoder_1(eleWise_mul))
145 |         edge_attr_rand_1 = torch.rand((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1]))
146 |         edge_attr_pred_1 = torch.zeros((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1]), dtype=torch.int64)
147 |         for n in range(3):
148 |             edge_attr_pred_1[edge_attr_rand_1 >= edge_attr_prob_1[:, :, n]] = n + 1
149 |             edge_attr_rand_1 -= edge_attr_prob_1[:, :, n]
150 | 
151 |         edge_attr_prob_2 = self.softmax(self.decoder_2(eleWise_mul))
152 |         edge_attr_rand_2 = torch.rand((edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1]))
153 |         edge_attr_pred_2 = torch.zeros((edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1]), dtype=torch.int64)
154 |         for n in range(2):
155 |             edge_attr_pred_2[edge_attr_rand_2 >= edge_attr_prob_2[:, :, n]] = n + 1
156 |             edge_attr_rand_2 -= edge_attr_prob_2[:, :, n]
157 | 
158 |         edge_attr_pred = torch.cat((edge_attr_pred_1.reshape((edge_attr_prob_1.shape[0], edge_attr_prob_1.shape[1], 1)),
159 |                                     edge_attr_pred_2.reshape(
160 |                                         (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1)),edge_attr_pred_2.reshape(
161 |                                         (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1)),edge_attr_pred_2.reshape(
162 |                                         (edge_attr_prob_2.shape[0], edge_attr_prob_2.shape[1], 1))), dim=2)
163 |         
164 | 
165 |         return prob, edge_attr_pred
166 | 
167 | def train(model: Model, x, edge_index, eps, model_1, optimizer_1,model_2, optimizer_2,lamb, alpha, beta, steps, node_ratio):
168 |     optimizer.zero_grad()
169 |     adj = edge2adj(x, edge_index)
170 |     edge_index_1 = dropout_adj(edge_index, p=drop_edge_rate_1)[0]
171 |     edge_index_2 = dropout_adj(edge_index, p=drop_edge_rate_2)[0]
172 |     # print("***:", x.size())
173 |     # println()
174 |     x_1 = drop_feature(x, drop_feature_rate_1)
175 |     # print("x_1", x_1)
176 |     x_2 = drop_feature(x, drop_feature_rate_2)  
177 |     
178 |     # adj_1 = edge2adj(x_1, edge_index_1)
179 |     adj_2 = edge2adj(x_2, edge_index_2)
180 |     # print("adj_1:", adj_1)
181 |     # print("adj_1_shape:", adj_1.size())
182 |     'learning to sample'
183 |     x_1, x_mean, x_std = model_1.forward_encoder(x, edge_index)
184 |     # print("x_1", x_1)
185 |     # println()
186 |     edge_pos_pred = model_1.forward_decoder(x_1,edge_index)
187 |     # print("edge_index:", edge_index)
188 |     # print("edge_pos_pred:", edge_pos_pred.size())
189 |     s = torch.sparse_coo_tensor(edge_index,edge_pos_pred, (adj.size()[0],adj.size()[1]))
190 |     adj_1 = s.to_dense()
191 |     # print("adj_vgae:", adj_vgae)
192 |     # print("x_1:",x_1.size())
193 |     # print(edge_pos_pred)
194 |     # print("x_3:", x_3.size())
195 |     # print(edge_pos_pred.size())
196 |     # println()
197 |     
198 |     x_2, x_mean, x_std = model_2.forward_encoder(x, edge_index)
199 |     # print("x_1", x_1)
200 |     # println()
201 |     edge_pos_pred = model_2.forward_decoder(x_2,edge_index)
202 |     # print("edge_index:", edge_index)
203 |     # print("edge_pos_pred:", edge_pos_pred.size())
204 |  
205 |     s = torch.sparse_coo_tensor(edge_index,edge_pos_pred, (adj.size()[0],adj.size()[1]))
206 |     adj_2 = s.to_dense()
207 |         
208 |     
209 |     if eps > 0:
210 |         print("x_1:", x_1.size())
211 |         print("x:", x.size())
212 |         file=open(r"./data/tmp_case_before.pickle","wb")
213 |         pickle.dump(x,file) #storing_list
214 |         file.close()
215 |         adj_3, x_3 = PGD_attack_graph(model, edge_index_1, edge_index, x_1, x, steps, node_ratio, alpha, beta)
216 |         print("x_3:", x_3.size())
217 |         file=open(r"./data/tmp_case_after.pickle","wb")
218 |         pickle.dump(x_3,file) #storing_list
219 |         file.close()
220 |         # println()
221 |     z = model(x, adj)
222 |     z_1 = model(x_1, adj_1)
223 |     z_2 = model(x_2, adj_2)
224 |     # print("x:", x)
225 |     # print("edge_index:", edge_index)
226 |     
227 |     '''adding cross-view contrastive learning'''
228 |     node_list = list(hy.nodes)
229 |     # print(node_list)
230 |     # println()
231 |     poi_view = []
232 |     spatial_view = []
233 |     flow_view = []
234 |     for item in node_list:
235 |         if item.endswith("s"):
236 |             spatial_view.append(node_list.index(item))
237 |         elif item.endswith("p"):
238 |             poi_view.append(node_list.index(item))
239 |         else:
240 |             flow_view.append(node_list.index(item))
241 |  
242 |     # '''cross-view conhtarstive learning'''
243 |     # linear = nn.Linear(len(spatial_view), 180).to(device)
244 |     # linear_1 = nn.Linear(len(flow_view), 180).to(device)
245 |     # poi_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in poi_view]),requires_grad=True).to(device)
246 |     # spatial_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in spatial_view]),requires_grad=True).to(device)
247 |     # flow_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in flow_view]),requires_grad=True).to(device)
248 |     # flow_out = linear_1(flow_view_tensor.view(128,len(flow_view)).float())
249 |     # flow_trans = flow_out.view(180,128)
250 |     # spatial_out = linear(spatial_view_tensor.view(128,len(spatial_view)).float())
251 |     # spatial_trans = spatial_out.view(180,128).float()
252 |     # # print(spatial_trans.size())
253 |     # loss_v1, simi_v1 = model.loss(flow_trans.float(),spatial_trans.float(),batch_size=0)
254 |     # loss_v2, simi_v2 = model.loss(flow_trans.float(),poi_view_tensor.float(),batch_size=0)
255 |     # loss_v3, simi_v3 = model.loss(spatial_trans.float(),poi_view_tensor.float(),batch_size=0)
256 |     # # print(loss_v1.mean(), loss_v2.mean(), loss_v3.mean())
257 |     # '''adaptative weight for cross-view loss'''
258 |     # model_fs = nn.Sequential(nn.Linear(360, 1),nn.ReLU()).to(device)
259 |     # # mlp = nn.Linear(in_features = 360, out_features = 1).to(device)
260 |     # flow_spatial = torch.cat((flow_trans,spatial_trans),0).to(device)
261 |     # flow_poi = torch.cat((flow_trans,poi_view_tensor),0).to(device)
262 |     # spatial_poi = torch.cat((spatial_trans, poi_view_tensor),0).to(device)
263 |     # fs_w = model_fs(flow_spatial.view(128,-1).float()).mean()
264 |     # fp_w = model_fs(flow_poi.view(128,-1).float()).mean()
265 |     # sp_w = model_fs(spatial_poi.view(128,-1).float()).mean()
266 |     # # print("fs_w:",fs_w.item())
267 |     # # print("fp_w:",fp_w.item())
268 |     # # print("sp_w:",sp_w.item())
269 |     # loss_view = fs_w.item()*loss_v1+fp_w.item()*loss_v2+sp_w.item()*loss_v3
270 |     '''cross-view conhtarstive learning'''
271 |     reg_num = 180
272 |     linear = nn.Linear(len(spatial_view), reg_num).to(device)
273 |     linear_1 = nn.Linear(len(flow_view), reg_num).to(device)
274 |     poi_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in poi_view]),requires_grad=True).to(device)
275 |     spatial_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in spatial_view]),requires_grad=True).to(device)
276 |     flow_view_tensor = torch.tensor(np.array([z_2[item].tolist() for item in flow_view]),requires_grad=True).to(device)
277 |     flow_out = linear_1(flow_view_tensor.view(128,len(flow_view)).float())
278 |     flow_trans = flow_out.view(reg_num,128)
279 |     spatial_out = linear(spatial_view_tensor.view(128,len(spatial_view)).float())
280 |     spatial_trans = spatial_out.view(reg_num,128).float()
281 |     # print(spatial_trans.size())
282 |     loss_v1, simi_v1 = model.loss(flow_trans.float(),spatial_trans.float(),batch_size=0)
283 |     loss_v2, simi_v2 = model.loss(flow_trans.float(),poi_view_tensor.float(),batch_size=0)
284 |     loss_v3, simi_v3 = model.loss(spatial_trans.float(),poi_view_tensor.float(),batch_size=0)
285 |     # print(loss_v1.mean(), loss_v2.mean(), loss_v3.mean())
286 |     '''adaptative weight for cross-view loss'''
287 |     model_fs = nn.Sequential(nn.Linear(reg_num*2, 1),nn.ReLU()).to(device)
288 |     # mlp = nn.Linear(in_features = 360, out_features = 1).to(device)
289 |     flow_spatial = torch.cat((flow_trans,spatial_trans),0).to(device)
290 |     flow_poi = torch.cat((flow_trans,poi_view_tensor),0).to(device)
291 |     spatial_poi = torch.cat((spatial_trans, poi_view_tensor),0).to(device)
292 |     fs_w = model_fs(flow_spatial.view(128,-1).float()).mean()
293 |     fp_w = model_fs(flow_poi.view(128,-1).float()).mean()
294 |     sp_w = model_fs(spatial_poi.view(128,-1).float()).mean()
295 |     # print("fs_w:",fs_w.item())
296 |     # print("fp_w:",fp_w.item())
297 |     # print("sp_w:",sp_w.item())
298 |     loss_view = fs_w.item()*loss_v1+fp_w.item()*loss_v2+sp_w.item()*loss_v3
299 |     
300 |     
301 |     loss1, simi1 = model.loss(z_1,z_2,batch_size=0)
302 |     loss2, simi2 = model.loss(z_1,z,batch_size=0)
303 |     loss3, simi3 = model.loss(z_2,z,batch_size=0)
304 |     loss3 = loss3 -loss3.mean()
305 |     # loss3 = loss3.mean()
306 |     # print("loss3:", loss3)
307 |     loss3[loss3 > 0] = 1
308 |     loss3[loss3 <= 0] = 0.01 # weaken the reward for low cl loss
309 |     
310 |     loss1 = loss1.mean() + lamb*torch.clamp(simi1*2 - simi2.detach()-simi3.detach(), 0).mean()
311 |     # loss_vage = loss3
312 |     # loss_vage = loss3*loss1
313 |     # print("loss_vage:",loss_vage)
314 |     # print("loss_vage:",loss_vage.size())
315 |     # println()
316 |     if eps > 0:  
317 |         z_3 = model(x_3,adj_3)
318 |         loss2, _ = model.loss(z_1,z_3)
319 |         loss2 = loss2.mean()
320 |         loss = (loss1 + eps*loss2+0.05*loss_view.mean())
321 |     else: 
322 |         loss = loss1+0.05*loss_view.mean()
323 |         loss2 = loss1
324 |     '''Adding loss for VGAE'''
325 |     loss_3, link_loss_2 = model_2.loss_vgae(edge_pos_pred,edge_index, x_mean, x_std, reward=loss3.mean().item())
326 |     # loss_2 =  loss_2
327 |     # print("loss_vage:",loss_vage)
328 |     # print("loss1:", loss1)
329 |     # print("loss:", loss)
330 |     # println()
331 |     
332 |     loss.backward(retain_graph=True)
333 |     # loss_vage = (loss3*loss).mean()
334 |     loss_vage = loss_3+link_loss_2
335 |     # print("loss_vage:",loss_vage)
336 |     # println()
337 |     loss_vage.backward(retain_graph=True)
338 |     optimizer.step()
339 |     optimizer_1.step()
340 |  
341 |     return loss1.item(), loss2.item(),loss_vage.item()
342 | 
343 | def test(model: Model, x, edge_index, model_1,y, final=False, task ="node"):   
344 |     model.eval()
345 |     adj = edge2adj(x, edge_index)
346 |     x = x.to(device)
347 |     adj = adj.to(device)
348 |     # print("adj.size():", adj.size())
349 |     # z = model(x, adj)
350 |     # print("test:", z.size())
351 |     # file=open(r"./data/tmp_vector.pickle","wb")
352 |     # pickle.dump(z,file) #storing_list
353 |     # file.close()
354 |     
355 |     x_1, x_mean, x_std = model_1.forward_encoder(x, edge_index.to(device))
356 |     x_1 = x_1.to(device)
357 |     # print("x_1", x_1)
358 |     # println()
359 |     edge_pos_pred = model_1.forward_decoder(x,edge_index.to(device))
360 |     # print("x:", x.size())
361 |     # print("edge_index:", edge_index)
362 |     # print("edge_pos_pred:", edge_pos_pred.size())
363 |     s = torch.sparse_coo_tensor(edge_index.to(device),edge_pos_pred.to(device), (adj.size()[0],adj.size()[1]))
364 |     adj_1 = s.to_dense()
365 |     adj_1 = adj_1.to(device)
366 |     z_1 = model(x_1, adj_1)
367 |     print("test z_1:", z_1.size())
368 |     
369 |     
370 |     file=open(r"./data/tmp_vector_chi_3.pickle","wb")
371 |     pickle.dump(z_1,file) #storing_list
372 |     file.close()
373 |     
374 |     # return label_classification(z, y, ratio=0.1),label_classification(z_1, y, ratio=0.1)
375 |     return label_classification(z_1, y, ratio=0.1)
376 | 
377 | if __name__ == '__main__':
378 |     parser = argparse.ArgumentParser()
379 |     parser.add_argument('--dataset', type=str, default='Cora')
380 |     parser.add_argument('--gpu_id', type=int, default=0)
381 |     parser.add_argument('--config', type=str, default='config.yaml')
382 |     parser.add_argument('--log', type=str, default='results/Cora/')
383 |     parser.add_argument('--seed', type=int, default=39788)
384 |     parser.add_argument('--eps', type=float, default=0.5)
385 |     parser.add_argument('--alpha', type=float, default=0.1)
386 |     parser.add_argument('--beta', type=float, default=0.1)
387 |     parser.add_argument('--lamb', type=float, default=0.05)
388 |     args = parser.parse_args()
389 |  
390 | 
391 |     assert args.gpu_id in range(0, 8)
392 | 
393 |     
394 |     config = yaml.load(open(args.config), Loader=SafeLoader)
395 |     if args.dataset in config:
396 |         config = config[args.dataset]
397 |     else:
398 |         config = {
399 |         'learning_rate': 0.001,
400 |         'num_hidden': 256,
401 |         'num_proj_hidden': 256,
402 |         'activation': 'prelu',
403 |         'base_model': 'GCNConv',
404 |         'num_layers': 2,
405 |         'drop_edge_rate_1': 0.3,
406 |         'drop_edge_rate_2': 0.4,
407 |         'drop_feature_rate_1': 0.1,
408 |         'drop_feature_rate_2': 0.0,
409 |         'tau': 0.4,
410 |         'num_epochs': 1000,
411 |         'weight_decay': 1e-5,
412 |         'drop_scheme': 'degree',
413 |     }
414 |     
415 |         
416 |     torch.manual_seed(config["seed"])
417 |     random.seed(12345)
418 |     np.random.seed(config["seed"])
419 |     
420 |     learning_rate = config['learning_rate']
421 |     num_hidden = config['num_hidden']
422 |     num_proj_hidden = config['num_proj_hidden']
423 |     activation = ({'relu': F.relu, 'prelu': nn.PReLU(), 'rrelu': nn.RReLU()})[config['activation']]
424 |     base_model = GCNConv
425 |     num_layers = config['num_layers']
426 | 
427 |     drop_edge_rate_1 = config['drop_edge_rate_1']
428 |     drop_edge_rate_2 = config['drop_edge_rate_2']
429 |     drop_feature_rate_1 = config['drop_feature_rate_1']
430 |     drop_feature_rate_2 = config['drop_feature_rate_2']
431 |     tau = config['tau']
432 |     num_epochs = config['num_epochs']
433 |     weight_decay = config['weight_decay']
434 |     # switch to the customer inputs by using args.{}
435 |     eps = config["eps"] # args.eps
436 |     lamb = config["lamb"] # args.lamb
437 |     alpha = config["alpha"] # args.alpha
438 |     beta = config["beta"] # arg.sbeta
439 |     
440 |     
441 |     sample_size = 1388 # new york(1388)
442 |     # sample_size = 2234 #chicago
443 |     
444 |     def get_dataset(path, name):
445 |         assert name in ['Cora', 'CiteSeer', "AmazonC", "AmazonP", 'CoauthorC', 'CoauthorP',\
446 |                         "DBLP", "PubMed", "GitHub", "Facebook", "LastFMAsia", "DeezerEurope"]
447 |         if name =="DBLP":
448 |             name = "dblp"
449 |         if name == "AmazonC":
450 |             return Amazon(path, "Computers", T.NormalizeFeatures())
451 |         if name == "AmazonP":
452 |             return Amazon(path, "Photo", T.NormalizeFeatures())
453 |         if name == 'CoauthorC':
454 |             return Coauthor(root=path, name='cs', transform=T.NormalizeFeatures())
455 |         if name == 'CoauthorP':
456 |             return Coauthor(root=path, name='physics', transform=T.NormalizeFeatures())
457 |         if name == "GitHub":
458 |             return GitHub(root=path,transform=T.NormalizeFeatures())
459 |         if name == "Facebook":
460 |             return FacebookPagePage(root=path,transform=T.NormalizeFeatures())    
461 |         if name == "LastFMAsia":
462 |             return LastFMAsia(root=path,transform=T.NormalizeFeatures())
463 |         if name == "DeezerEurope":
464 |             return DeezerEurope(root=path,transform=T.NormalizeFeatures())
465 | 
466 |         return (CitationFull if name == 'dblp' else Planetoid)(
467 |             path,
468 |             name,
469 |             "public",
470 |             T.NormalizeFeatures())
471 |         
472 |     path = osp.join(osp.expanduser('~'), 'datasets', args.dataset)
473 |     # print("path:", path)
474 |     # println
475 |     dataset = get_dataset(path, args.dataset)
476 |     # print("dataset:", dataset)
477 |     data = dataset.data  
478 |     # print(data.num_features)
479 |     # println()
480 |     
481 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
482 |     
483 |     encoder = Encoder(data.num_features, num_hidden, activation,
484 |                       base_model=base_model, k=num_layers).to(device)
485 |     model = Model(encoder, num_hidden, num_proj_hidden, tau).to(device)
486 |     optimizer = torch.optim.Adam(
487 |         model.parameters(), lr=learning_rate, weight_decay=weight_decay)
488 | 
489 |     start = t()
490 |     prev = start    
491 |     G = nx.Graph()
492 |     G.add_edges_from(list(zip(data.edge_index.numpy()[0],data.edge_index.numpy()[1])))
493 |     
494 |     
495 |     gnn_generative_1 = GNN(3, 96, JK="last", drop_ratio=0, gnn_type= "gcn")
496 |     model_generative_1 = vgae(gnn_generative_1, 96)
497 |     model_generative_1.to(device)
498 |     optimizer_generative_1 = optim.Adam(model_generative_1.parameters(), lr=learning_rate, weight_decay=weight_decay)
499 |     gnn_generative_2 = GNN(3, 96, JK="last", drop_ratio=0, gnn_type= "gcn")
500 |     model_generative_2 = vgae(gnn_generative_2, 96)
501 |     model_generative_2.to(device)
502 |     optimizer_generative_2 = optim.Adam(model_generative_2.parameters(), lr=learning_rate, weight_decay=weight_decay)
503 |  
504 |     '''set training'''
505 |     model.train(),model_generative_1.train()
506 |     # import time
507 |     # start_time = time.time()
508 |     for epoch in range(1, num_epochs + 1):
509 |         # uncomment to increase the eps every T epochs
510 |         #if epoch%20 ==0:
511 |         #    eps = eps*1.1
512 |         # sample a subgraph from the original one
513 | 
514 |         S = G.subgraph(np.random.permutation(G.number_of_nodes())[:sample_size])
515 |         x = data.x[np.array(S.nodes())].to(device)
516 |         # print("S.nodes():", S.nodes())
517 |         # println()
518 |         S = nx.relabel.convert_node_labels_to_integers(S, first_label=0, ordering='default')
519 |         edge_index = np.array(S.edges()).T
520 |         # print("S.edges():", S.edges())
521 |         edge_index = torch.LongTensor(np.hstack([edge_index,edge_index[::-1]])).to(device)
522 |         
523 |         # println()
524 |         # edge_attr = np.array(S.edges()).T
525 |         # edge_index = torch.LongTensor(np.hstack([edge_index,edge_index[::-1]])).to(device)
526 | 
527 |         loss1, loss2, loss3 = train(model, x, edge_index, eps, model_generative_1,optimizer_generative_1,model_generative_2,optimizer_generative_2, lamb, alpha, beta, 5, 0.2)
528 |              
529 |         now = t()                                     
530 |         print(f'(T) | Epoch={epoch:03d}, loss1={loss1:.4f}, loss2={loss2:.4f}'
531 |               f' this epoch {now - prev:.4f}, total {now - start:.4f}')
532 |         prev = now
533 |     # end_time = time.time()
534 |     # print("during time:", (end_time-start_time)/300)
535 |     # printnl()
536 |     print("=== Final ===")
537 |     results_1 = test(model, data.x, data.edge_index, model_generative_1,data.y, final=True)
538 |     print(results_1)
539 |     with open(osp.join(args.log, "progress.csv"), "w") as f:
540 |         f.write(str(results_1))


--------------------------------------------------------------------------------