├── README.md ├── clear_data ├── r_dict.pickle ├── region2poi.pickle └── rs_ratio.pickle ├── conf ├── model_data ├── mh_cd.json └── popus_count.npy ├── model_layers.py ├── remvc.py ├── remvc_data.py ├── remvc_flow.py ├── remvc_poi.py └── remvc_tasks.py /README.md: -------------------------------------------------------------------------------- 1 | # ReMVC 2 | 3 | please download data from https://drive.google.com/drive/folders/1X5M_-aNIXFfKZDlP6e5PugvQUsf3M3Ia?usp=sharing 4 | -------------------------------------------------------------------------------- /clear_data/r_dict.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Liang-NTU/ReMVC/9160f14ba51f85920738d13f500b84d36c530d1a/clear_data/r_dict.pickle -------------------------------------------------------------------------------- /clear_data/region2poi.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Liang-NTU/ReMVC/9160f14ba51f85920738d13f500b84d36c530d1a/clear_data/region2poi.pickle -------------------------------------------------------------------------------- /clear_data/rs_ratio.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Liang-NTU/ReMVC/9160f14ba51f85920738d13f500b84d36c530d1a/clear_data/rs_ratio.pickle -------------------------------------------------------------------------------- /conf: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | GPU_DEVICE = 1 3 | EXTRACTOR = MLP 4 | TYPE = avg 5 | FUNC = relu 6 | EMB = 16 7 | MUTUAL = 1.0 8 | REG = 0.0001 -------------------------------------------------------------------------------- /model_data/mh_cd.json: -------------------------------------------------------------------------------- 1 | {"0": 0, "1": 0, "2": 1, "3": 1, "4": 1, "5": 2, "6": 2, "7": 2, "8": 2, "9": 3, "10": 4, "11": 5, "12": 5, "13": 6, "14": 1, "15": 1, "16": 7, "17": 8, "18": 9, "19": 9, "20": 2, "21": 2, "22": 2, "23": 0, "24": 10, "25": 11, "26": 10, "27": 12, "28": 13, "29": 0, "30": 2, "31": 1, "32": 0, "33": 14, "34": 7, "35": 10, "36": 15, "37": 0, "38": 0, "39": 16, "40": 16, "41": 10, "42": 15, "43": 1, "44": 11, "45": 13, "46": 13, "47": 13, "48": 3, "49": 15, "50": 0, "51": 13, "52": 13, "53": 5, "54": 17, "55": 1, "56": 2, "57": 8, "58": 14, "59": 14, "60": 17, "61": 18, "62": 7, "63": 7, "64": 4, "65": 19, "66": 19, "67": 3, "68": 3, "69": 16, "70": 16, "71": 20, "72": 21, "73": 21, "74": 21, "75": 22, "76": 20, "77": 12, "78": 10, "79": 21, "80": 8, "81": 0, "82": 1, "83": 1, "84": 1, "85": 1, "86": 1, "87": 8, "88": 19, "89": 2, "90": 12, "91": 23, "92": 2, "93": 2, "94": 23, "95": 14, "96": 19, "97": 19, "98": 19, "99": 19, "100": 5, "101": 5, "102": 23, "103": 17, "104": 17, "105": 19, "106": 7, "107": 17, "108": 17, "109": 18, "110": 16, "111": 1, "112": 24, "113": 11, "114": 11, "115": 24, "116": 23, "117": 11, "118": 20, "119": 11, "120": 11, "121": 1, "122": 1, "123": 23, "124": 10, "125": 9, "126": 18, "127": 12, "128": 23, "129": 10, "130": 10, "131": 10, "132": 9, "133": 14, "134": 8, "135": 24, "136": 24, "137": 9, "138": 25, "139": 24, "140": 26, "141": 12, "142": 16, "143": 1, "144": 14, "145": 6, "146": 12, "147": 4, "148": 16, "149": 8, "150": 18, "151": 16, "152": 16, "153": 9, "154": 9, "155": 12, "156": 27, "157": 22, "158": 22, "159": 23, "160": 23, "161": 23, "162": 15, "163": 13, "164": 13, "165": 17, "166": 19, "167": 16, "168": 16, "169": 23, "170": 23, "171": 4, "172": 25, "173": 25, "174": 21, "175": 21, "176": 4, "177": 4, "178": 3, "179": 7, "180": 5, "181": 17, "182": 17, "183": 16, "184": 25, "185": 18, "186": 18, "187": 3, "188": 4, "189": 21, "190": 21, "191": 21, "192": 21, "193": 14, "194": 14, "195": 20, "196": 20, "197": 23, "198": 10, "199": 10, "200": 10, "201": 10, "202": 23, "203": 2, "204": 2, "205": 5, "206": 5, "207": 6, "208": 6, "209": 8, "210": 26, "211": 25, "212": 25, "213": 12, "214": 12, "215": 12, "216": 21, "217": 23, "218": 23, "219": 12, "220": 26, "221": 23, "222": 20, "223": 20, "224": 15, "225": 18, "226": 21, "227": 21, "228": 15, "229": 18, "230": 18, "231": 3, "232": 4, "233": 7, "234": 5, "235": 20, "236": 19, "237": 3, "238": 13, "239": 26, "240": 0, "241": 26, "242": 24, "243": 24, "244": 3, "245": 4, "246": 21, "247": 21, "248": 12, "249": 12, "250": 12, "251": 25, "252": 16, "253": 27, "254": 28, "255": 15, "256": 27, "257": 28, "258": 27, "259": 20, "260": 22, "261": 5, "262": 9, "263": 18, "264": 6, "265": 0, "266": 13, "267": 6, "268": 6, "269": 6} -------------------------------------------------------------------------------- /model_data/popus_count.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Liang-NTU/ReMVC/9160f14ba51f85920738d13f500b84d36c530d1a/model_data/popus_count.npy -------------------------------------------------------------------------------- /model_layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import random 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | from torch.optim import SGD, Adam, ASGD, RMSprop 8 | from torch.utils.data import DataLoader 9 | from torch.nn.functional import log_softmax, softmax 10 | import torch.nn.functional as F 11 | 12 | import math 13 | 14 | from configparser import ConfigParser 15 | config = ConfigParser() 16 | config.read('conf', encoding='UTF-8') 17 | _type = config['DEFAULT'].get("TYPE") 18 | func = config['DEFAULT'].get("FUNC") 19 | 20 | class SAEncoder(nn.Module): 21 | 22 | def __init__(self, d_input, d_model, n_head): 23 | super(SAEncoder, self).__init__() 24 | 25 | self.d_input = d_input 26 | self.d_model = d_model 27 | self.n_head = n_head 28 | 29 | self.linear_k = nn.Linear(self.d_input, self.d_model * self.n_head) 30 | self.linear_v = nn.Linear(self.d_input, self.d_model * self.n_head) 31 | self.linear_q = nn.Linear(self.d_input, self.d_model * self.n_head) 32 | 33 | self.relu = nn.ReLU() 34 | self.softmax = nn.Softmax() 35 | 36 | def fusion(self, v, f_type): 37 | if f_type == "concat": 38 | output = v.view(-1, self.d_model * self.n_head) 39 | if f_type == "avg": 40 | output = torch.mean(v, dim=0) 41 | return output 42 | 43 | def forward(self, x): 44 | q = self.linear_q(x) 45 | k = self.linear_k(x) 46 | v = self.linear_v(x) 47 | 48 | q_ = q.view(self.n_head, self.d_model) 49 | k_ = k.view(self.n_head, self.d_model) 50 | v_ = v.view(self.n_head, self.d_model) 51 | 52 | head, d_tensor = k_.size() 53 | score = (q_.matmul(k_.transpose(0, 1))) / math.sqrt(d_tensor) 54 | score = self.softmax(score) 55 | 56 | v_ = self.relu(v_) 57 | v = score.matmul(v_) 58 | 59 | output = self.fusion(v, _type) 60 | return output 61 | 62 | 63 | class CroSAEncoder(nn.Module): 64 | 65 | def __init__(self, d_input_query, d_input_kv, d_model, n_head): 66 | super(SAEncoder, self).__init__() 67 | 68 | self.d_input_query = d_input_query 69 | self.d_input_kv = d_input_kv 70 | self.d_model = d_model 71 | self.n_head = n_head 72 | 73 | self.linear_k = nn.Linear(self.d_input_kv, self.d_model * self.n_head) 74 | self.linear_v = nn.Linear(self.d_input_kv, self.d_model * self.n_head) 75 | self.linear_q = nn.Linear(self.d_input_query, self.d_model * self.n_head) 76 | 77 | self.relu = nn.ReLU() 78 | self.softmax = nn.Softmax() 79 | 80 | def fusion(self, v, f_type): 81 | if f_type == "concat": 82 | output = v.view(-1, self.d_model * self.n_head) 83 | if f_type == "avg": 84 | output = torch.mean(v, dim=0) 85 | return output 86 | 87 | def forward(self, q, kv): 88 | q = self.linear_q(q) 89 | k = self.linear_k(kv) 90 | v = self.linear_v(kv) 91 | 92 | q_ = q.view(self.n_head, self.d_model) 93 | k_ = k.view(self.n_head, self.d_model) 94 | v_ = v.view(self.n_head, self.d_model) 95 | 96 | head, d_tensor = k_.size() 97 | score = (q_.matmul(k_.transpose(0, 1))) / math.sqrt(d_tensor) 98 | score = self.softmax(score) 99 | 100 | v_ = self.relu(v_) 101 | v = score.matmul(v_) 102 | 103 | output = self.fusion(v, _type) 104 | if func == "relu": 105 | output = self.relu(output) 106 | 107 | return output -------------------------------------------------------------------------------- /remvc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import random 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | from torch.optim import SGD, Adam, ASGD, RMSprop 8 | from torch.utils.data import DataLoader 9 | from torch.nn.functional import log_softmax, softmax 10 | import torch.nn.functional as F 11 | from configparser import ConfigParser 12 | 13 | import sys 14 | import os 15 | import time 16 | import json 17 | import pickle 18 | 19 | from remvc_tasks import lu_classify, predict_popus 20 | from remvc_flow import FLOW_SSL 21 | from remvc_poi import POI_SSL 22 | from remvc_data import SSLData 23 | 24 | FType = torch.FloatTensor 25 | LType = torch.LongTensor 26 | 27 | from configparser import ConfigParser 28 | config = ConfigParser() 29 | config.read('conf', encoding='UTF-8') 30 | GPU_DEVICE = config['DEFAULT'].get("GPU_DEVICE") 31 | device = torch.device("cuda:"+GPU_DEVICE if torch.cuda.is_available() else "cpu") 32 | extractor = config['DEFAULT'].get("EXTRACTOR") 33 | 34 | size = int(config['DEFAULT'].get("EMB")) 35 | mutual_reg = float(config['DEFAULT'].get("MUTUAL")) 36 | poi_reg = float(config['DEFAULT'].get("REG")) 37 | 38 | fw = open("model_result/" + extractor + "_emb_" + str(size), "w") 39 | 40 | class Model_SSL(): 41 | 42 | def __init__(self): 43 | super(Model_SSL,self).__init__() 44 | 45 | self.ssl_data = SSLData() 46 | self.poi_model = POI_SSL(self.ssl_data, neg_size=10, emb_size=size, attention_size=16, temp=0.08, extractor=extractor).to(device) 47 | self.flow_model = FLOW_SSL(self.ssl_data, neg_size=150, emb_size=size, temp=0.08, time_zone=48, extractor=extractor).to(device) 48 | 49 | self.epoch = 200 50 | self.learning_rate = 0.001 51 | 52 | self.mutual_reg = mutual_reg 53 | self.poi_reg = poi_reg 54 | 55 | self.mutual_neg_size = 5 56 | self.emb_size = size 57 | self.init_basic_conf() 58 | 59 | self.opt = Adam(lr=self.learning_rate, params=[{"params":self.poi_model.poi_net.parameters()},\ 60 | {"params":self.flow_model.pickup_net.parameters()}, \ 61 | {"params":self.flow_model.dropoff_net.parameters()}, {"params":self.mutual_net.parameters()}], weight_decay=1e-5) 62 | 63 | def init_basic_conf(self): 64 | self.mutual_net = torch.nn.Sequential( 65 | nn.Linear(self.emb_size*2, 1)).to(device) 66 | 67 | def forward(self, base_poi_emb, base_flow_emb, neg_poi_emb, neg_flow_emb): 68 | pos_emb = torch.cat([base_poi_emb, base_flow_emb]) 69 | pos_scores = self.mutual_net(pos_emb) 70 | pos_label = torch.Tensor([1 for _ in range(pos_scores.size(0))]).type(FType).to(device) 71 | 72 | weights = torch.ones(neg_poi_emb.size()[0]) 73 | _indexs = torch.multinomial(weights, self.mutual_neg_size) 74 | neg_poi_emb = neg_poi_emb[_indexs] 75 | base_flow_emb = base_flow_emb.repeat(self.mutual_neg_size, 1) 76 | neg_emb_p = torch.cat([neg_poi_emb, base_flow_emb], dim=1) 77 | 78 | weights = torch.ones(neg_flow_emb.size()[0]) 79 | _indexs = torch.multinomial(weights, self.mutual_neg_size) 80 | neg_flow_emb = neg_flow_emb[_indexs] 81 | base_poi_emb = base_poi_emb.repeat(self.mutual_neg_size, 1) 82 | neg_emb_f = torch.cat([base_poi_emb, neg_flow_emb], dim=1) 83 | 84 | neg_emb = torch.cat([neg_emb_p, neg_emb_f], dim=0) 85 | neg_scores = self.mutual_net(neg_emb).squeeze() 86 | neg_label = torch.Tensor([0 for _ in range(neg_scores.size(0))]).type(FType).to(device) 87 | 88 | scores = torch.cat([pos_scores, neg_scores]) 89 | labels = torch.cat([pos_label, neg_label]) 90 | 91 | loss = -(F.log_softmax(scores, dim=0) * labels).sum() / labels.sum() 92 | 93 | return loss 94 | 95 | def model_train(self): 96 | for epoch in range(self.epoch): 97 | self.loss = 0.0 98 | 99 | for region_id in self.ssl_data.sampling_pool: 100 | poi_loss, base_poi_emb, neg_poi_emb = self.poi_model.model_train(region_id) 101 | flow_loss, base_flow_emb, neg_flow_emb = self.flow_model.model_train(region_id) 102 | mutual_loss = self.forward(base_poi_emb, base_flow_emb, neg_poi_emb, neg_flow_emb) 103 | 104 | loss = flow_loss + self.poi_reg * poi_loss + self.mutual_reg * mutual_loss 105 | 106 | self.opt.zero_grad() 107 | self.loss += loss 108 | loss.backward() 109 | self.opt.step() 110 | 111 | print("=============================> iter epoch", epoch) 112 | print("avg loss = " + str(self.loss)) 113 | 114 | if epoch >= 150: 115 | self.test() 116 | fw.write("=============================> iter epoch " + str(epoch) + "\n") 117 | fw.write("avg loss = " + str(self.loss) + "\n") 118 | 119 | def get_emb(self): 120 | output_flow = self.flow_model.get_emb() 121 | output_poi = self.poi_model.get_emb() 122 | output = np.concatenate((output_flow, output_poi), axis=1) 123 | return output 124 | 125 | def test(self): 126 | output = self.get_emb() 127 | lu_classify(output, fw, _type="con") 128 | predict_popus(output, fw) 129 | fw.flush() 130 | 131 | if __name__ == '__main__': 132 | model = Model_SSL() 133 | model.model_train() -------------------------------------------------------------------------------- /remvc_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import random 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | from torch.optim import SGD, Adam, ASGD, RMSprop 8 | from torch.utils.data import DataLoader 9 | from torch.nn.functional import log_softmax, softmax 10 | import torch.nn.functional as F 11 | from configparser import ConfigParser 12 | 13 | import sys 14 | import os 15 | import time 16 | import json 17 | import pickle 18 | 19 | from remvc_tasks import lu_classify, predict_popus 20 | 21 | FType = torch.FloatTensor 22 | LType = torch.LongTensor 23 | 24 | 25 | region_dict_path = "./clear_data/training_dict.pickle" 26 | region2poi_path = "./clear_data/region2poi.pickle" 27 | rs_ratio_path = "./clear_data/rs_ratio.pickle" 28 | 29 | from configparser import ConfigParser 30 | config = ConfigParser() 31 | config.read('conf', encoding='UTF-8') 32 | GPU_DEVICE = config['DEFAULT'].get("GPU_DEVICE") 33 | device = torch.device("cuda:"+GPU_DEVICE if torch.cuda.is_available() else "cpu") 34 | 35 | class SSLData: 36 | 37 | def __init__(self): 38 | 39 | self.region_dict = pickle.load(open(region_dict_path, "rb")) 40 | self.sampling_pool = [_i for _i in range(len(self.region_dict))] 41 | 42 | region2poi = pickle.load(open(region2poi_path, "rb")) 43 | self.level_c2p = region2poi["level_c2p"] 44 | self.level_p2c = region2poi["level_p2c"] 45 | self.poi_num = len(region2poi["node_dict"]) 46 | 47 | self.rs_ratio = pickle.load(open(rs_ratio_path, "rb")) 48 | 49 | def get_region(self, idx): 50 | pois = self.region_dict[idx]["poi"] 51 | 52 | poi_set = [] 53 | for poi in pois: 54 | _id = poi[1] 55 | l_vector = [poi[2].index(1), poi[3].index(1)] 56 | poi_set.append([_id, l_vector]) 57 | 58 | pickup_matrix = self.region_dict[idx]["pickup_matrix"] 59 | dropoff_matrix = self.region_dict[idx]["dropoff_matrix"] 60 | 61 | pickup_matrix = pickup_matrix / pickup_matrix.sum() 62 | where_are_NaNs = np.isnan(pickup_matrix) 63 | pickup_matrix[where_are_NaNs] = 0 64 | 65 | dropoff_matrix = dropoff_matrix / dropoff_matrix.sum() 66 | where_are_NaNs = np.isnan(dropoff_matrix) 67 | dropoff_matrix[where_are_NaNs] = 0 68 | 69 | flow_matrix = [pickup_matrix, dropoff_matrix] 70 | 71 | return poi_set, flow_matrix -------------------------------------------------------------------------------- /remvc_flow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import random 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | from torch.optim import SGD, Adam, ASGD, RMSprop 8 | from torch.utils.data import DataLoader 9 | from torch.nn.functional import log_softmax, softmax 10 | import torch.nn.functional as F 11 | from configparser import ConfigParser 12 | from sklearn.metrics.pairwise import cosine_similarity 13 | 14 | import sys 15 | import os 16 | import time 17 | import json 18 | import pickle 19 | 20 | from remvc_tasks import lu_classify, predict_popus 21 | from remvc_data import SSLData 22 | from model_layers import SAEncoder 23 | 24 | FType = torch.FloatTensor 25 | LType = torch.LongTensor 26 | 27 | from configparser import ConfigParser 28 | config = ConfigParser() 29 | config.read('conf', encoding='UTF-8') 30 | GPU_DEVICE = config['DEFAULT'].get("GPU_DEVICE") 31 | device = torch.device("cuda:"+GPU_DEVICE if torch.cuda.is_available() else "cpu") 32 | 33 | rs_type = "random" 34 | 35 | class Flatten(torch.nn.Module): 36 | def forward(self, input): 37 | return input.view(input.size(0), -1) 38 | 39 | class FLOW_SSL(torch.nn.Module): 40 | 41 | def __init__(self, ssl_data, neg_size, emb_size, temp, time_zone, extractor): 42 | super(FLOW_SSL,self).__init__() 43 | 44 | self.ssl_data = ssl_data 45 | self.init_basic_conf(neg_size, emb_size, temp, time_zone, extractor) 46 | 47 | def init_basic_conf(self, neg_size, emb_size, temp, time_zone, extractor): 48 | self.neg_size = neg_size 49 | self.emb_size = emb_size 50 | self.temp = temp 51 | self.time_zone = time_zone 52 | 53 | self.extractor = extractor 54 | 55 | if self.extractor == "CNN": 56 | self.pickup_net = torch.nn.Sequential( 57 | nn.Conv1d(in_channels=self.time_zone, out_channels=4, kernel_size=7), 58 | Flatten(), 59 | nn.Linear(1056, self.emb_size) 60 | ).to(device) 61 | 62 | self.dropoff_net = torch.nn.Sequential( 63 | nn.Conv1d(in_channels=self.time_zone, out_channels=4, kernel_size=7), 64 | Flatten(), 65 | nn.Linear(1056, self.emb_size) 66 | ).to(device) 67 | 68 | if self.extractor == "MLP": 69 | self.pickup_net = torch.nn.Sequential( 70 | nn.Linear(270, self.emb_size), 71 | nn.ReLU(), 72 | ).to(device) 73 | 74 | self.dropoff_net = torch.nn.Sequential( 75 | nn.Linear(270, self.emb_size), 76 | nn.ReLU(), 77 | ).to(device) 78 | 79 | if self.extractor == "SA": 80 | self.pickup_net = SAEncoder(d_input=270, d_model=16, n_head=3).to(device) 81 | self.dropoff_net = SAEncoder(d_input=270, d_model=16, n_head=3).to(device) 82 | 83 | 84 | def gaussian_noise(self, matrix, mean=0, sigma=0.03): 85 | matrix = matrix.copy() 86 | noise = np.random.normal(mean, sigma, matrix.shape) 87 | mask_overflow_upper = matrix+noise >= 1.0 88 | mask_overflow_lower = matrix+noise < 0 89 | noise[mask_overflow_upper] = 1.0 90 | noise[mask_overflow_lower] = 0 91 | matrix += noise 92 | return matrix 93 | 94 | def positive_sampling(self, region_id): 95 | pos_flow_sets = [] 96 | _, flow_matrix = self.ssl_data.get_region(region_id) 97 | pickup_matrix, dropoff_matrix = flow_matrix 98 | 99 | for sigma in [0.0001, 0.0001, 0.0001, 0.0001]: 100 | pickup_matrix = self.gaussian_noise(pickup_matrix, sigma=sigma) 101 | dropoff_matrix = self.gaussian_noise(dropoff_matrix, sigma=sigma) 102 | pos_flow_sets.append([pickup_matrix, dropoff_matrix]) 103 | 104 | return pos_flow_sets 105 | 106 | def negative_sampling(self, region_id): 107 | sampling_pool = [] 108 | for _id in self.ssl_data.sampling_pool: 109 | if _id == region_id: 110 | continue 111 | sampling_pool.append(_id) 112 | 113 | p = self.ssl_data.rs_ratio["model_flow"][region_id] 114 | neg_region_ids = np.random.choice(sampling_pool, self.neg_size, replace=False, p=p) 115 | 116 | neg_flow_sets = [] 117 | for neg_region_id in neg_region_ids: 118 | _, flow_matrix = self.ssl_data.get_region(neg_region_id) 119 | neg_flow_sets.append(flow_matrix) 120 | 121 | 122 | return neg_flow_sets 123 | 124 | def agg_region_emb(self, flow_matrix): 125 | pickup_matrix = flow_matrix[0] 126 | dropoff_matrix = flow_matrix[1] 127 | 128 | if self.extractor == "CNN": 129 | pickup_matrix = torch.from_numpy(pickup_matrix).type(FType).to(device) 130 | pickup_matrix = pickup_matrix.unsqueeze(0) 131 | pickup_emb = self.pickup_net(pickup_matrix) 132 | 133 | dropoff_matrix = torch.from_numpy(dropoff_matrix).type(FType).to(device) 134 | dropoff_matrix = dropoff_matrix.unsqueeze(0) 135 | dropoff_emb = self.dropoff_net(dropoff_matrix) 136 | 137 | if self.extractor == "MLP": 138 | pickup_matrix = np.sum(pickup_matrix, axis=0) 139 | pickup_matrix = torch.from_numpy(pickup_matrix).type(FType).to(device) 140 | pickup_emb = self.pickup_net(pickup_matrix) 141 | 142 | dropoff_matrix = np.sum(dropoff_matrix, axis=0) 143 | dropoff_matrix = torch.from_numpy(dropoff_matrix).type(FType).to(device) 144 | dropoff_emb = self.dropoff_net(dropoff_matrix) 145 | 146 | if self.extractor == "SA": 147 | pickup_matrix = np.sum(pickup_matrix, axis=0) 148 | pickup_matrix = torch.from_numpy(pickup_matrix).type(FType).to(device) 149 | pickup_emb = self.pickup_net(pickup_matrix) 150 | 151 | dropoff_matrix = np.sum(dropoff_matrix, axis=0) 152 | dropoff_matrix = torch.from_numpy(dropoff_matrix).type(FType).to(device) 153 | dropoff_emb = self.dropoff_net(dropoff_matrix) 154 | 155 | # region_emb = torch.cat([pickup_emb, dropoff_emb], dim=1).squeeze() 156 | region_emb = (pickup_emb + dropoff_emb) / 2 157 | region_emb = region_emb.squeeze() 158 | 159 | return region_emb 160 | 161 | def forward(self, flow_matrix, pos_flow_sets, neg_flow_sets): 162 | base_region_emb = self.agg_region_emb(flow_matrix) 163 | 164 | pos_region_emb_list = [] 165 | for pos_flow_matrix in pos_flow_sets: 166 | pos_region_emb = self.agg_region_emb(pos_flow_matrix) 167 | pos_region_emb_list.append(pos_region_emb.unsqueeze(0)) 168 | pos_region_emb = torch.cat(pos_region_emb_list, dim=0) 169 | 170 | neg_region_emb_list = [] 171 | for neg_flow_matrix in neg_flow_sets: 172 | neg_region_emb = self.agg_region_emb(neg_flow_matrix) 173 | neg_region_emb_list.append(neg_region_emb.unsqueeze(0)) 174 | neg_region_emb = torch.cat(neg_region_emb_list, dim=0) 175 | 176 | pos_scores = torch.matmul(pos_region_emb, base_region_emb) 177 | pos_label = torch.Tensor([1 for _ in range(pos_scores.size(0))]).type(FType).to(device) 178 | 179 | neg_scores = torch.matmul(neg_region_emb, base_region_emb) 180 | neg_label = torch.Tensor([0 for _ in range(neg_scores.size(0))]).type(FType).to(device) 181 | 182 | scores = torch.cat([pos_scores, neg_scores]) 183 | labels = torch.cat([pos_label, neg_label]) 184 | scores /= self.temp 185 | 186 | loss = -(F.log_softmax(scores, dim=0) * labels).sum() / labels.sum() 187 | return loss, base_region_emb, neg_region_emb 188 | 189 | def model_train(self, region_id): 190 | 191 | _, flow_matrix = self.ssl_data.get_region(region_id) 192 | pos_flow_sets = self.positive_sampling(region_id) 193 | neg_flow_sets = self.negative_sampling(region_id) 194 | 195 | flow_loss, base_region_emb, neg_region_emb = self.forward(flow_matrix, pos_flow_sets, neg_flow_sets) 196 | 197 | return flow_loss, base_region_emb, neg_region_emb 198 | 199 | def get_emb(self): 200 | output = [] 201 | for region_id in self.ssl_data.sampling_pool: 202 | _, flow_matrix = self.ssl_data.get_region(region_id) 203 | region_emb = self.agg_region_emb(flow_matrix) 204 | 205 | output.append(region_emb.detach().cpu().numpy()) 206 | return np.array(output) -------------------------------------------------------------------------------- /remvc_poi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import random 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | from torch.optim import SGD, Adam, ASGD, RMSprop 8 | from torch.utils.data import DataLoader 9 | from torch.nn.functional import log_softmax, softmax 10 | import torch.nn.functional as F 11 | from configparser import ConfigParser 12 | from sklearn.metrics.pairwise import cosine_similarity 13 | 14 | import sys 15 | import os 16 | import time 17 | import json 18 | import pickle 19 | 20 | from remvc_tasks import lu_classify, predict_popus 21 | from remvc_data import SSLData 22 | from model_layers import SAEncoder 23 | 24 | FType = torch.FloatTensor 25 | LType = torch.LongTensor 26 | 27 | from configparser import ConfigParser 28 | config = ConfigParser() 29 | config.read('conf', encoding='UTF-8') 30 | GPU_DEVICE = config['DEFAULT'].get("GPU_DEVICE") 31 | device = torch.device("cuda:"+GPU_DEVICE if torch.cuda.is_available() else "cpu") 32 | 33 | 34 | class Flatten(torch.nn.Module): 35 | def forward(self, input): 36 | return input.view(input.size(0), -1) 37 | 38 | class POI_SSL(torch.nn.Module): 39 | 40 | def __init__(self, ssl_data, neg_size, emb_size, attention_size, temp, extractor): 41 | super(POI_SSL,self).__init__() 42 | 43 | self.ssl_data = ssl_data 44 | self.init_basic_conf(neg_size, emb_size, attention_size, temp, extractor) 45 | 46 | def init_basic_conf(self, neg_size, emb_size, attention_size, temp, extractor): 47 | self.neg_size = neg_size 48 | self.emb_size = emb_size 49 | self.attention_size = attention_size 50 | self.bin_num = 10 51 | 52 | self.poi_num = self.ssl_data.poi_num 53 | self.temp = temp 54 | 55 | self.extractor = extractor 56 | 57 | self.W_poi = None 58 | 59 | if self.extractor == "CNN": 60 | self.poi_net = torch.nn.Sequential( 61 | nn.Conv1d(in_channels=1, out_channels=4, kernel_size=7), 62 | Flatten(), 63 | nn.Linear(964, self.emb_size) 64 | ).to(device) 65 | 66 | if self.extractor == "MLP": 67 | self.poi_net = torch.nn.Sequential( 68 | nn.Linear(247, self.emb_size), 69 | nn.ReLU(), 70 | ).to(device) 71 | 72 | if self.extractor == "SA": 73 | self.poi_net = SAEncoder(d_input=247, d_model=16, n_head=3).to(device) 74 | 75 | 76 | def generate_attention(self, W_parents, W_children, mask): 77 | _input = torch.cat([W_parents, W_children], dim=2) 78 | output = self.tree_att_net(_input) 79 | pre_attention = torch.matmul(output, self.v_attention) 80 | 81 | pre_attention = pre_attention + mask 82 | attention = torch.softmax(pre_attention, dim=1) 83 | 84 | return attention 85 | 86 | def tree_gcn(self): 87 | parentsList, childrenList, maskList = [], [], [] 88 | 89 | for i, p2c_i in enumerate(self.ssl_data.level_p2c[::-1]): 90 | children = list(p2c_i.values()) 91 | max_n_children = max(len(x) for x in children) 92 | mask = [] 93 | 94 | for k in children: 95 | cur_mask = [0.0] * len(k) 96 | if len(k) < max_n_children: 97 | cur_mask += [-10**13] * (max_n_children - len(k)) 98 | k += [0] * (max_n_children - len(k)) 99 | mask.append(cur_mask) 100 | 101 | parents = [] 102 | for p in p2c_i.keys(): 103 | parents.append([p] * max_n_children) 104 | 105 | children = torch.Tensor(children).type(LType) 106 | parents = torch.Tensor(parents).type(LType) 107 | mask = torch.Tensor(mask).type(FType) 108 | 109 | parentsList.append(parents) 110 | childrenList.append(children) 111 | maskList.append(mask) 112 | 113 | W_emb_temp = self.W_poi.clone() + 0. 114 | for i, (parents, children, mask) in enumerate(zip(parentsList, childrenList, maskList)): 115 | W_parents = self.W_poi[parents] 116 | if i == 0: 117 | W_children = self.W_poi[children] 118 | else: 119 | W_children = W_emb_temp[children] 120 | 121 | tempAttention = self.generate_attention(W_parents, W_children, mask) 122 | tempEmb = (W_children * tempAttention[:,:,None]).sum(dim=1) 123 | 124 | W_emb_temp = torch.index_copy(W_emb_temp, 0, parents[:, 0], tempEmb) 125 | 126 | parentsList, childrenList, maskList = [], [], [] 127 | for i, c2p_i in enumerate(self.ssl_data.level_c2p[::-1]): 128 | parents = list(c2p_i.values()) 129 | max_n_parents = max(len(x) for x in parents) 130 | mask = [] 131 | 132 | for k in parents: 133 | cur_mask = [0.0] * len(k) 134 | if len(k) < max_n_parents: 135 | cur_mask += [-10**13] * (max_n_parents - len(k)) 136 | k += [0] * (max_n_parents - len(k)) 137 | mask.append(cur_mask) 138 | 139 | children = [] 140 | for c in c2p_i.keys(): 141 | children.append([c] * max_n_parents) 142 | 143 | children = torch.Tensor(children).type(LType) 144 | parents = torch.Tensor(parents).type(LType) 145 | mask = torch.Tensor(mask).type(FType) 146 | 147 | parentsList.append(parents) 148 | childrenList.append(children) 149 | maskList.append(mask) 150 | 151 | for i, (parents, children, mask) in enumerate(zip(parentsList, childrenList, maskList)): 152 | W_children, W_parents = W_emb_temp[children], W_emb_temp[parents] 153 | 154 | tempAttention = self.generate_attention(W_children, W_parents, mask) 155 | # tempEmb = (W_parents * tempAttention[:,:,None]).sum(axis=1) 156 | tempEmb = (W_parents * tempAttention[:,:,None]).sum(dim=1) 157 | 158 | # W_emb_temp[children[:, 0]] = tempEmb 159 | W_emb_temp = torch.index_copy(W_emb_temp, 0, children[:, 0], tempEmb) 160 | 161 | self.W_emb_temp = W_emb_temp 162 | 163 | return W_emb_temp 164 | 165 | def location_attention(self, loc_emb_one, loc_emb_two): 166 | _input = torch.cat([loc_emb_one, loc_emb_two], axis=1) 167 | 168 | output = self.location_att_net(_input) 169 | pre_attention = torch.matmul(output, self.l_attention) 170 | 171 | attention = torch.softmax(pre_attention, dim=0) 172 | return attention 173 | 174 | def agg_region_emb(self, poi_set, W_emb_temp): 175 | p_node_dict = self.ssl_data.region_dict[0]["node_dict"] 176 | poi_f = np.zeros(len(p_node_dict)) 177 | for poi in poi_set: 178 | poi_id = poi[0] 179 | poi_f[poi_id] += 1 180 | 181 | if np.sum(poi_f) != 0: 182 | poi_f = poi_f / np.sum(poi_f) 183 | poi_f = torch.Tensor(poi_f).type(FType).to(device) 184 | 185 | if self.extractor == "CNN": 186 | poi_f = poi_f.unsqueeze(0) 187 | poi_f = poi_f.unsqueeze(0) 188 | temp_emb = self.poi_net(poi_f) 189 | 190 | if self.extractor == "MLP": 191 | temp_emb = self.poi_net(poi_f) 192 | 193 | if self.extractor == "SA": 194 | temp_emb = self.poi_net(poi_f) 195 | 196 | region_emb = temp_emb 197 | region_emb = region_emb.squeeze() 198 | 199 | return region_emb 200 | 201 | def add_aug(self, poi_set, _ratio): 202 | add_poi_set = [] 203 | for poi in poi_set: 204 | add_poi_set.append(poi) 205 | ratio = random.random() 206 | if ratio < _ratio: 207 | add_poi_set.append(poi) 208 | return add_poi_set 209 | 210 | def delete_aug(self, poi_set, _ratio): 211 | de_poi_set = [] 212 | for poi in poi_set: 213 | ratio = random.random() 214 | if ratio > _ratio: 215 | de_poi_set.append(poi) 216 | if not de_poi_set: 217 | de_poi_set = [poi_set[0]] 218 | return de_poi_set 219 | 220 | def replace_aug(self, poi_set, _ratio): 221 | replace_poi_set = [] 222 | for poi in poi_set: 223 | new_poi = poi 224 | ratio = random.random() 225 | if ratio < _ratio: 226 | new_poi[0] = random.randint(0, self.ssl_data.poi_num-1) 227 | replace_poi_set.append(new_poi) 228 | return replace_poi_set 229 | 230 | def positive_sampling(self, region_id): 231 | pos_poi_sets = [] 232 | poi_set, _ = self.ssl_data.get_region(region_id) 233 | 234 | de_poi_set = [] 235 | for ratio in [0.1]: 236 | de_poi_set.append(self.delete_aug(poi_set, ratio)) 237 | 238 | add_poi_set = [] 239 | for ratio in [0.1]: 240 | add_poi_set.append(self.add_aug(poi_set, ratio)) 241 | 242 | re_poi_set = [] 243 | for ratio in [0.1]: 244 | re_poi_set.append(self.replace_aug(poi_set, ratio)) 245 | 246 | pos_poi_sets = de_poi_set + add_poi_set + re_poi_set 247 | 248 | return pos_poi_sets 249 | 250 | def negative_sampling(self, region_id): 251 | sampling_pool = [] 252 | for _id in self.ssl_data.sampling_pool: 253 | if _id == region_id: 254 | continue 255 | sampling_pool.append(_id) 256 | 257 | p = self.ssl_data.rs_ratio["model_poi"][region_id] 258 | neg_region_ids = np.random.choice(sampling_pool, self.neg_size, replace=False, p=p) 259 | 260 | neg_poi_sets = [] 261 | for neg_region_id in neg_region_ids: 262 | poi_set, _ = self.ssl_data.get_region(neg_region_id) 263 | neg_poi_sets.append(poi_set) 264 | 265 | return neg_poi_sets 266 | 267 | def forward(self, poi_set, pos_poi_sets, neg_poi_sets): 268 | 269 | # W_emb_temp = self.tree_gcn() 270 | W_emb_temp = self.W_poi 271 | 272 | base_region_emb = self.agg_region_emb(poi_set, W_emb_temp) 273 | 274 | pos_region_emb_list = [] 275 | for pos_poi_set in pos_poi_sets: 276 | pos_region_emb = self.agg_region_emb(pos_poi_set, W_emb_temp) 277 | pos_region_emb_list.append(pos_region_emb.unsqueeze(0)) 278 | pos_region_emb = torch.cat(pos_region_emb_list, dim=0) 279 | 280 | neg_region_emb_list = [] 281 | for neg_poi_set in neg_poi_sets: 282 | neg_region_emb = self.agg_region_emb(neg_poi_set, W_emb_temp) 283 | neg_region_emb_list.append(neg_region_emb.unsqueeze(0)) 284 | neg_region_emb = torch.cat(neg_region_emb_list, dim=0) 285 | 286 | pos_scores = torch.matmul(pos_region_emb, base_region_emb) 287 | pos_label = torch.Tensor([1 for _ in range(pos_scores.size(0))]).type(FType).to(device) 288 | 289 | neg_scores = torch.matmul(neg_region_emb, base_region_emb) 290 | neg_label = torch.Tensor([0 for _ in range(neg_scores.size(0))]).type(FType).to(device) 291 | 292 | scores = torch.cat([pos_scores, neg_scores]) 293 | labels = torch.cat([pos_label, neg_label]) 294 | scores /= self.temp 295 | 296 | loss = -(F.log_softmax(scores, dim=0) * labels).sum() / labels.sum() 297 | 298 | return loss, base_region_emb, neg_region_emb 299 | 300 | def model_train(self, region_id): 301 | poi_set, _ = self.ssl_data.get_region(region_id) 302 | pos_poi_sets = self.positive_sampling(region_id) 303 | neg_poi_sets = self.negative_sampling(region_id) 304 | 305 | poi_loss, base_region_emb, neg_region_emb = self.forward(poi_set, pos_poi_sets, neg_poi_sets) 306 | 307 | return poi_loss, base_region_emb, neg_region_emb 308 | 309 | def get_emb(self): 310 | output = [] 311 | for region_id in self.ssl_data.sampling_pool: 312 | poi_set, _ = self.ssl_data.get_region(region_id) 313 | region_emb = self.agg_region_emb(poi_set, self.W_poi) 314 | 315 | output.append(region_emb.detach().cpu().numpy()) 316 | return np.array(output) 317 | -------------------------------------------------------------------------------- /remvc_tasks.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pandas as pd 4 | import numpy as np 5 | 6 | from sklearn.cluster import KMeans 7 | from sklearn.feature_extraction.text import TfidfTransformer 8 | from sklearn import linear_model 9 | import matplotlib.pyplot as plt 10 | 11 | from sklearn.model_selection import LeaveOneOut, KFold 12 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 13 | from sklearn.metrics import normalized_mutual_info_score 14 | from sklearn.metrics import adjusted_rand_score 15 | from sklearn.metrics import f1_score 16 | 17 | 18 | popus = np.load("./model_data/popus_count.npy") 19 | cd = json.load(open("./model_data/mh_cd.json")) 20 | 21 | cd_labels = np.zeros((len(cd))) 22 | for i in range(len(cd)): 23 | cd_labels[i] = cd[str(i)] 24 | 25 | n_clusters = 29 26 | 27 | def regression(X_train, y_train, X_test, alpha): 28 | reg = linear_model.Ridge(alpha=alpha) 29 | reg.fit(X_train, y_train) 30 | 31 | y_pred = reg.predict(X_test) 32 | return y_pred 33 | 34 | def kf_predict(X, Y): 35 | 36 | kf = KFold(n_splits=5) 37 | y_preds = [] 38 | y_truths = [] 39 | for train_index, test_index in kf.split(X): 40 | X_train, X_test = X[train_index], X[test_index] 41 | y_train, y_test = Y[train_index], Y[test_index] 42 | y_pred = regression(X_train, y_train, X_test, 1) 43 | y_preds.append(y_pred) 44 | y_truths.append(y_test) 45 | 46 | return np.concatenate(y_preds), np.concatenate(y_truths) 47 | 48 | def compute_metrics(y_pred, y_test): 49 | y_pred[y_pred<0] = 0 50 | 51 | mae = mean_absolute_error(y_test, y_pred) 52 | mse = mean_squared_error(y_test, y_pred) 53 | r2 = r2_score(y_test, y_pred) 54 | return mae, np.sqrt(mse), r2 55 | 56 | def predict_popus(emb, fw): 57 | y_pred, y_test = kf_predict(emb, popus) 58 | mae, rmse, r2 = compute_metrics(y_pred, y_test) 59 | 60 | print("MAE: ", mae) 61 | print("RMSE: ", rmse) 62 | print("R2: ", r2) 63 | 64 | fw.write("MAE: " + str(mae) + "\n") 65 | fw.write("RMSE: " + str(rmse) + "\n") 66 | fw.write("R2: " + str(r2) + "\n") 67 | 68 | return mae, rmse, r2 69 | 70 | 71 | def F_meansure(cd_labels, emb_labels): 72 | zones = len(cd_labels) 73 | 74 | labels = [] 75 | preds = [] 76 | for _i in range(zones): 77 | for _j in range(_i+1, zones): 78 | cd_i, cd_j = cd_labels[_i], cd_labels[_j] 79 | emb_i, emb_j = emb_labels[_i], emb_labels[_j] 80 | 81 | if cd_i == cd_j: 82 | labels.append(1) 83 | else: 84 | labels.append(0) 85 | 86 | if emb_i == emb_j: 87 | preds.append(1) 88 | else: 89 | preds.append(0) 90 | 91 | bins = np.array([0,0.5,1]) 92 | tn, fp, fn, tp = plt.hist2d(labels, preds, bins=bins, cmap='Blues')[0].flatten() 93 | 94 | precision = tp / (tp + fp) 95 | recall = tp / (tp + fn) 96 | beta = 0.5 97 | f_beta = ((beta**2 + 1) * precision * recall) / (beta**2 * precision + recall) 98 | return f_beta 99 | 100 | 101 | def lu_classify(emb, fw, _type="all"): 102 | kmeans = KMeans(n_clusters=n_clusters, random_state=3) 103 | emb_labels = kmeans.fit_predict(emb) 104 | 105 | nmi = normalized_mutual_info_score(cd_labels, emb_labels) 106 | print("emb nmi: {:.3f}".format(nmi)) 107 | ars = adjusted_rand_score(cd_labels, emb_labels) 108 | print("emb ars: {:.3f}".format(ars)) 109 | f_score = F_meansure(cd_labels, emb_labels) 110 | print("emb f_score: {:.3f}".format(f_score)) 111 | 112 | fw.write("emb nmi: " + str(nmi) + "\n") 113 | fw.write("emb ars: " + str(ars) + "\n") 114 | fw.write("emb f_score: " + str(f_score) + "\n") 115 | 116 | np.save(open("./model_result/clusters_" + _type + ".npy","wb"), emb_labels) --------------------------------------------------------------------------------