├── README.md
├── clear_data
    ├── r_dict.pickle
    ├── region2poi.pickle
    └── rs_ratio.pickle
├── conf
├── model_data
    ├── mh_cd.json
    └── popus_count.npy
├── model_layers.py
├── remvc.py
├── remvc_data.py
├── remvc_flow.py
├── remvc_poi.py
└── remvc_tasks.py


/README.md:
--------------------------------------------------------------------------------
1 | # ReMVC
2 | 
3 | please download data from https://drive.google.com/drive/folders/1X5M_-aNIXFfKZDlP6e5PugvQUsf3M3Ia?usp=sharing
4 | 


--------------------------------------------------------------------------------
/clear_data/r_dict.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Liang-NTU/ReMVC/9160f14ba51f85920738d13f500b84d36c530d1a/clear_data/r_dict.pickle


--------------------------------------------------------------------------------
/clear_data/region2poi.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Liang-NTU/ReMVC/9160f14ba51f85920738d13f500b84d36c530d1a/clear_data/region2poi.pickle


--------------------------------------------------------------------------------
/clear_data/rs_ratio.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Liang-NTU/ReMVC/9160f14ba51f85920738d13f500b84d36c530d1a/clear_data/rs_ratio.pickle


--------------------------------------------------------------------------------
/conf:
--------------------------------------------------------------------------------
1 | [DEFAULT]
2 | GPU_DEVICE = 1
3 | EXTRACTOR = MLP
4 | TYPE = avg
5 | FUNC = relu
6 | EMB = 16
7 | MUTUAL = 1.0
8 | REG = 0.0001


--------------------------------------------------------------------------------
/model_data/mh_cd.json:
--------------------------------------------------------------------------------
1 | {"0": 0, "1": 0, "2": 1, "3": 1, "4": 1, "5": 2, "6": 2, "7": 2, "8": 2, "9": 3, "10": 4, "11": 5, "12": 5, "13": 6, "14": 1, "15": 1, "16": 7, "17": 8, "18": 9, "19": 9, "20": 2, "21": 2, "22": 2, "23": 0, "24": 10, "25": 11, "26": 10, "27": 12, "28": 13, "29": 0, "30": 2, "31": 1, "32": 0, "33": 14, "34": 7, "35": 10, "36": 15, "37": 0, "38": 0, "39": 16, "40": 16, "41": 10, "42": 15, "43": 1, "44": 11, "45": 13, "46": 13, "47": 13, "48": 3, "49": 15, "50": 0, "51": 13, "52": 13, "53": 5, "54": 17, "55": 1, "56": 2, "57": 8, "58": 14, "59": 14, "60": 17, "61": 18, "62": 7, "63": 7, "64": 4, "65": 19, "66": 19, "67": 3, "68": 3, "69": 16, "70": 16, "71": 20, "72": 21, "73": 21, "74": 21, "75": 22, "76": 20, "77": 12, "78": 10, "79": 21, "80": 8, "81": 0, "82": 1, "83": 1, "84": 1, "85": 1, "86": 1, "87": 8, "88": 19, "89": 2, "90": 12, "91": 23, "92": 2, "93": 2, "94": 23, "95": 14, "96": 19, "97": 19, "98": 19, "99": 19, "100": 5, "101": 5, "102": 23, "103": 17, "104": 17, "105": 19, "106": 7, "107": 17, "108": 17, "109": 18, "110": 16, "111": 1, "112": 24, "113": 11, "114": 11, "115": 24, "116": 23, "117": 11, "118": 20, "119": 11, "120": 11, "121": 1, "122": 1, "123": 23, "124": 10, "125": 9, "126": 18, "127": 12, "128": 23, "129": 10, "130": 10, "131": 10, "132": 9, "133": 14, "134": 8, "135": 24, "136": 24, "137": 9, "138": 25, "139": 24, "140": 26, "141": 12, "142": 16, "143": 1, "144": 14, "145": 6, "146": 12, "147": 4, "148": 16, "149": 8, "150": 18, "151": 16, "152": 16, "153": 9, "154": 9, "155": 12, "156": 27, "157": 22, "158": 22, "159": 23, "160": 23, "161": 23, "162": 15, "163": 13, "164": 13, "165": 17, "166": 19, "167": 16, "168": 16, "169": 23, "170": 23, "171": 4, "172": 25, "173": 25, "174": 21, "175": 21, "176": 4, "177": 4, "178": 3, "179": 7, "180": 5, "181": 17, "182": 17, "183": 16, "184": 25, "185": 18, "186": 18, "187": 3, "188": 4, "189": 21, "190": 21, "191": 21, "192": 21, "193": 14, "194": 14, "195": 20, "196": 20, "197": 23, "198": 10, "199": 10, "200": 10, "201": 10, "202": 23, "203": 2, "204": 2, "205": 5, "206": 5, "207": 6, "208": 6, "209": 8, "210": 26, "211": 25, "212": 25, "213": 12, "214": 12, "215": 12, "216": 21, "217": 23, "218": 23, "219": 12, "220": 26, "221": 23, "222": 20, "223": 20, "224": 15, "225": 18, "226": 21, "227": 21, "228": 15, "229": 18, "230": 18, "231": 3, "232": 4, "233": 7, "234": 5, "235": 20, "236": 19, "237": 3, "238": 13, "239": 26, "240": 0, "241": 26, "242": 24, "243": 24, "244": 3, "245": 4, "246": 21, "247": 21, "248": 12, "249": 12, "250": 12, "251": 25, "252": 16, "253": 27, "254": 28, "255": 15, "256": 27, "257": 28, "258": 27, "259": 20, "260": 22, "261": 5, "262": 9, "263": 18, "264": 6, "265": 0, "266": 13, "267": 6, "268": 6, "269": 6}


--------------------------------------------------------------------------------
/model_data/popus_count.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Liang-NTU/ReMVC/9160f14ba51f85920738d13f500b84d36c530d1a/model_data/popus_count.npy


--------------------------------------------------------------------------------
/model_layers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import time
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | from torch.optim import SGD, Adam, ASGD, RMSprop
  8 | from torch.utils.data import DataLoader
  9 | from torch.nn.functional import log_softmax, softmax
 10 | import torch.nn.functional as F
 11 | 
 12 | import math
 13 | 
 14 | from configparser import ConfigParser
 15 | config = ConfigParser()
 16 | config.read('conf', encoding='UTF-8')
 17 | _type = config['DEFAULT'].get("TYPE")
 18 | func = config['DEFAULT'].get("FUNC")
 19 | 
 20 | class SAEncoder(nn.Module):
 21 | 
 22 |     def __init__(self, d_input, d_model, n_head):
 23 |         super(SAEncoder, self).__init__()
 24 | 
 25 |         self.d_input = d_input
 26 |         self.d_model = d_model
 27 |         self.n_head = n_head
 28 | 
 29 |         self.linear_k = nn.Linear(self.d_input, self.d_model * self.n_head) 
 30 |         self.linear_v = nn.Linear(self.d_input, self.d_model * self.n_head) 
 31 |         self.linear_q = nn.Linear(self.d_input, self.d_model * self.n_head)
 32 | 
 33 |         self.relu = nn.ReLU()
 34 |         self.softmax = nn.Softmax()
 35 | 
 36 |     def fusion(self, v, f_type):
 37 |         if f_type == "concat":
 38 |             output = v.view(-1, self.d_model * self.n_head)
 39 |         if f_type == "avg":
 40 |             output = torch.mean(v, dim=0)
 41 |         return output
 42 |     
 43 |     def forward(self, x):
 44 |         q = self.linear_q(x) 
 45 |         k = self.linear_k(x)
 46 |         v = self.linear_v(x)
 47 | 
 48 |         q_ = q.view(self.n_head, self.d_model) 
 49 |         k_ = k.view(self.n_head, self.d_model)
 50 |         v_ = v.view(self.n_head, self.d_model)
 51 | 
 52 |         head, d_tensor = k_.size()
 53 |         score = (q_.matmul(k_.transpose(0, 1))) / math.sqrt(d_tensor)
 54 |         score = self.softmax(score)
 55 | 
 56 |         v_ = self.relu(v_)
 57 |         v = score.matmul(v_)
 58 | 
 59 |         output = self.fusion(v, _type)
 60 |         return output
 61 | 
 62 | 
 63 | class CroSAEncoder(nn.Module):
 64 | 
 65 |     def __init__(self, d_input_query, d_input_kv, d_model, n_head):
 66 |         super(SAEncoder, self).__init__()
 67 | 
 68 |         self.d_input_query = d_input_query
 69 |         self.d_input_kv = d_input_kv
 70 |         self.d_model = d_model
 71 |         self.n_head = n_head
 72 | 
 73 |         self.linear_k = nn.Linear(self.d_input_kv, self.d_model * self.n_head) 
 74 |         self.linear_v = nn.Linear(self.d_input_kv, self.d_model * self.n_head) 
 75 |         self.linear_q = nn.Linear(self.d_input_query, self.d_model * self.n_head)
 76 | 
 77 |         self.relu = nn.ReLU()
 78 |         self.softmax = nn.Softmax()
 79 | 
 80 |     def fusion(self, v, f_type):
 81 |         if f_type == "concat":
 82 |             output = v.view(-1, self.d_model * self.n_head)
 83 |         if f_type == "avg":
 84 |             output = torch.mean(v, dim=0)
 85 |         return output
 86 |     
 87 |     def forward(self, q, kv):
 88 |         q = self.linear_q(q) 
 89 |         k = self.linear_k(kv)
 90 |         v = self.linear_v(kv)
 91 | 
 92 |         q_ = q.view(self.n_head, self.d_model) 
 93 |         k_ = k.view(self.n_head, self.d_model)
 94 |         v_ = v.view(self.n_head, self.d_model)
 95 | 
 96 |         head, d_tensor = k_.size()
 97 |         score = (q_.matmul(k_.transpose(0, 1))) / math.sqrt(d_tensor)
 98 |         score = self.softmax(score)
 99 |        
100 |         v_ = self.relu(v_)
101 |         v = score.matmul(v_)
102 | 
103 |         output = self.fusion(v, _type)
104 |         if func == "relu":
105 |             output = self.relu(output)
106 | 
107 |         return output


--------------------------------------------------------------------------------
/remvc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import time
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | from torch.optim import SGD, Adam, ASGD, RMSprop
  8 | from torch.utils.data import DataLoader
  9 | from torch.nn.functional import log_softmax, softmax
 10 | import torch.nn.functional as F
 11 | from configparser import ConfigParser
 12 | 
 13 | import sys
 14 | import os
 15 | import time
 16 | import json
 17 | import pickle
 18 | 
 19 | from remvc_tasks import lu_classify, predict_popus
 20 | from remvc_flow import FLOW_SSL
 21 | from remvc_poi import POI_SSL
 22 | from remvc_data import SSLData
 23 | 
 24 | FType = torch.FloatTensor
 25 | LType = torch.LongTensor
 26 | 
 27 | from configparser import ConfigParser
 28 | config = ConfigParser()
 29 | config.read('conf', encoding='UTF-8')
 30 | GPU_DEVICE = config['DEFAULT'].get("GPU_DEVICE")
 31 | device = torch.device("cuda:"+GPU_DEVICE if torch.cuda.is_available() else "cpu")
 32 | extractor = config['DEFAULT'].get("EXTRACTOR")
 33 | 
 34 | size = int(config['DEFAULT'].get("EMB"))
 35 | mutual_reg = float(config['DEFAULT'].get("MUTUAL"))
 36 | poi_reg = float(config['DEFAULT'].get("REG"))
 37 | 
 38 | fw = open("model_result/" + extractor + "_emb_" + str(size), "w")
 39 | 
 40 | class Model_SSL():
 41 | 
 42 |     def __init__(self): 
 43 |         super(Model_SSL,self).__init__()
 44 | 
 45 |         self.ssl_data = SSLData()
 46 |         self.poi_model = POI_SSL(self.ssl_data, neg_size=10, emb_size=size, attention_size=16, temp=0.08, extractor=extractor).to(device)
 47 |         self.flow_model = FLOW_SSL(self.ssl_data, neg_size=150, emb_size=size, temp=0.08, time_zone=48, extractor=extractor).to(device)
 48 | 
 49 |         self.epoch = 200
 50 |         self.learning_rate = 0.001
 51 | 
 52 |         self.mutual_reg = mutual_reg
 53 |         self.poi_reg = poi_reg
 54 | 
 55 |         self.mutual_neg_size = 5
 56 |         self.emb_size = size
 57 |         self.init_basic_conf()
 58 | 
 59 |         self.opt = Adam(lr=self.learning_rate, params=[{"params":self.poi_model.poi_net.parameters()},\
 60 |             {"params":self.flow_model.pickup_net.parameters()}, \
 61 |             {"params":self.flow_model.dropoff_net.parameters()}, {"params":self.mutual_net.parameters()}], weight_decay=1e-5)
 62 | 
 63 |     def init_basic_conf(self):
 64 |         self.mutual_net = torch.nn.Sequential(
 65 |                     nn.Linear(self.emb_size*2, 1)).to(device)
 66 | 
 67 |     def forward(self, base_poi_emb, base_flow_emb, neg_poi_emb, neg_flow_emb):
 68 |         pos_emb = torch.cat([base_poi_emb, base_flow_emb])
 69 |         pos_scores = self.mutual_net(pos_emb)
 70 |         pos_label = torch.Tensor([1 for _ in range(pos_scores.size(0))]).type(FType).to(device)
 71 | 
 72 |         weights = torch.ones(neg_poi_emb.size()[0])
 73 |         _indexs = torch.multinomial(weights, self.mutual_neg_size)
 74 |         neg_poi_emb = neg_poi_emb[_indexs]
 75 |         base_flow_emb = base_flow_emb.repeat(self.mutual_neg_size, 1)
 76 |         neg_emb_p = torch.cat([neg_poi_emb, base_flow_emb], dim=1)
 77 | 
 78 |         weights = torch.ones(neg_flow_emb.size()[0])
 79 |         _indexs = torch.multinomial(weights, self.mutual_neg_size)
 80 |         neg_flow_emb = neg_flow_emb[_indexs]
 81 |         base_poi_emb = base_poi_emb.repeat(self.mutual_neg_size, 1)
 82 |         neg_emb_f = torch.cat([base_poi_emb, neg_flow_emb], dim=1)
 83 | 
 84 |         neg_emb = torch.cat([neg_emb_p, neg_emb_f], dim=0)
 85 |         neg_scores = self.mutual_net(neg_emb).squeeze()
 86 |         neg_label = torch.Tensor([0 for _ in range(neg_scores.size(0))]).type(FType).to(device)
 87 | 
 88 |         scores = torch.cat([pos_scores, neg_scores])
 89 |         labels = torch.cat([pos_label, neg_label])
 90 | 
 91 |         loss = -(F.log_softmax(scores, dim=0) * labels).sum() / labels.sum()
 92 | 
 93 |         return loss
 94 | 
 95 |     def model_train(self):
 96 |         for epoch in range(self.epoch):
 97 |             self.loss =  0.0
 98 | 
 99 |             for region_id in self.ssl_data.sampling_pool: 
100 |                 poi_loss, base_poi_emb, neg_poi_emb = self.poi_model.model_train(region_id)
101 |                 flow_loss, base_flow_emb, neg_flow_emb =  self.flow_model.model_train(region_id)
102 |                 mutual_loss = self.forward(base_poi_emb, base_flow_emb, neg_poi_emb, neg_flow_emb)
103 | 
104 |                 loss = flow_loss + self.poi_reg * poi_loss + self.mutual_reg * mutual_loss
105 | 
106 |                 self.opt.zero_grad()
107 |                 self.loss += loss
108 |                 loss.backward()
109 |                 self.opt.step()
110 | 
111 |             print("=============================> iter epoch", epoch)
112 |             print("avg loss = " + str(self.loss))
113 | 
114 |             if epoch >= 150:
115 |                 self.test()
116 |                 fw.write("=============================> iter epoch " + str(epoch) + "\n")
117 |                 fw.write("avg loss = " + str(self.loss) + "\n")
118 | 
119 |     def get_emb(self):
120 |         output_flow = self.flow_model.get_emb()
121 |         output_poi = self.poi_model.get_emb()
122 |         output = np.concatenate((output_flow, output_poi), axis=1)
123 |         return output
124 | 
125 |     def test(self):
126 |         output = self.get_emb()
127 |         lu_classify(output, fw, _type="con")
128 |         predict_popus(output, fw)
129 |         fw.flush()
130 | 
131 | if __name__ == '__main__':
132 |     model = Model_SSL()
133 |     model.model_train()


--------------------------------------------------------------------------------
/remvc_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import time
 3 | import random
 4 | import torch
 5 | import torch.nn as nn
 6 | from torch.autograd import Variable
 7 | from torch.optim import SGD, Adam, ASGD, RMSprop
 8 | from torch.utils.data import DataLoader
 9 | from torch.nn.functional import log_softmax, softmax
10 | import torch.nn.functional as F
11 | from configparser import ConfigParser
12 | 
13 | import sys
14 | import os
15 | import time
16 | import json
17 | import pickle
18 | 
19 | from remvc_tasks import lu_classify, predict_popus
20 | 
21 | FType = torch.FloatTensor
22 | LType = torch.LongTensor
23 | 
24 | 
25 | region_dict_path = "./clear_data/training_dict.pickle"
26 | region2poi_path = "./clear_data/region2poi.pickle"
27 | rs_ratio_path = "./clear_data/rs_ratio.pickle"
28 | 
29 | from configparser import ConfigParser
30 | config = ConfigParser()
31 | config.read('conf', encoding='UTF-8')
32 | GPU_DEVICE = config['DEFAULT'].get("GPU_DEVICE")
33 | device = torch.device("cuda:"+GPU_DEVICE if torch.cuda.is_available() else "cpu")
34 | 
35 | class SSLData:
36 | 
37 |     def __init__(self):
38 | 
39 |         self.region_dict = pickle.load(open(region_dict_path, "rb"))
40 |         self.sampling_pool = [_i for _i in range(len(self.region_dict))]
41 | 
42 |         region2poi = pickle.load(open(region2poi_path, "rb"))
43 |         self.level_c2p = region2poi["level_c2p"]
44 |         self.level_p2c = region2poi["level_p2c"]
45 |         self.poi_num = len(region2poi["node_dict"])
46 | 
47 |         self.rs_ratio = pickle.load(open(rs_ratio_path, "rb"))
48 | 
49 |     def get_region(self, idx):
50 |         pois = self.region_dict[idx]["poi"]
51 | 
52 |         poi_set = []
53 |         for poi in pois:
54 |             _id = poi[1]
55 |             l_vector = [poi[2].index(1), poi[3].index(1)]
56 |             poi_set.append([_id, l_vector])
57 | 
58 |         pickup_matrix = self.region_dict[idx]["pickup_matrix"]
59 |         dropoff_matrix = self.region_dict[idx]["dropoff_matrix"]
60 | 
61 |         pickup_matrix = pickup_matrix / pickup_matrix.sum()
62 |         where_are_NaNs = np.isnan(pickup_matrix)
63 |         pickup_matrix[where_are_NaNs] = 0
64 | 
65 |         dropoff_matrix = dropoff_matrix / dropoff_matrix.sum()
66 |         where_are_NaNs = np.isnan(dropoff_matrix)
67 |         dropoff_matrix[where_are_NaNs] = 0
68 | 
69 |         flow_matrix = [pickup_matrix, dropoff_matrix]
70 | 
71 |         return poi_set, flow_matrix


--------------------------------------------------------------------------------
/remvc_flow.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import time
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | from torch.optim import SGD, Adam, ASGD, RMSprop
  8 | from torch.utils.data import DataLoader
  9 | from torch.nn.functional import log_softmax, softmax
 10 | import torch.nn.functional as F
 11 | from configparser import ConfigParser
 12 | from sklearn.metrics.pairwise import cosine_similarity
 13 | 
 14 | import sys
 15 | import os
 16 | import time
 17 | import json
 18 | import pickle
 19 | 
 20 | from remvc_tasks import lu_classify, predict_popus
 21 | from remvc_data import SSLData
 22 | from model_layers import SAEncoder
 23 | 
 24 | FType = torch.FloatTensor
 25 | LType = torch.LongTensor
 26 | 
 27 | from configparser import ConfigParser
 28 | config = ConfigParser()
 29 | config.read('conf', encoding='UTF-8')
 30 | GPU_DEVICE = config['DEFAULT'].get("GPU_DEVICE")
 31 | device = torch.device("cuda:"+GPU_DEVICE if torch.cuda.is_available() else "cpu")
 32 | 
 33 | rs_type = "random"
 34 | 
 35 | class Flatten(torch.nn.Module):
 36 |     def forward(self, input):
 37 |         return input.view(input.size(0), -1)
 38 | 
 39 | class FLOW_SSL(torch.nn.Module):
 40 | 
 41 |     def __init__(self, ssl_data, neg_size, emb_size, temp, time_zone, extractor): 
 42 |         super(FLOW_SSL,self).__init__()
 43 | 
 44 |         self.ssl_data = ssl_data
 45 |         self.init_basic_conf(neg_size, emb_size, temp, time_zone, extractor)
 46 |         
 47 |     def init_basic_conf(self, neg_size, emb_size, temp, time_zone, extractor):
 48 |         self.neg_size = neg_size
 49 |         self.emb_size = emb_size
 50 |         self.temp = temp
 51 |         self.time_zone = time_zone
 52 | 
 53 |         self.extractor = extractor
 54 | 
 55 |         if self.extractor == "CNN":
 56 |             self.pickup_net = torch.nn.Sequential(
 57 |                 nn.Conv1d(in_channels=self.time_zone, out_channels=4, kernel_size=7),
 58 |                 Flatten(),
 59 |                 nn.Linear(1056, self.emb_size)
 60 |             ).to(device)
 61 | 
 62 |             self.dropoff_net = torch.nn.Sequential(
 63 |                 nn.Conv1d(in_channels=self.time_zone, out_channels=4, kernel_size=7),
 64 |                 Flatten(),
 65 |                 nn.Linear(1056, self.emb_size)
 66 |             ).to(device)
 67 | 
 68 |         if self.extractor == "MLP":
 69 |             self.pickup_net = torch.nn.Sequential(
 70 |                 nn.Linear(270, self.emb_size),
 71 |                 nn.ReLU(),
 72 |             ).to(device)
 73 | 
 74 |             self.dropoff_net = torch.nn.Sequential(
 75 |                 nn.Linear(270, self.emb_size),
 76 |                 nn.ReLU(),
 77 |             ).to(device)
 78 | 
 79 |         if self.extractor == "SA":
 80 |             self.pickup_net = SAEncoder(d_input=270, d_model=16, n_head=3).to(device)
 81 |             self.dropoff_net = SAEncoder(d_input=270, d_model=16, n_head=3).to(device)
 82 | 
 83 | 
 84 |     def gaussian_noise(self, matrix, mean=0, sigma=0.03):
 85 |         matrix = matrix.copy()
 86 |         noise = np.random.normal(mean, sigma, matrix.shape)
 87 |         mask_overflow_upper = matrix+noise >= 1.0
 88 |         mask_overflow_lower = matrix+noise < 0
 89 |         noise[mask_overflow_upper] = 1.0
 90 |         noise[mask_overflow_lower] = 0
 91 |         matrix += noise
 92 |         return matrix
 93 | 
 94 |     def positive_sampling(self, region_id):
 95 |         pos_flow_sets = []
 96 |         _, flow_matrix = self.ssl_data.get_region(region_id)
 97 |         pickup_matrix, dropoff_matrix = flow_matrix
 98 | 
 99 |         for sigma in [0.0001, 0.0001, 0.0001, 0.0001]:
100 |             pickup_matrix = self.gaussian_noise(pickup_matrix, sigma=sigma)
101 |             dropoff_matrix = self.gaussian_noise(dropoff_matrix, sigma=sigma)
102 |             pos_flow_sets.append([pickup_matrix, dropoff_matrix])
103 | 
104 |         return pos_flow_sets
105 | 
106 |     def negative_sampling(self, region_id):
107 |         sampling_pool = []
108 |         for _id in self.ssl_data.sampling_pool:
109 |             if _id == region_id:
110 |                 continue
111 |             sampling_pool.append(_id)
112 | 
113 |         p = self.ssl_data.rs_ratio["model_flow"][region_id]
114 |         neg_region_ids = np.random.choice(sampling_pool, self.neg_size, replace=False, p=p)
115 | 
116 |         neg_flow_sets = []
117 |         for neg_region_id in neg_region_ids:
118 |             _, flow_matrix = self.ssl_data.get_region(neg_region_id)
119 |             neg_flow_sets.append(flow_matrix)
120 | 
121 | 
122 |         return neg_flow_sets
123 | 
124 |     def agg_region_emb(self, flow_matrix):
125 |         pickup_matrix = flow_matrix[0]
126 |         dropoff_matrix = flow_matrix[1]
127 | 
128 |         if self.extractor == "CNN":
129 |             pickup_matrix = torch.from_numpy(pickup_matrix).type(FType).to(device)
130 |             pickup_matrix = pickup_matrix.unsqueeze(0)
131 |             pickup_emb = self.pickup_net(pickup_matrix)
132 | 
133 |             dropoff_matrix = torch.from_numpy(dropoff_matrix).type(FType).to(device)
134 |             dropoff_matrix = dropoff_matrix.unsqueeze(0)
135 |             dropoff_emb = self.dropoff_net(dropoff_matrix)
136 | 
137 |         if self.extractor == "MLP":
138 |             pickup_matrix = np.sum(pickup_matrix, axis=0)
139 |             pickup_matrix = torch.from_numpy(pickup_matrix).type(FType).to(device)
140 |             pickup_emb = self.pickup_net(pickup_matrix)
141 | 
142 |             dropoff_matrix = np.sum(dropoff_matrix, axis=0)
143 |             dropoff_matrix = torch.from_numpy(dropoff_matrix).type(FType).to(device)
144 |             dropoff_emb = self.dropoff_net(dropoff_matrix)
145 | 
146 |         if self.extractor == "SA":
147 |             pickup_matrix = np.sum(pickup_matrix, axis=0)
148 |             pickup_matrix = torch.from_numpy(pickup_matrix).type(FType).to(device)
149 |             pickup_emb = self.pickup_net(pickup_matrix)
150 | 
151 |             dropoff_matrix = np.sum(dropoff_matrix, axis=0)
152 |             dropoff_matrix = torch.from_numpy(dropoff_matrix).type(FType).to(device)
153 |             dropoff_emb = self.dropoff_net(dropoff_matrix)
154 | 
155 |         # region_emb = torch.cat([pickup_emb, dropoff_emb], dim=1).squeeze()
156 |         region_emb = (pickup_emb + dropoff_emb) / 2
157 |         region_emb = region_emb.squeeze()
158 | 
159 |         return region_emb
160 | 
161 |     def forward(self, flow_matrix, pos_flow_sets, neg_flow_sets):
162 |         base_region_emb = self.agg_region_emb(flow_matrix)
163 | 
164 |         pos_region_emb_list = []
165 |         for pos_flow_matrix in pos_flow_sets:
166 |             pos_region_emb = self.agg_region_emb(pos_flow_matrix)
167 |             pos_region_emb_list.append(pos_region_emb.unsqueeze(0))
168 |         pos_region_emb = torch.cat(pos_region_emb_list, dim=0)
169 | 
170 |         neg_region_emb_list = []
171 |         for neg_flow_matrix in neg_flow_sets:
172 |             neg_region_emb = self.agg_region_emb(neg_flow_matrix)
173 |             neg_region_emb_list.append(neg_region_emb.unsqueeze(0))
174 |         neg_region_emb = torch.cat(neg_region_emb_list, dim=0)
175 | 
176 |         pos_scores = torch.matmul(pos_region_emb, base_region_emb)
177 |         pos_label = torch.Tensor([1 for _ in range(pos_scores.size(0))]).type(FType).to(device)
178 |         
179 |         neg_scores = torch.matmul(neg_region_emb, base_region_emb)
180 |         neg_label = torch.Tensor([0 for _ in range(neg_scores.size(0))]).type(FType).to(device)
181 | 
182 |         scores = torch.cat([pos_scores, neg_scores])
183 |         labels = torch.cat([pos_label, neg_label])
184 |         scores /= self.temp
185 | 
186 |         loss = -(F.log_softmax(scores, dim=0) * labels).sum() / labels.sum()
187 |         return loss, base_region_emb, neg_region_emb
188 | 
189 |     def model_train(self, region_id):
190 | 
191 |         _, flow_matrix = self.ssl_data.get_region(region_id)
192 |         pos_flow_sets = self.positive_sampling(region_id)
193 |         neg_flow_sets = self.negative_sampling(region_id)
194 | 
195 |         flow_loss, base_region_emb, neg_region_emb = self.forward(flow_matrix, pos_flow_sets, neg_flow_sets)
196 | 
197 |         return flow_loss, base_region_emb, neg_region_emb
198 | 
199 |     def get_emb(self):
200 |         output = []
201 |         for region_id in self.ssl_data.sampling_pool:
202 |             _, flow_matrix = self.ssl_data.get_region(region_id)
203 |             region_emb = self.agg_region_emb(flow_matrix)
204 | 
205 |             output.append(region_emb.detach().cpu().numpy())
206 |         return np.array(output)


--------------------------------------------------------------------------------
/remvc_poi.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import time
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | from torch.optim import SGD, Adam, ASGD, RMSprop
  8 | from torch.utils.data import DataLoader
  9 | from torch.nn.functional import log_softmax, softmax
 10 | import torch.nn.functional as F
 11 | from configparser import ConfigParser
 12 | from sklearn.metrics.pairwise import cosine_similarity
 13 | 
 14 | import sys
 15 | import os
 16 | import time
 17 | import json
 18 | import pickle
 19 | 
 20 | from remvc_tasks import lu_classify, predict_popus
 21 | from remvc_data import SSLData
 22 | from model_layers import SAEncoder
 23 | 
 24 | FType = torch.FloatTensor
 25 | LType = torch.LongTensor
 26 | 
 27 | from configparser import ConfigParser
 28 | config = ConfigParser()
 29 | config.read('conf', encoding='UTF-8')
 30 | GPU_DEVICE = config['DEFAULT'].get("GPU_DEVICE")
 31 | device = torch.device("cuda:"+GPU_DEVICE if torch.cuda.is_available() else "cpu")
 32 | 
 33 | 
 34 | class Flatten(torch.nn.Module):
 35 |     def forward(self, input):
 36 |         return input.view(input.size(0), -1)
 37 | 
 38 | class POI_SSL(torch.nn.Module):
 39 | 
 40 |     def __init__(self, ssl_data, neg_size, emb_size, attention_size, temp, extractor): 
 41 |         super(POI_SSL,self).__init__()
 42 | 
 43 |         self.ssl_data = ssl_data
 44 |         self.init_basic_conf(neg_size, emb_size, attention_size, temp, extractor)
 45 |         
 46 |     def init_basic_conf(self, neg_size, emb_size, attention_size, temp, extractor):
 47 |         self.neg_size = neg_size
 48 |         self.emb_size = emb_size
 49 |         self.attention_size = attention_size
 50 |         self.bin_num = 10
 51 | 
 52 |         self.poi_num = self.ssl_data.poi_num
 53 |         self.temp = temp
 54 | 
 55 |         self.extractor = extractor
 56 | 
 57 |         self.W_poi = None
 58 | 
 59 |         if self.extractor == "CNN":
 60 |             self.poi_net = torch.nn.Sequential(
 61 |                 nn.Conv1d(in_channels=1, out_channels=4, kernel_size=7),
 62 |                 Flatten(),
 63 |                 nn.Linear(964, self.emb_size)
 64 |             ).to(device)
 65 | 
 66 |         if self.extractor == "MLP":
 67 |             self.poi_net = torch.nn.Sequential(
 68 |                 nn.Linear(247, self.emb_size),
 69 |                 nn.ReLU(),
 70 |             ).to(device)
 71 | 
 72 |         if self.extractor == "SA":
 73 |             self.poi_net = SAEncoder(d_input=247, d_model=16, n_head=3).to(device)
 74 | 
 75 | 
 76 |     def generate_attention(self, W_parents, W_children, mask):
 77 |         _input = torch.cat([W_parents, W_children], dim=2)
 78 |         output = self.tree_att_net(_input)
 79 |         pre_attention = torch.matmul(output, self.v_attention)
 80 | 
 81 |         pre_attention = pre_attention + mask
 82 |         attention = torch.softmax(pre_attention, dim=1)
 83 |         
 84 |         return attention
 85 | 
 86 |     def tree_gcn(self):
 87 |         parentsList, childrenList, maskList = [], [], []
 88 | 
 89 |         for i, p2c_i in enumerate(self.ssl_data.level_p2c[::-1]):
 90 |             children = list(p2c_i.values())
 91 |             max_n_children = max(len(x) for x in children)
 92 |             mask = []
 93 | 
 94 |             for k in children:
 95 |                 cur_mask = [0.0] * len(k)
 96 |                 if len(k) < max_n_children:
 97 |                     cur_mask += [-10**13] * (max_n_children - len(k))
 98 |                     k += [0] * (max_n_children - len(k))
 99 |                 mask.append(cur_mask)
100 | 
101 |             parents = []
102 |             for p in p2c_i.keys():
103 |                 parents.append([p] * max_n_children)
104 | 
105 |             children = torch.Tensor(children).type(LType)
106 |             parents = torch.Tensor(parents).type(LType)
107 |             mask = torch.Tensor(mask).type(FType)
108 |             
109 |             parentsList.append(parents)
110 |             childrenList.append(children)
111 |             maskList.append(mask)
112 | 
113 |         W_emb_temp = self.W_poi.clone() + 0.
114 |         for i, (parents, children, mask) in enumerate(zip(parentsList, childrenList, maskList)):
115 |             W_parents = self.W_poi[parents]
116 |             if i == 0:
117 |                 W_children = self.W_poi[children]
118 |             else:
119 |                 W_children = W_emb_temp[children]
120 | 
121 |             tempAttention = self.generate_attention(W_parents, W_children, mask)
122 |             tempEmb = (W_children * tempAttention[:,:,None]).sum(dim=1)
123 | 
124 |             W_emb_temp = torch.index_copy(W_emb_temp, 0, parents[:, 0], tempEmb)
125 | 
126 |         parentsList, childrenList, maskList = [], [], []
127 |         for i, c2p_i in enumerate(self.ssl_data.level_c2p[::-1]):
128 |             parents = list(c2p_i.values())
129 |             max_n_parents = max(len(x) for x in parents)
130 |             mask = []
131 | 
132 |             for k in parents:
133 |                 cur_mask = [0.0] * len(k)
134 |                 if len(k) < max_n_parents:
135 |                     cur_mask += [-10**13] * (max_n_parents - len(k))
136 |                     k += [0] * (max_n_parents - len(k))
137 |                 mask.append(cur_mask)
138 | 
139 |             children = []
140 |             for c in c2p_i.keys():
141 |                 children.append([c] * max_n_parents)
142 | 
143 |             children = torch.Tensor(children).type(LType)
144 |             parents = torch.Tensor(parents).type(LType)
145 |             mask = torch.Tensor(mask).type(FType)
146 |             
147 |             parentsList.append(parents)
148 |             childrenList.append(children)
149 |             maskList.append(mask)
150 | 
151 |         for i, (parents, children, mask) in enumerate(zip(parentsList, childrenList, maskList)):
152 |             W_children, W_parents = W_emb_temp[children], W_emb_temp[parents]
153 | 
154 |             tempAttention = self.generate_attention(W_children, W_parents, mask)
155 |             # tempEmb = (W_parents * tempAttention[:,:,None]).sum(axis=1)
156 |             tempEmb = (W_parents * tempAttention[:,:,None]).sum(dim=1)
157 | 
158 |             # W_emb_temp[children[:, 0]] = tempEmb
159 |             W_emb_temp = torch.index_copy(W_emb_temp, 0, children[:, 0], tempEmb)
160 | 
161 |         self.W_emb_temp = W_emb_temp
162 | 
163 |         return W_emb_temp
164 | 
165 |     def location_attention(self, loc_emb_one, loc_emb_two):
166 |         _input = torch.cat([loc_emb_one, loc_emb_two], axis=1)
167 | 
168 |         output = self.location_att_net(_input)
169 |         pre_attention = torch.matmul(output, self.l_attention)
170 | 
171 |         attention = torch.softmax(pre_attention, dim=0)
172 |         return attention
173 | 
174 |     def agg_region_emb(self, poi_set, W_emb_temp):
175 |         p_node_dict = self.ssl_data.region_dict[0]["node_dict"] 
176 |         poi_f = np.zeros(len(p_node_dict))
177 |         for poi in poi_set:
178 |             poi_id = poi[0]
179 |             poi_f[poi_id] += 1
180 | 
181 |         if np.sum(poi_f) != 0:
182 |             poi_f = poi_f / np.sum(poi_f)
183 |         poi_f = torch.Tensor(poi_f).type(FType).to(device)
184 | 
185 |         if self.extractor == "CNN":
186 |             poi_f = poi_f.unsqueeze(0)
187 |             poi_f = poi_f.unsqueeze(0)
188 |             temp_emb = self.poi_net(poi_f)
189 | 
190 |         if self.extractor == "MLP":
191 |             temp_emb = self.poi_net(poi_f)
192 | 
193 |         if self.extractor == "SA":
194 |             temp_emb = self.poi_net(poi_f)
195 | 
196 |         region_emb = temp_emb
197 |         region_emb = region_emb.squeeze()
198 | 
199 |         return region_emb
200 | 
201 |     def add_aug(self, poi_set, _ratio):
202 |         add_poi_set = []
203 |         for poi in poi_set:
204 |             add_poi_set.append(poi)
205 |             ratio = random.random()
206 |             if ratio < _ratio:
207 |                 add_poi_set.append(poi)
208 |         return add_poi_set
209 | 
210 |     def delete_aug(self, poi_set, _ratio):
211 |         de_poi_set = []
212 |         for poi in poi_set:
213 |             ratio = random.random()
214 |             if ratio > _ratio:
215 |                 de_poi_set.append(poi)
216 |         if not de_poi_set:
217 |             de_poi_set = [poi_set[0]]
218 |         return de_poi_set
219 | 
220 |     def replace_aug(self, poi_set, _ratio):
221 |         replace_poi_set = []
222 |         for poi in poi_set:
223 |             new_poi = poi
224 |             ratio = random.random()
225 |             if ratio < _ratio:
226 |                 new_poi[0] = random.randint(0, self.ssl_data.poi_num-1)
227 |             replace_poi_set.append(new_poi)
228 |         return replace_poi_set
229 | 
230 |     def positive_sampling(self, region_id):
231 |         pos_poi_sets = []
232 |         poi_set, _ = self.ssl_data.get_region(region_id)
233 | 
234 |         de_poi_set = []
235 |         for ratio in [0.1]:
236 |             de_poi_set.append(self.delete_aug(poi_set, ratio))
237 | 
238 |         add_poi_set = []
239 |         for ratio in [0.1]:
240 |             add_poi_set.append(self.add_aug(poi_set, ratio))
241 | 
242 |         re_poi_set = []
243 |         for ratio in [0.1]:
244 |             re_poi_set.append(self.replace_aug(poi_set, ratio))
245 |         
246 |         pos_poi_sets = de_poi_set + add_poi_set + re_poi_set
247 | 
248 |         return pos_poi_sets
249 | 
250 |     def negative_sampling(self, region_id):
251 |         sampling_pool = []
252 |         for _id in self.ssl_data.sampling_pool:
253 |             if _id == region_id:
254 |                 continue
255 |             sampling_pool.append(_id)
256 | 
257 |         p = self.ssl_data.rs_ratio["model_poi"][region_id]
258 |         neg_region_ids = np.random.choice(sampling_pool, self.neg_size, replace=False, p=p)
259 | 
260 |         neg_poi_sets = []
261 |         for neg_region_id in neg_region_ids:
262 |             poi_set, _ = self.ssl_data.get_region(neg_region_id)
263 |             neg_poi_sets.append(poi_set)
264 | 
265 |         return neg_poi_sets
266 | 
267 |     def forward(self, poi_set, pos_poi_sets, neg_poi_sets):
268 |         
269 |         # W_emb_temp = self.tree_gcn()
270 |         W_emb_temp = self.W_poi
271 | 
272 |         base_region_emb = self.agg_region_emb(poi_set, W_emb_temp)
273 | 
274 |         pos_region_emb_list = []
275 |         for pos_poi_set in pos_poi_sets:
276 |             pos_region_emb = self.agg_region_emb(pos_poi_set, W_emb_temp)
277 |             pos_region_emb_list.append(pos_region_emb.unsqueeze(0))
278 |         pos_region_emb = torch.cat(pos_region_emb_list, dim=0)
279 | 
280 |         neg_region_emb_list = []
281 |         for neg_poi_set in neg_poi_sets:
282 |             neg_region_emb = self.agg_region_emb(neg_poi_set, W_emb_temp)
283 |             neg_region_emb_list.append(neg_region_emb.unsqueeze(0))
284 |         neg_region_emb = torch.cat(neg_region_emb_list, dim=0)
285 |         
286 |         pos_scores = torch.matmul(pos_region_emb, base_region_emb)
287 |         pos_label = torch.Tensor([1 for _ in range(pos_scores.size(0))]).type(FType).to(device)
288 |         
289 |         neg_scores = torch.matmul(neg_region_emb, base_region_emb)
290 |         neg_label = torch.Tensor([0 for _ in range(neg_scores.size(0))]).type(FType).to(device)
291 | 
292 |         scores = torch.cat([pos_scores, neg_scores])
293 |         labels = torch.cat([pos_label, neg_label])
294 |         scores /= self.temp
295 | 
296 |         loss = -(F.log_softmax(scores, dim=0) * labels).sum() / labels.sum()
297 | 
298 |         return loss, base_region_emb, neg_region_emb
299 | 
300 |     def model_train(self, region_id):
301 |         poi_set, _ = self.ssl_data.get_region(region_id)
302 |         pos_poi_sets = self.positive_sampling(region_id)
303 |         neg_poi_sets = self.negative_sampling(region_id)
304 | 
305 |         poi_loss, base_region_emb, neg_region_emb = self.forward(poi_set, pos_poi_sets, neg_poi_sets)
306 | 
307 |         return poi_loss, base_region_emb, neg_region_emb 
308 | 
309 |     def get_emb(self):
310 |         output = []
311 |         for region_id in self.ssl_data.sampling_pool:
312 |             poi_set, _ = self.ssl_data.get_region(region_id)
313 |             region_emb = self.agg_region_emb(poi_set, self.W_poi)
314 | 
315 |             output.append(region_emb.detach().cpu().numpy())
316 |         return np.array(output)
317 | 


--------------------------------------------------------------------------------
/remvc_tasks.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import pandas as pd
  4 | import numpy as np
  5 | 
  6 | from sklearn.cluster import KMeans
  7 | from sklearn.feature_extraction.text import TfidfTransformer
  8 | from sklearn import linear_model
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | from sklearn.model_selection import LeaveOneOut, KFold
 12 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 13 | from sklearn.metrics import normalized_mutual_info_score
 14 | from sklearn.metrics import adjusted_rand_score
 15 | from sklearn.metrics import f1_score
 16 | 
 17 | 
 18 | popus = np.load("./model_data/popus_count.npy")
 19 | cd = json.load(open("./model_data/mh_cd.json"))
 20 | 
 21 | cd_labels = np.zeros((len(cd)))
 22 | for i in range(len(cd)):
 23 |     cd_labels[i] = cd[str(i)]
 24 | 
 25 | n_clusters = 29
 26 | 
 27 | def regression(X_train, y_train, X_test, alpha):
 28 |     reg = linear_model.Ridge(alpha=alpha)
 29 |     reg.fit(X_train, y_train)
 30 | 
 31 |     y_pred = reg.predict(X_test)
 32 |     return y_pred
 33 | 
 34 | def kf_predict(X, Y):
 35 | 
 36 |     kf = KFold(n_splits=5)
 37 |     y_preds = []
 38 |     y_truths = []
 39 |     for train_index, test_index in kf.split(X):
 40 |         X_train, X_test = X[train_index], X[test_index]
 41 |         y_train, y_test = Y[train_index], Y[test_index]
 42 |         y_pred = regression(X_train, y_train, X_test, 1)
 43 |         y_preds.append(y_pred)
 44 |         y_truths.append(y_test)
 45 | 
 46 |     return np.concatenate(y_preds), np.concatenate(y_truths)
 47 | 
 48 | def compute_metrics(y_pred, y_test):
 49 |     y_pred[y_pred<0] = 0
 50 | 
 51 |     mae = mean_absolute_error(y_test, y_pred)
 52 |     mse = mean_squared_error(y_test, y_pred)
 53 |     r2 = r2_score(y_test, y_pred)
 54 |     return mae, np.sqrt(mse), r2
 55 | 
 56 | def predict_popus(emb, fw):
 57 |     y_pred, y_test = kf_predict(emb, popus)
 58 |     mae, rmse, r2 = compute_metrics(y_pred, y_test)
 59 | 
 60 |     print("MAE: ", mae)
 61 |     print("RMSE: ", rmse)
 62 |     print("R2: ", r2)
 63 | 
 64 |     fw.write("MAE: " + str(mae) + "\n")
 65 |     fw.write("RMSE: " + str(rmse) + "\n")
 66 |     fw.write("R2: " + str(r2) + "\n")
 67 | 
 68 |     return mae, rmse, r2
 69 | 
 70 | 
 71 | def F_meansure(cd_labels, emb_labels):
 72 |     zones = len(cd_labels)
 73 | 
 74 |     labels = []
 75 |     preds = []
 76 |     for _i in range(zones):
 77 |         for _j in range(_i+1, zones):
 78 |             cd_i, cd_j = cd_labels[_i], cd_labels[_j]
 79 |             emb_i, emb_j = emb_labels[_i], emb_labels[_j]
 80 | 
 81 |             if cd_i == cd_j:
 82 |                 labels.append(1)
 83 |             else:
 84 |                 labels.append(0)
 85 | 
 86 |             if emb_i == emb_j:
 87 |                 preds.append(1)
 88 |             else:
 89 |                 preds.append(0)
 90 | 
 91 |     bins = np.array([0,0.5,1])
 92 |     tn, fp, fn, tp = plt.hist2d(labels, preds, bins=bins, cmap='Blues')[0].flatten()
 93 |     
 94 |     precision = tp / (tp + fp)
 95 |     recall = tp / (tp + fn)
 96 |     beta = 0.5
 97 |     f_beta = ((beta**2 + 1) * precision * recall) / (beta**2 * precision + recall)
 98 |     return f_beta
 99 | 
100 | 
101 | def lu_classify(emb, fw, _type="all"):
102 |     kmeans = KMeans(n_clusters=n_clusters, random_state=3)
103 |     emb_labels = kmeans.fit_predict(emb)
104 | 
105 |     nmi = normalized_mutual_info_score(cd_labels, emb_labels)
106 |     print("emb nmi: {:.3f}".format(nmi))
107 |     ars = adjusted_rand_score(cd_labels, emb_labels)
108 |     print("emb ars: {:.3f}".format(ars))
109 |     f_score = F_meansure(cd_labels, emb_labels)
110 |     print("emb f_score: {:.3f}".format(f_score))
111 | 
112 |     fw.write("emb nmi: " + str(nmi) + "\n")
113 |     fw.write("emb ars: " + str(ars) + "\n")
114 |     fw.write("emb f_score: " + str(f_score) + "\n")
115 | 
116 |     np.save(open("./model_result/clusters_" + _type + ".npy","wb"), emb_labels)


--------------------------------------------------------------------------------