├── .gitattributes ├── HAFusion_Model.py ├── HAFusion_train.py ├── Images ├── DAFusion.png ├── Experiment.png └── model structure.png ├── README.md ├── data_Chi ├── check_counts.npy ├── crime_counts.npy ├── landUse_dist.npy ├── landUse_simi.npy ├── mob-adj.npy ├── mob_dist.npy ├── poi_dist.npy ├── poi_simi.npy └── serviceCall_counts.npy ├── data_NY ├── .DS_Store ├── check_counts.npy ├── crime_counts.npy ├── landUse_dist.npy ├── landUse_simi.npy ├── mob-adj.npy ├── mob_dist.npy ├── poi_dist.npy ├── poi_simi.npy └── serviceCall_counts.npy ├── data_SF ├── check_counts.npy ├── crime_counts.npy ├── landUse_dist.npy ├── landUse_simi.npy ├── mob-adj.npy ├── mob_dist.npy ├── poi_dist.npy ├── poi_simi.npy └── serviceCall_counts.npy ├── parse_args.py ├── requirements.txt ├── tasks_Chi ├── tasks_chk.py ├── tasks_crime.py └── tasks_serviceCall.py ├── tasks_NY ├── tasks_chk.py ├── tasks_crime.py └── tasks_serviceCall.py ├── tasks_SF ├── tasks_chk.py ├── tasks_crime.py └── tasks_serviceCall.py └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /HAFusion_Model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from parse_args import args 6 | 7 | class DeepFc(nn.Module): 8 | def __init__(self, input_dim, output_dim): 9 | super(DeepFc, self).__init__() 10 | self.model = nn.Sequential( 11 | nn.Linear(input_dim, input_dim * 2), 12 | nn.Linear(input_dim * 2, input_dim * 2), 13 | nn.LeakyReLU(negative_slope=0.3, inplace=True), 14 | nn.Linear(input_dim * 2, output_dim), 15 | nn.LeakyReLU(negative_slope=0.3, inplace=True), ) 16 | 17 | self.output = None 18 | 19 | def forward(self, x): 20 | output = self.model(x) 21 | self.output = output 22 | return output 23 | 24 | def out_feature(self): 25 | return self.output 26 | 27 | 28 | class RegionFusionBlock(nn.Module): 29 | 30 | def __init__(self, input_dim, nhead, dropout, dim_feedforward=2048): 31 | super(RegionFusionBlock, self).__init__() 32 | self.self_attn = nn.MultiheadAttention(input_dim, nhead, dropout=dropout, batch_first=True, bias=True) 33 | self.dropout = nn.Dropout(dropout) 34 | 35 | self.linear1 = nn.Linear(input_dim, dim_feedforward, ) 36 | self.linear2 = nn.Linear(dim_feedforward, input_dim) 37 | 38 | self.norm1 = nn.LayerNorm(input_dim) 39 | self.norm2 = nn.LayerNorm(input_dim) 40 | self.dropout1 = nn.Dropout(dropout) 41 | self.dropout2 = nn.Dropout(dropout) 42 | 43 | self.activation = F.relu 44 | 45 | def forward(self, src): 46 | src2, _ = self.self_attn(src, src, src, ) 47 | 48 | src = src + self.dropout1(src2) 49 | src = self.norm1(src) 50 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 51 | src = src + self.dropout2(src2) 52 | src = self.norm2(src) 53 | return src 54 | 55 | 56 | class intraAFL_Block(nn.Module): 57 | 58 | def __init__(self, input_dim, nhead, c, dropout, dim_feedforward=2048): 59 | super(intraAFL_Block, self).__init__() 60 | self.self_attn = nn.MultiheadAttention(input_dim, nhead, dropout=dropout, batch_first=True, bias=True) 61 | self.dropout = nn.Dropout(dropout) 62 | 63 | self.linear1 = nn.Linear(input_dim, dim_feedforward, ) 64 | self.linear2 = nn.Linear(dim_feedforward, input_dim) 65 | 66 | self.norm1 = nn.LayerNorm(input_dim) 67 | self.norm2 = nn.LayerNorm(input_dim) 68 | self.dropout1 = nn.Dropout(dropout) 69 | self.dropout2 = nn.Dropout(dropout) 70 | 71 | self.expand = nn.Conv2d(1, c, kernel_size=1) 72 | self.pooling = nn.AvgPool2d(kernel_size=3, padding=1, stride=1) 73 | self.proj = nn.Linear(c, input_dim) 74 | 75 | self.activation = F.relu 76 | 77 | def forward(self, src): 78 | src2, attnScore = self.self_attn(src, src, src, ) 79 | attnScore = attnScore[:, np.newaxis] 80 | 81 | edge_emb = self.expand(attnScore) 82 | # edge_emb = self.pooling(edge_emb) 83 | w = edge_emb 84 | w = w.softmax(dim=-1) 85 | w = (w * edge_emb).sum(-1).transpose(-1, -2) 86 | w = self.proj(w) 87 | src2 = src2 + w 88 | 89 | src = src + self.dropout1(src2) 90 | src = self.norm1(src) 91 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 92 | src = src + self.dropout2(src2) 93 | src = self.norm2(src) 94 | return src 95 | 96 | class intraAFL(nn.Module): 97 | def __init__(self, input_dim, c): 98 | super(intraAFL, self).__init__() 99 | self.input_dim = input_dim 100 | self.num_block = args.NO_IntraAFL 101 | NO_head = args.NO_head 102 | dropout = args.dropout 103 | 104 | self.blocks = nn.ModuleList( 105 | [intraAFL_Block(input_dim=input_dim, nhead=NO_head, c=c, dropout=dropout) for _ in range(self.num_block)]) 106 | 107 | self.fc = DeepFc(input_dim, input_dim) 108 | 109 | def forward(self, x): 110 | out = x 111 | for block in self.blocks: 112 | out = block(out) 113 | out = out.squeeze() 114 | out = self.fc(out) 115 | return out 116 | 117 | 118 | class RegionFusion(nn.Module): 119 | def __init__(self, input_dim): 120 | super(RegionFusion, self).__init__() 121 | self.input_dim = input_dim 122 | self.num_block = args.NO_RegionFusion 123 | NO_head = args.NO_head 124 | dropout = args.dropout 125 | 126 | self.blocks = nn.ModuleList( 127 | [RegionFusionBlock(input_dim=input_dim, nhead=NO_head, dropout=dropout) for _ in range(self.num_block)]) 128 | 129 | self.fc = DeepFc(input_dim, input_dim) 130 | 131 | def forward(self, x): 132 | out = x 133 | for block in self.blocks: 134 | out = block(out) 135 | out = out.squeeze() 136 | out = self.fc(out) 137 | return out 138 | 139 | 140 | class interAFL_Block(nn.Module): 141 | 142 | def __init__(self, d_model, S): 143 | super(interAFL_Block, self).__init__() 144 | self.mk = nn.Linear(d_model, S, bias=False) 145 | self.mv = nn.Linear(S, d_model, bias=False) 146 | self.softmax = nn.Softmax(dim=1) 147 | self.init_weights() 148 | 149 | def init_weights(self): 150 | for m in self.modules(): 151 | if isinstance(m, nn.Conv2d): 152 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 153 | if m.bias is not None: 154 | nn.init.constant_(m.bias, 0) 155 | elif isinstance(m, nn.BatchNorm2d): 156 | nn.init.constant_(m.weight, 1) 157 | nn.init.constant_(m.bias, 0) 158 | elif isinstance(m, nn.Linear): 159 | nn.init.normal_(m.weight, std=0.001) 160 | if m.bias is not None: 161 | nn.init.constant_(m.bias, 0) 162 | 163 | def forward(self, queries): 164 | attn = self.mk(queries) 165 | attn = self.softmax(attn) 166 | attn = attn / torch.sum(attn, dim=2, keepdim=True) 167 | out = self.mv(attn) 168 | 169 | return out 170 | 171 | 172 | class interAFL(nn.Module): 173 | def __init__(self, input_dim, d_m): 174 | super(interAFL, self).__init__() 175 | self.input_dim = input_dim 176 | self.num_block = args.NO_InterAFL 177 | 178 | self.blocks = nn.ModuleList( 179 | [interAFL_Block(input_dim, d_m) for _ in range(self.num_block)]) 180 | 181 | self.fc = DeepFc(input_dim, input_dim) 182 | 183 | def forward(self, x): 184 | out = x 185 | for block in self.blocks: 186 | out = block(out) 187 | out = out.squeeze() 188 | out = self.fc(out) 189 | return out 190 | 191 | 192 | class ViewFusion(nn.Module): 193 | def __init__(self, emb_dim, out_dim): 194 | super(ViewFusion, self).__init__() 195 | self.W = nn.Conv1d(emb_dim, out_dim, kernel_size=1, bias=False) 196 | self.f1 = nn.Conv1d(out_dim, 1, kernel_size=1) 197 | self.f2 = nn.Conv1d(out_dim, 1, kernel_size=1) 198 | self.act = nn.LeakyReLU(negative_slope=0.3, inplace=True) 199 | 200 | def forward(self, src): 201 | seq_fts = self.W(src) 202 | f_1 = self.f1(seq_fts) 203 | f_2 = self.f2(seq_fts) 204 | logits = f_1 + f_2.transpose(1, 2) 205 | coefs = torch.mean(self.act(logits), dim=-1) 206 | coefs = torch.mean(coefs, dim=0) 207 | coefs = F.softmax(coefs, dim=-1) 208 | return coefs 209 | 210 | 211 | class HAFusion(nn.Module): 212 | def __init__(self, poi_dim, landUse_dim, input_dim, output_dim, d_prime, d_m, c): 213 | super(HAFusion, self).__init__() 214 | self.input_dim = input_dim 215 | self.densePOI2 = nn.Linear(poi_dim, input_dim) 216 | self.denseLandUse3 = nn.Linear(landUse_dim, input_dim) 217 | 218 | self.encoderPOI = intraAFL(input_dim, c) 219 | self.encoderLandUse = intraAFL(input_dim, c) 220 | self.encoderMob = intraAFL(input_dim, c) 221 | 222 | self.regionFusionLayer = RegionFusion(input_dim) 223 | 224 | self.interViewEncoder = interAFL(input_dim, d_m) 225 | 226 | self.fc = DeepFc(input_dim, output_dim) 227 | 228 | self.para1 = torch.nn.Parameter(torch.FloatTensor(1), requires_grad=True) 229 | self.para1.data.fill_(0.1) 230 | self.para2 = torch.nn.Parameter(torch.FloatTensor(1), requires_grad=True) 231 | self.para2.data.fill_(0.9) 232 | 233 | self.viewFusionLayer = ViewFusion(input_dim, d_prime) 234 | 235 | self.activation = F.relu 236 | self.dropout = nn.Dropout(0.1) 237 | self.decoder_s = nn.Linear(output_dim, output_dim) # 238 | self.decoder_t = nn.Linear(output_dim, output_dim) 239 | self.decoder_p = nn.Linear(output_dim, output_dim) # 240 | self.decoder_l = nn.Linear(output_dim, output_dim) 241 | self.feature = None 242 | 243 | def forward(self, x): 244 | poi_emb, landUse_emb, mob_emb = x 245 | 246 | poi_emb = self.dropout(self.activation(self.densePOI2(poi_emb))) 247 | landUse_emb = self.dropout(self.activation(self.denseLandUse3(landUse_emb))) 248 | 249 | poi_emb = self.encoderPOI(poi_emb) 250 | landUse_emb = self.encoderLandUse(landUse_emb) 251 | mob_emb = self.encoderMob(mob_emb) 252 | 253 | out = torch.stack([poi_emb, landUse_emb, mob_emb]) 254 | 255 | intra_view_embs = out 256 | out = out.transpose(0, 1) 257 | out = self.interViewEncoder(out) 258 | out = out.transpose(0, 1) 259 | p1 = self.para1 / (self.para1 + self.para2) 260 | p2 = self.para2 / (self.para1 + self.para2) 261 | out = out * p2 + intra_view_embs * p1 262 | # --------------------------------------------- 263 | 264 | out1 = out.transpose(0, 2) 265 | coef = self.viewFusionLayer(out1) 266 | temp_out = coef[0] * out[0] + coef[1] * out[1] + coef[2] * out[2] 267 | # -------------------------------------------------- 268 | 269 | temp_out = temp_out[np.newaxis] 270 | temp_out = self.regionFusionLayer(temp_out) 271 | out = self.fc(temp_out) 272 | 273 | self.feature = out 274 | 275 | out_s = self.decoder_s(out) # source embedding of regions 276 | out_t = self.decoder_t(out) # destination embedding of regions 277 | out_p = self.decoder_p(out) # poi embedding of regions 278 | out_l = self.decoder_l(out) # landuse embedding of regions 279 | return out_s, out_t, out_p, out_l 280 | 281 | 282 | def out_feature(self): 283 | return self.feature -------------------------------------------------------------------------------- /HAFusion_train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | import torch.nn.functional as F 6 | import utils 7 | from parse_args import args 8 | import tasks_NY.tasks_crime, tasks_NY.tasks_chk, tasks_NY.tasks_serviceCall 9 | import tasks_Chi.tasks_crime, tasks_Chi.tasks_chk, tasks_Chi.tasks_serviceCall 10 | import tasks_SF.tasks_crime, tasks_SF.tasks_chk, tasks_SF.tasks_serviceCall 11 | from HAFusion_Model import HAFusion 12 | 13 | features, mob_adj, poi_sim, land_sim = utils.load_data() 14 | 15 | city = args.city 16 | embedding_size = args.embedding_size 17 | d_prime = args.d_prime 18 | d_m = args.d_m 19 | c = args.c 20 | POI_dim = args.POI_dim 21 | landUse_dim = args.landUse_dim 22 | region_num = args.region_num 23 | task = args.task 24 | 25 | def _mob_loss(s_embeddings, t_embeddings, mob): 26 | inner_prod = torch.mm(s_embeddings, t_embeddings.T) 27 | softmax1 = nn.Softmax(dim=-1) 28 | phat = softmax1(inner_prod) 29 | loss = torch.sum(-torch.mul(mob, torch.log(phat + 0.0001))) 30 | inner_prod = torch.mm(t_embeddings, s_embeddings.T) 31 | softmax2 = nn.Softmax(dim=-1) 32 | phat = softmax2(inner_prod) 33 | loss += torch.sum(-torch.mul(torch.transpose(mob, 0, 1), torch.log(phat + 0.0001))) 34 | return loss 35 | 36 | 37 | def _general_loss(embeddings, adj): 38 | inner_prod = F.cosine_similarity(embeddings.unsqueeze(1), embeddings.unsqueeze(0), dim=2) 39 | loss = F.mse_loss(inner_prod, adj) 40 | return loss 41 | 42 | 43 | class ModelLoss(nn.Module): 44 | def __init__(self): 45 | super(ModelLoss, self).__init__() 46 | 47 | def forward(self, out_s, out_t, mob_adj, out_p, poi_sim, out_l, land_sim): 48 | mob_loss = _mob_loss(out_s, out_t, mob_adj) 49 | poi_loss = _general_loss(out_p, poi_sim) 50 | land_loss = _general_loss(out_l, land_sim) 51 | loss = poi_loss + land_loss + mob_loss 52 | return loss 53 | 54 | def train_model(input_features, mob_adj, poi_sim, land_sim, model, model_loss, city, task): 55 | epochs = args.epochs 56 | learning_rate = args.learning_rate 57 | weight_decay = args.weight_decay 58 | 59 | optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) 60 | best_emb = 0 61 | best_r2 = 0 62 | 63 | for epoch in range(epochs): 64 | model.train() 65 | out_s, out_t, out_p, out_l = model(input_features) 66 | loss = model_loss(out_s, out_t, mob_adj, out_p, poi_sim, out_l, land_sim) 67 | optimizer.zero_grad() 68 | loss.backward() 69 | optimizer.step() 70 | if epoch % 30 == 0: 71 | print("Epoch {}, Loss {}".format(epoch, loss.item())) 72 | embs = model.out_feature() 73 | embs = embs.detach().cpu().numpy() 74 | 75 | if task == "checkIn": 76 | if city == "NY": 77 | _, _, r2 = tasks_NY.tasks_chk.do_tasks(embs) 78 | elif city == "Chi": 79 | _, _, r2 = tasks_Chi.tasks_chk.do_tasks(embs) 80 | elif city == "SF": 81 | _, _, r2 = tasks_SF.tasks_chk.do_tasks(embs) 82 | elif task == "crime": 83 | if city == "NY": 84 | _, _, r2 = tasks_NY.tasks_crime.do_tasks(embs) 85 | elif city == "Chi": 86 | _, _, r2 = tasks_Chi.tasks_crime.do_tasks(embs) 87 | elif city == "SF": 88 | _, _, r2 = tasks_SF.tasks_crime.do_tasks(embs) 89 | elif task == "serviceCall": 90 | if city == "NY": 91 | _, _, r2 = tasks_NY.tasks_serviceCall.do_tasks(embs) 92 | elif city == "Chi": 93 | _, _, r2 = tasks_Chi.tasks_serviceCall.do_tasks(embs) 94 | elif city == "SF": 95 | _, _, r2 = tasks_SF.tasks_serviceCall.do_tasks(embs) 96 | 97 | if best_r2 < r2: 98 | best_r2 = r2 99 | best_emb = embs 100 | 101 | np.save("best_emb.npy", best_emb) 102 | 103 | def test_model(city, task): 104 | best_emb = np.load("./best_emb.npy") 105 | print("Best region embeddings") 106 | if task == "checkIn": 107 | if city == "NY": 108 | print('>>>>>>>>>>>>>>>>> Check-In in New York City') 109 | mae, rmse, r2 = tasks_NY.tasks_chk.do_tasks(best_emb) 110 | elif city == "Chi": 111 | print('>>>>>>>>>>>>>>>>> Check-In in Chicago') 112 | mae, rmse, r2 = tasks_Chi.tasks_chk.do_tasks(best_emb) 113 | elif city == "SF": 114 | print('>>>>>>>>>>>>>>>>> Check-In in San Francisco') 115 | mae, rmse, r2 = tasks_SF.tasks_chk.do_tasks(best_emb) 116 | elif task == "crime": 117 | if city == "NY": 118 | print('>>>>>>>>>>>>>>>>> Crime in New York City') 119 | mae, rmse, r2 = tasks_NY.tasks_crime.do_tasks(best_emb) 120 | elif city == "Chi": 121 | print('>>>>>>>>>>>>>>>>> Crime in Chicago') 122 | mae, rmse, r2 = tasks_Chi.tasks_crime.do_tasks(best_emb) 123 | elif city == "SF": 124 | print('>>>>>>>>>>>>>>>>> Crime in San Francisco') 125 | mae, rmse, r2 = tasks_SF.tasks_crime.do_tasks(best_emb) 126 | elif task == "serviceCall": 127 | if city == "NY": 128 | print('>>>>>>>>>>>>>>>>> Service Calls in New York City') 129 | mae, rmse, r2 = tasks_NY.tasks_serviceCall.do_tasks(best_emb) 130 | elif city == "Chi": 131 | print('>>>>>>>>>>>>>>>>> Service Calls in Chicago') 132 | mae, rmse, r2 = tasks_Chi.tasks_serviceCall.do_tasks(best_emb) 133 | elif city == "SF": 134 | print('>>>>>>>>>>>>>>>>> Service Calls in San Francisco') 135 | mae, rmse, r2 = tasks_SF.tasks_serviceCall.do_tasks(best_emb) 136 | 137 | if __name__ == '__main__': 138 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 139 | model = HAFusion(POI_dim, landUse_dim, region_num, embedding_size, d_prime, d_m, c).to(device) 140 | model_loss = ModelLoss() 141 | 142 | print('Model Training-----------------') 143 | model.train() 144 | train_model(features, mob_adj, poi_sim, land_sim, model, model_loss, city, task) 145 | 146 | print("Downstream task test-----------") 147 | test_model(city, task) 148 | 149 | 150 | -------------------------------------------------------------------------------- /Images/DAFusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/Images/DAFusion.png -------------------------------------------------------------------------------- /Images/Experiment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/Images/Experiment.png -------------------------------------------------------------------------------- /Images/model structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/Images/model structure.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HAFusion: Urban Region Representation Learning with Attentive Fusion (ICDE 2024) 2 | 3 | This is a pytorh implementation of the [HAFusion paper](https://arxiv.org/abs/2312.04606) 4 | 5 | Authors: Fengze Sun, Jianzhong Qi, Yanchuan Chang, Xiaoliang Fan, Shanika Karunasekera, and Egemen Tanin 6 | 7 | ```bash 8 | @inproceedings{sun2024urban, 9 | title={Urban region representation learning with attentive fusion}, 10 | author={Sun, Fengze and Qi, Jianzhong and Chang, Yanchuan and Fan, Xiaoliang and Karunasekera, Shanika and Tanin, Egemen}, 11 | booktitle={2024 IEEE 40th International Conference on Data Engineering (ICDE)}, 12 | pages={4409--4421}, 13 | year={2024}, 14 | organization={IEEE} 15 | } 16 | ``` 17 | ## Model Structure 18 |

19 | 20 |

21 | 22 | ## Experiments 23 |

Overall Prediction Accuracy Results

24 |

25 | 26 |

27 | 28 |

Prediction Accuracy Results When Powering Existing Models with Our DAFusion Module (NYC)

29 |
30 | 31 |
32 | 33 | ## Requirements 34 | - Python 3.8.18 35 | - `pip install -r requirements.txt` 36 | 37 | ## Quick Start 38 | To train and test HAFusion on a specific city and a specific downstream task: 39 | 40 | - CITY_NAME: NY or Chi or SF 41 | - TASK_NAME: checkIn or crime or serviceCall 42 | 43 | ```bash 44 | python HAFusion_train.py --city CITY_NAME --task TASK_NAME 45 | ``` 46 | 47 | ## Contact 48 | Email fengzes@student.unimelb.edu.au if you have any queries. 49 | -------------------------------------------------------------------------------- /data_Chi/check_counts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/check_counts.npy -------------------------------------------------------------------------------- /data_Chi/crime_counts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/crime_counts.npy -------------------------------------------------------------------------------- /data_Chi/landUse_dist.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/landUse_dist.npy -------------------------------------------------------------------------------- /data_Chi/landUse_simi.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/landUse_simi.npy -------------------------------------------------------------------------------- /data_Chi/mob-adj.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/mob-adj.npy -------------------------------------------------------------------------------- /data_Chi/mob_dist.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/mob_dist.npy -------------------------------------------------------------------------------- /data_Chi/poi_dist.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/poi_dist.npy -------------------------------------------------------------------------------- /data_Chi/poi_simi.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/poi_simi.npy -------------------------------------------------------------------------------- /data_Chi/serviceCall_counts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/serviceCall_counts.npy -------------------------------------------------------------------------------- /data_NY/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/.DS_Store -------------------------------------------------------------------------------- /data_NY/check_counts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/check_counts.npy -------------------------------------------------------------------------------- /data_NY/crime_counts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/crime_counts.npy -------------------------------------------------------------------------------- /data_NY/landUse_dist.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/landUse_dist.npy -------------------------------------------------------------------------------- /data_NY/landUse_simi.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/landUse_simi.npy -------------------------------------------------------------------------------- /data_NY/mob-adj.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/mob-adj.npy -------------------------------------------------------------------------------- /data_NY/mob_dist.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/mob_dist.npy -------------------------------------------------------------------------------- /data_NY/poi_dist.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/poi_dist.npy -------------------------------------------------------------------------------- /data_NY/poi_simi.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/poi_simi.npy -------------------------------------------------------------------------------- /data_NY/serviceCall_counts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/serviceCall_counts.npy -------------------------------------------------------------------------------- /data_SF/check_counts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/check_counts.npy -------------------------------------------------------------------------------- /data_SF/crime_counts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/crime_counts.npy -------------------------------------------------------------------------------- /data_SF/landUse_dist.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/landUse_dist.npy -------------------------------------------------------------------------------- /data_SF/landUse_simi.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/landUse_simi.npy -------------------------------------------------------------------------------- /data_SF/mob-adj.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/mob-adj.npy -------------------------------------------------------------------------------- /data_SF/mob_dist.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/mob_dist.npy -------------------------------------------------------------------------------- /data_SF/poi_dist.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/poi_dist.npy -------------------------------------------------------------------------------- /data_SF/poi_simi.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/poi_simi.npy -------------------------------------------------------------------------------- /data_SF/serviceCall_counts.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/serviceCall_counts.npy -------------------------------------------------------------------------------- /parse_args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | parser = argparse.ArgumentParser() 4 | 5 | # -----------------------File------------------------ 6 | parser.add_argument('--city', default="NY", help='City name, can be NY or Chi or SF') 7 | parser.add_argument('--task', default="checkIn", help='Downstrea task name, can be crime or checkIn or serviceCall') 8 | parser.add_argument('--mobility_dist', default='/mob_dist.npy') 9 | parser.add_argument('--POI_dist', default='/poi_dist.npy') 10 | parser.add_argument('--landUse_dist', default='/landUse_dist.npy') 11 | parser.add_argument('--mobility_adj', default='/mob-adj.npy') 12 | parser.add_argument('--POI_simi', default='/poi_simi.npy') 13 | parser.add_argument('--landUse_simi', default='/landUse_simi.npy') 14 | 15 | # -----------------------Model----------------------- 16 | parser.add_argument('--embedding_size', type=int, default=144) 17 | parser.add_argument('--learning_rate', type=float, default=0.0005) 18 | parser.add_argument('--weight_decay', type=float, default=5e-4) 19 | parser.add_argument('--epochs', type=int, default=2000) 20 | parser.add_argument('--dropout', type=float, default=0.1) 21 | 22 | args = parser.parse_args() 23 | 24 | # -----------------------City--------------------------- # 25 | 26 | if args.city == 'NY': 27 | parser.add_argument('--data_path', default='./data_NY') 28 | parser.add_argument('--POI_dim', type=int, default=26) 29 | parser.add_argument('--landUse_dim', type=int, default=11) 30 | parser.add_argument('--region_num', type=int, default=180) 31 | parser.add_argument('--NO_IntraAFL', type=int, default=3) 32 | parser.add_argument('--NO_InterAFL', type=int, default=3) 33 | parser.add_argument('--NO_RegionFusion', type=int, default=3) 34 | parser.add_argument('--NO_head', type=int, default=4) 35 | parser.add_argument('--d_prime', type=int, default=64) 36 | parser.add_argument('--d_m', type=int, default=72) 37 | parser.add_argument('--c', type=int, default=32) 38 | elif args.city == "Chi": 39 | parser.add_argument('--data_path', default='./data_Chi') 40 | parser.add_argument('--POI_dim', type=int, default=26) 41 | parser.add_argument('--landUse_dim', type=int, default=12) 42 | parser.add_argument('--region_num', type=int, default=77) 43 | parser.add_argument('--NO_IntraAFL', type=int, default=1) 44 | parser.add_argument('--NO_InterAFL', type=int, default=2) 45 | parser.add_argument('--NO_RegionFusion', type=int, default=3) 46 | parser.add_argument('--NO_head', type=int, default=1) 47 | parser.add_argument('--d_prime', type=int, default=32) 48 | parser.add_argument('--d_m', type=int, default=36) 49 | parser.add_argument('--c', type=int, default=32) 50 | elif args.city == "SF": 51 | parser.add_argument('--data_path', default='./data_SF') 52 | parser.add_argument('--POI_dim', type=int, default=26) 53 | parser.add_argument('--landUse_dim', type=int, default=23) 54 | parser.add_argument('--region_num', type=int, default=175) 55 | parser.add_argument('--NO_IntraAFL', type=int, default=3) 56 | parser.add_argument('--NO_InterAFL', type=int, default=2) 57 | parser.add_argument('--NO_RegionFusion', type=int, default=3) 58 | parser.add_argument('--NO_head', type=int, default=5) 59 | parser.add_argument('--d_prime', type=int, default=64) 60 | parser.add_argument('--d_m', type=int, default=72) 61 | parser.add_argument('--c', type=int, default=32) 62 | 63 | args = parser.parse_args() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.24.3 2 | scikit_learn==1.2.2 3 | torch==1.10.2 4 | -------------------------------------------------------------------------------- /tasks_Chi/tasks_chk.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import linear_model 3 | from sklearn.model_selection import KFold 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 5 | 6 | 7 | def compute_metrics(y_pred, y_test): 8 | y_pred[y_pred < 0] = 0 9 | mae = mean_absolute_error(y_test, y_pred) 10 | mse = mean_squared_error(y_test, y_pred) 11 | r2 = r2_score(y_test, y_pred) 12 | return mae, np.sqrt(mse), r2 13 | 14 | 15 | def regression(X_train, y_train, X_test, alpha): 16 | reg = linear_model.Ridge(alpha=alpha) 17 | X_train = np.array(X_train, dtype=float) 18 | y_train = np.array(y_train, dtype=float) 19 | reg.fit(X_train, y_train) 20 | 21 | y_pred = reg.predict(X_test) 22 | return y_pred 23 | 24 | 25 | def kf_predict(X, Y): 26 | kf = KFold(n_splits=25, shuffle=True, random_state=2024) 27 | y_preds = [] 28 | y_truths = [] 29 | for train_index, test_index in kf.split(X): 30 | X_train, X_test = X[train_index], X[test_index] 31 | y_train, y_test = Y[train_index], Y[test_index] 32 | y_pred = regression(X_train, y_train, X_test, 1) 33 | y_preds.append(y_pred) 34 | y_truths.append(y_test) 35 | 36 | return np.concatenate(y_preds), np.concatenate(y_truths) 37 | 38 | 39 | def predict_regression(embs, labels, display=False): 40 | y_pred, y_test = kf_predict(embs, labels) 41 | mae, rmse, r2 = compute_metrics(y_pred, y_test) 42 | if display: 43 | print("MAE: %.3f" % mae) 44 | print("RMSE: %.3f" % rmse) 45 | print("R2: %.3f" % r2) 46 | return mae, rmse, r2 47 | 48 | 49 | def do_tasks(embs, display=True): 50 | if display: 51 | print("Check-In Prediction: ") 52 | check_in_label = np.load("./data_Chi/check_counts.npy") 53 | region_list = [] 54 | embs_list = [] 55 | chk_label = [] 56 | for i in range(len(embs)): 57 | if check_in_label[i] > 0: 58 | region_list.append(i) 59 | embs_list.append(embs[i]) 60 | chk_label.append(check_in_label[i]) 61 | embs_list = np.array(embs_list) 62 | chk_label = np.array(chk_label) 63 | 64 | check_mae, check_rmse, check_r2 = predict_regression(embs_list, chk_label, display=display) 65 | 66 | return check_mae, check_rmse, check_r2 -------------------------------------------------------------------------------- /tasks_Chi/tasks_crime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import linear_model 3 | from sklearn.model_selection import KFold 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 5 | 6 | 7 | def compute_metrics(y_pred, y_test): 8 | y_pred[y_pred < 0] = 0 9 | mae = mean_absolute_error(y_test, y_pred) 10 | mse = mean_squared_error(y_test, y_pred) 11 | r2 = r2_score(y_test, y_pred) 12 | return mae, np.sqrt(mse), r2 13 | 14 | 15 | def regression(X_train, y_train, X_test, alpha): 16 | reg = linear_model.Ridge(alpha=alpha) 17 | X_train = np.array(X_train, dtype=float) 18 | y_train = np.array(y_train, dtype=float) 19 | reg.fit(X_train, y_train) 20 | 21 | y_pred = reg.predict(X_test) 22 | return y_pred 23 | 24 | 25 | def kf_predict(X, Y): 26 | kf = KFold(n_splits=25, shuffle=True, random_state=2024) 27 | y_preds = [] 28 | y_truths = [] 29 | for train_index, test_index in kf.split(X): 30 | X_train, X_test = X[train_index], X[test_index] 31 | y_train, y_test = Y[train_index], Y[test_index] 32 | y_pred = regression(X_train, y_train, X_test, 1) 33 | y_preds.append(y_pred) 34 | y_truths.append(y_test) 35 | 36 | return np.concatenate(y_preds), np.concatenate(y_truths) 37 | 38 | 39 | def predict_regression(embs, labels, display=False): 40 | y_pred, y_test = kf_predict(embs, labels) 41 | mae, rmse, r2 = compute_metrics(y_pred, y_test) 42 | if display: 43 | print("MAE: %.3f" % mae) 44 | print("RMSE: %.3f" % rmse) 45 | print("R2: %.3f" % r2) 46 | return mae, rmse, r2 47 | 48 | 49 | def do_tasks(embs, display=True): 50 | if display: 51 | print("Crime Prediction: ") 52 | crime_count_label = np.load("./data_Chi/crime_counts.npy") 53 | 54 | crime_mae, crime_rmse, crime_r2 = predict_regression(embs, crime_count_label, display=display) 55 | return crime_mae, crime_rmse, crime_r2 -------------------------------------------------------------------------------- /tasks_Chi/tasks_serviceCall.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import linear_model 3 | from sklearn.model_selection import KFold 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 5 | 6 | 7 | def compute_metrics(y_pred, y_test): 8 | y_pred[y_pred < 0] = 0 9 | mae = mean_absolute_error(y_test, y_pred) 10 | mse = mean_squared_error(y_test, y_pred) 11 | r2 = r2_score(y_test, y_pred) 12 | return mae, np.sqrt(mse), r2 13 | 14 | 15 | def regression(X_train, y_train, X_test, alpha): 16 | reg = linear_model.Ridge(alpha=alpha) 17 | X_train = np.array(X_train, dtype=float) 18 | y_train = np.array(y_train, dtype=float) 19 | reg.fit(X_train, y_train) 20 | 21 | y_pred = reg.predict(X_test) 22 | return y_pred 23 | 24 | 25 | def kf_predict(X, Y): 26 | kf = KFold(n_splits=25, shuffle=True, random_state=2024) 27 | y_preds = [] 28 | y_truths = [] 29 | for train_index, test_index in kf.split(X): 30 | X_train, X_test = X[train_index], X[test_index] 31 | y_train, y_test = Y[train_index], Y[test_index] 32 | y_pred = regression(X_train, y_train, X_test, 1) 33 | y_preds.append(y_pred) 34 | y_truths.append(y_test) 35 | 36 | return np.concatenate(y_preds), np.concatenate(y_truths) 37 | 38 | 39 | def predict_regression(embs, labels, display=False): 40 | y_pred, y_test = kf_predict(embs, labels) 41 | mae, rmse, r2 = compute_metrics(y_pred, y_test) 42 | if display: 43 | print("MAE: %.3f" % mae) 44 | print("RMSE: %.3f" % rmse) 45 | print("R2: %.3f" % r2) 46 | return mae, rmse, r2 47 | 48 | 49 | def do_tasks(embs, display=True): 50 | if display: 51 | print("Service Calls Prediction: ") 52 | service_call_label = np.load("./data_Chi/serviceCall_counts.npy") 53 | 54 | service_call_mae, service_call_rmse, service_call_r2 = predict_regression(embs, service_call_label, display=display) 55 | return service_call_mae, service_call_rmse, service_call_r2 -------------------------------------------------------------------------------- /tasks_NY/tasks_chk.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import linear_model 3 | from sklearn.model_selection import KFold 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 5 | 6 | def compute_metrics(y_pred, y_test): 7 | y_pred[y_pred < 0] = 0 8 | mae = mean_absolute_error(y_test, y_pred) 9 | mse = mean_squared_error(y_test, y_pred) 10 | r2 = r2_score(y_test, y_pred) 11 | return mae, np.sqrt(mse), r2 12 | 13 | 14 | def regression(X_train, y_train, X_test, alpha): 15 | reg = linear_model.Ridge(alpha=alpha) 16 | X_train = np.array(X_train, dtype=float) 17 | y_train = np.array(y_train, dtype=float) 18 | reg.fit(X_train, y_train) 19 | 20 | y_pred = reg.predict(X_test) 21 | return y_pred 22 | 23 | 24 | def kf_predict(X, Y): 25 | kf = KFold(n_splits=10) 26 | y_preds = [] 27 | y_truths = [] 28 | for train_index, test_index in kf.split(X): 29 | X_train, X_test = X[train_index], X[test_index] 30 | y_train, y_test = Y[train_index], Y[test_index] 31 | y_pred = regression(X_train, y_train, X_test, 1) 32 | y_preds.append(y_pred) 33 | y_truths.append(y_test) 34 | 35 | return np.concatenate(y_preds), np.concatenate(y_truths) 36 | 37 | 38 | def predict_regression(embs, labels, display=False): 39 | y_pred, y_test = kf_predict(embs, labels) 40 | mae, rmse, r2 = compute_metrics(y_pred, y_test) 41 | if display: 42 | print("MAE: %.3f" % mae) 43 | print("RMSE: %.3f" % rmse) 44 | print("R2: %.3f" % r2) 45 | return mae, rmse, r2 46 | 47 | 48 | def do_tasks(embs, display=True): 49 | if display: 50 | print("Check-In Prediction: ") 51 | check_in_label = np.load("./data_NY/check_counts.npy") 52 | 53 | check_mae, check_rmse, check_r2 = predict_regression(embs, check_in_label, display=display) 54 | 55 | return check_mae, check_rmse, check_r2 -------------------------------------------------------------------------------- /tasks_NY/tasks_crime.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from sklearn import linear_model 4 | from sklearn.model_selection import KFold 5 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 6 | 7 | 8 | def compute_metrics(y_pred, y_test): 9 | y_pred[y_pred < 0] = 0 10 | mae = mean_absolute_error(y_test, y_pred) 11 | mse = mean_squared_error(y_test, y_pred) 12 | r2 = r2_score(y_test, y_pred) 13 | return mae, np.sqrt(mse), r2 14 | 15 | 16 | def regression(X_train, y_train, X_test, alpha): 17 | reg = linear_model.Ridge(alpha=alpha) 18 | X_train = np.array(X_train, dtype=float) 19 | y_train = np.array(y_train, dtype=float) 20 | reg.fit(X_train, y_train) 21 | 22 | y_pred = reg.predict(X_test) 23 | return y_pred 24 | 25 | 26 | def kf_predict(X, Y): 27 | kf = KFold(n_splits=10) 28 | y_preds = [] 29 | y_truths = [] 30 | for train_index, test_index in kf.split(X): 31 | X_train, X_test = X[train_index], X[test_index] 32 | y_train, y_test = Y[train_index], Y[test_index] 33 | y_pred = regression(X_train, y_train, X_test, 1) 34 | y_preds.append(y_pred) 35 | y_truths.append(y_test) 36 | 37 | return np.concatenate(y_preds), np.concatenate(y_truths) 38 | 39 | 40 | def predict_regression(embs, labels, display=False): 41 | y_pred, y_test = kf_predict(embs, labels) 42 | mae, rmse, r2 = compute_metrics(y_pred, y_test) 43 | if display: 44 | print("MAE: %.3f" % mae) 45 | print("RMSE: %.3f" % rmse) 46 | print("R2: %.3f" % r2) 47 | return mae, rmse, r2 48 | 49 | 50 | def do_tasks(embs, display=True): 51 | if display: 52 | print("Crime Prediction: ") 53 | crime_count_label = np.load("./data_NY/crime_counts.npy") 54 | 55 | crime_mae, crime_rmse, crime_r2 = predict_regression(embs, crime_count_label, display=display) 56 | return crime_mae, crime_rmse, crime_r2 -------------------------------------------------------------------------------- /tasks_NY/tasks_serviceCall.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import linear_model 3 | from sklearn.model_selection import KFold 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error 5 | 6 | 7 | def compute_metrics(y_pred, y_test): 8 | y_pred[y_pred < 0] = 0 9 | mae = mean_absolute_error(y_test, y_pred) 10 | mse = mean_squared_error(y_test, y_pred) 11 | r2 = r2_score(y_test, y_pred) 12 | return mae, np.sqrt(mse), r2 13 | 14 | 15 | def regression(X_train, y_train, X_test, alpha): 16 | reg = linear_model.Ridge(alpha=alpha) 17 | X_train = np.array(X_train, dtype=float) 18 | y_train = np.array(y_train, dtype=float) 19 | reg.fit(X_train, y_train) 20 | 21 | y_pred = reg.predict(X_test) 22 | return y_pred 23 | 24 | 25 | def kf_predict(X, Y): 26 | kf = KFold(n_splits=10) 27 | y_preds = [] 28 | y_truths = [] 29 | for train_index, test_index in kf.split(X): 30 | X_train, X_test = X[train_index], X[test_index] 31 | y_train, y_test = Y[train_index], Y[test_index] 32 | y_pred = regression(X_train, y_train, X_test, 1) 33 | y_preds.append(y_pred) 34 | y_truths.append(y_test) 35 | 36 | return np.concatenate(y_preds), np.concatenate(y_truths) 37 | 38 | 39 | def predict_regression(embs, labels, display=False): 40 | y_pred, y_test = kf_predict(embs, labels) 41 | mae, rmse, r2 = compute_metrics(y_pred, y_test) 42 | if display: 43 | print("MAE: %.3f" % mae) 44 | print("RMSE: %.3f" % rmse) 45 | print("R2: %.3f" % r2) 46 | return mae, rmse, r2 47 | 48 | 49 | def do_tasks(embs, display=True): 50 | if display: 51 | print("Service Calls Prediction: ") 52 | service_call_label = np.load("./data_NY/serviceCall_counts.npy") 53 | 54 | service_call_mae, service_call_rmse, service_call_r2 = predict_regression(embs, service_call_label, display=display) 55 | return service_call_mae, service_call_rmse, service_call_r2 -------------------------------------------------------------------------------- /tasks_SF/tasks_chk.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import linear_model 3 | from sklearn.model_selection import KFold 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 5 | 6 | 7 | def compute_metrics(y_pred, y_test): 8 | y_pred[y_pred < 0] = 0 9 | mae = mean_absolute_error(y_test, y_pred) 10 | mse = mean_squared_error(y_test, y_pred) 11 | r2 = r2_score(y_test, y_pred) 12 | return mae, np.sqrt(mse), r2 13 | 14 | 15 | def regression(X_train, y_train, X_test, alpha): 16 | reg = linear_model.Ridge(alpha=alpha) 17 | X_train = np.array(X_train, dtype=float) 18 | y_train = np.array(y_train, dtype=float) 19 | reg.fit(X_train, y_train) 20 | 21 | y_pred = reg.predict(X_test) 22 | return y_pred 23 | 24 | 25 | def kf_predict(X, Y): 26 | kf = KFold(n_splits=10, shuffle = True, random_state=2024) 27 | y_preds = [] 28 | y_truths = [] 29 | for train_index, test_index in kf.split(X): 30 | X_train, X_test = X[train_index], X[test_index] 31 | y_train, y_test = Y[train_index], Y[test_index] 32 | y_pred = regression(X_train, y_train, X_test, 1) 33 | y_preds.append(y_pred) 34 | y_truths.append(y_test) 35 | 36 | return np.concatenate(y_preds), np.concatenate(y_truths) 37 | 38 | 39 | def predict_regression(embs, labels, display=False): 40 | y_pred, y_test = kf_predict(embs, labels) 41 | mae, rmse, r2 = compute_metrics(y_pred, y_test) 42 | if display: 43 | print("MAE: %.3f" % mae) 44 | print("RMSE: %.3f" % rmse) 45 | print("R2: %.3f" % r2) 46 | return mae, rmse, r2 47 | 48 | 49 | def do_tasks(embs, display=True): 50 | if display: 51 | print("Check-In Prediction: ") 52 | check_in_label = np.load("./data_SF/check_counts.npy") 53 | region_list = [] 54 | embs_list = [] 55 | chk_label = [] 56 | for i in range(len(embs)): 57 | if check_in_label[i] > 0: 58 | region_list.append(i) 59 | embs_list.append(embs[i]) 60 | chk_label.append(check_in_label[i]) 61 | embs_list = np.array(embs_list) 62 | chk_label = np.array(chk_label) 63 | 64 | check_mae, check_rmse, check_r2 = predict_regression(embs_list, chk_label, display=display) 65 | 66 | return check_mae, check_rmse, check_r2 -------------------------------------------------------------------------------- /tasks_SF/tasks_crime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import linear_model 3 | from sklearn.model_selection import KFold 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 5 | 6 | 7 | def compute_metrics(y_pred, y_test): 8 | y_pred[y_pred < 0] = 0 9 | mae = mean_absolute_error(y_test, y_pred) 10 | mse = mean_squared_error(y_test, y_pred) 11 | r2 = r2_score(y_test, y_pred) 12 | return mae, np.sqrt(mse), r2 13 | 14 | 15 | def regression(X_train, y_train, X_test, alpha): 16 | reg = linear_model.Ridge(alpha=alpha) 17 | X_train = np.array(X_train, dtype=float) 18 | y_train = np.array(y_train, dtype=float) 19 | reg.fit(X_train, y_train) 20 | 21 | y_pred = reg.predict(X_test) 22 | return y_pred 23 | 24 | 25 | def kf_predict(X, Y): 26 | kf = KFold(n_splits=10, shuffle=True, random_state=2024) 27 | y_preds = [] 28 | y_truths = [] 29 | for train_index, test_index in kf.split(X): 30 | X_train, X_test = X[train_index], X[test_index] 31 | y_train, y_test = Y[train_index], Y[test_index] 32 | y_pred = regression(X_train, y_train, X_test, 1) 33 | y_preds.append(y_pred) 34 | y_truths.append(y_test) 35 | 36 | return np.concatenate(y_preds), np.concatenate(y_truths) 37 | 38 | 39 | def predict_regression(embs, labels, display=False): 40 | y_pred, y_test = kf_predict(embs, labels) 41 | mae, rmse, r2 = compute_metrics(y_pred, y_test) 42 | if display: 43 | print("MAE: %.3f" % mae) 44 | print("RMSE: %.3f" % rmse) 45 | print("R2: %.3f" % r2) 46 | return mae, rmse, r2 47 | 48 | 49 | def do_tasks(embs, display=True): 50 | if display: 51 | print("Crime Prediction: ") 52 | crime_count_label = np.load("./data_SF/crime_counts.npy") 53 | 54 | crime_mae, crime_rmse, crime_r2 = predict_regression(embs, crime_count_label, display=display) 55 | return crime_mae, crime_rmse, crime_r2 -------------------------------------------------------------------------------- /tasks_SF/tasks_serviceCall.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import linear_model 3 | from sklearn.model_selection import KFold 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error 5 | 6 | 7 | def compute_metrics(y_pred, y_test): 8 | y_pred[y_pred < 0] = 0 9 | mae = mean_absolute_error(y_test, y_pred) 10 | mse = mean_squared_error(y_test, y_pred) 11 | r2 = r2_score(y_test, y_pred) 12 | return mae, np.sqrt(mse), r2 13 | 14 | 15 | def regression(X_train, y_train, X_test, alpha): 16 | reg = linear_model.Ridge(alpha=alpha) 17 | X_train = np.array(X_train, dtype=float) 18 | y_train = np.array(y_train, dtype=float) 19 | reg.fit(X_train, y_train) 20 | 21 | y_pred = reg.predict(X_test) 22 | return y_pred 23 | 24 | 25 | def kf_predict(X, Y): 26 | kf = KFold(n_splits=10, shuffle=True, random_state=2024) 27 | y_preds = [] 28 | y_truths = [] 29 | for train_index, test_index in kf.split(X): 30 | X_train, X_test = X[train_index], X[test_index] 31 | y_train, y_test = Y[train_index], Y[test_index] 32 | y_pred = regression(X_train, y_train, X_test, 1) 33 | y_preds.append(y_pred) 34 | y_truths.append(y_test) 35 | 36 | return np.concatenate(y_preds), np.concatenate(y_truths) 37 | 38 | 39 | def predict_regression(embs, labels, display=False): 40 | y_pred, y_test = kf_predict(embs, labels) 41 | mae, rmse, r2 = compute_metrics(y_pred, y_test) 42 | if display: 43 | print("MAE: %.3f" % mae) 44 | print("RMSE: %.3f" % rmse) 45 | print("R2: %.3f" % r2) 46 | return mae, rmse, r2 47 | 48 | 49 | def do_tasks(embs, display=True): 50 | if display: 51 | print("Service Calls Prediction: ") 52 | service_call_label = np.load("./data_SF/serviceCall_counts.npy") 53 | 54 | service_call_mae, service_call_rmse, service_call_r2 = predict_regression(embs, service_call_label, display=display) 55 | return service_call_mae, service_call_rmse, service_call_r2 -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from parse_args import args 2 | import numpy as np 3 | import torch 4 | 5 | def load_data(): 6 | data_path = args.data_path 7 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 8 | landUse_feature = np.load(data_path + args.landUse_dist) 9 | landUse_feature = landUse_feature[np.newaxis] 10 | landUse_feature = torch.Tensor(landUse_feature).to(device) 11 | 12 | POI_feature = np.load(data_path + args.POI_dist) 13 | POI_feature = POI_feature[np.newaxis] 14 | POI_feature = torch.Tensor(POI_feature).to(device) 15 | 16 | mob_feature = np.load(data_path + args.mobility_dist) 17 | mob_feature = mob_feature[np.newaxis] 18 | mob_feature = torch.Tensor(mob_feature).to(device) 19 | 20 | mob_adj = np.load(data_path + args.mobility_adj) 21 | mob_adj = mob_adj/np.mean(mob_adj) 22 | mob_adj = torch.Tensor(mob_adj).to(device) 23 | 24 | poi_sim = np.load(data_path + args.POI_simi) 25 | poi_sim = torch.Tensor(poi_sim).to(device) 26 | 27 | land_sim = np.load(data_path + args.landUse_simi) 28 | land_sim = torch.Tensor(land_sim).to(device) 29 | 30 | features = [POI_feature, landUse_feature, mob_feature] 31 | 32 | return features, mob_adj, poi_sim, land_sim 33 | --------------------------------------------------------------------------------