├── .gitattributes
├── HAFusion_Model.py
├── HAFusion_train.py
├── Images
    ├── DAFusion.png
    ├── Experiment.png
    └── model structure.png
├── README.md
├── data_Chi
    ├── check_counts.npy
    ├── crime_counts.npy
    ├── landUse_dist.npy
    ├── landUse_simi.npy
    ├── mob-adj.npy
    ├── mob_dist.npy
    ├── poi_dist.npy
    ├── poi_simi.npy
    └── serviceCall_counts.npy
├── data_NY
    ├── .DS_Store
    ├── check_counts.npy
    ├── crime_counts.npy
    ├── landUse_dist.npy
    ├── landUse_simi.npy
    ├── mob-adj.npy
    ├── mob_dist.npy
    ├── poi_dist.npy
    ├── poi_simi.npy
    └── serviceCall_counts.npy
├── data_SF
    ├── check_counts.npy
    ├── crime_counts.npy
    ├── landUse_dist.npy
    ├── landUse_simi.npy
    ├── mob-adj.npy
    ├── mob_dist.npy
    ├── poi_dist.npy
    ├── poi_simi.npy
    └── serviceCall_counts.npy
├── parse_args.py
├── requirements.txt
├── tasks_Chi
    ├── tasks_chk.py
    ├── tasks_crime.py
    └── tasks_serviceCall.py
├── tasks_NY
    ├── tasks_chk.py
    ├── tasks_crime.py
    └── tasks_serviceCall.py
├── tasks_SF
    ├── tasks_chk.py
    ├── tasks_crime.py
    └── tasks_serviceCall.py
└── utils.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/HAFusion_Model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from parse_args import args
  6 | 
  7 | class DeepFc(nn.Module):
  8 |     def __init__(self, input_dim, output_dim):
  9 |         super(DeepFc, self).__init__()
 10 |         self.model = nn.Sequential(
 11 |             nn.Linear(input_dim, input_dim * 2),
 12 |             nn.Linear(input_dim * 2, input_dim * 2),
 13 |             nn.LeakyReLU(negative_slope=0.3, inplace=True),
 14 |             nn.Linear(input_dim * 2, output_dim),
 15 |             nn.LeakyReLU(negative_slope=0.3, inplace=True), )
 16 | 
 17 |         self.output = None
 18 | 
 19 |     def forward(self, x):
 20 |         output = self.model(x)
 21 |         self.output = output
 22 |         return output
 23 | 
 24 |     def out_feature(self):
 25 |         return self.output
 26 | 
 27 | 
 28 | class RegionFusionBlock(nn.Module):
 29 | 
 30 |     def __init__(self, input_dim, nhead, dropout, dim_feedforward=2048):
 31 |         super(RegionFusionBlock, self).__init__()
 32 |         self.self_attn = nn.MultiheadAttention(input_dim, nhead, dropout=dropout, batch_first=True, bias=True)
 33 |         self.dropout = nn.Dropout(dropout)
 34 | 
 35 |         self.linear1 = nn.Linear(input_dim, dim_feedforward, )
 36 |         self.linear2 = nn.Linear(dim_feedforward, input_dim)
 37 | 
 38 |         self.norm1 = nn.LayerNorm(input_dim)
 39 |         self.norm2 = nn.LayerNorm(input_dim)
 40 |         self.dropout1 = nn.Dropout(dropout)
 41 |         self.dropout2 = nn.Dropout(dropout)
 42 | 
 43 |         self.activation = F.relu
 44 | 
 45 |     def forward(self, src):
 46 |         src2, _ = self.self_attn(src, src, src, )
 47 | 
 48 |         src = src + self.dropout1(src2)
 49 |         src = self.norm1(src)
 50 |         src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
 51 |         src = src + self.dropout2(src2)
 52 |         src = self.norm2(src)
 53 |         return src
 54 | 
 55 | 
 56 | class intraAFL_Block(nn.Module):
 57 | 
 58 |     def __init__(self, input_dim, nhead, c, dropout, dim_feedforward=2048):
 59 |         super(intraAFL_Block, self).__init__()
 60 |         self.self_attn = nn.MultiheadAttention(input_dim, nhead, dropout=dropout, batch_first=True, bias=True)
 61 |         self.dropout = nn.Dropout(dropout)
 62 | 
 63 |         self.linear1 = nn.Linear(input_dim, dim_feedforward, )
 64 |         self.linear2 = nn.Linear(dim_feedforward, input_dim)
 65 | 
 66 |         self.norm1 = nn.LayerNorm(input_dim)
 67 |         self.norm2 = nn.LayerNorm(input_dim)
 68 |         self.dropout1 = nn.Dropout(dropout)
 69 |         self.dropout2 = nn.Dropout(dropout)
 70 | 
 71 |         self.expand = nn.Conv2d(1, c, kernel_size=1)
 72 |         self.pooling = nn.AvgPool2d(kernel_size=3, padding=1, stride=1)
 73 |         self.proj = nn.Linear(c, input_dim)
 74 | 
 75 |         self.activation = F.relu
 76 | 
 77 |     def forward(self, src):
 78 |         src2, attnScore = self.self_attn(src, src, src, )
 79 |         attnScore = attnScore[:, np.newaxis]
 80 | 
 81 |         edge_emb = self.expand(attnScore)
 82 |         # edge_emb = self.pooling(edge_emb)
 83 |         w = edge_emb
 84 |         w = w.softmax(dim=-1)
 85 |         w = (w * edge_emb).sum(-1).transpose(-1, -2)
 86 |         w = self.proj(w)
 87 |         src2 = src2 + w
 88 | 
 89 |         src = src + self.dropout1(src2)
 90 |         src = self.norm1(src)
 91 |         src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
 92 |         src = src + self.dropout2(src2)
 93 |         src = self.norm2(src)
 94 |         return src
 95 | 
 96 | class intraAFL(nn.Module):
 97 |     def __init__(self, input_dim, c):
 98 |         super(intraAFL, self).__init__()
 99 |         self.input_dim = input_dim
100 |         self.num_block = args.NO_IntraAFL
101 |         NO_head = args.NO_head
102 |         dropout = args.dropout
103 | 
104 |         self.blocks = nn.ModuleList(
105 |             [intraAFL_Block(input_dim=input_dim, nhead=NO_head, c=c, dropout=dropout) for _ in range(self.num_block)])
106 | 
107 |         self.fc = DeepFc(input_dim, input_dim)
108 | 
109 |     def forward(self, x):
110 |         out = x
111 |         for block in self.blocks:
112 |             out = block(out)
113 |         out = out.squeeze()
114 |         out = self.fc(out)
115 |         return out
116 | 
117 | 
118 | class RegionFusion(nn.Module):
119 |     def __init__(self, input_dim):
120 |         super(RegionFusion, self).__init__()
121 |         self.input_dim = input_dim
122 |         self.num_block = args.NO_RegionFusion
123 |         NO_head = args.NO_head
124 |         dropout = args.dropout
125 | 
126 |         self.blocks = nn.ModuleList(
127 |             [RegionFusionBlock(input_dim=input_dim, nhead=NO_head, dropout=dropout) for _ in range(self.num_block)])
128 | 
129 |         self.fc = DeepFc(input_dim, input_dim)
130 | 
131 |     def forward(self, x):
132 |         out = x
133 |         for block in self.blocks:
134 |             out = block(out)
135 |         out = out.squeeze()
136 |         out = self.fc(out)
137 |         return out
138 | 
139 | 
140 | class interAFL_Block(nn.Module):
141 | 
142 |     def __init__(self, d_model, S):
143 |         super(interAFL_Block, self).__init__()
144 |         self.mk = nn.Linear(d_model, S, bias=False)
145 |         self.mv = nn.Linear(S, d_model, bias=False)
146 |         self.softmax = nn.Softmax(dim=1)
147 |         self.init_weights()
148 | 
149 |     def init_weights(self):
150 |         for m in self.modules():
151 |             if isinstance(m, nn.Conv2d):
152 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
153 |                 if m.bias is not None:
154 |                     nn.init.constant_(m.bias, 0)
155 |             elif isinstance(m, nn.BatchNorm2d):
156 |                 nn.init.constant_(m.weight, 1)
157 |                 nn.init.constant_(m.bias, 0)
158 |             elif isinstance(m, nn.Linear):
159 |                 nn.init.normal_(m.weight, std=0.001)
160 |                 if m.bias is not None:
161 |                     nn.init.constant_(m.bias, 0)
162 | 
163 |     def forward(self, queries):
164 |         attn = self.mk(queries)
165 |         attn = self.softmax(attn)
166 |         attn = attn / torch.sum(attn, dim=2, keepdim=True)
167 |         out = self.mv(attn)
168 | 
169 |         return out
170 | 
171 | 
172 | class interAFL(nn.Module):
173 |     def __init__(self, input_dim, d_m):
174 |         super(interAFL, self).__init__()
175 |         self.input_dim = input_dim
176 |         self.num_block = args.NO_InterAFL
177 | 
178 |         self.blocks = nn.ModuleList(
179 |             [interAFL_Block(input_dim, d_m) for _ in range(self.num_block)])
180 | 
181 |         self.fc = DeepFc(input_dim, input_dim)
182 | 
183 |     def forward(self, x):
184 |         out = x
185 |         for block in self.blocks:
186 |             out = block(out)
187 |         out = out.squeeze()
188 |         out = self.fc(out)
189 |         return out
190 | 
191 | 
192 | class ViewFusion(nn.Module):
193 |     def __init__(self, emb_dim, out_dim):
194 |         super(ViewFusion, self).__init__()
195 |         self.W = nn.Conv1d(emb_dim, out_dim, kernel_size=1, bias=False)
196 |         self.f1 = nn.Conv1d(out_dim, 1, kernel_size=1)
197 |         self.f2 = nn.Conv1d(out_dim, 1, kernel_size=1)
198 |         self.act = nn.LeakyReLU(negative_slope=0.3, inplace=True)
199 | 
200 |     def forward(self, src):
201 |         seq_fts = self.W(src)
202 |         f_1 = self.f1(seq_fts)
203 |         f_2 = self.f2(seq_fts)
204 |         logits = f_1 + f_2.transpose(1, 2)
205 |         coefs = torch.mean(self.act(logits), dim=-1)
206 |         coefs = torch.mean(coefs, dim=0)
207 |         coefs = F.softmax(coefs, dim=-1)
208 |         return coefs
209 | 
210 | 
211 | class HAFusion(nn.Module):
212 |     def __init__(self, poi_dim, landUse_dim, input_dim, output_dim, d_prime, d_m, c):
213 |         super(HAFusion, self).__init__()
214 |         self.input_dim = input_dim
215 |         self.densePOI2 = nn.Linear(poi_dim, input_dim)
216 |         self.denseLandUse3 = nn.Linear(landUse_dim, input_dim)
217 | 
218 |         self.encoderPOI = intraAFL(input_dim, c)
219 |         self.encoderLandUse = intraAFL(input_dim, c)
220 |         self.encoderMob = intraAFL(input_dim, c)
221 | 
222 |         self.regionFusionLayer = RegionFusion(input_dim)
223 | 
224 |         self.interViewEncoder = interAFL(input_dim, d_m)
225 | 
226 |         self.fc = DeepFc(input_dim, output_dim)
227 | 
228 |         self.para1 = torch.nn.Parameter(torch.FloatTensor(1), requires_grad=True) 
229 |         self.para1.data.fill_(0.1)
230 |         self.para2 = torch.nn.Parameter(torch.FloatTensor(1), requires_grad=True) 
231 |         self.para2.data.fill_(0.9)
232 | 
233 |         self.viewFusionLayer = ViewFusion(input_dim, d_prime)
234 | 
235 |         self.activation = F.relu
236 |         self.dropout = nn.Dropout(0.1)
237 |         self.decoder_s = nn.Linear(output_dim, output_dim)  #
238 |         self.decoder_t = nn.Linear(output_dim, output_dim)
239 |         self.decoder_p = nn.Linear(output_dim, output_dim)  #
240 |         self.decoder_l = nn.Linear(output_dim, output_dim)
241 |         self.feature = None
242 | 
243 |     def forward(self, x):
244 |         poi_emb, landUse_emb, mob_emb = x
245 | 
246 |         poi_emb = self.dropout(self.activation(self.densePOI2(poi_emb)))
247 |         landUse_emb = self.dropout(self.activation(self.denseLandUse3(landUse_emb)))
248 | 
249 |         poi_emb = self.encoderPOI(poi_emb)
250 |         landUse_emb = self.encoderLandUse(landUse_emb)
251 |         mob_emb = self.encoderMob(mob_emb)
252 | 
253 |         out = torch.stack([poi_emb, landUse_emb, mob_emb])
254 | 
255 |         intra_view_embs = out
256 |         out = out.transpose(0, 1)
257 |         out = self.interViewEncoder(out)
258 |         out = out.transpose(0, 1)
259 |         p1 = self.para1 / (self.para1 + self.para2)
260 |         p2 = self.para2 / (self.para1 + self.para2)
261 |         out = out * p2 + intra_view_embs * p1
262 |         # ---------------------------------------------
263 | 
264 |         out1 = out.transpose(0, 2)
265 |         coef = self.viewFusionLayer(out1)
266 |         temp_out = coef[0] * out[0] + coef[1] * out[1] + coef[2] * out[2]
267 |         # --------------------------------------------------
268 | 
269 |         temp_out = temp_out[np.newaxis]
270 |         temp_out = self.regionFusionLayer(temp_out)
271 |         out = self.fc(temp_out)
272 | 
273 |         self.feature = out
274 | 
275 |         out_s = self.decoder_s(out)  # source embedding of regions
276 |         out_t = self.decoder_t(out)  # destination embedding of regions
277 |         out_p = self.decoder_p(out)  # poi embedding of regions
278 |         out_l = self.decoder_l(out)  # landuse embedding of regions
279 |         return out_s, out_t, out_p, out_l
280 | 
281 | 
282 |     def out_feature(self):
283 |         return self.feature


--------------------------------------------------------------------------------
/HAFusion_train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import torch.nn as nn
  4 | import torch.optim as optim
  5 | import torch.nn.functional as F
  6 | import utils
  7 | from parse_args import args
  8 | import tasks_NY.tasks_crime, tasks_NY.tasks_chk, tasks_NY.tasks_serviceCall
  9 | import tasks_Chi.tasks_crime, tasks_Chi.tasks_chk, tasks_Chi.tasks_serviceCall
 10 | import tasks_SF.tasks_crime, tasks_SF.tasks_chk, tasks_SF.tasks_serviceCall
 11 | from HAFusion_Model import HAFusion
 12 | 
 13 | features, mob_adj, poi_sim, land_sim = utils.load_data()
 14 | 
 15 | city = args.city
 16 | embedding_size = args.embedding_size
 17 | d_prime = args.d_prime
 18 | d_m = args.d_m
 19 | c = args.c 
 20 | POI_dim = args.POI_dim
 21 | landUse_dim = args.landUse_dim
 22 | region_num = args.region_num
 23 | task = args.task
 24 | 
 25 | def _mob_loss(s_embeddings, t_embeddings, mob):
 26 |     inner_prod = torch.mm(s_embeddings, t_embeddings.T)
 27 |     softmax1 = nn.Softmax(dim=-1)
 28 |     phat = softmax1(inner_prod)
 29 |     loss = torch.sum(-torch.mul(mob, torch.log(phat + 0.0001)))
 30 |     inner_prod = torch.mm(t_embeddings, s_embeddings.T)
 31 |     softmax2 = nn.Softmax(dim=-1)
 32 |     phat = softmax2(inner_prod)
 33 |     loss += torch.sum(-torch.mul(torch.transpose(mob, 0, 1), torch.log(phat + 0.0001)))
 34 |     return loss
 35 | 
 36 | 
 37 | def _general_loss(embeddings, adj):
 38 |     inner_prod = F.cosine_similarity(embeddings.unsqueeze(1), embeddings.unsqueeze(0), dim=2)
 39 |     loss = F.mse_loss(inner_prod, adj)
 40 |     return loss
 41 | 
 42 | 
 43 | class ModelLoss(nn.Module):
 44 |     def __init__(self):
 45 |         super(ModelLoss, self).__init__()
 46 | 
 47 |     def forward(self, out_s, out_t, mob_adj, out_p, poi_sim, out_l, land_sim):
 48 |         mob_loss = _mob_loss(out_s, out_t, mob_adj)
 49 |         poi_loss = _general_loss(out_p, poi_sim)
 50 |         land_loss = _general_loss(out_l, land_sim)
 51 |         loss = poi_loss + land_loss + mob_loss
 52 |         return loss
 53 |     
 54 | def train_model(input_features, mob_adj, poi_sim, land_sim, model, model_loss, city, task):
 55 |     epochs = args.epochs
 56 |     learning_rate = args.learning_rate
 57 |     weight_decay = args.weight_decay
 58 | 
 59 |     optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
 60 |     best_emb = 0
 61 |     best_r2 = 0
 62 | 
 63 |     for epoch in range(epochs):
 64 |         model.train()
 65 |         out_s, out_t, out_p, out_l = model(input_features)
 66 |         loss = model_loss(out_s, out_t, mob_adj, out_p, poi_sim, out_l, land_sim)
 67 |         optimizer.zero_grad()
 68 |         loss.backward()
 69 |         optimizer.step()
 70 |         if epoch % 30 == 0:
 71 |             print("Epoch {}, Loss {}".format(epoch, loss.item()))
 72 |             embs = model.out_feature()
 73 |             embs = embs.detach().cpu().numpy()
 74 | 
 75 |             if task == "checkIn":
 76 |                 if city == "NY":
 77 |                     _, _, r2 = tasks_NY.tasks_chk.do_tasks(embs)
 78 |                 elif city == "Chi":
 79 |                     _, _, r2 = tasks_Chi.tasks_chk.do_tasks(embs)
 80 |                 elif city == "SF":
 81 |                     _, _, r2 = tasks_SF.tasks_chk.do_tasks(embs)
 82 |             elif task == "crime":
 83 |                 if city == "NY":
 84 |                     _, _, r2 = tasks_NY.tasks_crime.do_tasks(embs)
 85 |                 elif city == "Chi":
 86 |                     _, _, r2 = tasks_Chi.tasks_crime.do_tasks(embs)
 87 |                 elif city == "SF":
 88 |                     _, _, r2 = tasks_SF.tasks_crime.do_tasks(embs)
 89 |             elif task == "serviceCall":
 90 |                 if city == "NY":
 91 |                     _, _, r2 = tasks_NY.tasks_serviceCall.do_tasks(embs)
 92 |                 elif city == "Chi":
 93 |                     _, _, r2 = tasks_Chi.tasks_serviceCall.do_tasks(embs)
 94 |                 elif city == "SF":
 95 |                     _, _, r2 = tasks_SF.tasks_serviceCall.do_tasks(embs)
 96 | 
 97 |             if best_r2 < r2:
 98 |                 best_r2 = r2
 99 |                 best_emb = embs
100 | 
101 |     np.save("best_emb.npy", best_emb)
102 | 
103 | def test_model(city, task):
104 |     best_emb = np.load("./best_emb.npy")
105 |     print("Best region embeddings")
106 |     if task == "checkIn":
107 |         if city == "NY":
108 |             print('>>>>>>>>>>>>>>>>>   Check-In in New York City')
109 |             mae, rmse, r2 = tasks_NY.tasks_chk.do_tasks(best_emb)
110 |         elif city == "Chi":
111 |             print('>>>>>>>>>>>>>>>>>   Check-In in Chicago')
112 |             mae, rmse, r2 = tasks_Chi.tasks_chk.do_tasks(best_emb)
113 |         elif city == "SF":
114 |             print('>>>>>>>>>>>>>>>>>   Check-In in San Francisco')
115 |             mae, rmse, r2 = tasks_SF.tasks_chk.do_tasks(best_emb)
116 |     elif task == "crime":
117 |         if city == "NY":
118 |             print('>>>>>>>>>>>>>>>>>   Crime in New York City')
119 |             mae, rmse, r2 = tasks_NY.tasks_crime.do_tasks(best_emb)
120 |         elif city == "Chi":
121 |             print('>>>>>>>>>>>>>>>>>   Crime in Chicago')
122 |             mae, rmse, r2 = tasks_Chi.tasks_crime.do_tasks(best_emb)
123 |         elif city == "SF":
124 |             print('>>>>>>>>>>>>>>>>>   Crime in San Francisco')
125 |             mae, rmse, r2 = tasks_SF.tasks_crime.do_tasks(best_emb)
126 |     elif task == "serviceCall":
127 |         if city == "NY":
128 |             print('>>>>>>>>>>>>>>>>>   Service Calls in New York City')
129 |             mae, rmse, r2 = tasks_NY.tasks_serviceCall.do_tasks(best_emb)
130 |         elif city == "Chi":
131 |             print('>>>>>>>>>>>>>>>>>   Service Calls in Chicago')
132 |             mae, rmse, r2 = tasks_Chi.tasks_serviceCall.do_tasks(best_emb)
133 |         elif city == "SF":
134 |             print('>>>>>>>>>>>>>>>>>   Service Calls in San Francisco')
135 |             mae, rmse, r2 = tasks_SF.tasks_serviceCall.do_tasks(best_emb)
136 | 
137 | if __name__ == '__main__':
138 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
139 |     model = HAFusion(POI_dim, landUse_dim, region_num, embedding_size, d_prime, d_m, c).to(device)
140 |     model_loss = ModelLoss()
141 |     
142 |     print('Model Training-----------------')
143 |     model.train()
144 |     train_model(features, mob_adj, poi_sim, land_sim, model, model_loss, city, task)
145 | 
146 |     print("Downstream task test-----------")
147 |     test_model(city, task)
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/Images/DAFusion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/Images/DAFusion.png


--------------------------------------------------------------------------------
/Images/Experiment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/Images/Experiment.png


--------------------------------------------------------------------------------
/Images/model structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/Images/model structure.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HAFusion: Urban Region Representation Learning with Attentive Fusion (ICDE 2024)
 2 | 
 3 | This is a pytorh implementation of the [HAFusion paper](https://arxiv.org/abs/2312.04606)
 4 | 
 5 | Authors: Fengze Sun, Jianzhong Qi, Yanchuan Chang, Xiaoliang Fan, Shanika Karunasekera, and Egemen Tanin
 6 | 
 7 | ```bash
 8 | @inproceedings{sun2024urban,
 9 |   title={Urban region representation learning with attentive fusion},
10 |   author={Sun, Fengze and Qi, Jianzhong and Chang, Yanchuan and Fan, Xiaoliang and Karunasekera, Shanika and Tanin, Egemen},
11 |   booktitle={2024 IEEE 40th International Conference on Data Engineering (ICDE)},
12 |   pages={4409--4421},
13 |   year={2024},
14 |   organization={IEEE}
15 | }
16 | ```
17 | ## Model Structure
18 | <p align="center">
19 |     <img src="Images/model structure.png" width="700">
20 | </p>
21 | 
22 | ## Experiments
23 | <p align="center"><strong>Overall Prediction Accuracy Results</strong></p>
24 | <p align="center">
25 |     <img src="Images/Experiment.png" width="700"> 
26 | </p>
27 | 
28 | <p align="center"><strong>Prediction Accuracy Results When Powering Existing Models with Our DAFusion Module (NYC)</strong></p>
29 | <div align="center">
30 |     <img src="Images/DAFusion.png" width="700"> 
31 | </div>
32 | 
33 | ## Requirements
34 | - Python 3.8.18
35 | - `pip install -r requirements.txt`
36 | 
37 | ## Quick Start
38 | To train and test HAFusion on a specific city and a specific downstream task:
39 | 
40 | - CITY_NAME: <strong>NY</strong> or <strong>Chi</strong> or <strong>SF</strong>
41 | - TASK_NAME: <strong>checkIn</strong> or <strong>crime</strong> or <strong>serviceCall</strong>
42 | 
43 | ```bash
44 | python HAFusion_train.py --city CITY_NAME --task TASK_NAME
45 | ```
46 | 
47 | ## Contact
48 | Email fengzes@student.unimelb.edu.au if you have any queries.
49 | 


--------------------------------------------------------------------------------
/data_Chi/check_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/check_counts.npy


--------------------------------------------------------------------------------
/data_Chi/crime_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/crime_counts.npy


--------------------------------------------------------------------------------
/data_Chi/landUse_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/landUse_dist.npy


--------------------------------------------------------------------------------
/data_Chi/landUse_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/landUse_simi.npy


--------------------------------------------------------------------------------
/data_Chi/mob-adj.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/mob-adj.npy


--------------------------------------------------------------------------------
/data_Chi/mob_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/mob_dist.npy


--------------------------------------------------------------------------------
/data_Chi/poi_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/poi_dist.npy


--------------------------------------------------------------------------------
/data_Chi/poi_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/poi_simi.npy


--------------------------------------------------------------------------------
/data_Chi/serviceCall_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/serviceCall_counts.npy


--------------------------------------------------------------------------------
/data_NY/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/.DS_Store


--------------------------------------------------------------------------------
/data_NY/check_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/check_counts.npy


--------------------------------------------------------------------------------
/data_NY/crime_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/crime_counts.npy


--------------------------------------------------------------------------------
/data_NY/landUse_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/landUse_dist.npy


--------------------------------------------------------------------------------
/data_NY/landUse_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/landUse_simi.npy


--------------------------------------------------------------------------------
/data_NY/mob-adj.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/mob-adj.npy


--------------------------------------------------------------------------------
/data_NY/mob_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/mob_dist.npy


--------------------------------------------------------------------------------
/data_NY/poi_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/poi_dist.npy


--------------------------------------------------------------------------------
/data_NY/poi_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/poi_simi.npy


--------------------------------------------------------------------------------
/data_NY/serviceCall_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/serviceCall_counts.npy


--------------------------------------------------------------------------------
/data_SF/check_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/check_counts.npy


--------------------------------------------------------------------------------
/data_SF/crime_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/crime_counts.npy


--------------------------------------------------------------------------------
/data_SF/landUse_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/landUse_dist.npy


--------------------------------------------------------------------------------
/data_SF/landUse_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/landUse_simi.npy


--------------------------------------------------------------------------------
/data_SF/mob-adj.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/mob-adj.npy


--------------------------------------------------------------------------------
/data_SF/mob_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/mob_dist.npy


--------------------------------------------------------------------------------
/data_SF/poi_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/poi_dist.npy


--------------------------------------------------------------------------------
/data_SF/poi_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/poi_simi.npy


--------------------------------------------------------------------------------
/data_SF/serviceCall_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/serviceCall_counts.npy


--------------------------------------------------------------------------------
/parse_args.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | parser = argparse.ArgumentParser()
 4 | 
 5 | # -----------------------File------------------------
 6 | parser.add_argument('--city',                 default="NY",       help='City name, can be NY or Chi or SF')
 7 | parser.add_argument('--task',                 default="checkIn",  help='Downstrea task name, can be crime or checkIn or serviceCall')
 8 | parser.add_argument('--mobility_dist',        default='/mob_dist.npy')
 9 | parser.add_argument('--POI_dist',             default='/poi_dist.npy')
10 | parser.add_argument('--landUse_dist',         default='/landUse_dist.npy')
11 | parser.add_argument('--mobility_adj',         default='/mob-adj.npy')
12 | parser.add_argument('--POI_simi',             default='/poi_simi.npy')
13 | parser.add_argument('--landUse_simi',         default='/landUse_simi.npy')
14 | 
15 | # -----------------------Model-----------------------
16 | parser.add_argument('--embedding_size', type=int,    default=144)
17 | parser.add_argument('--learning_rate',  type=float,  default=0.0005)
18 | parser.add_argument('--weight_decay',   type=float,  default=5e-4)
19 | parser.add_argument('--epochs',         type=int,    default=2000)
20 | parser.add_argument('--dropout',        type=float,  default=0.1)
21 | 
22 | args = parser.parse_args()
23 | 
24 | # -----------------------City--------------------------- #
25 | 
26 | if args.city == 'NY':
27 |     parser.add_argument('--data_path',                    default='./data_NY')
28 |     parser.add_argument('--POI_dim',         type=int,    default=26)
29 |     parser.add_argument('--landUse_dim',     type=int,    default=11)
30 |     parser.add_argument('--region_num',      type=int,    default=180)
31 |     parser.add_argument('--NO_IntraAFL',     type=int,    default=3)
32 |     parser.add_argument('--NO_InterAFL',     type=int,    default=3)
33 |     parser.add_argument('--NO_RegionFusion', type=int,    default=3)
34 |     parser.add_argument('--NO_head',         type=int,    default=4)
35 |     parser.add_argument('--d_prime',         type=int,    default=64)
36 |     parser.add_argument('--d_m',             type=int,    default=72)
37 |     parser.add_argument('--c',               type=int,    default=32)
38 | elif args.city == "Chi":
39 |     parser.add_argument('--data_path',                    default='./data_Chi')
40 |     parser.add_argument('--POI_dim',         type=int,    default=26)
41 |     parser.add_argument('--landUse_dim',     type=int,    default=12)
42 |     parser.add_argument('--region_num',      type=int,    default=77)
43 |     parser.add_argument('--NO_IntraAFL',     type=int,    default=1)
44 |     parser.add_argument('--NO_InterAFL',     type=int,    default=2)
45 |     parser.add_argument('--NO_RegionFusion', type=int,    default=3)
46 |     parser.add_argument('--NO_head',         type=int,    default=1)
47 |     parser.add_argument('--d_prime',         type=int,    default=32)
48 |     parser.add_argument('--d_m',             type=int,    default=36)
49 |     parser.add_argument('--c',               type=int,    default=32)
50 | elif args.city == "SF":
51 |     parser.add_argument('--data_path',                    default='./data_SF')
52 |     parser.add_argument('--POI_dim',         type=int,    default=26)
53 |     parser.add_argument('--landUse_dim',     type=int,    default=23)
54 |     parser.add_argument('--region_num',      type=int,    default=175)
55 |     parser.add_argument('--NO_IntraAFL',     type=int,    default=3)
56 |     parser.add_argument('--NO_InterAFL',     type=int,    default=2)
57 |     parser.add_argument('--NO_RegionFusion', type=int,    default=3)
58 |     parser.add_argument('--NO_head',         type=int,    default=5)
59 |     parser.add_argument('--d_prime',         type=int,    default=64)
60 |     parser.add_argument('--d_m',             type=int,    default=72)
61 |     parser.add_argument('--c',               type=int,    default=32)
62 | 
63 | args = parser.parse_args()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.24.3
2 | scikit_learn==1.2.2
3 | torch==1.10.2
4 | 


--------------------------------------------------------------------------------
/tasks_Chi/tasks_chk.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import linear_model
 3 | from sklearn.model_selection import KFold
 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 5 | 
 6 | 
 7 | def compute_metrics(y_pred, y_test):
 8 |     y_pred[y_pred < 0] = 0
 9 |     mae = mean_absolute_error(y_test, y_pred)
10 |     mse = mean_squared_error(y_test, y_pred)
11 |     r2 = r2_score(y_test, y_pred)
12 |     return mae, np.sqrt(mse), r2
13 | 
14 | 
15 | def regression(X_train, y_train, X_test, alpha):
16 |     reg = linear_model.Ridge(alpha=alpha)
17 |     X_train = np.array(X_train, dtype=float)
18 |     y_train = np.array(y_train, dtype=float)
19 |     reg.fit(X_train, y_train)
20 | 
21 |     y_pred = reg.predict(X_test)
22 |     return y_pred
23 | 
24 | 
25 | def kf_predict(X, Y):
26 |     kf = KFold(n_splits=25, shuffle=True, random_state=2024)
27 |     y_preds = []
28 |     y_truths = []
29 |     for train_index, test_index in kf.split(X):
30 |         X_train, X_test = X[train_index], X[test_index]
31 |         y_train, y_test = Y[train_index], Y[test_index]
32 |         y_pred = regression(X_train, y_train, X_test, 1)
33 |         y_preds.append(y_pred)
34 |         y_truths.append(y_test)
35 | 
36 |     return np.concatenate(y_preds), np.concatenate(y_truths)
37 | 
38 | 
39 | def predict_regression(embs, labels, display=False):
40 |     y_pred, y_test = kf_predict(embs, labels)
41 |     mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 |     if display:
43 |         print("MAE:  %.3f" % mae)
44 |         print("RMSE: %.3f" % rmse)
45 |         print("R2:   %.3f" % r2)
46 |     return mae, rmse, r2
47 | 
48 | 
49 | def do_tasks(embs, display=True):
50 |     if display:
51 |         print("Check-In Prediction: ")
52 |     check_in_label = np.load("./data_Chi/check_counts.npy")
53 |     region_list = []
54 |     embs_list = []
55 |     chk_label = []
56 |     for i in range(len(embs)):
57 |         if check_in_label[i] > 0:
58 |             region_list.append(i)
59 |             embs_list.append(embs[i])
60 |             chk_label.append(check_in_label[i])
61 |     embs_list = np.array(embs_list)
62 |     chk_label = np.array(chk_label)
63 | 
64 |     check_mae, check_rmse, check_r2 = predict_regression(embs_list, chk_label, display=display)
65 | 
66 |     return check_mae, check_rmse, check_r2


--------------------------------------------------------------------------------
/tasks_Chi/tasks_crime.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import linear_model
 3 | from sklearn.model_selection import KFold
 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 5 | 
 6 | 
 7 | def compute_metrics(y_pred, y_test):
 8 |     y_pred[y_pred < 0] = 0
 9 |     mae = mean_absolute_error(y_test, y_pred)
10 |     mse = mean_squared_error(y_test, y_pred)
11 |     r2 = r2_score(y_test, y_pred)
12 |     return mae, np.sqrt(mse), r2
13 | 
14 | 
15 | def regression(X_train, y_train, X_test, alpha):
16 |     reg = linear_model.Ridge(alpha=alpha)
17 |     X_train = np.array(X_train, dtype=float)
18 |     y_train = np.array(y_train, dtype=float)
19 |     reg.fit(X_train, y_train)
20 | 
21 |     y_pred = reg.predict(X_test)
22 |     return y_pred
23 | 
24 | 
25 | def kf_predict(X, Y):
26 |     kf = KFold(n_splits=25, shuffle=True, random_state=2024)
27 |     y_preds = []
28 |     y_truths = []
29 |     for train_index, test_index in kf.split(X):
30 |         X_train, X_test = X[train_index], X[test_index]
31 |         y_train, y_test = Y[train_index], Y[test_index]
32 |         y_pred = regression(X_train, y_train, X_test, 1)
33 |         y_preds.append(y_pred)
34 |         y_truths.append(y_test)
35 | 
36 |     return np.concatenate(y_preds), np.concatenate(y_truths)
37 | 
38 | 
39 | def predict_regression(embs, labels, display=False):
40 |     y_pred, y_test = kf_predict(embs, labels)
41 |     mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 |     if display:
43 |         print("MAE:  %.3f" % mae)
44 |         print("RMSE: %.3f" % rmse)
45 |         print("R2:   %.3f" % r2)
46 |     return mae, rmse, r2
47 | 
48 | 
49 | def do_tasks(embs, display=True):
50 |     if display:
51 |         print("Crime Prediction: ")
52 |     crime_count_label = np.load("./data_Chi/crime_counts.npy")
53 | 
54 |     crime_mae, crime_rmse, crime_r2 = predict_regression(embs, crime_count_label, display=display)
55 |     return crime_mae, crime_rmse, crime_r2


--------------------------------------------------------------------------------
/tasks_Chi/tasks_serviceCall.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import linear_model
 3 | from sklearn.model_selection import KFold
 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 5 | 
 6 | 
 7 | def compute_metrics(y_pred, y_test):
 8 |     y_pred[y_pred < 0] = 0
 9 |     mae = mean_absolute_error(y_test, y_pred)
10 |     mse = mean_squared_error(y_test, y_pred)
11 |     r2 = r2_score(y_test, y_pred)
12 |     return mae, np.sqrt(mse), r2
13 | 
14 | 
15 | def regression(X_train, y_train, X_test, alpha):
16 |     reg = linear_model.Ridge(alpha=alpha)
17 |     X_train = np.array(X_train, dtype=float)
18 |     y_train = np.array(y_train, dtype=float)
19 |     reg.fit(X_train, y_train)
20 | 
21 |     y_pred = reg.predict(X_test)
22 |     return y_pred
23 | 
24 | 
25 | def kf_predict(X, Y):
26 |     kf = KFold(n_splits=25, shuffle=True, random_state=2024)
27 |     y_preds = []
28 |     y_truths = []
29 |     for train_index, test_index in kf.split(X):
30 |         X_train, X_test = X[train_index], X[test_index]
31 |         y_train, y_test = Y[train_index], Y[test_index]
32 |         y_pred = regression(X_train, y_train, X_test, 1)
33 |         y_preds.append(y_pred)
34 |         y_truths.append(y_test)
35 | 
36 |     return np.concatenate(y_preds), np.concatenate(y_truths)
37 | 
38 | 
39 | def predict_regression(embs, labels, display=False):
40 |     y_pred, y_test = kf_predict(embs, labels)
41 |     mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 |     if display:
43 |         print("MAE:  %.3f" % mae)
44 |         print("RMSE: %.3f" % rmse)
45 |         print("R2:   %.3f" % r2)
46 |     return mae, rmse, r2
47 | 
48 | 
49 | def do_tasks(embs, display=True):
50 |     if display:
51 |         print("Service Calls Prediction: ")
52 |     service_call_label = np.load("./data_Chi/serviceCall_counts.npy")
53 | 
54 |     service_call_mae, service_call_rmse, service_call_r2 = predict_regression(embs, service_call_label, display=display)
55 |     return service_call_mae, service_call_rmse, service_call_r2


--------------------------------------------------------------------------------
/tasks_NY/tasks_chk.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import linear_model
 3 | from sklearn.model_selection import KFold
 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 5 | 
 6 | def compute_metrics(y_pred, y_test):
 7 |     y_pred[y_pred < 0] = 0
 8 |     mae = mean_absolute_error(y_test, y_pred)
 9 |     mse = mean_squared_error(y_test, y_pred)
10 |     r2 = r2_score(y_test, y_pred)
11 |     return mae, np.sqrt(mse), r2
12 | 
13 | 
14 | def regression(X_train, y_train, X_test, alpha):
15 |     reg = linear_model.Ridge(alpha=alpha)
16 |     X_train = np.array(X_train, dtype=float)
17 |     y_train = np.array(y_train, dtype=float)
18 |     reg.fit(X_train, y_train)
19 | 
20 |     y_pred = reg.predict(X_test)
21 |     return y_pred
22 | 
23 | 
24 | def kf_predict(X, Y):
25 |     kf = KFold(n_splits=10)
26 |     y_preds = []
27 |     y_truths = []
28 |     for train_index, test_index in kf.split(X):
29 |         X_train, X_test = X[train_index], X[test_index]
30 |         y_train, y_test = Y[train_index], Y[test_index]
31 |         y_pred = regression(X_train, y_train, X_test, 1)
32 |         y_preds.append(y_pred)
33 |         y_truths.append(y_test)
34 | 
35 |     return np.concatenate(y_preds), np.concatenate(y_truths)
36 | 
37 | 
38 | def predict_regression(embs, labels, display=False):
39 |     y_pred, y_test = kf_predict(embs, labels)
40 |     mae, rmse, r2 = compute_metrics(y_pred, y_test)
41 |     if display:
42 |         print("MAE:  %.3f" % mae)
43 |         print("RMSE: %.3f" % rmse)
44 |         print("R2:   %.3f" % r2)
45 |     return mae, rmse, r2
46 | 
47 | 
48 | def do_tasks(embs, display=True):
49 |     if display:
50 |         print("Check-In Prediction: ")
51 |     check_in_label = np.load("./data_NY/check_counts.npy")
52 | 
53 |     check_mae, check_rmse, check_r2 = predict_regression(embs, check_in_label, display=display)
54 | 
55 |     return check_mae, check_rmse, check_r2


--------------------------------------------------------------------------------
/tasks_NY/tasks_crime.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | from sklearn import linear_model
 4 | from sklearn.model_selection import KFold
 5 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 6 | 
 7 | 
 8 | def compute_metrics(y_pred, y_test):
 9 |     y_pred[y_pred < 0] = 0
10 |     mae = mean_absolute_error(y_test, y_pred)
11 |     mse = mean_squared_error(y_test, y_pred)
12 |     r2 = r2_score(y_test, y_pred)
13 |     return mae, np.sqrt(mse), r2
14 | 
15 | 
16 | def regression(X_train, y_train, X_test, alpha):
17 |     reg = linear_model.Ridge(alpha=alpha)
18 |     X_train = np.array(X_train, dtype=float)
19 |     y_train = np.array(y_train, dtype=float)
20 |     reg.fit(X_train, y_train)
21 | 
22 |     y_pred = reg.predict(X_test)
23 |     return y_pred
24 | 
25 | 
26 | def kf_predict(X, Y):
27 |     kf = KFold(n_splits=10)
28 |     y_preds = []
29 |     y_truths = []
30 |     for train_index, test_index in kf.split(X):
31 |         X_train, X_test = X[train_index], X[test_index]
32 |         y_train, y_test = Y[train_index], Y[test_index]
33 |         y_pred = regression(X_train, y_train, X_test, 1)
34 |         y_preds.append(y_pred)
35 |         y_truths.append(y_test)
36 | 
37 |     return np.concatenate(y_preds), np.concatenate(y_truths)
38 | 
39 | 
40 | def predict_regression(embs, labels, display=False):
41 |     y_pred, y_test = kf_predict(embs, labels)
42 |     mae, rmse, r2 = compute_metrics(y_pred, y_test)
43 |     if display:
44 |         print("MAE:  %.3f" % mae)
45 |         print("RMSE: %.3f" % rmse)
46 |         print("R2:   %.3f" % r2)
47 |     return mae, rmse, r2
48 | 
49 | 
50 | def do_tasks(embs, display=True):
51 |     if display:
52 |         print("Crime Prediction: ")
53 |     crime_count_label = np.load("./data_NY/crime_counts.npy")
54 | 
55 |     crime_mae, crime_rmse, crime_r2 = predict_regression(embs, crime_count_label, display=display)
56 |     return crime_mae, crime_rmse, crime_r2


--------------------------------------------------------------------------------
/tasks_NY/tasks_serviceCall.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import linear_model
 3 | from sklearn.model_selection import KFold
 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
 5 | 
 6 | 
 7 | def compute_metrics(y_pred, y_test):
 8 |     y_pred[y_pred < 0] = 0
 9 |     mae = mean_absolute_error(y_test, y_pred)
10 |     mse = mean_squared_error(y_test, y_pred)
11 |     r2 = r2_score(y_test, y_pred)
12 |     return mae, np.sqrt(mse), r2
13 | 
14 | 
15 | def regression(X_train, y_train, X_test, alpha):
16 |     reg = linear_model.Ridge(alpha=alpha)
17 |     X_train = np.array(X_train, dtype=float)
18 |     y_train = np.array(y_train, dtype=float)
19 |     reg.fit(X_train, y_train)
20 | 
21 |     y_pred = reg.predict(X_test)
22 |     return y_pred
23 | 
24 | 
25 | def kf_predict(X, Y):
26 |     kf = KFold(n_splits=10)
27 |     y_preds = []
28 |     y_truths = []
29 |     for train_index, test_index in kf.split(X):
30 |         X_train, X_test = X[train_index], X[test_index]
31 |         y_train, y_test = Y[train_index], Y[test_index]
32 |         y_pred = regression(X_train, y_train, X_test, 1)
33 |         y_preds.append(y_pred)
34 |         y_truths.append(y_test)
35 | 
36 |     return np.concatenate(y_preds), np.concatenate(y_truths)
37 | 
38 | 
39 | def predict_regression(embs, labels, display=False):
40 |     y_pred, y_test = kf_predict(embs, labels)
41 |     mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 |     if display:
43 |         print("MAE:  %.3f" % mae)
44 |         print("RMSE: %.3f" % rmse)
45 |         print("R2:   %.3f" % r2)
46 |     return mae, rmse, r2
47 | 
48 | 
49 | def do_tasks(embs, display=True):
50 |     if display:
51 |         print("Service Calls Prediction: ")
52 |     service_call_label = np.load("./data_NY/serviceCall_counts.npy")
53 | 
54 |     service_call_mae, service_call_rmse, service_call_r2 = predict_regression(embs, service_call_label, display=display)
55 |     return service_call_mae, service_call_rmse, service_call_r2


--------------------------------------------------------------------------------
/tasks_SF/tasks_chk.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import linear_model
 3 | from sklearn.model_selection import KFold
 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 5 | 
 6 | 
 7 | def compute_metrics(y_pred, y_test):
 8 |     y_pred[y_pred < 0] = 0
 9 |     mae = mean_absolute_error(y_test, y_pred)
10 |     mse = mean_squared_error(y_test, y_pred)
11 |     r2 = r2_score(y_test, y_pred)
12 |     return mae, np.sqrt(mse), r2
13 | 
14 | 
15 | def regression(X_train, y_train, X_test, alpha):
16 |     reg = linear_model.Ridge(alpha=alpha)
17 |     X_train = np.array(X_train, dtype=float)
18 |     y_train = np.array(y_train, dtype=float)
19 |     reg.fit(X_train, y_train)
20 | 
21 |     y_pred = reg.predict(X_test)
22 |     return y_pred
23 | 
24 | 
25 | def kf_predict(X, Y):
26 |     kf = KFold(n_splits=10, shuffle = True,  random_state=2024)
27 |     y_preds = []
28 |     y_truths = []
29 |     for train_index, test_index in kf.split(X):
30 |         X_train, X_test = X[train_index], X[test_index]
31 |         y_train, y_test = Y[train_index], Y[test_index]
32 |         y_pred = regression(X_train, y_train, X_test, 1)
33 |         y_preds.append(y_pred)
34 |         y_truths.append(y_test)
35 | 
36 |     return np.concatenate(y_preds), np.concatenate(y_truths)
37 | 
38 | 
39 | def predict_regression(embs, labels, display=False):
40 |     y_pred, y_test = kf_predict(embs, labels)
41 |     mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 |     if display:
43 |         print("MAE:  %.3f" % mae)
44 |         print("RMSE: %.3f" % rmse)
45 |         print("R2:   %.3f" % r2)
46 |     return mae, rmse, r2
47 | 
48 | 
49 | def do_tasks(embs, display=True):
50 |     if display:
51 |         print("Check-In Prediction: ")
52 |     check_in_label = np.load("./data_SF/check_counts.npy")
53 |     region_list = []
54 |     embs_list = []
55 |     chk_label = []
56 |     for i in range(len(embs)):
57 |         if check_in_label[i] > 0:
58 |             region_list.append(i)
59 |             embs_list.append(embs[i])
60 |             chk_label.append(check_in_label[i])
61 |     embs_list = np.array(embs_list)
62 |     chk_label = np.array(chk_label)
63 | 
64 |     check_mae, check_rmse, check_r2 = predict_regression(embs_list, chk_label, display=display)
65 | 
66 |     return check_mae, check_rmse, check_r2


--------------------------------------------------------------------------------
/tasks_SF/tasks_crime.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import linear_model
 3 | from sklearn.model_selection import KFold
 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 5 | 
 6 | 
 7 | def compute_metrics(y_pred, y_test):
 8 |     y_pred[y_pred < 0] = 0
 9 |     mae = mean_absolute_error(y_test, y_pred)
10 |     mse = mean_squared_error(y_test, y_pred)
11 |     r2 = r2_score(y_test, y_pred)
12 |     return mae, np.sqrt(mse), r2
13 | 
14 | 
15 | def regression(X_train, y_train, X_test, alpha):
16 |     reg = linear_model.Ridge(alpha=alpha)
17 |     X_train = np.array(X_train, dtype=float)
18 |     y_train = np.array(y_train, dtype=float)
19 |     reg.fit(X_train, y_train)
20 | 
21 |     y_pred = reg.predict(X_test)
22 |     return y_pred
23 | 
24 | 
25 | def kf_predict(X, Y):
26 |     kf = KFold(n_splits=10, shuffle=True, random_state=2024)
27 |     y_preds = []
28 |     y_truths = []
29 |     for train_index, test_index in kf.split(X):
30 |         X_train, X_test = X[train_index], X[test_index]
31 |         y_train, y_test = Y[train_index], Y[test_index]
32 |         y_pred = regression(X_train, y_train, X_test, 1)
33 |         y_preds.append(y_pred)
34 |         y_truths.append(y_test)
35 | 
36 |     return np.concatenate(y_preds), np.concatenate(y_truths)
37 | 
38 | 
39 | def predict_regression(embs, labels, display=False):
40 |     y_pred, y_test = kf_predict(embs, labels)
41 |     mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 |     if display:
43 |         print("MAE:  %.3f" % mae)
44 |         print("RMSE: %.3f" % rmse)
45 |         print("R2:   %.3f" % r2)
46 |     return mae, rmse, r2
47 | 
48 | 
49 | def do_tasks(embs, display=True):
50 |     if display:
51 |         print("Crime Prediction: ")
52 |     crime_count_label = np.load("./data_SF/crime_counts.npy")
53 | 
54 |     crime_mae, crime_rmse, crime_r2 = predict_regression(embs, crime_count_label, display=display)
55 |     return crime_mae, crime_rmse, crime_r2


--------------------------------------------------------------------------------
/tasks_SF/tasks_serviceCall.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import linear_model
 3 | from sklearn.model_selection import KFold
 4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
 5 | 
 6 | 
 7 | def compute_metrics(y_pred, y_test):
 8 |     y_pred[y_pred < 0] = 0
 9 |     mae = mean_absolute_error(y_test, y_pred)
10 |     mse = mean_squared_error(y_test, y_pred)
11 |     r2 = r2_score(y_test, y_pred)
12 |     return mae, np.sqrt(mse), r2
13 | 
14 | 
15 | def regression(X_train, y_train, X_test, alpha):
16 |     reg = linear_model.Ridge(alpha=alpha)
17 |     X_train = np.array(X_train, dtype=float)
18 |     y_train = np.array(y_train, dtype=float)
19 |     reg.fit(X_train, y_train)
20 | 
21 |     y_pred = reg.predict(X_test)
22 |     return y_pred
23 | 
24 | 
25 | def kf_predict(X, Y):
26 |     kf = KFold(n_splits=10, shuffle=True, random_state=2024)
27 |     y_preds = []
28 |     y_truths = []
29 |     for train_index, test_index in kf.split(X):
30 |         X_train, X_test = X[train_index], X[test_index]
31 |         y_train, y_test = Y[train_index], Y[test_index]
32 |         y_pred = regression(X_train, y_train, X_test, 1)
33 |         y_preds.append(y_pred)
34 |         y_truths.append(y_test)
35 | 
36 |     return np.concatenate(y_preds), np.concatenate(y_truths)
37 | 
38 | 
39 | def predict_regression(embs, labels, display=False):
40 |     y_pred, y_test = kf_predict(embs, labels)
41 |     mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 |     if display:
43 |         print("MAE:  %.3f" % mae)
44 |         print("RMSE: %.3f" % rmse)
45 |         print("R2:   %.3f" % r2)
46 |     return mae, rmse, r2
47 | 
48 | 
49 | def do_tasks(embs, display=True):
50 |     if display:
51 |         print("Service Calls Prediction: ")
52 |     service_call_label = np.load("./data_SF/serviceCall_counts.npy")
53 | 
54 |     service_call_mae, service_call_rmse, service_call_r2 = predict_regression(embs, service_call_label, display=display)
55 |     return service_call_mae, service_call_rmse, service_call_r2


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | from parse_args import args
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | def load_data():
 6 |     data_path = args.data_path
 7 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 8 |     landUse_feature = np.load(data_path + args.landUse_dist)
 9 |     landUse_feature = landUse_feature[np.newaxis]
10 |     landUse_feature = torch.Tensor(landUse_feature).to(device)
11 | 
12 |     POI_feature = np.load(data_path + args.POI_dist)
13 |     POI_feature = POI_feature[np.newaxis]
14 |     POI_feature = torch.Tensor(POI_feature).to(device)
15 | 
16 |     mob_feature = np.load(data_path + args.mobility_dist)
17 |     mob_feature = mob_feature[np.newaxis]
18 |     mob_feature = torch.Tensor(mob_feature).to(device)
19 | 
20 |     mob_adj = np.load(data_path + args.mobility_adj)
21 |     mob_adj = mob_adj/np.mean(mob_adj)
22 |     mob_adj = torch.Tensor(mob_adj).to(device)
23 | 
24 |     poi_sim = np.load(data_path + args.POI_simi)
25 |     poi_sim = torch.Tensor(poi_sim).to(device)
26 | 
27 |     land_sim = np.load(data_path + args.landUse_simi)
28 |     land_sim = torch.Tensor(land_sim).to(device)
29 | 
30 |     features = [POI_feature, landUse_feature, mob_feature]
31 | 
32 |     return features, mob_adj, poi_sim, land_sim
33 | 


--------------------------------------------------------------------------------