├── .gitattributes
├── HAFusion_Model.py
├── HAFusion_train.py
├── Images
├── DAFusion.png
├── Experiment.png
└── model structure.png
├── README.md
├── data_Chi
├── check_counts.npy
├── crime_counts.npy
├── landUse_dist.npy
├── landUse_simi.npy
├── mob-adj.npy
├── mob_dist.npy
├── poi_dist.npy
├── poi_simi.npy
└── serviceCall_counts.npy
├── data_NY
├── .DS_Store
├── check_counts.npy
├── crime_counts.npy
├── landUse_dist.npy
├── landUse_simi.npy
├── mob-adj.npy
├── mob_dist.npy
├── poi_dist.npy
├── poi_simi.npy
└── serviceCall_counts.npy
├── data_SF
├── check_counts.npy
├── crime_counts.npy
├── landUse_dist.npy
├── landUse_simi.npy
├── mob-adj.npy
├── mob_dist.npy
├── poi_dist.npy
├── poi_simi.npy
└── serviceCall_counts.npy
├── parse_args.py
├── requirements.txt
├── tasks_Chi
├── tasks_chk.py
├── tasks_crime.py
└── tasks_serviceCall.py
├── tasks_NY
├── tasks_chk.py
├── tasks_crime.py
└── tasks_serviceCall.py
├── tasks_SF
├── tasks_chk.py
├── tasks_crime.py
└── tasks_serviceCall.py
└── utils.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/HAFusion_Model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from parse_args import args
6 |
7 | class DeepFc(nn.Module):
8 | def __init__(self, input_dim, output_dim):
9 | super(DeepFc, self).__init__()
10 | self.model = nn.Sequential(
11 | nn.Linear(input_dim, input_dim * 2),
12 | nn.Linear(input_dim * 2, input_dim * 2),
13 | nn.LeakyReLU(negative_slope=0.3, inplace=True),
14 | nn.Linear(input_dim * 2, output_dim),
15 | nn.LeakyReLU(negative_slope=0.3, inplace=True), )
16 |
17 | self.output = None
18 |
19 | def forward(self, x):
20 | output = self.model(x)
21 | self.output = output
22 | return output
23 |
24 | def out_feature(self):
25 | return self.output
26 |
27 |
28 | class RegionFusionBlock(nn.Module):
29 |
30 | def __init__(self, input_dim, nhead, dropout, dim_feedforward=2048):
31 | super(RegionFusionBlock, self).__init__()
32 | self.self_attn = nn.MultiheadAttention(input_dim, nhead, dropout=dropout, batch_first=True, bias=True)
33 | self.dropout = nn.Dropout(dropout)
34 |
35 | self.linear1 = nn.Linear(input_dim, dim_feedforward, )
36 | self.linear2 = nn.Linear(dim_feedforward, input_dim)
37 |
38 | self.norm1 = nn.LayerNorm(input_dim)
39 | self.norm2 = nn.LayerNorm(input_dim)
40 | self.dropout1 = nn.Dropout(dropout)
41 | self.dropout2 = nn.Dropout(dropout)
42 |
43 | self.activation = F.relu
44 |
45 | def forward(self, src):
46 | src2, _ = self.self_attn(src, src, src, )
47 |
48 | src = src + self.dropout1(src2)
49 | src = self.norm1(src)
50 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
51 | src = src + self.dropout2(src2)
52 | src = self.norm2(src)
53 | return src
54 |
55 |
56 | class intraAFL_Block(nn.Module):
57 |
58 | def __init__(self, input_dim, nhead, c, dropout, dim_feedforward=2048):
59 | super(intraAFL_Block, self).__init__()
60 | self.self_attn = nn.MultiheadAttention(input_dim, nhead, dropout=dropout, batch_first=True, bias=True)
61 | self.dropout = nn.Dropout(dropout)
62 |
63 | self.linear1 = nn.Linear(input_dim, dim_feedforward, )
64 | self.linear2 = nn.Linear(dim_feedforward, input_dim)
65 |
66 | self.norm1 = nn.LayerNorm(input_dim)
67 | self.norm2 = nn.LayerNorm(input_dim)
68 | self.dropout1 = nn.Dropout(dropout)
69 | self.dropout2 = nn.Dropout(dropout)
70 |
71 | self.expand = nn.Conv2d(1, c, kernel_size=1)
72 | self.pooling = nn.AvgPool2d(kernel_size=3, padding=1, stride=1)
73 | self.proj = nn.Linear(c, input_dim)
74 |
75 | self.activation = F.relu
76 |
77 | def forward(self, src):
78 | src2, attnScore = self.self_attn(src, src, src, )
79 | attnScore = attnScore[:, np.newaxis]
80 |
81 | edge_emb = self.expand(attnScore)
82 | # edge_emb = self.pooling(edge_emb)
83 | w = edge_emb
84 | w = w.softmax(dim=-1)
85 | w = (w * edge_emb).sum(-1).transpose(-1, -2)
86 | w = self.proj(w)
87 | src2 = src2 + w
88 |
89 | src = src + self.dropout1(src2)
90 | src = self.norm1(src)
91 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
92 | src = src + self.dropout2(src2)
93 | src = self.norm2(src)
94 | return src
95 |
96 | class intraAFL(nn.Module):
97 | def __init__(self, input_dim, c):
98 | super(intraAFL, self).__init__()
99 | self.input_dim = input_dim
100 | self.num_block = args.NO_IntraAFL
101 | NO_head = args.NO_head
102 | dropout = args.dropout
103 |
104 | self.blocks = nn.ModuleList(
105 | [intraAFL_Block(input_dim=input_dim, nhead=NO_head, c=c, dropout=dropout) for _ in range(self.num_block)])
106 |
107 | self.fc = DeepFc(input_dim, input_dim)
108 |
109 | def forward(self, x):
110 | out = x
111 | for block in self.blocks:
112 | out = block(out)
113 | out = out.squeeze()
114 | out = self.fc(out)
115 | return out
116 |
117 |
118 | class RegionFusion(nn.Module):
119 | def __init__(self, input_dim):
120 | super(RegionFusion, self).__init__()
121 | self.input_dim = input_dim
122 | self.num_block = args.NO_RegionFusion
123 | NO_head = args.NO_head
124 | dropout = args.dropout
125 |
126 | self.blocks = nn.ModuleList(
127 | [RegionFusionBlock(input_dim=input_dim, nhead=NO_head, dropout=dropout) for _ in range(self.num_block)])
128 |
129 | self.fc = DeepFc(input_dim, input_dim)
130 |
131 | def forward(self, x):
132 | out = x
133 | for block in self.blocks:
134 | out = block(out)
135 | out = out.squeeze()
136 | out = self.fc(out)
137 | return out
138 |
139 |
140 | class interAFL_Block(nn.Module):
141 |
142 | def __init__(self, d_model, S):
143 | super(interAFL_Block, self).__init__()
144 | self.mk = nn.Linear(d_model, S, bias=False)
145 | self.mv = nn.Linear(S, d_model, bias=False)
146 | self.softmax = nn.Softmax(dim=1)
147 | self.init_weights()
148 |
149 | def init_weights(self):
150 | for m in self.modules():
151 | if isinstance(m, nn.Conv2d):
152 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
153 | if m.bias is not None:
154 | nn.init.constant_(m.bias, 0)
155 | elif isinstance(m, nn.BatchNorm2d):
156 | nn.init.constant_(m.weight, 1)
157 | nn.init.constant_(m.bias, 0)
158 | elif isinstance(m, nn.Linear):
159 | nn.init.normal_(m.weight, std=0.001)
160 | if m.bias is not None:
161 | nn.init.constant_(m.bias, 0)
162 |
163 | def forward(self, queries):
164 | attn = self.mk(queries)
165 | attn = self.softmax(attn)
166 | attn = attn / torch.sum(attn, dim=2, keepdim=True)
167 | out = self.mv(attn)
168 |
169 | return out
170 |
171 |
172 | class interAFL(nn.Module):
173 | def __init__(self, input_dim, d_m):
174 | super(interAFL, self).__init__()
175 | self.input_dim = input_dim
176 | self.num_block = args.NO_InterAFL
177 |
178 | self.blocks = nn.ModuleList(
179 | [interAFL_Block(input_dim, d_m) for _ in range(self.num_block)])
180 |
181 | self.fc = DeepFc(input_dim, input_dim)
182 |
183 | def forward(self, x):
184 | out = x
185 | for block in self.blocks:
186 | out = block(out)
187 | out = out.squeeze()
188 | out = self.fc(out)
189 | return out
190 |
191 |
192 | class ViewFusion(nn.Module):
193 | def __init__(self, emb_dim, out_dim):
194 | super(ViewFusion, self).__init__()
195 | self.W = nn.Conv1d(emb_dim, out_dim, kernel_size=1, bias=False)
196 | self.f1 = nn.Conv1d(out_dim, 1, kernel_size=1)
197 | self.f2 = nn.Conv1d(out_dim, 1, kernel_size=1)
198 | self.act = nn.LeakyReLU(negative_slope=0.3, inplace=True)
199 |
200 | def forward(self, src):
201 | seq_fts = self.W(src)
202 | f_1 = self.f1(seq_fts)
203 | f_2 = self.f2(seq_fts)
204 | logits = f_1 + f_2.transpose(1, 2)
205 | coefs = torch.mean(self.act(logits), dim=-1)
206 | coefs = torch.mean(coefs, dim=0)
207 | coefs = F.softmax(coefs, dim=-1)
208 | return coefs
209 |
210 |
211 | class HAFusion(nn.Module):
212 | def __init__(self, poi_dim, landUse_dim, input_dim, output_dim, d_prime, d_m, c):
213 | super(HAFusion, self).__init__()
214 | self.input_dim = input_dim
215 | self.densePOI2 = nn.Linear(poi_dim, input_dim)
216 | self.denseLandUse3 = nn.Linear(landUse_dim, input_dim)
217 |
218 | self.encoderPOI = intraAFL(input_dim, c)
219 | self.encoderLandUse = intraAFL(input_dim, c)
220 | self.encoderMob = intraAFL(input_dim, c)
221 |
222 | self.regionFusionLayer = RegionFusion(input_dim)
223 |
224 | self.interViewEncoder = interAFL(input_dim, d_m)
225 |
226 | self.fc = DeepFc(input_dim, output_dim)
227 |
228 | self.para1 = torch.nn.Parameter(torch.FloatTensor(1), requires_grad=True)
229 | self.para1.data.fill_(0.1)
230 | self.para2 = torch.nn.Parameter(torch.FloatTensor(1), requires_grad=True)
231 | self.para2.data.fill_(0.9)
232 |
233 | self.viewFusionLayer = ViewFusion(input_dim, d_prime)
234 |
235 | self.activation = F.relu
236 | self.dropout = nn.Dropout(0.1)
237 | self.decoder_s = nn.Linear(output_dim, output_dim) #
238 | self.decoder_t = nn.Linear(output_dim, output_dim)
239 | self.decoder_p = nn.Linear(output_dim, output_dim) #
240 | self.decoder_l = nn.Linear(output_dim, output_dim)
241 | self.feature = None
242 |
243 | def forward(self, x):
244 | poi_emb, landUse_emb, mob_emb = x
245 |
246 | poi_emb = self.dropout(self.activation(self.densePOI2(poi_emb)))
247 | landUse_emb = self.dropout(self.activation(self.denseLandUse3(landUse_emb)))
248 |
249 | poi_emb = self.encoderPOI(poi_emb)
250 | landUse_emb = self.encoderLandUse(landUse_emb)
251 | mob_emb = self.encoderMob(mob_emb)
252 |
253 | out = torch.stack([poi_emb, landUse_emb, mob_emb])
254 |
255 | intra_view_embs = out
256 | out = out.transpose(0, 1)
257 | out = self.interViewEncoder(out)
258 | out = out.transpose(0, 1)
259 | p1 = self.para1 / (self.para1 + self.para2)
260 | p2 = self.para2 / (self.para1 + self.para2)
261 | out = out * p2 + intra_view_embs * p1
262 | # ---------------------------------------------
263 |
264 | out1 = out.transpose(0, 2)
265 | coef = self.viewFusionLayer(out1)
266 | temp_out = coef[0] * out[0] + coef[1] * out[1] + coef[2] * out[2]
267 | # --------------------------------------------------
268 |
269 | temp_out = temp_out[np.newaxis]
270 | temp_out = self.regionFusionLayer(temp_out)
271 | out = self.fc(temp_out)
272 |
273 | self.feature = out
274 |
275 | out_s = self.decoder_s(out) # source embedding of regions
276 | out_t = self.decoder_t(out) # destination embedding of regions
277 | out_p = self.decoder_p(out) # poi embedding of regions
278 | out_l = self.decoder_l(out) # landuse embedding of regions
279 | return out_s, out_t, out_p, out_l
280 |
281 |
282 | def out_feature(self):
283 | return self.feature
--------------------------------------------------------------------------------
/HAFusion_train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | import torch.nn.functional as F
6 | import utils
7 | from parse_args import args
8 | import tasks_NY.tasks_crime, tasks_NY.tasks_chk, tasks_NY.tasks_serviceCall
9 | import tasks_Chi.tasks_crime, tasks_Chi.tasks_chk, tasks_Chi.tasks_serviceCall
10 | import tasks_SF.tasks_crime, tasks_SF.tasks_chk, tasks_SF.tasks_serviceCall
11 | from HAFusion_Model import HAFusion
12 |
13 | features, mob_adj, poi_sim, land_sim = utils.load_data()
14 |
15 | city = args.city
16 | embedding_size = args.embedding_size
17 | d_prime = args.d_prime
18 | d_m = args.d_m
19 | c = args.c
20 | POI_dim = args.POI_dim
21 | landUse_dim = args.landUse_dim
22 | region_num = args.region_num
23 | task = args.task
24 |
25 | def _mob_loss(s_embeddings, t_embeddings, mob):
26 | inner_prod = torch.mm(s_embeddings, t_embeddings.T)
27 | softmax1 = nn.Softmax(dim=-1)
28 | phat = softmax1(inner_prod)
29 | loss = torch.sum(-torch.mul(mob, torch.log(phat + 0.0001)))
30 | inner_prod = torch.mm(t_embeddings, s_embeddings.T)
31 | softmax2 = nn.Softmax(dim=-1)
32 | phat = softmax2(inner_prod)
33 | loss += torch.sum(-torch.mul(torch.transpose(mob, 0, 1), torch.log(phat + 0.0001)))
34 | return loss
35 |
36 |
37 | def _general_loss(embeddings, adj):
38 | inner_prod = F.cosine_similarity(embeddings.unsqueeze(1), embeddings.unsqueeze(0), dim=2)
39 | loss = F.mse_loss(inner_prod, adj)
40 | return loss
41 |
42 |
43 | class ModelLoss(nn.Module):
44 | def __init__(self):
45 | super(ModelLoss, self).__init__()
46 |
47 | def forward(self, out_s, out_t, mob_adj, out_p, poi_sim, out_l, land_sim):
48 | mob_loss = _mob_loss(out_s, out_t, mob_adj)
49 | poi_loss = _general_loss(out_p, poi_sim)
50 | land_loss = _general_loss(out_l, land_sim)
51 | loss = poi_loss + land_loss + mob_loss
52 | return loss
53 |
54 | def train_model(input_features, mob_adj, poi_sim, land_sim, model, model_loss, city, task):
55 | epochs = args.epochs
56 | learning_rate = args.learning_rate
57 | weight_decay = args.weight_decay
58 |
59 | optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
60 | best_emb = 0
61 | best_r2 = 0
62 |
63 | for epoch in range(epochs):
64 | model.train()
65 | out_s, out_t, out_p, out_l = model(input_features)
66 | loss = model_loss(out_s, out_t, mob_adj, out_p, poi_sim, out_l, land_sim)
67 | optimizer.zero_grad()
68 | loss.backward()
69 | optimizer.step()
70 | if epoch % 30 == 0:
71 | print("Epoch {}, Loss {}".format(epoch, loss.item()))
72 | embs = model.out_feature()
73 | embs = embs.detach().cpu().numpy()
74 |
75 | if task == "checkIn":
76 | if city == "NY":
77 | _, _, r2 = tasks_NY.tasks_chk.do_tasks(embs)
78 | elif city == "Chi":
79 | _, _, r2 = tasks_Chi.tasks_chk.do_tasks(embs)
80 | elif city == "SF":
81 | _, _, r2 = tasks_SF.tasks_chk.do_tasks(embs)
82 | elif task == "crime":
83 | if city == "NY":
84 | _, _, r2 = tasks_NY.tasks_crime.do_tasks(embs)
85 | elif city == "Chi":
86 | _, _, r2 = tasks_Chi.tasks_crime.do_tasks(embs)
87 | elif city == "SF":
88 | _, _, r2 = tasks_SF.tasks_crime.do_tasks(embs)
89 | elif task == "serviceCall":
90 | if city == "NY":
91 | _, _, r2 = tasks_NY.tasks_serviceCall.do_tasks(embs)
92 | elif city == "Chi":
93 | _, _, r2 = tasks_Chi.tasks_serviceCall.do_tasks(embs)
94 | elif city == "SF":
95 | _, _, r2 = tasks_SF.tasks_serviceCall.do_tasks(embs)
96 |
97 | if best_r2 < r2:
98 | best_r2 = r2
99 | best_emb = embs
100 |
101 | np.save("best_emb.npy", best_emb)
102 |
103 | def test_model(city, task):
104 | best_emb = np.load("./best_emb.npy")
105 | print("Best region embeddings")
106 | if task == "checkIn":
107 | if city == "NY":
108 | print('>>>>>>>>>>>>>>>>> Check-In in New York City')
109 | mae, rmse, r2 = tasks_NY.tasks_chk.do_tasks(best_emb)
110 | elif city == "Chi":
111 | print('>>>>>>>>>>>>>>>>> Check-In in Chicago')
112 | mae, rmse, r2 = tasks_Chi.tasks_chk.do_tasks(best_emb)
113 | elif city == "SF":
114 | print('>>>>>>>>>>>>>>>>> Check-In in San Francisco')
115 | mae, rmse, r2 = tasks_SF.tasks_chk.do_tasks(best_emb)
116 | elif task == "crime":
117 | if city == "NY":
118 | print('>>>>>>>>>>>>>>>>> Crime in New York City')
119 | mae, rmse, r2 = tasks_NY.tasks_crime.do_tasks(best_emb)
120 | elif city == "Chi":
121 | print('>>>>>>>>>>>>>>>>> Crime in Chicago')
122 | mae, rmse, r2 = tasks_Chi.tasks_crime.do_tasks(best_emb)
123 | elif city == "SF":
124 | print('>>>>>>>>>>>>>>>>> Crime in San Francisco')
125 | mae, rmse, r2 = tasks_SF.tasks_crime.do_tasks(best_emb)
126 | elif task == "serviceCall":
127 | if city == "NY":
128 | print('>>>>>>>>>>>>>>>>> Service Calls in New York City')
129 | mae, rmse, r2 = tasks_NY.tasks_serviceCall.do_tasks(best_emb)
130 | elif city == "Chi":
131 | print('>>>>>>>>>>>>>>>>> Service Calls in Chicago')
132 | mae, rmse, r2 = tasks_Chi.tasks_serviceCall.do_tasks(best_emb)
133 | elif city == "SF":
134 | print('>>>>>>>>>>>>>>>>> Service Calls in San Francisco')
135 | mae, rmse, r2 = tasks_SF.tasks_serviceCall.do_tasks(best_emb)
136 |
137 | if __name__ == '__main__':
138 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
139 | model = HAFusion(POI_dim, landUse_dim, region_num, embedding_size, d_prime, d_m, c).to(device)
140 | model_loss = ModelLoss()
141 |
142 | print('Model Training-----------------')
143 | model.train()
144 | train_model(features, mob_adj, poi_sim, land_sim, model, model_loss, city, task)
145 |
146 | print("Downstream task test-----------")
147 | test_model(city, task)
148 |
149 |
150 |
--------------------------------------------------------------------------------
/Images/DAFusion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/Images/DAFusion.png
--------------------------------------------------------------------------------
/Images/Experiment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/Images/Experiment.png
--------------------------------------------------------------------------------
/Images/model structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/Images/model structure.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # HAFusion: Urban Region Representation Learning with Attentive Fusion (ICDE 2024)
2 |
3 | This is a pytorh implementation of the [HAFusion paper](https://arxiv.org/abs/2312.04606)
4 |
5 | Authors: Fengze Sun, Jianzhong Qi, Yanchuan Chang, Xiaoliang Fan, Shanika Karunasekera, and Egemen Tanin
6 |
7 | ```bash
8 | @inproceedings{sun2024urban,
9 | title={Urban region representation learning with attentive fusion},
10 | author={Sun, Fengze and Qi, Jianzhong and Chang, Yanchuan and Fan, Xiaoliang and Karunasekera, Shanika and Tanin, Egemen},
11 | booktitle={2024 IEEE 40th International Conference on Data Engineering (ICDE)},
12 | pages={4409--4421},
13 | year={2024},
14 | organization={IEEE}
15 | }
16 | ```
17 | ## Model Structure
18 |
19 |
20 |
21 |
22 | ## Experiments
23 | Overall Prediction Accuracy Results
24 |
25 |
26 |
27 |
28 | Prediction Accuracy Results When Powering Existing Models with Our DAFusion Module (NYC)
29 |
30 |

31 |
32 |
33 | ## Requirements
34 | - Python 3.8.18
35 | - `pip install -r requirements.txt`
36 |
37 | ## Quick Start
38 | To train and test HAFusion on a specific city and a specific downstream task:
39 |
40 | - CITY_NAME: NY or Chi or SF
41 | - TASK_NAME: checkIn or crime or serviceCall
42 |
43 | ```bash
44 | python HAFusion_train.py --city CITY_NAME --task TASK_NAME
45 | ```
46 |
47 | ## Contact
48 | Email fengzes@student.unimelb.edu.au if you have any queries.
49 |
--------------------------------------------------------------------------------
/data_Chi/check_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/check_counts.npy
--------------------------------------------------------------------------------
/data_Chi/crime_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/crime_counts.npy
--------------------------------------------------------------------------------
/data_Chi/landUse_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/landUse_dist.npy
--------------------------------------------------------------------------------
/data_Chi/landUse_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/landUse_simi.npy
--------------------------------------------------------------------------------
/data_Chi/mob-adj.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/mob-adj.npy
--------------------------------------------------------------------------------
/data_Chi/mob_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/mob_dist.npy
--------------------------------------------------------------------------------
/data_Chi/poi_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/poi_dist.npy
--------------------------------------------------------------------------------
/data_Chi/poi_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/poi_simi.npy
--------------------------------------------------------------------------------
/data_Chi/serviceCall_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_Chi/serviceCall_counts.npy
--------------------------------------------------------------------------------
/data_NY/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/.DS_Store
--------------------------------------------------------------------------------
/data_NY/check_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/check_counts.npy
--------------------------------------------------------------------------------
/data_NY/crime_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/crime_counts.npy
--------------------------------------------------------------------------------
/data_NY/landUse_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/landUse_dist.npy
--------------------------------------------------------------------------------
/data_NY/landUse_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/landUse_simi.npy
--------------------------------------------------------------------------------
/data_NY/mob-adj.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/mob-adj.npy
--------------------------------------------------------------------------------
/data_NY/mob_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/mob_dist.npy
--------------------------------------------------------------------------------
/data_NY/poi_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/poi_dist.npy
--------------------------------------------------------------------------------
/data_NY/poi_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/poi_simi.npy
--------------------------------------------------------------------------------
/data_NY/serviceCall_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_NY/serviceCall_counts.npy
--------------------------------------------------------------------------------
/data_SF/check_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/check_counts.npy
--------------------------------------------------------------------------------
/data_SF/crime_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/crime_counts.npy
--------------------------------------------------------------------------------
/data_SF/landUse_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/landUse_dist.npy
--------------------------------------------------------------------------------
/data_SF/landUse_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/landUse_simi.npy
--------------------------------------------------------------------------------
/data_SF/mob-adj.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/mob-adj.npy
--------------------------------------------------------------------------------
/data_SF/mob_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/mob_dist.npy
--------------------------------------------------------------------------------
/data_SF/poi_dist.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/poi_dist.npy
--------------------------------------------------------------------------------
/data_SF/poi_simi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/poi_simi.npy
--------------------------------------------------------------------------------
/data_SF/serviceCall_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MiRuacle24/HAFusion/73fbe911901ada71bb40129e19d86dedb9b9602c/data_SF/serviceCall_counts.npy
--------------------------------------------------------------------------------
/parse_args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | parser = argparse.ArgumentParser()
4 |
5 | # -----------------------File------------------------
6 | parser.add_argument('--city', default="NY", help='City name, can be NY or Chi or SF')
7 | parser.add_argument('--task', default="checkIn", help='Downstrea task name, can be crime or checkIn or serviceCall')
8 | parser.add_argument('--mobility_dist', default='/mob_dist.npy')
9 | parser.add_argument('--POI_dist', default='/poi_dist.npy')
10 | parser.add_argument('--landUse_dist', default='/landUse_dist.npy')
11 | parser.add_argument('--mobility_adj', default='/mob-adj.npy')
12 | parser.add_argument('--POI_simi', default='/poi_simi.npy')
13 | parser.add_argument('--landUse_simi', default='/landUse_simi.npy')
14 |
15 | # -----------------------Model-----------------------
16 | parser.add_argument('--embedding_size', type=int, default=144)
17 | parser.add_argument('--learning_rate', type=float, default=0.0005)
18 | parser.add_argument('--weight_decay', type=float, default=5e-4)
19 | parser.add_argument('--epochs', type=int, default=2000)
20 | parser.add_argument('--dropout', type=float, default=0.1)
21 |
22 | args = parser.parse_args()
23 |
24 | # -----------------------City--------------------------- #
25 |
26 | if args.city == 'NY':
27 | parser.add_argument('--data_path', default='./data_NY')
28 | parser.add_argument('--POI_dim', type=int, default=26)
29 | parser.add_argument('--landUse_dim', type=int, default=11)
30 | parser.add_argument('--region_num', type=int, default=180)
31 | parser.add_argument('--NO_IntraAFL', type=int, default=3)
32 | parser.add_argument('--NO_InterAFL', type=int, default=3)
33 | parser.add_argument('--NO_RegionFusion', type=int, default=3)
34 | parser.add_argument('--NO_head', type=int, default=4)
35 | parser.add_argument('--d_prime', type=int, default=64)
36 | parser.add_argument('--d_m', type=int, default=72)
37 | parser.add_argument('--c', type=int, default=32)
38 | elif args.city == "Chi":
39 | parser.add_argument('--data_path', default='./data_Chi')
40 | parser.add_argument('--POI_dim', type=int, default=26)
41 | parser.add_argument('--landUse_dim', type=int, default=12)
42 | parser.add_argument('--region_num', type=int, default=77)
43 | parser.add_argument('--NO_IntraAFL', type=int, default=1)
44 | parser.add_argument('--NO_InterAFL', type=int, default=2)
45 | parser.add_argument('--NO_RegionFusion', type=int, default=3)
46 | parser.add_argument('--NO_head', type=int, default=1)
47 | parser.add_argument('--d_prime', type=int, default=32)
48 | parser.add_argument('--d_m', type=int, default=36)
49 | parser.add_argument('--c', type=int, default=32)
50 | elif args.city == "SF":
51 | parser.add_argument('--data_path', default='./data_SF')
52 | parser.add_argument('--POI_dim', type=int, default=26)
53 | parser.add_argument('--landUse_dim', type=int, default=23)
54 | parser.add_argument('--region_num', type=int, default=175)
55 | parser.add_argument('--NO_IntraAFL', type=int, default=3)
56 | parser.add_argument('--NO_InterAFL', type=int, default=2)
57 | parser.add_argument('--NO_RegionFusion', type=int, default=3)
58 | parser.add_argument('--NO_head', type=int, default=5)
59 | parser.add_argument('--d_prime', type=int, default=64)
60 | parser.add_argument('--d_m', type=int, default=72)
61 | parser.add_argument('--c', type=int, default=32)
62 |
63 | args = parser.parse_args()
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.24.3
2 | scikit_learn==1.2.2
3 | torch==1.10.2
4 |
--------------------------------------------------------------------------------
/tasks_Chi/tasks_chk.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn import linear_model
3 | from sklearn.model_selection import KFold
4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
5 |
6 |
7 | def compute_metrics(y_pred, y_test):
8 | y_pred[y_pred < 0] = 0
9 | mae = mean_absolute_error(y_test, y_pred)
10 | mse = mean_squared_error(y_test, y_pred)
11 | r2 = r2_score(y_test, y_pred)
12 | return mae, np.sqrt(mse), r2
13 |
14 |
15 | def regression(X_train, y_train, X_test, alpha):
16 | reg = linear_model.Ridge(alpha=alpha)
17 | X_train = np.array(X_train, dtype=float)
18 | y_train = np.array(y_train, dtype=float)
19 | reg.fit(X_train, y_train)
20 |
21 | y_pred = reg.predict(X_test)
22 | return y_pred
23 |
24 |
25 | def kf_predict(X, Y):
26 | kf = KFold(n_splits=25, shuffle=True, random_state=2024)
27 | y_preds = []
28 | y_truths = []
29 | for train_index, test_index in kf.split(X):
30 | X_train, X_test = X[train_index], X[test_index]
31 | y_train, y_test = Y[train_index], Y[test_index]
32 | y_pred = regression(X_train, y_train, X_test, 1)
33 | y_preds.append(y_pred)
34 | y_truths.append(y_test)
35 |
36 | return np.concatenate(y_preds), np.concatenate(y_truths)
37 |
38 |
39 | def predict_regression(embs, labels, display=False):
40 | y_pred, y_test = kf_predict(embs, labels)
41 | mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 | if display:
43 | print("MAE: %.3f" % mae)
44 | print("RMSE: %.3f" % rmse)
45 | print("R2: %.3f" % r2)
46 | return mae, rmse, r2
47 |
48 |
49 | def do_tasks(embs, display=True):
50 | if display:
51 | print("Check-In Prediction: ")
52 | check_in_label = np.load("./data_Chi/check_counts.npy")
53 | region_list = []
54 | embs_list = []
55 | chk_label = []
56 | for i in range(len(embs)):
57 | if check_in_label[i] > 0:
58 | region_list.append(i)
59 | embs_list.append(embs[i])
60 | chk_label.append(check_in_label[i])
61 | embs_list = np.array(embs_list)
62 | chk_label = np.array(chk_label)
63 |
64 | check_mae, check_rmse, check_r2 = predict_regression(embs_list, chk_label, display=display)
65 |
66 | return check_mae, check_rmse, check_r2
--------------------------------------------------------------------------------
/tasks_Chi/tasks_crime.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn import linear_model
3 | from sklearn.model_selection import KFold
4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
5 |
6 |
7 | def compute_metrics(y_pred, y_test):
8 | y_pred[y_pred < 0] = 0
9 | mae = mean_absolute_error(y_test, y_pred)
10 | mse = mean_squared_error(y_test, y_pred)
11 | r2 = r2_score(y_test, y_pred)
12 | return mae, np.sqrt(mse), r2
13 |
14 |
15 | def regression(X_train, y_train, X_test, alpha):
16 | reg = linear_model.Ridge(alpha=alpha)
17 | X_train = np.array(X_train, dtype=float)
18 | y_train = np.array(y_train, dtype=float)
19 | reg.fit(X_train, y_train)
20 |
21 | y_pred = reg.predict(X_test)
22 | return y_pred
23 |
24 |
25 | def kf_predict(X, Y):
26 | kf = KFold(n_splits=25, shuffle=True, random_state=2024)
27 | y_preds = []
28 | y_truths = []
29 | for train_index, test_index in kf.split(X):
30 | X_train, X_test = X[train_index], X[test_index]
31 | y_train, y_test = Y[train_index], Y[test_index]
32 | y_pred = regression(X_train, y_train, X_test, 1)
33 | y_preds.append(y_pred)
34 | y_truths.append(y_test)
35 |
36 | return np.concatenate(y_preds), np.concatenate(y_truths)
37 |
38 |
39 | def predict_regression(embs, labels, display=False):
40 | y_pred, y_test = kf_predict(embs, labels)
41 | mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 | if display:
43 | print("MAE: %.3f" % mae)
44 | print("RMSE: %.3f" % rmse)
45 | print("R2: %.3f" % r2)
46 | return mae, rmse, r2
47 |
48 |
49 | def do_tasks(embs, display=True):
50 | if display:
51 | print("Crime Prediction: ")
52 | crime_count_label = np.load("./data_Chi/crime_counts.npy")
53 |
54 | crime_mae, crime_rmse, crime_r2 = predict_regression(embs, crime_count_label, display=display)
55 | return crime_mae, crime_rmse, crime_r2
--------------------------------------------------------------------------------
/tasks_Chi/tasks_serviceCall.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn import linear_model
3 | from sklearn.model_selection import KFold
4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
5 |
6 |
7 | def compute_metrics(y_pred, y_test):
8 | y_pred[y_pred < 0] = 0
9 | mae = mean_absolute_error(y_test, y_pred)
10 | mse = mean_squared_error(y_test, y_pred)
11 | r2 = r2_score(y_test, y_pred)
12 | return mae, np.sqrt(mse), r2
13 |
14 |
15 | def regression(X_train, y_train, X_test, alpha):
16 | reg = linear_model.Ridge(alpha=alpha)
17 | X_train = np.array(X_train, dtype=float)
18 | y_train = np.array(y_train, dtype=float)
19 | reg.fit(X_train, y_train)
20 |
21 | y_pred = reg.predict(X_test)
22 | return y_pred
23 |
24 |
25 | def kf_predict(X, Y):
26 | kf = KFold(n_splits=25, shuffle=True, random_state=2024)
27 | y_preds = []
28 | y_truths = []
29 | for train_index, test_index in kf.split(X):
30 | X_train, X_test = X[train_index], X[test_index]
31 | y_train, y_test = Y[train_index], Y[test_index]
32 | y_pred = regression(X_train, y_train, X_test, 1)
33 | y_preds.append(y_pred)
34 | y_truths.append(y_test)
35 |
36 | return np.concatenate(y_preds), np.concatenate(y_truths)
37 |
38 |
39 | def predict_regression(embs, labels, display=False):
40 | y_pred, y_test = kf_predict(embs, labels)
41 | mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 | if display:
43 | print("MAE: %.3f" % mae)
44 | print("RMSE: %.3f" % rmse)
45 | print("R2: %.3f" % r2)
46 | return mae, rmse, r2
47 |
48 |
49 | def do_tasks(embs, display=True):
50 | if display:
51 | print("Service Calls Prediction: ")
52 | service_call_label = np.load("./data_Chi/serviceCall_counts.npy")
53 |
54 | service_call_mae, service_call_rmse, service_call_r2 = predict_regression(embs, service_call_label, display=display)
55 | return service_call_mae, service_call_rmse, service_call_r2
--------------------------------------------------------------------------------
/tasks_NY/tasks_chk.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn import linear_model
3 | from sklearn.model_selection import KFold
4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
5 |
6 | def compute_metrics(y_pred, y_test):
7 | y_pred[y_pred < 0] = 0
8 | mae = mean_absolute_error(y_test, y_pred)
9 | mse = mean_squared_error(y_test, y_pred)
10 | r2 = r2_score(y_test, y_pred)
11 | return mae, np.sqrt(mse), r2
12 |
13 |
14 | def regression(X_train, y_train, X_test, alpha):
15 | reg = linear_model.Ridge(alpha=alpha)
16 | X_train = np.array(X_train, dtype=float)
17 | y_train = np.array(y_train, dtype=float)
18 | reg.fit(X_train, y_train)
19 |
20 | y_pred = reg.predict(X_test)
21 | return y_pred
22 |
23 |
24 | def kf_predict(X, Y):
25 | kf = KFold(n_splits=10)
26 | y_preds = []
27 | y_truths = []
28 | for train_index, test_index in kf.split(X):
29 | X_train, X_test = X[train_index], X[test_index]
30 | y_train, y_test = Y[train_index], Y[test_index]
31 | y_pred = regression(X_train, y_train, X_test, 1)
32 | y_preds.append(y_pred)
33 | y_truths.append(y_test)
34 |
35 | return np.concatenate(y_preds), np.concatenate(y_truths)
36 |
37 |
38 | def predict_regression(embs, labels, display=False):
39 | y_pred, y_test = kf_predict(embs, labels)
40 | mae, rmse, r2 = compute_metrics(y_pred, y_test)
41 | if display:
42 | print("MAE: %.3f" % mae)
43 | print("RMSE: %.3f" % rmse)
44 | print("R2: %.3f" % r2)
45 | return mae, rmse, r2
46 |
47 |
48 | def do_tasks(embs, display=True):
49 | if display:
50 | print("Check-In Prediction: ")
51 | check_in_label = np.load("./data_NY/check_counts.npy")
52 |
53 | check_mae, check_rmse, check_r2 = predict_regression(embs, check_in_label, display=display)
54 |
55 | return check_mae, check_rmse, check_r2
--------------------------------------------------------------------------------
/tasks_NY/tasks_crime.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | from sklearn import linear_model
4 | from sklearn.model_selection import KFold
5 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
6 |
7 |
8 | def compute_metrics(y_pred, y_test):
9 | y_pred[y_pred < 0] = 0
10 | mae = mean_absolute_error(y_test, y_pred)
11 | mse = mean_squared_error(y_test, y_pred)
12 | r2 = r2_score(y_test, y_pred)
13 | return mae, np.sqrt(mse), r2
14 |
15 |
16 | def regression(X_train, y_train, X_test, alpha):
17 | reg = linear_model.Ridge(alpha=alpha)
18 | X_train = np.array(X_train, dtype=float)
19 | y_train = np.array(y_train, dtype=float)
20 | reg.fit(X_train, y_train)
21 |
22 | y_pred = reg.predict(X_test)
23 | return y_pred
24 |
25 |
26 | def kf_predict(X, Y):
27 | kf = KFold(n_splits=10)
28 | y_preds = []
29 | y_truths = []
30 | for train_index, test_index in kf.split(X):
31 | X_train, X_test = X[train_index], X[test_index]
32 | y_train, y_test = Y[train_index], Y[test_index]
33 | y_pred = regression(X_train, y_train, X_test, 1)
34 | y_preds.append(y_pred)
35 | y_truths.append(y_test)
36 |
37 | return np.concatenate(y_preds), np.concatenate(y_truths)
38 |
39 |
40 | def predict_regression(embs, labels, display=False):
41 | y_pred, y_test = kf_predict(embs, labels)
42 | mae, rmse, r2 = compute_metrics(y_pred, y_test)
43 | if display:
44 | print("MAE: %.3f" % mae)
45 | print("RMSE: %.3f" % rmse)
46 | print("R2: %.3f" % r2)
47 | return mae, rmse, r2
48 |
49 |
50 | def do_tasks(embs, display=True):
51 | if display:
52 | print("Crime Prediction: ")
53 | crime_count_label = np.load("./data_NY/crime_counts.npy")
54 |
55 | crime_mae, crime_rmse, crime_r2 = predict_regression(embs, crime_count_label, display=display)
56 | return crime_mae, crime_rmse, crime_r2
--------------------------------------------------------------------------------
/tasks_NY/tasks_serviceCall.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn import linear_model
3 | from sklearn.model_selection import KFold
4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
5 |
6 |
7 | def compute_metrics(y_pred, y_test):
8 | y_pred[y_pred < 0] = 0
9 | mae = mean_absolute_error(y_test, y_pred)
10 | mse = mean_squared_error(y_test, y_pred)
11 | r2 = r2_score(y_test, y_pred)
12 | return mae, np.sqrt(mse), r2
13 |
14 |
15 | def regression(X_train, y_train, X_test, alpha):
16 | reg = linear_model.Ridge(alpha=alpha)
17 | X_train = np.array(X_train, dtype=float)
18 | y_train = np.array(y_train, dtype=float)
19 | reg.fit(X_train, y_train)
20 |
21 | y_pred = reg.predict(X_test)
22 | return y_pred
23 |
24 |
25 | def kf_predict(X, Y):
26 | kf = KFold(n_splits=10)
27 | y_preds = []
28 | y_truths = []
29 | for train_index, test_index in kf.split(X):
30 | X_train, X_test = X[train_index], X[test_index]
31 | y_train, y_test = Y[train_index], Y[test_index]
32 | y_pred = regression(X_train, y_train, X_test, 1)
33 | y_preds.append(y_pred)
34 | y_truths.append(y_test)
35 |
36 | return np.concatenate(y_preds), np.concatenate(y_truths)
37 |
38 |
39 | def predict_regression(embs, labels, display=False):
40 | y_pred, y_test = kf_predict(embs, labels)
41 | mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 | if display:
43 | print("MAE: %.3f" % mae)
44 | print("RMSE: %.3f" % rmse)
45 | print("R2: %.3f" % r2)
46 | return mae, rmse, r2
47 |
48 |
49 | def do_tasks(embs, display=True):
50 | if display:
51 | print("Service Calls Prediction: ")
52 | service_call_label = np.load("./data_NY/serviceCall_counts.npy")
53 |
54 | service_call_mae, service_call_rmse, service_call_r2 = predict_regression(embs, service_call_label, display=display)
55 | return service_call_mae, service_call_rmse, service_call_r2
--------------------------------------------------------------------------------
/tasks_SF/tasks_chk.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn import linear_model
3 | from sklearn.model_selection import KFold
4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
5 |
6 |
7 | def compute_metrics(y_pred, y_test):
8 | y_pred[y_pred < 0] = 0
9 | mae = mean_absolute_error(y_test, y_pred)
10 | mse = mean_squared_error(y_test, y_pred)
11 | r2 = r2_score(y_test, y_pred)
12 | return mae, np.sqrt(mse), r2
13 |
14 |
15 | def regression(X_train, y_train, X_test, alpha):
16 | reg = linear_model.Ridge(alpha=alpha)
17 | X_train = np.array(X_train, dtype=float)
18 | y_train = np.array(y_train, dtype=float)
19 | reg.fit(X_train, y_train)
20 |
21 | y_pred = reg.predict(X_test)
22 | return y_pred
23 |
24 |
25 | def kf_predict(X, Y):
26 | kf = KFold(n_splits=10, shuffle = True, random_state=2024)
27 | y_preds = []
28 | y_truths = []
29 | for train_index, test_index in kf.split(X):
30 | X_train, X_test = X[train_index], X[test_index]
31 | y_train, y_test = Y[train_index], Y[test_index]
32 | y_pred = regression(X_train, y_train, X_test, 1)
33 | y_preds.append(y_pred)
34 | y_truths.append(y_test)
35 |
36 | return np.concatenate(y_preds), np.concatenate(y_truths)
37 |
38 |
39 | def predict_regression(embs, labels, display=False):
40 | y_pred, y_test = kf_predict(embs, labels)
41 | mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 | if display:
43 | print("MAE: %.3f" % mae)
44 | print("RMSE: %.3f" % rmse)
45 | print("R2: %.3f" % r2)
46 | return mae, rmse, r2
47 |
48 |
49 | def do_tasks(embs, display=True):
50 | if display:
51 | print("Check-In Prediction: ")
52 | check_in_label = np.load("./data_SF/check_counts.npy")
53 | region_list = []
54 | embs_list = []
55 | chk_label = []
56 | for i in range(len(embs)):
57 | if check_in_label[i] > 0:
58 | region_list.append(i)
59 | embs_list.append(embs[i])
60 | chk_label.append(check_in_label[i])
61 | embs_list = np.array(embs_list)
62 | chk_label = np.array(chk_label)
63 |
64 | check_mae, check_rmse, check_r2 = predict_regression(embs_list, chk_label, display=display)
65 |
66 | return check_mae, check_rmse, check_r2
--------------------------------------------------------------------------------
/tasks_SF/tasks_crime.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn import linear_model
3 | from sklearn.model_selection import KFold
4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
5 |
6 |
7 | def compute_metrics(y_pred, y_test):
8 | y_pred[y_pred < 0] = 0
9 | mae = mean_absolute_error(y_test, y_pred)
10 | mse = mean_squared_error(y_test, y_pred)
11 | r2 = r2_score(y_test, y_pred)
12 | return mae, np.sqrt(mse), r2
13 |
14 |
15 | def regression(X_train, y_train, X_test, alpha):
16 | reg = linear_model.Ridge(alpha=alpha)
17 | X_train = np.array(X_train, dtype=float)
18 | y_train = np.array(y_train, dtype=float)
19 | reg.fit(X_train, y_train)
20 |
21 | y_pred = reg.predict(X_test)
22 | return y_pred
23 |
24 |
25 | def kf_predict(X, Y):
26 | kf = KFold(n_splits=10, shuffle=True, random_state=2024)
27 | y_preds = []
28 | y_truths = []
29 | for train_index, test_index in kf.split(X):
30 | X_train, X_test = X[train_index], X[test_index]
31 | y_train, y_test = Y[train_index], Y[test_index]
32 | y_pred = regression(X_train, y_train, X_test, 1)
33 | y_preds.append(y_pred)
34 | y_truths.append(y_test)
35 |
36 | return np.concatenate(y_preds), np.concatenate(y_truths)
37 |
38 |
39 | def predict_regression(embs, labels, display=False):
40 | y_pred, y_test = kf_predict(embs, labels)
41 | mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 | if display:
43 | print("MAE: %.3f" % mae)
44 | print("RMSE: %.3f" % rmse)
45 | print("R2: %.3f" % r2)
46 | return mae, rmse, r2
47 |
48 |
49 | def do_tasks(embs, display=True):
50 | if display:
51 | print("Crime Prediction: ")
52 | crime_count_label = np.load("./data_SF/crime_counts.npy")
53 |
54 | crime_mae, crime_rmse, crime_r2 = predict_regression(embs, crime_count_label, display=display)
55 | return crime_mae, crime_rmse, crime_r2
--------------------------------------------------------------------------------
/tasks_SF/tasks_serviceCall.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn import linear_model
3 | from sklearn.model_selection import KFold
4 | from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
5 |
6 |
7 | def compute_metrics(y_pred, y_test):
8 | y_pred[y_pred < 0] = 0
9 | mae = mean_absolute_error(y_test, y_pred)
10 | mse = mean_squared_error(y_test, y_pred)
11 | r2 = r2_score(y_test, y_pred)
12 | return mae, np.sqrt(mse), r2
13 |
14 |
15 | def regression(X_train, y_train, X_test, alpha):
16 | reg = linear_model.Ridge(alpha=alpha)
17 | X_train = np.array(X_train, dtype=float)
18 | y_train = np.array(y_train, dtype=float)
19 | reg.fit(X_train, y_train)
20 |
21 | y_pred = reg.predict(X_test)
22 | return y_pred
23 |
24 |
25 | def kf_predict(X, Y):
26 | kf = KFold(n_splits=10, shuffle=True, random_state=2024)
27 | y_preds = []
28 | y_truths = []
29 | for train_index, test_index in kf.split(X):
30 | X_train, X_test = X[train_index], X[test_index]
31 | y_train, y_test = Y[train_index], Y[test_index]
32 | y_pred = regression(X_train, y_train, X_test, 1)
33 | y_preds.append(y_pred)
34 | y_truths.append(y_test)
35 |
36 | return np.concatenate(y_preds), np.concatenate(y_truths)
37 |
38 |
39 | def predict_regression(embs, labels, display=False):
40 | y_pred, y_test = kf_predict(embs, labels)
41 | mae, rmse, r2 = compute_metrics(y_pred, y_test)
42 | if display:
43 | print("MAE: %.3f" % mae)
44 | print("RMSE: %.3f" % rmse)
45 | print("R2: %.3f" % r2)
46 | return mae, rmse, r2
47 |
48 |
49 | def do_tasks(embs, display=True):
50 | if display:
51 | print("Service Calls Prediction: ")
52 | service_call_label = np.load("./data_SF/serviceCall_counts.npy")
53 |
54 | service_call_mae, service_call_rmse, service_call_r2 = predict_regression(embs, service_call_label, display=display)
55 | return service_call_mae, service_call_rmse, service_call_r2
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | from parse_args import args
2 | import numpy as np
3 | import torch
4 |
5 | def load_data():
6 | data_path = args.data_path
7 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
8 | landUse_feature = np.load(data_path + args.landUse_dist)
9 | landUse_feature = landUse_feature[np.newaxis]
10 | landUse_feature = torch.Tensor(landUse_feature).to(device)
11 |
12 | POI_feature = np.load(data_path + args.POI_dist)
13 | POI_feature = POI_feature[np.newaxis]
14 | POI_feature = torch.Tensor(POI_feature).to(device)
15 |
16 | mob_feature = np.load(data_path + args.mobility_dist)
17 | mob_feature = mob_feature[np.newaxis]
18 | mob_feature = torch.Tensor(mob_feature).to(device)
19 |
20 | mob_adj = np.load(data_path + args.mobility_adj)
21 | mob_adj = mob_adj/np.mean(mob_adj)
22 | mob_adj = torch.Tensor(mob_adj).to(device)
23 |
24 | poi_sim = np.load(data_path + args.POI_simi)
25 | poi_sim = torch.Tensor(poi_sim).to(device)
26 |
27 | land_sim = np.load(data_path + args.landUse_simi)
28 | land_sim = torch.Tensor(land_sim).to(device)
29 |
30 | features = [POI_feature, landUse_feature, mob_feature]
31 |
32 | return features, mob_adj, poi_sim, land_sim
33 |
--------------------------------------------------------------------------------