├── History ├── IJCAI_15 │ └── 1 └── Tmall │ └── 1.txt ├── Main.py ├── Model.py ├── Model ├── IJCAI_15 │ └── 1 └── Tmall │ └── 1 ├── Params.py ├── README.md └── datasets ├── IJCAI_15.zip ├── Tmall.zip └── process_ts.py /History/IJCAI_15/1: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /History/Tmall/1.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /Main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | from tqdm import tqdm 4 | import random 5 | import torch 6 | from Params import args 7 | import datetime 8 | from Model import myModel1 9 | 10 | import torch.utils.data as dataloader 11 | 12 | torch.backends.cudnn.benchmark = True 13 | torch.autograd.set_detect_anomaly(True) 14 | from datasets import process_ts 15 | 16 | 17 | def pre_train(trainData, cf_train, testData, dim=16, epoch=300, batch_size=256, device='cuda:0'): 18 | tr_mats, tr_matsT, int_types, meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out, tr_labels = trainData 19 | labelP = np.squeeze(np.array(np.sum(tr_labels, axis=0))) 20 | 21 | item_in = [] 22 | item_out = [] 23 | user_in = [] 24 | 25 | if args.prompt: 26 | if args.pattern: 27 | for beh in range(int_types): 28 | item_in.append(itemMats_in[beh].to(device)) 29 | item_out.append(itemMats_out[beh].to(device)) 30 | user_in.append(userMats_in[beh].to(device)) 31 | 32 | else: 33 | item_in.append(torch.tensor([0,1]).to(device)) 34 | item_out.append(torch.tensor([0,1]).to(device)) 35 | user_in.append(torch.tensor([0,1]).to(device)) 36 | else: 37 | for beh in range(int_types): 38 | item_in.append(itemMats_in[beh].to(device)) 39 | item_out.append(itemMats_out[beh].to(device)) 40 | user_in.append(userMats_in[beh].to(device)) 41 | 42 | target_mat_for_test = tr_mats[-1].cpu().to_dense() 43 | 44 | tr_mats = [item.to(device) for item in tr_mats] 45 | tr_matsT = [item.to(device) for item in tr_matsT] 46 | target_beh = tr_mats[-1] 47 | user_num, item_num = target_beh.shape 48 | print('user_num:', user_num) 49 | print('item_num:', item_num) 50 | 51 | print('waiting for csr->dense') 52 | if args.prompt or args.denoise_tune: 53 | ori_graphs = [] 54 | 55 | else: 56 | ori_graphs = [torch.tensor(item_.todense()).long().to(args.device) for item_ in np_mats] 57 | 58 | # train_loader = data_loader(meta_paths, batchSize=batch_size) 59 | 60 | test1 = (labelP, target_mat_for_test, testData) 61 | 62 | if args.just_test: 63 | if args.prompt and not args.denoise_tune: 64 | loadPath = r'./Model/' + args.dataset + r'/' + args.prompt_flag + '_deep_' + r'.pth' 65 | elif args.prompt and args.denoise_tune: 66 | loadPath = r'./Model/' + args.dataset + r'/' + args.tune_flag + r'.pth' 67 | 68 | params = torch.load(loadPath, map_location=torch.device(args.device)) 69 | net = params['model'] 70 | hit = params['hr'] 71 | ndcg = params['ndcg'] 72 | print(f'loaded model, hit:{hit}, ndcg:{ndcg}') 73 | else: 74 | net = model_prepare(int_types, item_in, item_num, item_out, tr_mats, tr_matsT, user_in, user_num, labelP, 75 | target_mat_for_test, testData) 76 | 77 | epochHR, epochNDCG = [0] * 2 78 | 79 | print('test before train') 80 | net.eval() 81 | cnt, result_HR, result_NDCG, _ = cfTestEpoch(epochHR, epochNDCG, labelP, net, target_mat_for_test, testData) 82 | 83 | print(f"Step {cnt}: hit:{result_HR}, ndcg:{result_NDCG}") 84 | 85 | epoch_num = 0 86 | best_hr = 0 87 | best_ndcg = 0 88 | for epoch in range(epoch): 89 | epoch_num += 1 90 | cf_cost = 0 91 | rec_cost = 0 92 | brp_cost = 0 93 | reg_cost = 0 94 | 95 | '''训练''' 96 | print('start training') 97 | 98 | net.train() 99 | 100 | if args.prompt: 101 | epoch_loss = cfTrainEpoch(net, cf_train, int_types, ori_graphs=ori_graphs) 102 | cf_cost += epoch_loss 103 | 104 | print('\tcf_cost:\t%.3f' % cf_cost) 105 | else: 106 | epoch_loss, epoch_rec_loss, epoch_bpr_loss, epoch_reg_loss = cfTrainEpoch(net, cf_train, int_types, 107 | ori_graphs=ori_graphs, target_mat_for_test=target_mat_for_test) 108 | cf_cost += epoch_loss 109 | rec_cost += epoch_rec_loss 110 | brp_cost += epoch_bpr_loss 111 | reg_cost += epoch_reg_loss 112 | 113 | print('\tcf_cost:\t%.3f' % cf_cost) 114 | print('\trec_cost:\t%.3f' % rec_cost) 115 | print('\tbrp_cost:\t%.3f' % brp_cost) 116 | print('\treg_cost:\t%.3f' % reg_cost) 117 | 118 | net.scheduler.step() 119 | net.eval() 120 | 121 | 122 | epochHR, epochNDCG = [0] * 2 123 | cnt, result_HR, result_NDCG, saved_embs = cfTestEpoch(epochHR, epochNDCG, labelP, net, target_mat_for_test, 124 | testData) 125 | 126 | print(f"Step {cnt}: hit:{result_HR}, ndcg:{result_NDCG}") 127 | 128 | if result_HR > best_hr: 129 | best_hr = result_HR 130 | print(f'best_HR={best_hr},epoch={epoch}') 131 | user_embs, item_embs, user_embs_list, item_embs_list = saved_embs 132 | if args.wsdm: 133 | save_model(net, user_embs, user_embs_list, item_embs, item_embs_list, result_HR, result_NDCG, test1, 134 | flag=args.tune_flag) 135 | else: 136 | if not args.prompt: 137 | save_model(net, user_embs, user_embs_list, item_embs, item_embs_list, result_HR, result_NDCG, test1, 138 | flag=args.pre_flag) 139 | 140 | if args.prompt and not args.denoise_tune: 141 | if not args.deep: 142 | save_model(net, user_embs, user_embs_list, item_embs, item_embs_list, result_HR, result_NDCG, test1, 143 | flag=args.prompt_flag) 144 | elif args.deep: 145 | save_model(net, user_embs, user_embs_list, item_embs, item_embs_list, result_HR, result_NDCG, test1, 146 | flag=args.prompt_flag + '_deep_') 147 | 148 | elif args.prompt and args.denoise_tune: 149 | save_model(net, user_embs, user_embs_list, item_embs, item_embs_list, result_HR, result_NDCG, test1, 150 | flag=args.tune_flag) 151 | 152 | if result_NDCG > best_ndcg: 153 | best_ndcg = result_NDCG 154 | print(f'best_NDCG={best_ndcg},epoch={epoch}') 155 | 156 | 157 | 158 | def model_prepare(int_types, item_in, item_num, item_out, tr_mats, tr_matsT, user_in, user_num, labelP, 159 | target_mat_for_test, testData): 160 | if args.prompt and not args.denoise_tune: 161 | # print('加载去噪后微调embedding layers的模型!!!!') 162 | print('直接 加载预训练模型 prompt tuning') 163 | # loadPath = r'./Model/' + args.dataset + r'/' + args.pre_flag + r'.pth' 164 | loadPath = r'./Model/' + args.dataset + r'/' + args.tune_flag + r'.pth' 165 | params = torch.load(loadPath, map_location=torch.device(args.device)) 166 | pre_trained_net = params['model'] 167 | pre_trained_net.behavior_mats = tr_mats 168 | pre_trained_net.behavior_matsT = tr_matsT 169 | 170 | # print('test loaded model') 171 | # epochHR, epochNDCG = [0] * 2 172 | # 173 | # pre_trained_net.eval() 174 | # cnt, result_HR, result_NDCG, _ = cfTestEpoch(epochHR, epochNDCG, labelP, pre_trained_net, target_mat_for_test, testData) 175 | # 176 | # print(f"Step {cnt}: hit:{result_HR}, ndcg:{result_NDCG}") 177 | 178 | if args.pattern: 179 | pre_trained_net.i_in = item_in 180 | pre_trained_net.i_out = item_out 181 | pre_trained_net.u_in = user_in 182 | 183 | pre_dict = pre_trained_net.state_dict() 184 | 185 | if args.pattern: 186 | net = myModel1(userNum=user_num, itemNum=item_num, behavior=int_types, behavior_mats=tr_mats, 187 | behavior_matsT=tr_matsT, i_in=item_in, i_out=item_out, u_in=user_in).to(args.device) 188 | else: 189 | # pre_item_in = pre_trained_net.i_in 190 | # pre_item_out = pre_trained_net.i_out 191 | # pre_user_in = pre_trained_net.u_in 192 | pre_item_in = torch.tensor([0,1]).to(args.device) 193 | pre_item_out = torch.tensor([0,1]).to(args.device) 194 | pre_user_in = torch.tensor([0,1]).to(args.device) 195 | net = myModel1(userNum=user_num, itemNum=item_num, behavior=int_types, behavior_mats=tr_mats, 196 | behavior_matsT=tr_matsT, i_in=pre_item_in, i_out=pre_item_out, u_in=pre_user_in).to( 197 | args.device) 198 | 199 | net_dict = net.state_dict() 200 | 201 | for i,p in enumerate(net_dict): 202 | if i==0: 203 | net_dict[str(p)].copy_(torch.mean(pre_dict['beh_embedding.weight'][:-1], 0)) 204 | # net_dict[str(p)].copy_(pre_dict[str(p)]) 205 | 206 | # pass 207 | 208 | else: 209 | net_dict[str(p)].copy_(pre_dict[str(p)]) 210 | # print(i, p) 211 | # if i>=38: 212 | # pass 213 | # else: 214 | # if i == 0: 215 | # net_dict[str(p)].copy_(torch.mean(pre_dict['beh_embedding.weight'], 0)) 216 | # elif i == 7: 217 | # net_dict[str(p)].copy_(torch.mean(pre_dict['gcn.behavior_embeddings'], 0)) 218 | # else: 219 | # net_dict[str(p)].copy_(pre_dict[str(p)]) 220 | # assert 1==2 221 | # assert 1==2 222 | # 223 | # for i, p in enumerate(net_dict): 224 | # if i == 2 or i == 5: 225 | # for beh in range(int_types): 226 | # if beh != int_types-1: 227 | # net_dict[str(p)][beh].copy_(pre_dict[str(p)][beh]) 228 | # else: 229 | # net_dict[str(p)][-1].copy_(torch.mean(pre_dict[str(p)], 0)) 230 | # elif i >= 36: 231 | # # 没有conv bias 232 | # pass 233 | # else: 234 | # net_dict[str(p)].copy_(pre_dict[str(p)]) 235 | 236 | if args.head: 237 | # update_para_name = ['beh_embedding.weight','gcn.behavior_embeddings','gcn.i_concatenation_w','gcn.u_concatenation_w','prompt.weight', 'gcn.prompt_embedding'] 238 | update_para_name = ['gcn.i_concatenation_w','gcn.u_concatenation_w','prompt.weight', 'gcn.prompt_embedding'] 239 | # head_name = ['gcn.i_concatenation_w','gcn.u_concatenation_w'] 240 | # # 241 | # update_para_name = ['beh_embedding.weight', 'gcn.behavior_embeddings', 'gcn.layers.0.i_w', 242 | # 'gcn.layers.0.u_w', 'gcn.layers.1.i_w', 'gcn.layers.1.u_w', 'gcn.layers.2.i_w', 243 | # 'gcn.layers.2.u_w'] 244 | # head_name = ['gcn.layers.0.i_w', 'gcn.layers.0.u_w', 'gcn.layers.1.i_w', 'gcn.layers.1.u_w', 245 | # 'gcn.layers.2.i_w', 'gcn.layers.2.u_w'] 246 | else: 247 | # update_para_name = ['prompt.weight', 'gcn.prompt_embedding', 'beh_embedding.weight','gcn.behavior_embeddings'] 248 | update_para_name = ['prompt.weight'] 249 | head_name = [] 250 | 251 | 252 | if args.noise_lambda > 0: 253 | for i, p in enumerate(net.named_parameters()): 254 | if 'bias' not in str(p[0]): 255 | p[1].data += (torch.rand(p[1].data.size()).to(args.device) - 0.5) * args.noise_lambda * torch.std( 256 | p[1].data) 257 | 258 | for i, p in enumerate(net.named_parameters()): 259 | if str(p[0]) not in update_para_name: 260 | p[1].requires_grad = False 261 | # if str(p[0]) == 'beh_embedding.weight': 262 | # p[1][:-1].requires_grad = False 263 | 264 | # for i,p in enumerate(net.named_parameters()): 265 | # print(i,p[0],p[1].requires_grad) 266 | # assert 1==2 267 | 268 | net.optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, net.parameters()), 269 | lr=args.lr, weight_decay=args.opt_weight_decay) 270 | net.scheduler = torch.optim.lr_scheduler.CyclicLR(net.optimizer, args.opt_base_lr, args.opt_max_lr, 271 | step_size_up=5, step_size_down=10, mode='triangular', 272 | gamma=0.99, scale_fn=None, scale_mode='cycle', 273 | cycle_momentum=False, base_momentum=0.8, max_momentum=0.9, 274 | last_epoch=-1) 275 | 276 | del pre_trained_net 277 | 278 | elif args.prompt and args.denoise_tune: 279 | print('加载预训练模型,删除pattern部分!!!!') 280 | loadPath = r'./Model/' + args.dataset + r'/' + args.pre_flag + r'.pth' 281 | params = torch.load(loadPath, map_location=torch.device(args.device)) 282 | pre_trained_net = params['model'] 283 | 284 | if args.wsdm: 285 | pass 286 | else: 287 | pre_trained_net.behavior_mats = tr_mats 288 | pre_trained_net.behavior_matsT = tr_matsT 289 | 290 | if args.pattern: 291 | pre_trained_net.i_in = item_in 292 | pre_trained_net.i_out = item_out 293 | pre_trained_net.u_in = user_in 294 | 295 | pre_dict = pre_trained_net.state_dict() 296 | 297 | if args.pattern: 298 | net = myModel1(userNum=user_num, itemNum=item_num, behavior=int_types, behavior_mats=tr_mats, 299 | behavior_matsT=tr_matsT, i_in=item_in, i_out=item_out, u_in=user_in).to(args.device) 300 | 301 | 302 | else: 303 | pre_item_in = pre_trained_net.i_in 304 | pre_item_out = pre_trained_net.i_out 305 | pre_user_in = pre_trained_net.u_in 306 | net = myModel1(userNum=user_num, itemNum=item_num, behavior=int_types, behavior_mats=tr_mats, 307 | behavior_matsT=tr_matsT, i_in=pre_item_in, i_out=pre_item_out, u_in=pre_user_in).to( 308 | args.device) 309 | 310 | net_dict = net.state_dict() 311 | 312 | if args.wsdm: 313 | print('using wsdm-21 denoiseRec, lightGCN+TCE') 314 | pass 315 | else: 316 | frozen_para = ['user_embedding.weight', 'item_embedding.weight', 'beh_embedding.weight', 'gcn.behavior_embeddings', 'gcn.user_embedding', 'gcn.item_embedding'] 317 | 318 | for i, p in enumerate(net_dict): 319 | if str(p) in frozen_para: 320 | net_dict[str(p)].copy_(pre_dict[str(p)]) 321 | # net_dict[str(p)].copy_(pre_dict[str(p)]) 322 | 323 | 324 | for i, p in enumerate(net.named_parameters()): 325 | # if str(p[0]) not in update_para_name: 326 | if str(p[0]) in frozen_para: 327 | p[1].requires_grad = False 328 | # 329 | if args.noise_lambda > 0: 330 | print('noise-tune:噪声量',args.noise_lambda) 331 | for name, p in net.named_parameters(): 332 | # if str(name) not in new_para: 333 | net.state_dict()[name][:] += (torch.rand(p.size()).to( 334 | args.device) - 0.5) * args.noise_lambda * torch.std(p) 335 | 336 | net.optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, net.parameters()), 337 | lr=args.lr, weight_decay=args.opt_weight_decay) 338 | net.scheduler = torch.optim.lr_scheduler.CyclicLR(net.optimizer, args.opt_base_lr, args.opt_max_lr, 339 | step_size_up=5, step_size_down=10, mode='triangular', 340 | gamma=0.99, scale_fn=None, scale_mode='cycle', 341 | cycle_momentum=False, base_momentum=0.8, max_momentum=0.9, 342 | last_epoch=-1) 343 | 344 | del pre_trained_net 345 | 346 | 347 | else: 348 | net = myModel1(userNum=user_num, itemNum=item_num, behavior=int_types, behavior_mats=tr_mats, 349 | behavior_matsT=tr_matsT, i_in=item_in, i_out=item_out, u_in=user_in).to(args.device) 350 | 351 | return net 352 | 353 | 354 | def cfTestEpoch(epochHR, epochNDCG, labelP, net, target_mat_for_test, testData): 355 | with torch.no_grad(): 356 | user_embs, item_embs, user_embs_list, item_emb_list, graphs = net() 357 | 358 | 359 | cnt = 0 360 | tot = 0 361 | for user, item_i in testData: 362 | user_compute, item_compute, user_item1, user_item100 = sampleTestBatch(user, item_i, target_mat_for_test, 363 | labelP) 364 | 365 | userEmbed = user_embs[user_compute] 366 | itemEmbed = item_embs[item_compute] 367 | 368 | pred_i = torch.sum(torch.mul(userEmbed, itemEmbed), dim=1) 369 | 370 | hit, ndcg = calcRes(torch.reshape(pred_i, [user.shape[0], 100]), user_item1, user_item100) 371 | epochHR = epochHR + hit 372 | epochNDCG = epochNDCG + ndcg # 373 | cnt += 1 374 | tot += user.shape[0] 375 | result_HR = epochHR / tot 376 | result_NDCG = epochNDCG / tot 377 | 378 | saved_embs = (user_embs, item_embs, user_embs_list, item_emb_list) 379 | 380 | return cnt, result_HR, result_NDCG, saved_embs 381 | 382 | 383 | if args.prompt and not args.denoise_tune: 384 | print('prompt tuning') 385 | 386 | 387 | def cfTrainEpoch(net, train_loader, behavior_num, ori_graphs): 388 | time = datetime.datetime.now() 389 | print("start_ng_samp: ", time) 390 | train_loader.dataset.ng_sample() 391 | time = datetime.datetime.now() 392 | print("end_ng_samp: ", time) 393 | 394 | epoch_loss = 0 395 | 396 | # ----------------------------------------------------------------------------------- 397 | behavior_loss_list = [None] 398 | 399 | user_id_list = [None] 400 | item_id_pos_list = [None] 401 | item_id_neg_list = [None] 402 | 403 | # ---------------------------------------------------------------------------------- 404 | cnt = 0 405 | for user, item_i, item_j in tqdm(train_loader): 406 | user = user.long().cuda() 407 | user_embed, item_embed, user_embeds, item_embeds, graphs = net() 408 | 409 | for index in range(len(behavior_loss_list)): 410 | not_zero_index = np.where(item_i[index].cpu().numpy() != -1)[0] 411 | 412 | user_id_list[index] = user[not_zero_index].long().cuda() 413 | item_id_pos_list[index] = item_i[index][not_zero_index].long().cuda() 414 | item_id_neg_list[index] = item_j[index][not_zero_index].long().cuda() 415 | 416 | userEmbed = user_embed[user_id_list[index]] 417 | posEmbed = item_embed[item_id_pos_list[index]] 418 | negEmbed = item_embed[item_id_neg_list[index]] 419 | 420 | pred_i, pred_j = 0, 0 421 | pred_i, pred_j = innerProduct(userEmbed, posEmbed, negEmbed) 422 | 423 | behavior_loss_list[index] = - (pred_i.view(-1) - pred_j.view(-1)).sigmoid().log() 424 | 425 | for i in range(len(behavior_loss_list)): 426 | behavior_loss_list[i] = (behavior_loss_list[i]).sum() 427 | 428 | bprloss = sum(behavior_loss_list) / len(behavior_loss_list) 429 | 430 | regLoss = (torch.norm(userEmbed) ** 2 + torch.norm(posEmbed) ** 2 + torch.norm(negEmbed) ** 2) * (args.reg) 431 | 432 | loss = (bprloss+regLoss) / args.batch 433 | 434 | epoch_loss = epoch_loss + loss.item() 435 | 436 | net.optimizer.zero_grad(set_to_none=True) 437 | loss.backward() 438 | 439 | torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=20, norm_type=2) 440 | net.optimizer.step() 441 | cnt += 1 442 | return epoch_loss 443 | 444 | elif args.prompt and args.denoise_tune: 445 | print('噪声微调只用auxiliary behavior, 不需要还原图') 446 | 447 | 448 | def cfTrainEpoch(net, train_loader, behavior_num, ori_graphs): 449 | time = datetime.datetime.now() 450 | print("start_ng_samp: ", time) 451 | train_loader.dataset.ng_sample() 452 | 453 | time = datetime.datetime.now() 454 | print("end_ng_samp: ", time) 455 | 456 | epoch_loss = 0 457 | 458 | # ----------------------------------------------------------------------------------- 459 | behavior_loss_list = [None] * (behavior_num) 460 | 461 | 462 | user_id_list = [None] * (behavior_num) 463 | item_id_pos_list = [None] * (behavior_num) 464 | item_id_neg_list = [None] * (behavior_num) 465 | 466 | # ---------------------------------------------------------------------------------- 467 | cnt = 0 468 | for user, item_i, item_j in tqdm(train_loader): 469 | 470 | item_list = [None] * (behavior_num) 471 | user_set = torch.tensor(list(set(user.tolist()))) 472 | for i in range((behavior_num)): 473 | item_set = list(set(item_i[i].tolist())) 474 | try: 475 | item_set.remove(-1) 476 | except: 477 | pass 478 | 479 | item_list[i] = torch.tensor(item_set) 480 | 481 | user = user.long().cuda() 482 | user_embed, item_embed, user_embeds, item_embeds, graphs = net(user_set, item_list, is_denoise=True) 483 | 484 | # from numba import njit 485 | 486 | for index in range((behavior_num)): 487 | not_zero_index = np.where(item_i[index].cpu().numpy() != -1)[0] 488 | 489 | user_id_list[index] = user[not_zero_index].long().cuda() 490 | item_id_pos_list[index] = item_i[index][not_zero_index].long().cuda() 491 | item_id_neg_list[index] = item_j[index][not_zero_index].long().cuda() 492 | 493 | userEmbed = user_embed[user_id_list[index]] 494 | posEmbed = item_embed[item_id_pos_list[index]] 495 | negEmbed = item_embed[item_id_neg_list[index]] 496 | 497 | pred_i, pred_j = 0, 0 498 | pred_i, pred_j = innerProduct(userEmbed, posEmbed, negEmbed) 499 | 500 | behavior_loss_list[index] = - (pred_i.view(-1) - pred_j.view(-1)).sigmoid().log() 501 | 502 | 503 | for i in range((behavior_num)): 504 | if args.wsdm: 505 | def drop_rate_schedule(iteration): 506 | 507 | drop_rate = np.linspace(0, 0.2, 10000) 508 | if iteration < 10000: 509 | return drop_rate[iteration] 510 | else: 511 | return 0.2 512 | 513 | def TCE(loss_list_, rate_): 514 | flatten_loss = loss_list_.flatten() 515 | ind_sorted_ = np.argsort(flatten_loss.cpu().data) 516 | loss_sorted_ = flatten_loss[ind_sorted_] 517 | remember_rate_ = 1 - rate_ 518 | num_remember_ = int(remember_rate_ * len(loss_sorted_)) 519 | ind_update_ = ind_sorted_[:num_remember_] 520 | loss_update_ = flatten_loss[ind_update_] 521 | return loss_update_.sum() 522 | 523 | behavior_loss_list[i] = TCE(behavior_loss_list[i], drop_rate_schedule(cnt)) 524 | else: 525 | behavior_loss_list[i] = (behavior_loss_list[i]).sum() 526 | 527 | bprloss = sum(behavior_loss_list) / len(behavior_loss_list) 528 | regLoss = (torch.norm(userEmbed) ** 2 + torch.norm(posEmbed) ** 2 + torch.norm(negEmbed) ** 2) 529 | 530 | loss = (bprloss + args.reg * regLoss) / args.batch 531 | 532 | epoch_loss = epoch_loss + loss.item() 533 | 534 | 535 | net.optimizer.zero_grad(set_to_none=True) 536 | loss.backward() 537 | 538 | torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=20, norm_type=2) 539 | net.optimizer.step() 540 | 541 | cnt += 1 542 | 543 | return epoch_loss 544 | else: 545 | print('预训练只用auxiliary behaviors') 546 | 547 | def cfTrainEpoch(net, train_loader, behavior_num, ori_graphs, target_mat_for_test): 548 | time = datetime.datetime.now() 549 | print("start_ng_samp: ", time) 550 | train_loader.dataset.ng_sample() 551 | time = datetime.datetime.now() 552 | print("end_ng_samp: ", time) 553 | 554 | epoch_loss = 0 555 | epoch_rec_loss = 0 556 | epoch_bpr_loss = 0 557 | epoch_reg_loss = 0 558 | # behavior_nums = behavior_num + 1 559 | # ----------------------------------------------------------------------------------- 560 | behavior_loss_list = [None] * (behavior_num) 561 | reconstruction_loss_list = [None] * (behavior_num) 562 | 563 | loss_fuc = torch.nn.CrossEntropyLoss() 564 | 565 | user_id_list = [None] * (behavior_num) 566 | item_id_pos_list = [None] * (behavior_num) 567 | item_id_neg_list = [None] * (behavior_num) 568 | 569 | # ---------------------------------------------------------------------------------- 570 | cnt = 0 571 | for user, item_i, item_j in tqdm(train_loader): 572 | user = user.long().to(args.device) 573 | item_i = [item.long().to(args.device) for item in item_i] 574 | item_j = [item.long().to(args.device) for item in item_j] 575 | item_list = [None] * (behavior_num) 576 | user_set = torch.tensor(list(set(user.tolist()))) 577 | # for i in range((behavior_num)): 578 | # item_set = list(set(item_i[i].tolist())) 579 | # try: 580 | # item_set.remove(-1) 581 | # except: 582 | # pass 583 | # 584 | # item_list[i] = torch.tensor(item_set) 585 | 586 | ori_graph_labels = [None] * (behavior_num) 587 | 588 | for i in range((behavior_num)): 589 | # if i == behavior_num-1: 590 | # ori_graph_labels[i] = torch.flatten(torch.from_numpy(target_mat_for_test[user_set, :][:, item_list[i]].long()).to(args.device)) 591 | # else: 592 | ori_graph = ori_graphs[i] 593 | pos = ori_graph[(user,item_i[i])] 594 | neg = ori_graph[(user,item_j[i])] 595 | ori_graph_labels[i] = torch.cat((pos,neg),0).unsqueeze(-1) 596 | # ori_graph_labels[i] = torch.flatten(ori_graphs[i][user_set, :][:, item_list[i]]) 597 | 598 | user = user.long().to(args.device) 599 | user_embed, item_embed, user_embeds, item_embeds, graphs = net(user, item_i, item_j, is_denoise=True) 600 | for i in range((behavior_num)): 601 | label = ori_graph_labels[i] 602 | reconstruction_graph = graphs[i] 603 | 604 | neg_ = 1 - reconstruction_graph 605 | decoder_res = torch.stack((neg_, reconstruction_graph), 1) 606 | reconstruction_loss_list[i] = loss_fuc(decoder_res, label) 607 | 608 | for index in range((behavior_num)): 609 | not_zero_index = np.where(item_i[index].cpu().numpy() != -1)[0] 610 | 611 | user_id_list[index] = user[not_zero_index].long().cuda() 612 | item_id_pos_list[index] = item_i[index][not_zero_index].long().cuda() 613 | item_id_neg_list[index] = item_j[index][not_zero_index].long().cuda() 614 | 615 | userEmbed = user_embed[user_id_list[index]] 616 | posEmbed = item_embed[item_id_pos_list[index]] 617 | negEmbed = item_embed[item_id_neg_list[index]] 618 | 619 | 620 | pred_i, pred_j = 0, 0 621 | pred_i, pred_j = innerProduct(userEmbed, posEmbed, negEmbed) 622 | 623 | behavior_loss_list[index] = - (pred_i.view(-1) - pred_j.view(-1)).sigmoid().log() 624 | 625 | rec_loss = sum(reconstruction_loss_list) / len(reconstruction_loss_list) 626 | 627 | for i in range((behavior_num)): 628 | behavior_loss_list[i] = (behavior_loss_list[i]).sum() 629 | 630 | bprloss = sum(behavior_loss_list) / len(behavior_loss_list) 631 | regLoss = (torch.norm(userEmbed) ** 2 + torch.norm(posEmbed) ** 2 + torch.norm(negEmbed) ** 2) 632 | 633 | beh_reg = torch.norm(net.beh_embedding.weight) ** 2 + torch.norm(net.prompt.weight) ** 2 634 | 635 | loss = (bprloss + args.reg * regLoss) / args.batch + rec_loss / 2 + (beh_reg * args.reg / behavior_num) 636 | 637 | epoch_loss = epoch_loss + loss.item() 638 | 639 | epoch_rec_loss = epoch_rec_loss + (rec_loss / 2).item() 640 | epoch_bpr_loss = epoch_bpr_loss + (bprloss / args.batch).item() 641 | epoch_reg_loss = epoch_reg_loss + (args.reg * regLoss / args.batch).item() 642 | 643 | net.optimizer.zero_grad(set_to_none=True) 644 | loss.backward() 645 | 646 | torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=20, norm_type=2) 647 | net.optimizer.step() 648 | 649 | cnt += 1 650 | 651 | return epoch_loss, epoch_rec_loss, epoch_bpr_loss, epoch_reg_loss 652 | 653 | 654 | def calcRes(pred_i, user_item1, user_item100): # [6144, 100] [6144] [6144, (ndarray:(100,))] 655 | 656 | hit = 0 657 | ndcg = 0 658 | 659 | for j in range(pred_i.shape[0]): 660 | 661 | _, shoot_index = torch.topk(pred_i[j], args.shoot) 662 | shoot_index = shoot_index.cpu() 663 | shoot = user_item100[j][shoot_index] 664 | shoot = shoot.tolist() 665 | 666 | if type(shoot) != int and (user_item1[j] in shoot): 667 | hit += 1 668 | ndcg += np.reciprocal(np.log2(shoot.index(user_item1[j]) + 2)) 669 | elif type(shoot) == int and (user_item1[j] == shoot): 670 | hit += 1 671 | ndcg += np.reciprocal(np.log2(0 + 2)) 672 | 673 | return hit, ndcg # int, float 674 | 675 | 676 | def innerProduct(u, i, j): 677 | pred_i = torch.sum(torch.mul(u, i), dim=1) 678 | pred_j = torch.sum(torch.mul(u, j), dim=1) 679 | return pred_i, pred_j 680 | 681 | 682 | def sampleTestBatch(batch_user_id, batch_item_id, trainMat_target, labelP): 683 | trainMat_target = trainMat_target.detach().cpu() 684 | batch = len(batch_user_id) # e.g., 8K 685 | tmplen = (batch * 100) # e.g. 800K 686 | 687 | sub_trainMat = trainMat_target[batch_user_id].numpy() # 从交互记录中取出了batch_user_id对应行 688 | user_item1 = batch_item_id 689 | user_compute = [None] * tmplen 690 | item_compute = [None] * tmplen 691 | user_item100 = [None] * (batch) 692 | 693 | cur = 0 694 | for i in range(batch): 695 | pos_item = user_item1[i] 696 | negset = np.reshape(np.argwhere(sub_trainMat[i] == 0), [-1]) 697 | pvec = labelP[negset] 698 | pvec = pvec / np.sum(pvec) 699 | 700 | random_neg_sam = np.random.permutation(negset)[:99] 701 | user_item100_one_user = np.concatenate((random_neg_sam, np.array([pos_item]))) 702 | user_item100[i] = user_item100_one_user 703 | 704 | for j in range(100): 705 | user_compute[cur] = batch_user_id[i] 706 | item_compute[cur] = user_item100_one_user[j] 707 | cur += 1 708 | 709 | return user_compute, item_compute, user_item1, user_item100 710 | 711 | 712 | def setRandomSeed(): 713 | np.random.seed(args.seed) 714 | torch.manual_seed(args.seed) 715 | torch.cuda.manual_seed(args.seed) 716 | random.seed(args.seed) 717 | 718 | 719 | def save_model(model, user_embed, user_embeds, item_embed, item_embeds, best_hr, ndcg, test_data_tuple, flag): 720 | 721 | print(model.item_embedding.weight[0][:2]) 722 | savePath = r'./Model/' + args.dataset + r'/' + flag + r'.pth' 723 | print(f'u_emb.shape:{user_embed.shape}, i_emb.shape:{item_embed.shape}') 724 | if args.prompt and args.denoise_tune: 725 | params = { 726 | 'model': model, 727 | 'user_embed': user_embed, 728 | 'item_embed': item_embed, 729 | 'hr':best_hr, 730 | 'ndcg':ndcg, 731 | } 732 | else: 733 | params = { 734 | 'model': model, 735 | 'user_embed': user_embed, 736 | 'user_embeds': user_embeds, 737 | 'item_embed': item_embed, 738 | 'item_embeds': item_embeds, 739 | 'hr':best_hr, 740 | 'ndcg':ndcg, 741 | } 742 | torch.save(params, savePath) 743 | print('model saved') 744 | 745 | 746 | 747 | def load_model(loadPath, labelP, target_mat_for_test, testData): 748 | params = torch.load(loadPath) 749 | model = params['model'] 750 | user_emb = params['user_embed'] 751 | user_embs = params['item_embed'] 752 | item_emb = params['user_embeds'] 753 | item_embs = params['item_embeds'] 754 | load_hr = params['hr'] 755 | load_ndcg = params['ndcg'] 756 | print(model.item_embedding.weight[0][:2]) 757 | epochHR, epochNDCG = [0] * 2 758 | model.eval() 759 | cnt, hr, ndcg, saved_embs = cfTestEpoch(epochHR, epochNDCG, labelP, model, target_mat_for_test, testData) 760 | 761 | return model, user_emb, item_emb, user_embs, item_embs, load_hr, load_ndcg, hr, ndcg 762 | 763 | 764 | 765 | if __name__ == '__main__': 766 | setRandomSeed() 767 | print(args) 768 | print('start data pre-process') 769 | print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 770 | datas = process_ts.load_data() 771 | print('over!') 772 | print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 773 | print('---------train-------------') 774 | print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 775 | # behavior_mats, trnMats, trnMatsT, te_int, tr_label, te_users, len( 776 | # behaviors), meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out = datas 777 | 778 | np_mats, tr_mats, tr_matsT, tr_label, cf_train_data, test_data, int_types, meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out = datas 779 | torch.cuda.empty_cache() 780 | 781 | pre_train_data = ( 782 | tr_mats, tr_matsT, int_types, meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out, tr_label) 783 | 784 | pre_train(pre_train_data, cf_train_data, test_data, dim=16, epoch=500, batch_size=128, device=args.device) 785 | 786 | # np_mats, tr_mats, tr_matsT, te_int, tr_label, te_users, int_types = datas 787 | -------------------------------------------------------------------------------- /Model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import torch.nn as nn 4 | from torch.nn import Module, Parameter, init 5 | import torch.nn.functional as F 6 | import numpy as np 7 | import math 8 | import time 9 | from torch.autograd import Variable 10 | from Params import args 11 | 12 | 13 | class myModel1(nn.Module): 14 | def __init__(self, userNum, itemNum, behavior, behavior_mats, behavior_matsT, i_in, i_out, u_in): 15 | super(myModel1, self).__init__() 16 | 17 | self.userNum = userNum 18 | self.itemNum = itemNum 19 | 20 | 21 | # behavior_num = behavior - 1 22 | 23 | # if args.prompt and not args.denoise_tune: 24 | self.prompt = torch.nn.Embedding(1, args.dim) 25 | nn.init.xavier_uniform_(self.prompt.weight) 26 | self.behavior = behavior 27 | self.behavior_mats = behavior_mats 28 | self.behavior_matsT = behavior_matsT 29 | self.i_in = i_in 30 | self.i_out = i_out 31 | self.u_in = u_in 32 | 33 | self.sigmoid = nn.Sigmoid() 34 | 35 | 36 | self.user_embedding, self.item_embedding, self.beh_embedding = self.init_embedding() 37 | 38 | 39 | self.gcn = GCNv5(self.userNum, self.itemNum, self.behavior, self.behavior_mats, self.behavior_matsT, 40 | self.i_in, self.i_out, self.u_in, 41 | self.user_embedding.weight, self.item_embedding.weight, self.beh_embedding.weight, self.prompt.weight) 42 | 43 | self.denoising = graphDenoising(args.dim,self.behavior) 44 | 45 | self.optimizer = torch.optim.AdamW(self.parameters(), lr=args.lr, weight_decay=args.opt_weight_decay) 46 | self.scheduler = torch.optim.lr_scheduler.CyclicLR(self.optimizer, args.opt_base_lr, args.opt_max_lr, 47 | step_size_up=5, step_size_down=10, mode='triangular', 48 | gamma=0.99, scale_fn=None, scale_mode='cycle', 49 | cycle_momentum=False, base_momentum=0.8, max_momentum=0.9, 50 | last_epoch=-1) 51 | 52 | def init_embedding(self): 53 | user_embedding = torch.nn.Embedding(self.userNum, args.dim) 54 | item_embedding = torch.nn.Embedding(self.itemNum, args.dim) 55 | nn.init.xavier_uniform_(user_embedding.weight) 56 | nn.init.xavier_uniform_(item_embedding.weight) 57 | # if not args.prompt: 58 | # beh_embedding = torch.nn.Embedding(self.behavior, args.dim) 59 | # 60 | # else: 61 | # beh_embedding = torch.nn.Embedding(1,args.dim) 62 | 63 | beh_embedding = torch.nn.Embedding(self.behavior - 1, args.dim) 64 | 65 | 66 | 67 | nn.init.xavier_uniform_(beh_embedding.weight) 68 | 69 | return user_embedding, item_embedding, beh_embedding 70 | if not args.prompt: 71 | def forward(self, user=0,pos=0,neg=0, is_denoise=False): 72 | 73 | 74 | user_embed, item_embed,user_embeds, item_embeds = self.gcn() 75 | 76 | if is_denoise: 77 | 78 | sub_user_emb_list = [None] * self.behavior 79 | sub_pos_emb_list = [None] * self.behavior 80 | 81 | sub_neg_emb_list = [None] * self.behavior 82 | for beh in range(self.behavior): 83 | sub_user_emb_list[beh] = user_embeds[beh][user] 84 | sub_pos_emb_list[beh] = item_embeds[beh][pos[beh]] 85 | sub_neg_emb_list[beh] = item_embeds[beh][neg[beh]] 86 | graphs, graphsT = self.denoising(sub_user_emb_list,sub_pos_emb_list,sub_neg_emb_list, self.beh_embedding.weight, self.prompt.weight) 87 | else: 88 | graphs = 0 89 | 90 | return user_embed, item_embed, user_embeds, item_embeds, graphs 91 | 92 | # return enhanced_user_emb, enhanced_item_emb, user_embeds, item_embeds, graphs 93 | else: 94 | def forward(self, user_set=0,item_list=0, is_denoise=False): 95 | user_embed, item_embed, user_embeds, item_embeds = self.gcn() 96 | return user_embed, item_embed, user_embeds, item_embeds, 0 97 | 98 | class GCNv5(nn.Module): 99 | def __init__(self, userNum, itemNum, behavior, behavior_mats, behavior_matsT, i_in, i_out, u_in, u_emb, i_emb, beh_emb, prompt_emb): 100 | super(GCNv5, self).__init__() 101 | self.userNum = userNum 102 | self.itemNum = itemNum 103 | self.hidden_dim = args.dim 104 | self.behavior = range(behavior) 105 | self.behavior_mats = behavior_mats 106 | self.behavior_matsT = behavior_matsT 107 | 108 | self.i_in = i_in 109 | self.i_out = i_out 110 | self.u_in = u_in 111 | 112 | # self.user_embedding, self.item_embedding = self.init_embedding() 113 | self.user_embedding = u_emb 114 | self.item_embedding = i_emb 115 | self.behavior_embeddings = beh_emb 116 | self.prompt_embedding = prompt_emb 117 | 118 | self.alpha, self.i_concatenation_w, self.u_concatenation_w, self.i_input_w, self.u_input_w = self.init_weight() 119 | 120 | self.sigmoid = torch.nn.Sigmoid() 121 | self.act = torch.nn.PReLU() 122 | self.dropout = torch.nn.Dropout(args.drop_rate) 123 | 124 | gnn_layer = args.gnn_layer 125 | 126 | self.gnn_layer = eval(gnn_layer) 127 | self.layers = nn.ModuleList() 128 | for i in range(0, len(self.gnn_layer)): 129 | self.layers.append(GCNLayerv5(args.dim, args.dim, self.userNum, self.itemNum, len(self.behavior), 130 | self.behavior_mats, self.behavior_matsT, self.i_in, self.i_out, self.u_in)) 131 | 132 | 133 | def init_embedding(self): 134 | user_embedding = torch.nn.Embedding(self.userNum, args.dim) 135 | item_embedding = torch.nn.Embedding(self.itemNum, args.dim) 136 | nn.init.xavier_uniform_(user_embedding.weight) 137 | nn.init.xavier_uniform_(item_embedding.weight) 138 | 139 | return user_embedding, item_embedding 140 | 141 | def init_weight(self): 142 | alpha = nn.Parameter(torch.ones(2)) 143 | i_concatenation_w = nn.Parameter(torch.Tensor(len(eval(args.gnn_layer)) * args.dim, args.dim)) 144 | u_concatenation_w = nn.Parameter(torch.Tensor(len(eval(args.gnn_layer)) * args.dim, args.dim)) 145 | i_input_w = nn.Parameter(torch.Tensor(args.dim, args.dim)) 146 | u_input_w = nn.Parameter(torch.Tensor(args.dim, args.dim)) 147 | 148 | init.xavier_uniform_(i_concatenation_w) 149 | init.xavier_uniform_(u_concatenation_w) 150 | init.xavier_uniform_(i_input_w) 151 | init.xavier_uniform_(u_input_w) 152 | 153 | 154 | return alpha, i_concatenation_w, u_concatenation_w, i_input_w, u_input_w 155 | def forward(self, user_embedding_input=None, item_embedding_input=None): 156 | all_user_embeddings = [] 157 | all_item_embeddings = [] 158 | all_user_embeddingss = [] 159 | all_item_embeddingss = [] 160 | 161 | 162 | user_embedding = self.user_embedding 163 | item_embedding = self.item_embedding 164 | beh_embedding_list = self.behavior_embeddings 165 | # 166 | # if args.prompt and not args.denoise_tune: 167 | # user_embedding = user_embedding + self.prompt_embedding 168 | # item_embedding = item_embedding + self.prompt_embedding 169 | 170 | for i, layer in enumerate(self.layers): 171 | # user_emb, item_emb, user_embs, item_embs = layer(user_embedding, item_embedding) 172 | user_embedding,item_embedding, user_embeddings,item_embeddings = layer(user_embedding, item_embedding, beh_embedding_list, self.prompt_embedding) 173 | # if args.prompt and args.deep and not args.denoise_tune: 174 | # user_embedding = user_embedding + beh_embedding_list[-1] 175 | 176 | 177 | norm_user_embeddings = F.normalize(user_embedding, p=2, dim=1) 178 | norm_item_embeddings = F.normalize(item_embedding, p=2, dim=1) 179 | 180 | all_user_embeddings.append(user_embedding) 181 | all_item_embeddings.append(item_embedding) 182 | 183 | 184 | all_user_embeddingss.append(user_embeddings) 185 | all_item_embeddingss.append(item_embeddings) 186 | 187 | 188 | user_embedding = torch.cat(all_user_embeddings, dim=1) 189 | item_embedding = torch.cat(all_item_embeddings, dim=1) 190 | 191 | 192 | user_embeddings = torch.cat(all_user_embeddingss, dim=2) 193 | item_embeddings = torch.cat(all_item_embeddingss, dim=2) 194 | 195 | user_embedding = torch.matmul(user_embedding, self.u_concatenation_w) 196 | item_embedding = torch.matmul(item_embedding, self.i_concatenation_w) 197 | user_embeddings = torch.matmul(user_embeddings, self.u_concatenation_w) 198 | item_embeddings = torch.matmul(item_embeddings, self.i_concatenation_w) 199 | 200 | 201 | return user_embedding, item_embedding, user_embeddings, item_embeddings # [31882, 16], [31882, 16], [4, 31882, 16], [4, 31882, 16] 202 | 203 | class GCNLayerv5(nn.Module): 204 | def __init__(self, in_dim, out_dim, userNum, itemNum, behaviorNum, behavior_mats, behavior_mats_t, i_in, i_out, 205 | u_in): 206 | super(GCNLayerv5, self).__init__() 207 | 208 | self.behaviorNum = behaviorNum 209 | self.behavior_mats = behavior_mats 210 | self.behavior_mats_t = behavior_mats_t 211 | 212 | self.userNum = userNum 213 | self.itemNum = itemNum 214 | 215 | self.i_in = i_in 216 | self.i_out = i_out 217 | self.u_in = u_in 218 | 219 | self.act = torch.nn.Sigmoid() 220 | self.relu = torch.nn.ReLU() 221 | self.i_w = nn.Parameter(torch.Tensor(in_dim, out_dim)) 222 | self.u_w = nn.Parameter(torch.Tensor(in_dim, out_dim)) 223 | self.ii_w = nn.Parameter(torch.Tensor(2 * in_dim, out_dim)) 224 | self.uu_w = nn.Parameter(torch.Tensor(2 * in_dim, out_dim)) 225 | 226 | self.W1 = nn.Parameter(torch.Tensor(args.dim, args.dim)) 227 | self.W2 = nn.Parameter(torch.Tensor(args.dim, args.dim)) 228 | 229 | self.conv_layer = nn.Conv2d(1, 1, (1, 2), bias=True) 230 | self.conv_layer_user = nn.Conv2d(1, 1, (1, 2), bias=True) 231 | self.feat_drop = nn.Dropout(args.drop_rate) 232 | self.softmax = nn.Softmax(dim=1) 233 | self.relu = nn.LeakyReLU(0.2) 234 | 235 | init.xavier_uniform_(self.i_w) 236 | init.xavier_uniform_(self.u_w) 237 | init.xavier_uniform_(self.ii_w) 238 | init.xavier_uniform_(self.uu_w) 239 | init.xavier_uniform_(self.conv_layer.weight) 240 | init.xavier_uniform_(self.conv_layer_user.weight) 241 | init.xavier_uniform_(self.W1) 242 | init.xavier_uniform_(self.W2) 243 | 244 | 245 | def forward(self, user_embedding, item_embedding, beh_embeddings, prompt_emb): 246 | 247 | user_embedding_list = [None] * self.behaviorNum 248 | item_embedding_list = [None] * self.behaviorNum 249 | 250 | for i in range(self.behaviorNum): 251 | if i==self.behaviorNum-1: 252 | prompt = prompt_emb 253 | else: 254 | prompt = 0.0 255 | 256 | 257 | if args.prompt: 258 | if not args.denoise_tune: 259 | 260 | # 目前IJCAI性能最好的 261 | # if i == self.behaviorNum - 1: 262 | # # pass 263 | # # 264 | # item_embedding = (item_embedding @ prompt.t()) / (prompt @ prompt.t()) * prompt 265 | # user_embedding = (user_embedding @ prompt.t()) / (prompt @ prompt.t()) * prompt 266 | # 267 | # else: 268 | # pass 269 | # user_embedding_list[i] = torch.spmm(self.behavior_mats[i], item_embedding) 270 | # item_embedding_list[i] = torch.spmm(self.behavior_mats_t[i], user_embedding) 271 | 272 | # # add prompt 273 | user_embedding_list[i] = torch.spmm(self.behavior_mats[i], item_embedding) + prompt 274 | item_embedding_list[i] = torch.spmm(self.behavior_mats_t[i], user_embedding) + prompt 275 | 276 | 277 | 278 | # if i == self.behaviorNum - 1: 279 | # 280 | # user_embedding_list[i] = (user_embedding_list[i] @ prompt.t()) / (prompt @ prompt.t()) @ prompt 281 | # item_embedding_list[i] = (item_embedding_list[i] @ prompt.t()) / (prompt @ prompt.t()) @ prompt 282 | # else: 283 | # pass 284 | 285 | else: 286 | user_embedding_list[i] = torch.spmm(self.behavior_mats[i], item_embedding) 287 | item_embedding_list[i] = torch.spmm(self.behavior_mats_t[i], user_embedding) 288 | else: 289 | u_emb_uu = self.userGNN(user_embedding , self.u_in[i]) 290 | i_emb_ii = self.itemGNN(item_embedding, self.i_in[i], self.i_out[i]) 291 | 292 | u_emb_ui = torch.spmm(self.behavior_mats[i], item_embedding) 293 | i_emb_ui = torch.spmm(self.behavior_mats_t[i], user_embedding) 294 | 295 | 296 | user_embedding_list[i] = torch.cat((u_emb_ui,u_emb_uu),-1) @ self.uu_w 297 | item_embedding_list[i] = torch.cat((i_emb_ii,i_emb_ui),-1) @ self.ii_w 298 | 299 | user_embeddings = torch.stack(user_embedding_list, dim=0) 300 | item_embeddings = torch.stack(item_embedding_list, dim=0) 301 | 302 | user_embedding = self.act(torch.matmul(torch.mean(user_embeddings, dim=0), self.u_w)) 303 | item_embedding = self.act(torch.matmul(torch.mean(item_embeddings, dim=0), self.i_w)) 304 | 305 | 306 | 307 | user_embeddings = self.act(torch.matmul(user_embeddings, self.u_w)) 308 | item_embeddings = self.act(torch.matmul(item_embeddings, self.i_w)) 309 | 310 | # return self.feat_drop(user_embedding), self.feat_drop(item_embedding),user_embeddings, item_embeddings 311 | return user_embedding, item_embedding,user_embeddings, item_embeddings 312 | 313 | def itemGNN(self, item_emb, i_in, i_out): 314 | in_neighbor = torch.spmm(i_in, item_emb) 315 | out_neighbor = torch.spmm(i_out, item_emb) 316 | 317 | x_in = self.relu((item_emb * in_neighbor) @ self.W1) 318 | x_out = self.relu((item_emb * out_neighbor) @ self.W2) 319 | 320 | in_score = torch.squeeze(torch.sum((x_in / math.sqrt(args.dim)), dim=1), 0) 321 | out_score = torch.squeeze(torch.sum((x_out / math.sqrt(args.dim)), dim=1), 0) 322 | score = self.softmax(torch.stack((in_score, out_score), dim=1)) 323 | score_in = torch.unsqueeze(score[:, 0], dim=-1) 324 | score_out = torch.unsqueeze(score[:, 1], dim=-1) 325 | neighbor = in_neighbor * score_in + out_neighbor * score_out 326 | agg = torch.stack((item_emb, neighbor), dim=2) 327 | agg = torch.unsqueeze(agg, 1) 328 | out_conv = self.conv_layer(agg) 329 | emb = self.feat_drop(torch.squeeze(out_conv)) 330 | 331 | return emb 332 | 333 | def userGNN(self, user_emb, u_in): 334 | neighbor_feature = torch.spmm(u_in, user_emb) 335 | 336 | agg = torch.stack((neighbor_feature, user_emb), dim=2) # n x dim x 3 337 | agg = torch.unsqueeze(agg, 1) 338 | out_conv = self.conv_layer_user(agg) 339 | emb = self.feat_drop(torch.squeeze(out_conv)) 340 | 341 | 342 | return emb 343 | 344 | 345 | 346 | class graphDenoising(nn.Module): 347 | def __init__(self, dim, behavior): 348 | super(graphDenoising, self).__init__() 349 | self.dim = dim 350 | self.behavior = behavior 351 | # self.W = nn.Parameter(torch.Tensor(dim, dim)) 352 | self.act = nn.Sigmoid() 353 | 354 | # init.xavier_uniform_(self.W) 355 | 356 | 357 | def forward(self, user_emb, pos_emb, neg_emb, beh_emb, prompt_emb): 358 | generated_graphs = [None] * self.behavior 359 | generated_graphsT = [] 360 | for beh in range(self.behavior): 361 | if beh != self.behavior-1: 362 | w = beh_emb[beh] 363 | else: 364 | w = prompt_emb.squeeze() 365 | W = w.unsqueeze(1) @ w.unsqueeze(-1).t() 366 | ge_g_pos = self.act(torch.sum(user_emb[beh] @ W * pos_emb[beh], 1,keepdim=True)) 367 | ge_g_neg = self.act(torch.sum(user_emb[beh] @ W * neg_emb[beh], 1,keepdim=True)) 368 | ge_g = torch.cat((ge_g_pos,ge_g_neg),0) 369 | # ge_g = self.act(user_emb[beh] @ W @ item_emb[beh].t()) 370 | generated_graphs[beh] = ge_g 371 | # for beh in range(self.behavior): 372 | # ge_g = self.act(user_emb[beh] @ self.W @ item_emb[beh].t()) 373 | # generated_graphs[beh] = ge_g 374 | return generated_graphs, generated_graphsT 375 | -------------------------------------------------------------------------------- /Model/IJCAI_15/1: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /Model/Tmall/1: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /Params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def parse_args(): 5 | parser = argparse.ArgumentParser(description='Model Params') 6 | 7 | parser.add_argument('--device', default='cuda:0', type=str, help="['0','1','cpu']") 8 | parser.add_argument('--seed', type=int, default=2023) 9 | 10 | parser.add_argument('--lr', default=3e-4, type=float, help='learning rate') 11 | parser.add_argument('--opt_base_lr', default=1e-3, type=float, help='learning rate') 12 | parser.add_argument('--opt_weight_decay', default=1e-4, type=float, help='weight decay regularizer') 13 | 14 | parser.add_argument('--batch', default=8192, type=int, help='batch size') 15 | # parser.add_argument('--gnn_layer', default='[16,16,16]', type=str, help='name of dataset: IJCAI_15, Tmall') 16 | parser.add_argument('--gnn_layer', default='[32,32,32]', type=str, help='name of dataset: IJCAI_15, Tmall') 17 | 18 | parser.add_argument('--dataset', default='Tmall', type=str, help='name of dataset: IJCAI_15, Tmall') 19 | parser.add_argument('--reg', default=1e-2, type=float, help='weight decay regularizer,') 20 | parser.add_argument('--opt_max_lr', default=5e-3, type=float, 21 | help='learning rate Ijcai: 2e-3 Tmall:5e-3, TMALL:prompt add 2e-3') 22 | # 23 | 24 | parser.add_argument('--epoch', default=120, type=int, help='number of epochs') 25 | parser.add_argument('--decay', default=0.96, type=float, help='weight decay rate') 26 | parser.add_argument('--save_path', default='tem', help='file name to save model and training record') 27 | parser.add_argument('--dim', default=32, type=int, help='embedding size:IJCAI-16, Tmall-32') 28 | parser.add_argument('--memosize', default=2, type=int, help='memory size') 29 | parser.add_argument('--sampNum', default=40, type=int, help='batch size for sampling') 30 | 31 | parser.add_argument('--load_model', default=None, help='model name to load') 32 | parser.add_argument('--shoot', default=10, type=int, help='K of top k') 33 | parser.add_argument('--target', default='buy', type=str, help='target behavior to predict on') 34 | parser.add_argument('--deep_layer', default=0, type=int, help='number of deep layers to make the final prediction') 35 | parser.add_argument('--mult', default=1, type=float, help='multiplier for the result') 36 | parser.add_argument('--keepRate', default=0.7, type=float, help='rate for dropout') 37 | parser.add_argument('--iiweight', default=0.3, type=float, help='weight for ii') 38 | parser.add_argument('--slot', default=5, type=int, help='length of time slots') 39 | parser.add_argument('--graphSampleN', default=25000, type=int, 40 | help='use 25000 for training and 200000 for testing, empirically') 41 | parser.add_argument('--divSize', default=50, type=int, help='div size for smallTestEpoch') 42 | parser.add_argument('--isload', default=False, type=bool, help='whether load model') 43 | parser.add_argument('--isJustTest', default=False, type=bool, help='whether load model') 44 | parser.add_argument('--patience', type=int, default=100) 45 | parser.add_argument('--inner_product_mult', default=1, type=float, help='multiplier for the result') 46 | parser.add_argument('--drop_rate', default=0.5, type=float, help='drop_rate') 47 | parser.add_argument('--drop_rate1', default=0.5, type=float, help='drop_rate') 48 | 49 | 50 | parser.add_argument('--pattern', default=False, type=bool) 51 | parser.add_argument('--prompt', default=True, type=bool, help='whether prompt/denoise') 52 | parser.add_argument('--denoise_tune', default=False, type=bool, 53 | help='whether denoise, prompt=False,denoise_tune=False for the first stage') 54 | 55 | parser.add_argument('--wsdm', default=False, type=bool, help='whether leverage wsdm21 truank') 56 | 57 | parser.add_argument('--pre_flag', default='prompt_demo_3_bias_target_2', type=str, 58 | help='first stage') 59 | parser.add_argument('--prompt_flag', default='prompt_demo_3_1_1', type=str, help='third stage') 60 | parser.add_argument('--tune_flag', default='prompt_demo_3_0', type=str, help='second stage') 61 | parser.add_argument('--gumbel', default=0.48, type=float, help='disturber') 62 | 63 | parser.add_argument('--deep', default=True, type=bool, help='whether load model') 64 | parser.add_argument('--vector', default=True, type=bool, help='whether load model') 65 | parser.add_argument('--head', default=False, type=bool, help='pre: L1+l2+l3 -> tune: l4') 66 | parser.add_argument('--noise_lambda', default=0.1, type=float, help='rate for dropout') 67 | 68 | parser.add_argument('--just_test', default=False, type=bool, help='result evaluation performance') 69 | 70 | # # shallow - head 71 | # parser.add_argument('--deep', default=False , type=bool, help='whether load model') 72 | # parser.add_argument('--head', default=False , type=bool, help='pre: L1+l2+l3 -> tune: l4') 73 | # parser.add_argument('--noise_lambda', default=-1, type=float, help='rate for dropout') 74 | 75 | # # deep - head 76 | # parser.add_argument('--deep', default=True , type=bool, help='whether load model') 77 | # parser.add_argument('--head', default=False , type=bool, help='pre: L1+l2+l3 -> tune: l4') 78 | # parser.add_argument('--noise_lambda', default=-1, type=float, help='rate for dropout') 79 | 80 | # 81 | # # shallow + head 82 | # parser.add_argument('--deep', default=False , type=bool, help='whether load model') 83 | # parser.add_argument('--head', default=True , type=bool, help='pre: L1+l2+l3 -> tune: l4') 84 | # # parser.add_argument('--noise_lambda', default=0.1, type=float, help='rate for dropout') 85 | # parser.add_argument('--noise_lambda', default=0.5, type=float, help='rate for dropout') 86 | 87 | # 88 | # deep + head 89 | 90 | return parser.parse_args() 91 | 92 | 93 | args = parse_args() 94 | # args.user = 147894 95 | # args.item = 99037 96 | # ML10M 97 | # args.user = 67788 98 | # args.item = 8704 99 | # yelp 100 | # args.user = 19800 101 | # args.item = 22734 102 | 103 | 104 | args.decay_step = 10000 // args.batch 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DPT: Denoising and Prompt-Tuning for Multi-Behavior Recommendaiton 2 | *** 3 | PyTorch implementation of the paper "DPT: Denoising and Prompt-Tuning for Multi-Behavior Recommendaiton", WWW' 23 4 | *** 5 | 6 | 7 | ## Parameters: 8 | *** 9 | * Tmall: dim=32 ; IJCAI: dim=16 10 | 11 | * DPT-1: prompt=False ; denoise_tune=False 12 | 13 | * DPT-2: prompt=True ; denoise_tune=True 14 | 15 | * DPT-3: prompt=True ; denoise_tune=False 16 | *** 17 | -------------------------------------------------------------------------------- /datasets/IJCAI_15.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-97/DPT/11559cdbb2c4a8e519d56fa3e8b49a240ee801ff/datasets/IJCAI_15.zip -------------------------------------------------------------------------------- /datasets/Tmall.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-97/DPT/11559cdbb2c4a8e519d56fa3e8b49a240ee801ff/datasets/Tmall.zip -------------------------------------------------------------------------------- /datasets/process_ts.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | from scipy.sparse import csr_matrix 4 | from Params import args 5 | import scipy.sparse as sp 6 | import pandas as pd 7 | import torch 8 | import torch.utils.data as data 9 | import pickle 10 | import numpy as np 11 | import scipy.sparse as sp 12 | from math import ceil 13 | import datetime 14 | from scipy import sparse 15 | from tqdm import tqdm 16 | from Params import args 17 | import Utils.graph_util 18 | 19 | if args.dataset == 'yelp' or args.dataset == 'Yelp': 20 | # predir = 'WWW23-MB-DGD/datasets/Yelp/' 21 | predir = 'datasets/Yelp/' 22 | behaviors = ['tip', 'neg', 'neutral', 'pos'] 23 | user_num = 19800 24 | item_num = 22734 25 | # USER 19800 ITEM 22734 26 | 27 | elif args.dataset == 'IJCAI_15' or args.dataset == 'IJCAI': 28 | # predir = 'WWW23-MB-DGD/datasets/Yelp/' 29 | predir = 'datasets/IJCAI_15/' 30 | behaviors = ['click', 'fav', 'cart', 'buy'] 31 | user_num = 17435 32 | item_num = 35920 33 | 34 | elif args.dataset == 'Tmall' or args.dataset == 'tmall': 35 | predir = 'datasets/Tmall/' 36 | behaviors = ['pv', 'fav', 'cart', 'buy'] 37 | user_num = 31882 38 | item_num = 31232 39 | # USER 17435 ITEM 35920 40 | # else: 41 | # predir = 'None' 42 | # behaviors = [] 43 | # print('have not implement') 44 | # tr_file = predir + 'trn_' 45 | # te_file = predir + 'tst_' 46 | 47 | 48 | # predir = 'WWW23-MB-DGD/datasets/Yelp/' 49 | # behaviors = ['tip', 'neg', 'neutral', 'pos'] 50 | tr_file = predir + 'trn_' 51 | te_file = predir + 'tst_' 52 | 53 | 54 | def load_data(): 55 | 56 | ori_mats = list() 57 | trnMats = list() # 所有behavior matrix组成的list,每一个都是torch.Tensor 58 | 59 | behavior_mats = list() 60 | trnMatsT = list() 61 | 62 | seq_trnMats = list() 63 | seq_trnMatsT = list() 64 | for i in range(len(behaviors)): 65 | beh = behaviors[i] 66 | path = tr_file + beh 67 | with open(path, 'rb') as fs: 68 | mat = pickle.load(fs) 69 | ori_mats.append(mat) 70 | mat = (mat != 0) * 1 71 | behavior_mats.append(mat) 72 | if args.target == 'click': 73 | tr_label = (mat if i == 0 else 1 * (tr_label + mat != 0)) 74 | user_num, item_num = mat.shape 75 | elif args.target == 'buy' and i == len(behaviors) - 1: 76 | tr_label = 1 * (mat != 0) 77 | user_num, item_num = mat.shape 78 | target_Mat = mat 79 | print(user_num, item_num) 80 | 81 | trnMatsT.append(matrix_to_tensor(mat.T)) 82 | mat = matrix_to_tensor(mat) 83 | trnMats.append(mat) 84 | 85 | seq_trnMats.append(matrix_to_tensor(ori_mats[i])) 86 | seq_trnMatsT.append(matrix_to_tensor(ori_mats[i].T)) 87 | if args.wsdm: 88 | meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out = (0, 0, 0, 0, 0) 89 | elif not args.wsdm: 90 | if args.prompt: 91 | time1 = datetime.datetime.now() 92 | print('加载去噪图', time1) 93 | trnMats = list() # 所有behavior matrix组成的list,每一个都是torch.Tensor 94 | trnMatsT = list() 95 | seq_trnMats = list() 96 | seq_trnMatsT = list() 97 | 98 | ori_int_nums = [behavior_mats[i].sum() for i in range(len(behaviors))] 99 | 100 | behavior_mats, seq_mats = loadModel(behavior_mats, ori_mats) 101 | 102 | ge_int_nums = [behavior_mats[i].sum() for i in range(len(behaviors))] 103 | 104 | for beh in range(len(behaviors)): 105 | print( 106 | f'ori_int:{ori_int_nums[beh]},ge_int:{ge_int_nums[beh]},noise_percent:{1 - (ge_int_nums[beh] / ori_int_nums[beh])}') 107 | 108 | for beh in range(len(behaviors)): 109 | mat = behavior_mats[beh] 110 | trnMatsT.append(matrix_to_tensor(mat.T)) 111 | mat = matrix_to_tensor(mat) 112 | trnMats.append(mat) 113 | 114 | seq_trnMats.append(matrix_to_tensor(seq_mats[beh])) 115 | seq_trnMatsT.append(matrix_to_tensor(seq_mats[beh].T)) 116 | 117 | time2 = datetime.datetime.now() 118 | 119 | print('加载去噪图完毕', time2) 120 | 121 | # target_matrix = trnMats[-1] 122 | 123 | 'trnMats, maxTime = timeProcess(trnMats)' 124 | try: 125 | print('userNum, itemNum:', (user_num, item_num)) 126 | except: 127 | print('ERROR') 128 | assert 1 == 2 129 | if args.wsdm: 130 | pass 131 | elif not args.wsdm: 132 | if args.prompt or args.denoise_tune: 133 | if args.pattern: 134 | try: 135 | print('try to open loaded files (graph denoising)') 136 | with open(predir + 'temp/' + args.tune_flag + '_graphs.pkl', 'rb') as f: 137 | print('file exist (graph denoising)') 138 | temp_data = pickle.load(f) 139 | meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out = temp_data 140 | except: 141 | print('file not exist (graph denoising)') 142 | meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out = get_trn_meta_path( 143 | seq_trnMats, seq_trnMatsT) 144 | temp_data = (meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out) 145 | with open(predir + 'temp/' + args.tune_flag + '_graphs.pkl', 'wb') as f: 146 | pickle.dump(temp_data, f) 147 | else: 148 | meta_paths = [] 149 | itemMats_in = [] 150 | itemMats_out = [] 151 | userMats_in = [] 152 | userMats_out = [] 153 | else: 154 | try: 155 | print('try to open loaded files') 156 | with open(predir + 'temp/temp_data.pkl', 'rb') as f: 157 | print('file exist') 158 | temp_data = pickle.load(f) 159 | meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out = temp_data 160 | except: 161 | print('file not exist') 162 | meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out = get_trn_meta_path(seq_trnMats, 163 | seq_trnMatsT) 164 | temp_data = (meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out) 165 | with open(predir + 'temp/temp_data.pkl', 'wb') as f: 166 | pickle.dump(temp_data, f) 167 | 168 | path = te_file + 'int' 169 | with open(path, 'rb') as fs: 170 | te_int = pickle.load(fs) 171 | 172 | # test_user = np.array([idx for idx, i in enumerate(te_int) if i is not None]) 173 | # test_item = np.array([i for idx, i in enumerate(te_int) if i is not None]) 174 | # test_data = np.hstack((test_user.reshape(-1,1), test_item.reshape(-1,1))).tolist() 175 | # test_dataset = RecDataset(test_data, item_num, trnMats[-1], 0, False) 176 | 177 | # --------------------- CF-based train loader-------------------------- 178 | train_u, train_v = behavior_mats[-1].nonzero() 179 | train_data = np.hstack((train_u.reshape(-1, 1), train_v.reshape(-1, 1))).tolist() 180 | train_dataset = RecDataset_beh(behaviors, train_data, item_num, behavior_mats, True) 181 | train_loader = torch.utils.data.dataloader.DataLoader(train_dataset, batch_size=args.batch, shuffle=True, 182 | num_workers=4, 183 | pin_memory=True) 184 | 185 | # --------------------- CF-based test loader -------------------------- 186 | test_user = np.array([idx for idx, i in enumerate(te_int) if i is not None]) 187 | test_item = np.array([i for idx, i in enumerate(te_int) if i is not None]) 188 | # tstUsrs = np.reshape(np.argwhere(data!=None), [-1]) 189 | test_data = np.hstack((test_user.reshape(-1, 1), test_item.reshape(-1, 1))).tolist() 190 | # testbatch = np.maximum(1, args.batch * args.sampNum 191 | 192 | test_dataset = RecDataset(test_data, item_num, target_Mat, 0, False) 193 | test_loader = torch.utils.data.dataloader.DataLoader(test_dataset, batch_size=args.batch, shuffle=False, 194 | num_workers=4, 195 | pin_memory=True) 196 | 197 | # te_stat = (te_int != None) 198 | # te_users = np.reshape(np.argwhere(te_stat != False), [-1]) 199 | return behavior_mats, trnMats, trnMatsT, tr_label, train_loader, test_loader, len( 200 | behaviors), meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out 201 | 202 | 203 | 204 | def loadModel(sparse_trn_mats, ori_mats): 205 | # denoising_test_2 206 | # ModelName = self.modelName 207 | # loadPath = r'./Model/' + args.dataset + r'/' + ModelName + r'.pth' 208 | loadPath = r'./Model/' + args.dataset + r'/' + args.pre_flag + r'.pth' 209 | params = torch.load(loadPath, map_location='cpu') 210 | 211 | with torch.no_grad(): 212 | new_trn_mats = [None] * len(behaviors) 213 | new_seq_mats = [None] * len(behaviors) 214 | 215 | model = params['model'].cpu() 216 | user_embed = params['user_embed'].cpu() 217 | item_embed = params['item_embed'].cpu() 218 | user_embeds = params['user_embeds'].cpu() 219 | item_embeds = params['item_embeds'].cpu() 220 | 221 | # generated_graphs,_ = model.denoising(user_embed,[item_embed]*len(behaviors)) 222 | generated_graphs = [None] * (len(behaviors) - 1) 223 | act = torch.nn.Sigmoid() 224 | for beh in range(len(behaviors) - 1): 225 | beh_emb = model.beh_embedding.weight[beh] 226 | 227 | beh_w = beh_emb.unsqueeze(1) @ beh_emb.unsqueeze(-1).t() 228 | 229 | generated_graphs[beh] = act(user_embed @ beh_w @ item_embed.t()) 230 | 231 | 232 | for beh in range(len(behaviors)): 233 | if args.dataset == 'IJCAI_15': 234 | if beh != (len(behaviors) - 1) and beh != (len(behaviors) - 2): 235 | # g = generated_graphs[beh].clone() 236 | g = generated_graphs[beh] 237 | if beh == 0: 238 | 239 | g[g >= args.gumbel] = 1 240 | g[g < args.gumbel] = 0 241 | else: 242 | g[g >= args.gumbel] = 1 243 | g[g < args.gumbel] = 0 244 | ori_g = torch.from_numpy(sparse_trn_mats[beh].todense()).to(g.device) 245 | seq_g = torch.from_numpy(ori_mats[beh].todense()).to(g.device) # time stamp 246 | 247 | new_g = g * ori_g 248 | new_seq_g = new_g * seq_g 249 | 250 | if args.case: 251 | user = torch.tensor(range(ori_g.shape[0])) 252 | 253 | new_trn_mats[beh] = sp.csr_matrix(new_g.long().cpu().numpy()) 254 | new_seq_mats[beh] = sp.csr_matrix(new_seq_g.long().cpu().numpy()) 255 | else: 256 | new_trn_mats[beh] = sparse_trn_mats[beh] 257 | new_seq_mats[beh] = ori_mats[beh] 258 | elif args.dataset == 'Tmall': 259 | if beh != (len(behaviors) - 1): 260 | # g = generated_graphs[beh].clone() 261 | g = generated_graphs[beh] 262 | if beh == 0: 263 | # g[g >= 0.44] = 1 264 | # g[g < 0.44] = 0 265 | 266 | g[g >= args.gumbel] = 1 267 | g[g < args.gumbel] = 0 268 | elif beh == (len(behaviors) - 2): 269 | # g[g >= 0.46] = 1 270 | # g[g < 0.46] = 0 271 | 272 | g[g >= args.gumbel] = 1 273 | g[g < args.gumbel] = 0 274 | else: 275 | g[g >= args.gumbel] = 1 276 | g[g < args.gumbel] = 0 277 | ori_g = torch.from_numpy(sparse_trn_mats[beh].todense()).to(g.device) 278 | seq_g = torch.from_numpy(ori_mats[beh].todense()).to(g.device) # time stamp 279 | 280 | new_g = g * ori_g 281 | new_seq_g = new_g * seq_g 282 | 283 | new_trn_mats[beh] = sp.csr_matrix(new_g.long().cpu().numpy()) 284 | new_seq_mats[beh] = sp.csr_matrix(new_seq_g.long().cpu().numpy()) 285 | else: 286 | new_trn_mats[beh] = sparse_trn_mats[beh] 287 | new_seq_mats[beh] = ori_mats[beh] 288 | 289 | del params 290 | torch.cuda.empty_cache() 291 | 292 | return new_trn_mats, new_seq_mats 293 | 294 | 295 | def get_trn_meta_path(trnMats, trnMatsT): 296 | max_len = 0 297 | meta_path = {} 298 | # user:(item_seq, beh_seq, mask_seq) 299 | # item_mask = item_num 300 | # beh_mask = beh_num 301 | # mask_seq = 1/0 302 | print('create mate-paths') 303 | for user in tqdm(range(user_num), total=user_num, ncols=100): 304 | item_list = [] 305 | time_list = [] 306 | beh_list = [] 307 | for i in range(len(behaviors)): 308 | beh = behaviors[i] 309 | path = tr_file + beh 310 | with open(path, 'rb') as fs: 311 | mat = pickle.load(fs) 312 | # mat = trnMats[i] 313 | item = mat[user].nonzero()[1] # np.ndarray 314 | 315 | time = mat[user, item].data # np.ndarray 316 | 317 | item_list.extend(item) 318 | time_list.extend(time) 319 | assert len(item) == len(time) 320 | beh_list.extend([i] * len(item)) 321 | df = pd.DataFrame(columns=['iid', 'time', 'beh'], index=range(len(item_list))) 322 | df['iid'] = item_list 323 | df['time'] = time_list 324 | df['beh'] = beh_list 325 | df1 = df.sort_values(['time']) 326 | item_sequence = df1['iid'].tolist() 327 | beh_sequence = df1['beh'].tolist() 328 | # item_set = df1['iid'].unique().tolist() 329 | # beh_set = df1['beh'].unique().tolist() 330 | 331 | seq_len = len(item_sequence) 332 | if seq_len >= max_len: 333 | max_len = seq_len 334 | # sequence = [item_sequence, beh_sequence, item_set, beh_set] 335 | sequence = [item_sequence, beh_sequence] 336 | meta_path[user] = sequence 337 | 338 | itemMats_in, itemMats_out, userMats_in, userMats_out = create_graphs(meta_path, trnMats, trnMatsT, is_padding=False) 339 | 340 | print('padding meta-paths') 341 | user_id_seq = torch.tensor(range(user_num)) 342 | item_seqs_list = [] 343 | beh_seqs_list = [] 344 | mask_seqs_list = [] 345 | item_sets_list = [] 346 | beh_sets_list = [] 347 | for user in tqdm(range(user_num), total=user_num, ncols=100): 348 | item_seq = meta_path[user][0] 349 | beh_seq = meta_path[user][1] 350 | 351 | length = len(item_seq) 352 | fix = max_len - len(item_seq) 353 | 354 | mask_seq = [1] * length + [0] * fix 355 | item_seq.extend([item_num] * fix) 356 | beh_seq.extend([len(behaviors)] * fix) 357 | 358 | item_seqs_list.append(np.array(item_seq)) 359 | beh_seqs_list.append(np.array(beh_seq)) 360 | mask_seqs_list.append(np.array(mask_seq)) 361 | item_seqs_list = np.array(item_seqs_list) 362 | beh_seqs_list = np.array(beh_seqs_list) 363 | mask_seqs_list = np.array(mask_seqs_list) 364 | meta_paths = ( 365 | user_id_seq, 366 | torch.from_numpy(item_seqs_list), 367 | torch.from_numpy(beh_seqs_list), 368 | torch.from_numpy(mask_seqs_list) 369 | ) 370 | # mask_seq1 = np.array(mask_seq) 371 | # item_seq1 = np.array(item_seq) 372 | # beh_seq1 = np.array(beh_seq) 373 | # user_seq1 = np.array([user]) # [u_i] 374 | # meta_path[user] = (user_seq1, item_seq1, beh_seq1, mask_seq1) 375 | 376 | meta_paths = 0 377 | 378 | return meta_paths, itemMats_in, itemMats_out, userMats_in, userMats_out 379 | 380 | 381 | def create_graphs(meta_path, trnMats, trnMatsT, is_padding=True): 382 | print('create_graphs') 383 | item_graph_list = [] 384 | user_graph_list = [] 385 | if is_padding: 386 | for i in range(len(behaviors)): 387 | item_graph = np.zeros((item_num + 1, item_num + 1), dtype=int) # 包含了mask 388 | user_graph = np.zeros((user_num, user_num), dtype=int) # user没有mask 389 | item_graph_list.append(item_graph) 390 | user_graph_list.append(user_graph) 391 | else: 392 | for i in range(len(behaviors)): 393 | item_graph = np.zeros((item_num, item_num), dtype=int) # 包含了mask 394 | user_graph = np.zeros((user_num, user_num), dtype=int) # user没有mask 395 | item_graph_list.append(item_graph) 396 | user_graph_list.append(user_graph) 397 | 398 | item_in, item_out = create_item_graphs(meta_path, item_graph_list) 399 | user_in, user_out = create_user_graphs(trnMats, trnMatsT, user_graph_list) 400 | 401 | return item_in, item_out, user_in, user_out 402 | 403 | 404 | def create_item_graphs(meta_path, item_graph_list): 405 | print('create item graphs') 406 | in_list = [] 407 | out_list = [] 408 | for user in tqdm(range(user_num), total=user_num, ncols=100): 409 | item_seq = meta_path[user][0] 410 | beh_seq = meta_path[user][1] 411 | 412 | for i in range(len(item_seq) - 1): 413 | 414 | for j in range(i + 1, len(item_seq)): 415 | assert j < len(item_seq) 416 | 417 | item_i = item_seq[i] 418 | 419 | item_j = item_seq[j] 420 | beh_j = beh_seq[j] 421 | 422 | # e^t_{ij} += 1 423 | item_graph_list[beh_j][item_i][item_j] += 1 424 | print('convert to degree mat') 425 | for beh in range(len(behaviors)): 426 | in_g, out_g = get_degree_matrix(item_graph_list[beh]) 427 | in_list.append(in_g) 428 | out_list.append(out_g) 429 | return in_list, out_list 430 | 431 | 432 | def create_user_graphs(behavior_graphs, behavior_graphs_t, user_graph_list): 433 | with torch.no_grad(): 434 | 435 | print('create user graphs') 436 | in_list = [] 437 | out_list = [] 438 | shapes = 0 439 | for beh in range(len(behaviors)): 440 | beh_graph = behavior_graphs[beh] 441 | beh_graph_t = behavior_graphs_t[beh] 442 | 443 | user_graph_list[beh] = (beh_graph @ beh_graph_t.to_dense()).data 444 | # user_graph_list[beh] = beh_graph.matmul(beh_graph_t) 445 | 446 | shapes = beh_graph.shape[0] 447 | 448 | total = torch.zeros((shapes, shapes)).to(args.device) 449 | for beh in range(len(behaviors)): 450 | total += user_graph_list[beh] 451 | 452 | # cup_uid, cup_iid = torch.nonzero(total, as_tuple=True) 453 | # cup_num = total[cup_uid, cup_iid] 454 | for beh in range(len(behaviors)): 455 | cap_uid, cap_iid = torch.nonzero(user_graph_list[beh], as_tuple=True) 456 | cap_num = user_graph_list[beh][cap_uid, cap_iid] 457 | cup_num = total[cap_uid, cap_iid] 458 | user_graph_list[beh][cap_uid, cap_iid] = cap_num / cup_num 459 | 460 | in_g, out_g = get_degree_matrix(user_graph_list[beh].detach().cpu().numpy()) 461 | in_list.append(in_g) 462 | out_list.append(out_g) 463 | 464 | return in_list, out_list 465 | 466 | 467 | def matrix_to_tensor(cur_matrix): 468 | if type(cur_matrix) != sp.coo_matrix: 469 | cur_matrix = cur_matrix.tocoo() 470 | indices = torch.from_numpy(np.vstack((cur_matrix.row, cur_matrix.col)).astype(np.int64)) 471 | values = torch.from_numpy(cur_matrix.data) 472 | shape = torch.Size(cur_matrix.shape) 473 | 474 | if torch.cuda.is_available(): 475 | return torch.sparse.FloatTensor(indices, values, shape).to(torch.float32).to(args.device) 476 | else: 477 | assert 1 == 2 478 | return 0 479 | 480 | 481 | def bool_numpy(numpy_array): 482 | numpy_array_1 = numpy_array.copy() 483 | numpy_array_1[numpy_array_1 == 0.] = 1 484 | return numpy_array_1 485 | 486 | 487 | def get_degree_matrix(adj_matrix): 488 | ''' 489 | A = [ 1, 2, 2, 490 | 0, 4, 6, 491 | 1, 0, 0 ] 492 | 493 | in = [ 0.5, 0.0, 0.5, 494 | 0.3, 0.7, 0.0, 495 | 1.0, 0.0, 0.0 ] 496 | 497 | out = [ 0.2, 0.4, 0.4, 498 | 0.0 0.4 0.6, 499 | 1.0 0.0 0.0 ] 500 | 501 | NOTE: E = AE --> E \in R^{n X d} 502 | ''' 503 | d = np.shape(adj_matrix)[0] 504 | row_temp = np.sum(adj_matrix, axis=0) 505 | row = bool_numpy(row_temp) 506 | row = np.reshape(row, (1, d)) 507 | col_temp = np.sum(adj_matrix, axis=1) 508 | col = bool_numpy(col_temp) 509 | col = np.reshape(col, (d, 1)) 510 | a_out = adj_matrix / col 511 | a_in = adj_matrix / row 512 | a_in = a_in.T 513 | 514 | a_in = dense2sparse(a_in) 515 | a_out = dense2sparse(a_out) 516 | 517 | # a_out = torch.from_numpy(a_out) 518 | # a_in = torch.from_numpy(a_in) 519 | return a_in, a_out 520 | 521 | 522 | def dense2sparse(_matrix): 523 | a_ = sparse.coo_matrix(_matrix) 524 | v1 = a_.data 525 | indices = np.vstack((a_.row, a_.col)) 526 | i = torch.LongTensor(indices) 527 | v = torch.FloatTensor(v1) 528 | shape = a_.shape 529 | if torch.cuda.is_available(): 530 | sparse_matrix = torch.sparse.FloatTensor(i, v, torch.Size(shape)).to(torch.float32).to(args.device) 531 | else: 532 | sparse_matrix = torch.sparse.FloatTensor(i, v, torch.Size(shape)) 533 | return sparse_matrix 534 | 535 | 536 | class RecDataset(data.Dataset): 537 | def __init__(self, data, num_item, train_mat=None, num_ng=1, is_training=True): 538 | super(RecDataset, self).__init__() 539 | 540 | self.data = np.array(data) 541 | self.num_item = num_item 542 | self.train_mat = train_mat 543 | self.is_training = is_training 544 | 545 | def ng_sample(self): 546 | assert self.is_training, 'no need to sampling when testing' 547 | dok_trainMat = self.train_mat.todok() 548 | length = self.data.shape[0] 549 | self.neg_data = np.random.randint(low=0, high=self.num_item, size=length) 550 | 551 | for i in range(length): # 552 | uid = self.data[i][0] 553 | iid = self.neg_data[i] 554 | if (uid, iid) in dok_trainMat: 555 | while (uid, iid) in dok_trainMat: 556 | iid = np.random.randint(low=0, high=self.num_item) 557 | self.neg_data[i] = iid 558 | self.neg_data[i] = iid 559 | 560 | def __len__(self): 561 | return len(self.data) 562 | 563 | def __getitem__(self, idx): 564 | user = self.data[idx][0] 565 | item_i = self.data[idx][1] 566 | 567 | if self.is_training: 568 | neg_data = self.neg_data 569 | item_j = neg_data[idx] 570 | return user, item_i, item_j 571 | else: 572 | return user, item_i 573 | 574 | def getMatrix(self): 575 | pass 576 | 577 | def getAdj(self): 578 | pass 579 | 580 | def sampleLargeGraph(self): 581 | 582 | def makeMask(): 583 | pass 584 | 585 | def updateBdgt(): 586 | pass 587 | 588 | def sample(): 589 | pass 590 | 591 | def constructData(self): 592 | pass 593 | 594 | 595 | class RecDataset_beh(data.Dataset): 596 | def __init__(self, beh, data, num_item, behaviors_data=None, num_ng=1, is_training=True): 597 | super(RecDataset_beh, self).__init__() 598 | 599 | self.data = np.array(data) 600 | self.num_item = num_item 601 | self.is_training = is_training 602 | self.beh = beh 603 | self.behaviors_data = behaviors_data 604 | 605 | self.length = self.data.shape[0] 606 | self.neg_data = [None] * self.length 607 | self.pos_data = [None] * self.length 608 | 609 | if args.prompt and not args.denoise_tune: 610 | def ng_sample(self): 611 | assert self.is_training, 'no need to sampling when testing' 612 | for i in range(self.length): 613 | self.pos_data[i] = [None] 614 | self.neg_data[i] = [None] 615 | train_u, train_v = self.behaviors_data[-1].nonzero() 616 | beh_dok = self.behaviors_data[-1].todok() 617 | set_pos = np.array(list(set(train_v))) 618 | 619 | self.pos_data_index = np.random.choice(set_pos, size=self.length, replace=True, p=None) 620 | self.neg_data_index = np.random.randint(low=0, high=self.num_item, size=self.length) 621 | 622 | for i in range(self.length): 623 | 624 | uid = self.data[i][0] 625 | iid_neg = self.neg_data[i][0] = self.neg_data_index[i] 626 | iid_pos = self.pos_data[i][0] = self.pos_data_index[i] 627 | 628 | if (uid, iid_neg) in beh_dok: 629 | while (uid, iid_neg) in beh_dok: 630 | iid_neg = np.random.randint(low=0, high=self.num_item) 631 | self.neg_data[i][0] = iid_neg 632 | self.neg_data[i][0] = iid_neg 633 | 634 | self.pos_data[i][0] = train_v[i] 635 | else: 636 | 637 | def ng_sample(self): 638 | assert self.is_training, 'no need to sampling when testing' 639 | 640 | for i in range(self.length): 641 | self.neg_data[i] = [None] * len(self.beh) 642 | self.pos_data[i] = [None] * len(self.beh) 643 | 644 | for index in range(len(self.beh)): 645 | 646 | train_u, train_v = self.behaviors_data[index].nonzero() 647 | beh_dok = self.behaviors_data[index].todok() 648 | 649 | set_pos = np.array(list(set(train_v))) 650 | 651 | self.pos_data_index = np.random.choice(set_pos, size=self.length, replace=True, p=None) 652 | self.neg_data_index = np.random.randint(low=0, high=self.num_item, size=self.length) 653 | 654 | for i in range(self.length): # 655 | 656 | uid = self.data[i][0] 657 | iid_neg = self.neg_data[i][index] = self.neg_data_index[i] 658 | iid_pos = self.pos_data[i][index] = self.pos_data_index[i] 659 | 660 | if (uid, iid_neg) in beh_dok: 661 | while (uid, iid_neg) in beh_dok: 662 | iid_neg = np.random.randint(low=0, high=self.num_item) 663 | self.neg_data[i][index] = iid_neg 664 | self.neg_data[i][index] = iid_neg 665 | 666 | if index == (len(self.beh) - 1): 667 | self.pos_data[i][index] = train_v[i] 668 | elif (uid, iid_pos) not in beh_dok: 669 | if len(self.behaviors_data[index][uid].data) == 0: # 如果用户根本没有该类型交互 670 | self.pos_data[i][index] = -1 671 | else: 672 | t_array = self.behaviors_data[index][uid].toarray() 673 | pos_index = np.where(t_array != 0)[1] 674 | iid_pos = np.random.choice(pos_index, size=1, replace=True, p=None)[0] 675 | self.pos_data[i][index] = iid_pos 676 | 677 | # not_zero_index = np.where(item_i[index].cpu().numpy() != -1)[0] 678 | # 679 | # 680 | # user_id_list[index] = user[not_zero_index].long().cuda() 681 | # item_id_pos_list[index] = item_i[index][not_zero_index].long().cuda() 682 | # item_id_neg_list[index] = item_j[index][not_zero_index].long().cuda() 683 | 684 | # print(len(self.pos_data[index])) 685 | # print(type(self.pos_data[index])) 686 | # print(type(self.pos_data[index][0])) 687 | # print(self.pos_data[index][0].size) 688 | # print(len(self.pos_data[index][0])) 689 | # 690 | # assert 1==2 691 | # beh_item_set = self.pos_data[index] 692 | 693 | def __len__(self): 694 | return len(self.data) 695 | 696 | def __getitem__(self, idx): 697 | user = self.data[idx][0] 698 | item_i = self.pos_data[idx] 699 | 700 | # print(user.shape) 701 | # print(item_i.shape) 702 | # print(len(user)) 703 | # print(len(item_i)) 704 | # assert 1==2 705 | 706 | if self.is_training: 707 | item_j = self.neg_data[idx] 708 | return user, item_i, item_j 709 | else: 710 | return user, item_i 711 | 712 | 713 | class RecDataset_beh2(data.Dataset): 714 | def __init__(self, beh, data, num_item, behaviors_data=None, num_ng=1, is_training=True): 715 | super(RecDataset_beh2, self).__init__() 716 | 717 | self.data = np.array(data) 718 | self.num_item = num_item 719 | self.is_training = is_training 720 | self.beh = beh # ['tip', 'neg', 'neutral', 'pos'] 721 | self.behaviors_data = behaviors_data 722 | 723 | self.length = self.data.shape[0] # user_num 724 | self.neg_data = [None] * self.length 725 | self.pos_data = [None] * self.length 726 | 727 | def ng_sample1(self): 728 | assert self.is_training, 'no need to sampling when testing' 729 | 730 | for i in range(self.length): 731 | self.neg_data[i] = [None] * len(self.beh) 732 | self.pos_data[i] = [None] * len(self.beh) 733 | 734 | # self.neg_data[i] = [None] 735 | # self.pos_data[i] = [None] 736 | 737 | for index in range(len(self.beh)): 738 | # if args.target == 'buy' and index != len(self.beh)-1: 739 | # continue 740 | train_u, train_v = self.behaviors_data[index].nonzero() 741 | beh_dok = self.behaviors_data[index].todok() 742 | 743 | # train_u, train_v = self.[index].nonzero() 744 | # beh_dok = self.behaviors_data[index].todok() 745 | 746 | set_pos = np.array(list(set(train_v))) 747 | 748 | self.pos_data_index = np.random.choice(set_pos, size=self.length, replace=True, p=None) 749 | self.neg_data_index = np.random.randint(low=0, high=self.num_item, size=self.length) 750 | 751 | for i in range(self.length): # 752 | 753 | uid = self.data[i][0] 754 | iid_neg = self.neg_data[i][index] = self.neg_data_index[i] 755 | iid_pos = self.pos_data[i][index] = self.pos_data_index[i] 756 | 757 | if (uid, iid_neg) in beh_dok: 758 | while (uid, iid_neg) in beh_dok: 759 | iid_neg = np.random.randint(low=0, high=self.num_item) 760 | self.neg_data[i][index] = iid_neg 761 | self.neg_data[i][index] = iid_neg 762 | 763 | if index == (len(self.beh) - 1): 764 | self.pos_data[i][index] = train_v[i] 765 | elif (uid, iid_pos) not in beh_dok: 766 | if len(self.behaviors_data[index][uid].data) == 0: 767 | self.pos_data[i][index] = -1 768 | else: 769 | t_array = self.behaviors_data[index][uid].toarray() 770 | pos_index = np.where(t_array != 0)[1] 771 | iid_pos = np.random.choice(pos_index, size=1, replace=True, p=None)[0] 772 | self.pos_data[i][index] = iid_pos 773 | 774 | def ng_sample(self): 775 | assert self.is_training, 'no need to sampling when testing' 776 | if args.target == 'buy': 777 | for i in range(self.length): 778 | self.neg_data[i] = [None] * 1 779 | self.pos_data[i] = [None] * 1 780 | else: 781 | for i in range(self.length): 782 | self.neg_data[i] = [None] * len(self.beh) 783 | self.pos_data[i] = [None] * len(self.beh) 784 | 785 | if args.target == 'buy': 786 | index = len(self.beh) - 1 787 | train_u, train_v = self.behaviors_data[index].nonzero() 788 | beh_dok = self.behaviors_data[index].todok() 789 | set_pos = np.array(list(set(train_v))) 790 | self.pos_data_index = np.random.choice(set_pos, size=self.length, replace=True, p=None) 791 | self.neg_data_index = np.random.randint(low=0, high=self.num_item, size=self.length) 792 | 793 | for i in range(self.length): # 794 | 795 | uid = self.data[i][0] 796 | iid_neg = self.neg_data[i][0] = self.neg_data_index[i] 797 | iid_pos = self.pos_data[i][0] = self.pos_data_index[i] 798 | 799 | if (uid, iid_neg) in beh_dok: # 取neg sample 800 | while (uid, iid_neg) in beh_dok: 801 | iid_neg = np.random.randint(low=0, high=self.num_item) 802 | self.neg_data[i][0] = iid_neg 803 | self.neg_data[i][0] = iid_neg 804 | 805 | if index == (len(self.beh) - 1): 806 | self.pos_data[i][0] = train_v[i] 807 | elif (uid, iid_pos) not in beh_dok: 808 | if len(self.behaviors_data[index][uid].data) == 0: 809 | self.pos_data[i][0] = -1 810 | else: 811 | t_array = self.behaviors_data[index][uid].toarray() 812 | pos_index = np.where(t_array != 0)[1] 813 | iid_pos = np.random.choice(pos_index, size=1, replace=True, p=None)[0] 814 | self.pos_data[i][0] = iid_pos 815 | 816 | 817 | else: 818 | for index in range(len(self.beh)): 819 | # if args.target == 'buy' and index != len(self.beh)-1: 820 | # continue 821 | train_u, train_v = self.behaviors_data[index].nonzero() 822 | beh_dok = self.behaviors_data[index].todok() 823 | 824 | # train_u, train_v = self.[index].nonzero() 825 | # beh_dok = self.behaviors_data[index].todok() 826 | 827 | set_pos = np.array(list(set(train_v))) 828 | 829 | self.pos_data_index = np.random.choice(set_pos, size=self.length, replace=True, p=None) 830 | self.neg_data_index = np.random.randint(low=0, high=self.num_item, size=self.length) 831 | 832 | for i in range(self.length): # 833 | 834 | uid = self.data[i][0] 835 | iid_neg = self.neg_data[i][index] = self.neg_data_index[i] 836 | iid_pos = self.pos_data[i][index] = self.pos_data_index[i] 837 | 838 | if (uid, iid_neg) in beh_dok: # 取neg sample 839 | while (uid, iid_neg) in beh_dok: 840 | iid_neg = np.random.randint(low=0, high=self.num_item) 841 | self.neg_data[i][index] = iid_neg 842 | self.neg_data[i][index] = iid_neg 843 | 844 | if index == (len(self.beh) - 1): 845 | self.pos_data[i][index] = train_v[i] 846 | elif (uid, iid_pos) not in beh_dok: 847 | if len(self.behaviors_data[index][uid].data) == 0: 848 | self.pos_data[i][index] = -1 849 | else: 850 | t_array = self.behaviors_data[index][uid].toarray() 851 | pos_index = np.where(t_array != 0)[1] 852 | iid_pos = np.random.choice(pos_index, size=1, replace=True, p=None)[0] 853 | self.pos_data[i][index] = iid_pos 854 | 855 | def __len__(self): 856 | return len(self.data) 857 | 858 | def __getitem__(self, idx): 859 | user = self.data[idx][0] 860 | item_i = self.pos_data[idx] 861 | 862 | if self.is_training: 863 | item_j = self.neg_data[idx] 864 | return user, item_i, item_j 865 | else: 866 | return user, item_i 867 | 868 | 869 | class RecDataset_beh1(data.Dataset): 870 | def __init__(self, beh, data, num_item, behaviors_data=None, num_ng=1, is_training=True): 871 | super(RecDataset_beh1, self).__init__() 872 | 873 | self.data = np.array(data) 874 | self.num_item = num_item 875 | self.is_training = is_training 876 | self.beh = beh 877 | self.behaviors_data = behaviors_data 878 | 879 | self.length = self.data.shape[0] 880 | self.neg_data = [None] * self.length 881 | self.pos_data = [None] * self.length 882 | 883 | def ng_sample(self): 884 | assert self.is_training, 'no need to sampling when testing' 885 | 886 | for i in range(self.length): 887 | # self.neg_data[i] = [None] * len(self.beh) 888 | # self.pos_data[i] = [None] * len(self.beh) 889 | 890 | self.neg_data[i] = [None] * 1 891 | self.pos_data[i] = [None] * 1 892 | 893 | for index in range(len(self.beh)): 894 | if args.target == 'buy' and index != len(self.beh) - 1: 895 | continue 896 | train_u, train_v = self.behaviors_data[index].nonzero() 897 | beh_dok = self.behaviors_data[index].todok() 898 | 899 | # train_u, train_v = self.[index].nonzero() 900 | # beh_dok = self.behaviors_data[index].todok() 901 | 902 | set_pos = np.array(list(set(train_v))) 903 | 904 | self.pos_data_index = np.random.choice(set_pos, size=self.length, replace=True, p=None) 905 | self.neg_data_index = np.random.randint(low=0, high=self.num_item, size=self.length) 906 | 907 | for i in range(self.length): # 908 | 909 | uid = self.data[i][0] 910 | iid_neg = self.neg_data[i][0] = self.neg_data_index[i] 911 | iid_pos = self.pos_data[i][0] = self.pos_data_index[i] 912 | 913 | if (uid, iid_neg) in beh_dok: 914 | while (uid, iid_neg) in beh_dok: 915 | iid_neg = np.random.randint(low=0, high=self.num_item) 916 | self.neg_data[i][0] = iid_neg 917 | self.neg_data[i][0] = iid_neg 918 | 919 | if index == (len(self.beh) - 1): 920 | self.pos_data[i][0] = train_v[i] 921 | elif (uid, iid_pos) not in beh_dok: 922 | if len(self.behaviors_data[0][uid].data) == 0: 923 | self.pos_data[i][0] = -1 924 | else: 925 | t_array = self.behaviors_data[0][uid].toarray() 926 | pos_index = np.where(t_array != 0)[1] 927 | iid_pos = np.random.choice(pos_index, size=1, replace=True, p=None)[0] 928 | self.pos_data[i][0] = iid_pos 929 | 930 | def __len__(self): 931 | return len(self.data) 932 | 933 | def __getitem__(self, idx): 934 | user = self.data[idx][0] 935 | item_i = self.pos_data[idx] 936 | 937 | if self.is_training: 938 | item_j = self.neg_data[idx] 939 | return user, item_i, item_j 940 | else: 941 | return user, item_i 942 | 943 | 944 | # import h5py 945 | # temp = np.array([1,2,3,4,5]) 946 | # print(predir) 947 | # with open(args.dataset+'/temp/temp_data.pkl','wb') as f: 948 | # pickle.dump(temp,f) 949 | 950 | import os 951 | 952 | --------------------------------------------------------------------------------