├── data_loader ├── __init__.py ├── data_collator_base.py ├── data_utils.py └── data_loader_edge_v1.py ├── general_util ├── __init__.py ├── mrr.py ├── mixin.py ├── logger.py ├── average_meter.py └── training_utils.py ├── requirements.txt ├── scripts ├── gat_tf_emb_max_ctr_v1_3_1_predict.sh ├── run_gat_tf_emb_max_v1_eval.sh ├── gat_tf_emb_max_v1_3_1_predict.sh ├── gat_tf_emb_max_ctr_v1_3_1_wo_ii_uia_predict.sh ├── gat_tf_emb_max_ctr_v1_3_1_wo_iia_iai_uia_uiaiu_predict.sh ├── gat_pooling_emb_max_ctr_v1_3_1_predict.sh ├── run_predict.sh ├── item_ab_gat_tf_emb_max_ctr_v1_3_1_predict.sh ├── run_gat_ablation.sh └── sparse_graph.sh ├── preprocess ├── process_subgraph_v3.py ├── sparsing_subgraph_v1.py ├── embedding_loading.py ├── data_statistics.py └── process_user_embedding.py ├── README.md ├── models ├── layers.py ├── modeling_utils.py ├── gat.py └── simple_gat.py ├── acc2auc.py ├── baselines ├── GPBPR │ └── README.md ├── PAIBPT │ └── README.md └── HFGN │ ├── utility │ ├── helper.py │ ├── metrics.py │ └── parser.py │ ├── README.md │ ├── inference │ └── fltb_test.py │ └── data │ ├── recom_batch_train.py │ └── fltb_batch_train.py ├── .gitignore └── conf ├── basic_config_v1.yaml ├── dataset_v2 └── gat_tf_emb_v1.yaml ├── gat_tf_emb_v1.yaml ├── gat_mlp └── gat_mlp_emb_max_ctr_v1_3_1.yaml ├── gp_bpr ├── gat_tf_emb_v1.yaml ├── gat_tf_emb_wo_img_fix_v1.yaml ├── gat_tf_emb_max_v1.yaml ├── gat_tf_emb_max_fix_graph_v1_n5.yaml ├── gat_tf_emb_max_fix_graph_v3.yaml └── gat_tf_emb_max_fix_graph_v2.yaml ├── gat_pooling ├── gat_pooling_emb_max_v1_3_1.yaml └── gat_pooling_emb_max_ctr_v1_3_1.yaml ├── gat_tf_wo_att └── gat_tf_emb_max_wo_att_v1_3_1.yaml ├── gat_tf_emb_max_v1_3_1_wo_ii_uia.yaml ├── gat_tf_emb_max_v1_3_1_wo_iia_iai_uia_uiaiu.yaml ├── item_ab ├── gat_tf_emb_max_v1_3_1.yaml └── gat_tf_emb_max_ctr_v1_3_1.yaml ├── gat_tf_emb_max_ctr_v1.yaml ├── ctr_pretrain ├── gat_tf_emb_max_ctr_pt_v1.yaml └── gat_tf_emb_max_ctr_pt_v2.yaml ├── rgcn └── rgcn_tf_emb_max_v1.yaml ├── gat_tf_emb_max_v1_3_1.yaml ├── simple_gat ├── simple_gat_tf_emb_max_v1.yaml └── gpbpr_simple_gat_tf_emb_max_v1_3_1.yaml ├── gat_tf_emb_max_ctr_v1_3_1_wo_ii_uia.yaml ├── gat_tf_emb_max_ctr_v1_3_1_wo_iia_iai_uia_uiaiu.yaml ├── gat_tf_emb_max_ctr_v1_3_1_1.yaml └── gat_tf_emb_max_ctr_v1_3_3.yaml /data_loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /general_util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.8.1+cu101 2 | torchvision==0.9.1+cu101 3 | fairscale==0.4.4 4 | hydra-core==1.1.1 5 | tqdm 6 | hydra-core 7 | omegaconf 8 | tensorboard 9 | dgl -------------------------------------------------------------------------------- /scripts/gat_tf_emb_max_ctr_v1_3_1_predict.sh: -------------------------------------------------------------------------------- 1 | #for tuple_len in 4 7; do 2 | for tuple_len in 4 5 6 7 12; do 3 | python predict.py dataset.max_tuple_num=$tuple_len -cp conf -cn gat_tf_emb_max_ctr_v1_3_1 4 | done; -------------------------------------------------------------------------------- /scripts/run_gat_tf_emb_max_v1_eval.sh: -------------------------------------------------------------------------------- 1 | for step in 2000 1000 3000; do 2 | python trainer.py do_train=False do_eval=True eval_sub_path=checkpoint-${step} eval_num_workers=32 dataset.graph_sampler.max_neighbour_num=6 -cp conf/gp_bpr -cn gat_tf_emb_max_v1 3 | done; -------------------------------------------------------------------------------- /scripts/gat_tf_emb_max_v1_3_1_predict.sh: -------------------------------------------------------------------------------- 1 | #for tuple_len in 5 6 8 10; do 2 | # python predict.py dataset.max_tuple_num=$tuple_len -cp conf -cn gat_tf_emb_max_v1_3_1 3 | #done; 4 | 5 | #for tuple_len in 4 7; do 6 | for tuple_len in 4 5 6 7 12; do 7 | python predict.py dataset.max_tuple_num=$tuple_len -cp conf -cn gat_tf_emb_max_v1_3_1 8 | done; -------------------------------------------------------------------------------- /scripts/gat_tf_emb_max_ctr_v1_3_1_wo_ii_uia_predict.sh: -------------------------------------------------------------------------------- 1 | #for tuple_len in 5 6 8 10; do 2 | # python predict.py dataset.max_tuple_num=$tuple_len -cp conf -cn gat_tf_emb_max_ctr_v1_3_1_wo_ii_uia 3 | #done; 4 | 5 | #for tuple_len in 4 7; do 6 | for tuple_len in 4 5 6 7 12; do 7 | python predict.py dataset.max_tuple_num=$tuple_len -cp conf -cn gat_tf_emb_max_ctr_v1_3_1_wo_ii_uia 8 | done; -------------------------------------------------------------------------------- /scripts/gat_tf_emb_max_ctr_v1_3_1_wo_iia_iai_uia_uiaiu_predict.sh: -------------------------------------------------------------------------------- 1 | #for tuple_len in 5 6 8 10; do 2 | # python predict.py dataset.max_tuple_num=$tuple_len -cp conf -cn gat_tf_emb_max_ctr_v1_3_1_wo_iia_iai_uia_uiaiu 3 | #done; 4 | 5 | #for tuple_len in 4 7; do 6 | for tuple_len in 4 5 6 7 12; do 7 | python predict.py dataset.max_tuple_num=$tuple_len -cp conf -cn gat_tf_emb_max_ctr_v1_3_1_wo_iia_iai_uia_uiaiu 8 | done; 9 | -------------------------------------------------------------------------------- /preprocess/process_subgraph_v3.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from multiprocessing import Pool 3 | 4 | """ 5 | This script have several aims: 6 | 1. Process or load a processed subgraph (generated by process_subgraph_v2.py) 7 | 2. For each node, load the corresponding subgraph, load the corresponding embedding, 8 | relabeling node id to prepare dgl.graph, and save the final node set. 9 | 10 | As a result, the data loader can directly load the embedding of each node. 11 | """ 12 | 13 | -------------------------------------------------------------------------------- /scripts/gat_pooling_emb_max_ctr_v1_3_1_predict.sh: -------------------------------------------------------------------------------- 1 | #for tuple_len in 5 6 8 10; do 2 | # python predict.py dataset.max_tuple_num=$tuple_len -cp conf/gat_pooling -cn gat_pooling_emb_max_ctr_v1_3_1 3 | #done; 4 | 5 | #for tuple_len in 4 7; do 6 | # python predict.py dataset.max_tuple_num=$tuple_len -cp conf/gat_pooling -cn gat_pooling_emb_max_ctr_v1_3_1 7 | #done; 8 | 9 | for tuple_len in 4 5 6 7 12; do 10 | python predict.py dataset.max_tuple_num=$tuple_len -cp conf/gat_pooling -cn gat_pooling_emb_max_ctr_v1_3_1 11 | done; -------------------------------------------------------------------------------- /general_util/mrr.py: -------------------------------------------------------------------------------- 1 | """ 2 | Calculate the MRR metric. 3 | """ 4 | 5 | 6 | def get_rank(pos_score, neg_scores): 7 | rank = 1 8 | for score in neg_scores: 9 | if score >= pos_score: 10 | rank += 1 11 | return rank 12 | 13 | 14 | def get_mrr(scores_list): 15 | mrr = 0 16 | for scores in scores_list: 17 | pos_score = scores[0] 18 | neg_scores = scores[1:] 19 | rank = get_rank(pos_score, neg_scores) 20 | mrr += 1.0 / rank 21 | return mrr / len(scores_list) 22 | -------------------------------------------------------------------------------- /scripts/run_predict.sh: -------------------------------------------------------------------------------- 1 | source scripts/gat_pooling_emb_max_ctr_v1_3_1_predict.sh 2 | 3 | echo "New Command: " 4 | 5 | source scripts/gat_tf_emb_max_ctr_v1_3_1_predict.sh 6 | 7 | echo "New Command: " 8 | 9 | source scripts/gat_tf_emb_max_ctr_v1_3_1_wo_ii_uia_predict.sh 10 | 11 | echo "New Command: " 12 | 13 | source scripts/gat_tf_emb_max_ctr_v1_3_1_wo_iia_iai_uia_uiaiu_predict.sh 14 | 15 | echo "New Command: " 16 | 17 | source scripts/gat_tf_emb_max_v1_3_1_predict.sh 18 | 19 | echo "New Command: " 20 | 21 | source scripts/item_ab_gat_tf_emb_max_ctr_v1_3_1_predict.sh 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## MG-PFCM 2 | 3 | This is the implementation of the paper: 4 | 5 | **Personalized Fashion Compatibility Modeling via Metapath-guided Heterogeneous Graph Learning.** 6 | Weili Guan, Fangkai Jiao, Xuemeng Song, Haokun Wen, Chung-Hsing Yeh and Xiaojun Chang. _SIGIR._ 2022. 7 | 8 | ### Requirements 9 | 10 | The required packages can be installed following the `requirements.txt` file. 11 | 12 | ### Dataset 13 | 14 | [Google drive link](https://drive.google.com/file/d/1b_T_MHHF7W5buSXi8M3LsAzCVgJzJbRn/view?usp=share_link) 15 | 16 | ### Preprocess 17 | 18 | Pending... 19 | 20 | 21 | -------------------------------------------------------------------------------- /models/layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | 4 | from general_util.logger import get_child_logger 5 | 6 | logger = get_child_logger("layer") 7 | 8 | 9 | class LearnableLayerCombine(nn.Module): 10 | def __init__(self, num_layers: int = 2): 11 | super().__init__() 12 | 13 | self.weight = nn.Parameter(torch.FloatTensor(num_layers)) 14 | 15 | def forward(self, x: Tensor): 16 | """ 17 | :param x: [num_layers, seq_len, h] 18 | :return: y: [seq_len, h] 19 | """ 20 | return torch.einsum("l,lsh->sh", self.weight, x) 21 | 22 | 23 | def bpr_loss(positive_logits: Tensor, negative_logits: Tensor): 24 | ... 25 | 26 | -------------------------------------------------------------------------------- /scripts/item_ab_gat_tf_emb_max_ctr_v1_3_1_predict.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | #for tuple_len in 5 6 8 10; do 4 | # python predict.py dataset.max_tuple_num=$tuple_len model.item_use_img=True model.item_use_text=False gpu=A100 -cp conf/item_ab -cn gat_tf_emb_max_ctr_v1_3_1 5 | #done; 6 | # 7 | #for tuple_len in 5 6 8 10; do 8 | # python predict.py dataset.max_tuple_num=$tuple_len model.item_use_img=False model.item_use_text=True gpu=T4 -cp conf/item_ab -cn gat_tf_emb_max_ctr_v1_3_1 9 | #done; 10 | 11 | 12 | #for tuple_len in 4 7; do 13 | for tuple_len in 4 5 6 7 12; do 14 | python predict.py dataset.max_tuple_num=$tuple_len model.item_use_img=True model.item_use_text=False gpu=A100 -cp conf/item_ab -cn gat_tf_emb_max_ctr_v1_3_1 15 | done; 16 | 17 | #for tuple_len in 4 7; do 18 | for tuple_len in 4 5 6 7 12; do 19 | python predict.py dataset.max_tuple_num=$tuple_len model.item_use_img=False model.item_use_text=True gpu=T4 -cp conf/item_ab -cn gat_tf_emb_max_ctr_v1_3_1 20 | done; 21 | -------------------------------------------------------------------------------- /acc2auc.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import json 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--pred_file', type=str) 8 | parser.add_argument('--prob_file', type=str) 9 | parser.add_argument('--batch_size', type=int) 10 | 11 | args = parser.parse_args() 12 | 13 | bs = args.batch_size 14 | db = 2 * bs 15 | 16 | pred_ls = np.load(args.pred_file) 17 | prob_ls = json.load(open(args.prob_file, 'r')) 18 | 19 | predictions = [] 20 | tmp = [] 21 | for idx, (pred, prob) in enumerate(zip(pred_ls, prob_ls)): 22 | if pred == 1: 23 | tmp.append([1 - prob, prob]) 24 | else: 25 | tmp.append([prob, 1 - prob]) 26 | 27 | if (idx + 1) % db == 0 or idx == len(pred_ls) - 1: 28 | assert len(tmp) % 2 == 0 29 | _half = len(tmp) // 2 30 | for j in range(_half): 31 | cmp_res = tmp[j][1] > tmp[j + _half][0] 32 | if cmp_res: 33 | predictions.append(1) 34 | else: 35 | predictions.append(0) 36 | tmp.clear() 37 | assert len(predictions) == len(pred_ls) // 2, (len(predictions), len(pred_ls)) 38 | 39 | auc = sum(predictions) * 1.0 / len(predictions) 40 | print(auc) 41 | -------------------------------------------------------------------------------- /general_util/mixin.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import Dict, List 3 | 4 | import torch 5 | 6 | from general_util.average_meter import LogMetric 7 | from general_util.logger import get_child_logger 8 | 9 | logger = get_child_logger("Mixin") 10 | 11 | 12 | class LogMixin: 13 | eval_metrics: LogMetric = None 14 | 15 | def init_metric(self, *metric_names): 16 | self.eval_metrics = LogMetric(*metric_names) 17 | 18 | def get_eval_log(self, reset=False): 19 | if self.eval_metrics is None: 20 | logger.warning("The `eval_metrics` attribute hasn't been initialized.") 21 | 22 | results = self.eval_metrics.get_log() 23 | 24 | _eval_metric_log = '\t'.join([f"{k}: {v}" for k, v in results.items()]) 25 | 26 | if reset: 27 | self.eval_metrics.reset() 28 | 29 | return _eval_metric_log, results 30 | 31 | 32 | class PredictionMixin: 33 | tensor_dict: Dict[str, List] = defaultdict(list) 34 | 35 | def reset_predict_tensors(self): 36 | self.tensor_dict = defaultdict(list) 37 | 38 | def concat_predict_tensors(self, **tensors: torch.Tensor): 39 | for k, v in tensors.items(): 40 | self.tensor_dict[k].extend(v.detach().cpu().tolist()) 41 | 42 | def get_predict_tensors(self): 43 | return self.tensor_dict 44 | -------------------------------------------------------------------------------- /baselines/GPBPR/README.md: -------------------------------------------------------------------------------- 1 | # GP-BPR: Personalized Compatibility Modeling for Clothing Matching 2 | 3 | Code for paper [GP-BPR: Personalized Compatibility Modeling for Clothing Matching](https://dl.acm.org/doi/abs/10.1145/3343031.3350956). 4 | 5 | ## Dependencies 6 | 7 | This project currently requires the stable version of [Pytorch](pytorch.org) 8 | 9 | - torch 1.0.0 10 | 11 | or 12 | 13 | - torch 1.0.1.post2 14 | 15 | you need to run this program using GPU 16 | 17 | ## Data Preparation 18 | 19 | ### /data 20 | 21 | - train(valid/test).scv 22 | 23 | format: UserID|TopID|PositiveBottomID|NegativeBottomID 24 | 25 | ### /feat 26 | 27 | - smallnwjc2vec 28 | 29 | - textfeatures 30 | 31 | - visualfeatures 32 | 33 | Can be download from [there](https://drive.google.com/file/d/1ILz1P4BiyQ0rTwOJD-vqs2J4cF77alUM/view). 34 | 35 | ### Meta data 36 | 37 | format: user/outfit/item 38 | 39 | Can be download from [there](https://drive.google.com/open?id=1sTfUoNPid9zG_MgV--lWZTBP1XZpmcK8). 40 | 41 | ## Running command 42 | 43 | CUDA_VISIBLE_DEVICE=0 python main.py 44 | 45 | ## Citations 46 | 47 | ``` 48 | @inproceedings{song2019gp, 49 | title={GP-BPR: Personalized Compatibility Modeling for Clothing Matching}, 50 | author={Song, Xuemeng and Han, Xianjing and Li, Yunkai and Chen, Jingyuan and Xu, Xin-Shun and Nie, Liqiang}, 51 | booktitle={Proceedings of the 27th ACM International Conference on Multimedia}, 52 | pages={320--328}, 53 | year={2019} 54 | } 55 | ``` 56 | -------------------------------------------------------------------------------- /general_util/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | _root_name = 'FK' 6 | 7 | 8 | def get_child_logger(child_name): 9 | return logging.getLogger(_root_name + '.' + child_name) 10 | 11 | 12 | def setting_logger(log_file: str, local_rank: int = -1): 13 | model_name = "-".join(log_file.replace('/', ' ').split()[1:]) 14 | 15 | logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', 16 | datefmt='%m/%d/%Y %H:%M:%S', 17 | level=logging.INFO if local_rank in [-1, 0] else logging.WARNING) 18 | 19 | global _root_name 20 | if local_rank != -1: 21 | _root_name = _root_name + '.' + str(local_rank) 22 | logger = logging.getLogger(_root_name) 23 | logger.setLevel(logging.INFO if local_rank in [-1, 0] else logging.WARNING) 24 | 25 | rf_handler = logging.StreamHandler(sys.stderr) 26 | rf_handler.setLevel(logging.INFO) 27 | rf_handler.setFormatter(logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(name)s - %(message)s', 28 | datefmt='%m/%d/%Y %H:%M:%S')) 29 | 30 | output_dir = './log_dir' 31 | if not os.path.exists(output_dir): 32 | os.makedirs(output_dir) 33 | f_handler = logging.FileHandler(os.path.join( 34 | output_dir, model_name + '-output.log')) 35 | f_handler.setLevel(logging.INFO) 36 | f_handler.setFormatter(logging.Formatter(fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s", 37 | datefmt='%m/%d/%Y %H:%M:%S')) 38 | 39 | logger.addHandler(f_handler) 40 | return logger 41 | 42 | -------------------------------------------------------------------------------- /baselines/PAIBPT/README.md: -------------------------------------------------------------------------------- 1 | # PAI-BPR 2 | ### State of the art fashion recommendation system capturing user preference and capability of attribute preference interpretation 3 | 4 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/pai-bpr-personalized-outfit-recommendation/preference-mapping-on-iqoon3000)](https://paperswithcode.com/sota/preference-mapping-on-iqoon3000?p=pai-bpr-personalized-outfit-recommendation) 5 | 6 | - The whole project was implemented on Google Colab, hence it is suggested to run it there itself!! 7 | 8 | ## folder structure for Code files 9 | 10 | - Attribute_keras.ipynb - Attribute Representation Model Training 11 | - GetAttribute.ipynb - Getting Visual representations from the Attribute Model 12 | - GPBPR2.py - The Model File. 13 | - train.py - Training file for our state of the art model. 14 | - test.py - Testing our model 15 | - Testing_Model.pynb - Notebook visualising our SOTA model and its predictions and outputs 16 | - main.ipynb - To run train.py with the desired requirements 17 | 18 | * "Attribute_keras.ipynb" and "GetAttribute.ipynb" extract visual features from an image which reside in data folder. 19 | * Model is already trained and resides in data folder. 20 | 21 | 22 | 23 | ## Use Citation 24 | 25 | ```BibTeX 26 | @misc{sagar2020paibpr, 27 | title={PAI-BPR: Personalized Outfit Recommendation Scheme with Attribute-wise Interpretability}, 28 | author={Dikshant Sagar and Jatin Garg and Prarthana Kansal and Sejal Bhalla and Rajiv Ratn Shah and Yi Yu}, 29 | year={2020}, 30 | eprint={2008.01780}, 31 | archivePrefix={arXiv}, 32 | primaryClass={cs.CV} 33 | } 34 | ``` 35 | 36 | -------------------------------------------------------------------------------- /baselines/HFGN/utility/helper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Aug 19, 2016 3 | @author: Xiang Wang (xiangwang@u.nus.edu) 4 | ''' 5 | __author__ = "xiangwang" 6 | import os 7 | import re 8 | import numpy as np 9 | def sigmoid(x, derivative=False): 10 | sigm = 1. / (1. + np.exp(-x)) 11 | if derivative: 12 | return sigm * (1. - sigm) 13 | return sigm 14 | 15 | def txt2list(file_src): 16 | orig_file = open(file_src, "r") 17 | lines = orig_file.readlines() 18 | return lines 19 | 20 | def ensureDir(dir_path): 21 | d = os.path.dirname(dir_path) 22 | if not os.path.exists(d): 23 | os.makedirs(d) 24 | 25 | def uni2str(unicode_str): 26 | return str(unicode_str.encode('ascii', 'ignore')).replace('\n', '').strip() 27 | 28 | def hasNumbers(inputString): 29 | return bool(re.search(r'\d', inputString)) 30 | 31 | def delMultiChar(inputString, chars): 32 | for ch in chars: 33 | inputString = inputString.replace(ch, '') 34 | return inputString 35 | 36 | def merge_two_dicts(x, y): 37 | z = x.copy() # start with x's keys and values 38 | z.update(y) # modifies z with y's keys and values & returns None 39 | return z 40 | 41 | def early_stopping(log_value, best_value, stopping_step, expected_order='acc', flag_step=100): 42 | # early stopping strategy: 43 | assert expected_order in ['acc', 'dec'] 44 | 45 | if (expected_order == 'acc' and log_value >= best_value) or (expected_order == 'dec' and log_value <= best_value): 46 | stopping_step = 0 47 | best_value = log_value 48 | else: 49 | stopping_step += 1 50 | 51 | if stopping_step >= flag_step: 52 | print("Early stopping is trigger at step: {} log:{}".format(flag_step, log_value)) 53 | should_stop = True 54 | else: 55 | should_stop = False 56 | return best_value, stopping_step, should_stop 57 | -------------------------------------------------------------------------------- /baselines/HFGN/README.md: -------------------------------------------------------------------------------- 1 | # hierarchical_fashion_graph_network 2 | This is our Tensorflow implementation for the paper: 3 | > Xingchen Li, Xiang Wang, Xiangnan He, Long Chen, Jun Xiao, and Tat-Seng Chua. Hierarchical Fashion Graph Network for Personalized Outfit Recommendation. In SIGIR 2020. 4 | 5 | ## Introduction 6 | Hierarchical Fashion Graph Network (HFGN) is a new recommendation framework for personalized outfit recommendation task based on hierarchical graph structure. 7 | 8 | ## Citation 9 | If you want to use our codes and datasets in your research, please cite: 10 | ``` 11 | @inproceedings{HFGN20, 12 | author = {Xingchen Li and 13 | Xiang Wang and 14 | Xiangnan He and 15 | Long Chen and 16 | Jun Xiao and 17 | Tat{-}Seng Chua}, 18 | title = {Hierarchical Fashion Graph Network for Personalized Outfit Recommendation}, 19 | booktitle = {Proceedings of the 43rd International {ACM} {SIGIR} Conference on 20 | Research and Development in Information Retrieval, {SIGIR} 2020.}, 21 | year = {2020}, 22 | } 23 | ``` 24 | 25 | ## Dataset 26 | 27 | > Our experiment are based on [POG dataset](https://github.com/wenyuer/POG). We reprocess the data and save the files, and the file format is listed in [Data/pog](https://github.com/xcppy/hierarchical_fashion_graph_network/blob/master/Data/pog/Data.md). 28 | 29 | 30 | ## Environment 31 | > tensorflow == 1.10.1 32 | > python == 3.6 33 | 34 | ## Run the Codes 35 | ``` 36 | python model.py -regs 1e-5 --embed_size 64 --batch_size 1024 37 | ``` 38 | 39 | ## Train the model 40 | 41 | > For Fill in the Blank (FLTB) task, we only optimize the compatibility loss: L_{com}. 42 | 43 | > For Personalized outfit Recommendation task, we use the pretrained FLTB model to intialized the whole model to obtain better performance. 44 | 45 | 46 | -------------------------------------------------------------------------------- /models/modeling_utils.py: -------------------------------------------------------------------------------- 1 | from torch import nn, Tensor 2 | import torch 3 | from torchvision.models import resnet18 4 | 5 | 6 | def initialize_vision_backbone(model: str) -> nn.Module: 7 | if model == 'resnet18': 8 | return nn.Sequential(*list(resnet18(pretrained=True).children())[:-1]) 9 | else: 10 | raise RuntimeError(f'Unrecognized backbone: {model}.') 11 | 12 | 13 | def get_activation_func(activation: str): 14 | _parse = { 15 | "gelu": torch.nn.GELU(), 16 | "elu": torch.nn.ELU(), 17 | } 18 | return _parse[activation] 19 | 20 | 21 | def init_weights(module: nn.Module): 22 | """ Initialize the weights """ 23 | if isinstance(module, (nn.Linear, nn.Embedding)): 24 | # Slightly different from the TF version which uses truncated_normal for initialization 25 | # cf https://github.com/pytorch/pytorch/pull/5617 26 | module.weight.data.normal_(mean=0.0, std=0.02) 27 | elif isinstance(module, nn.LayerNorm): 28 | module.bias.data.zero_() 29 | module.weight.data.fill_(1.0) 30 | if isinstance(module, nn.Linear) and module.bias is not None: 31 | module.bias.data.zero_() 32 | 33 | 34 | def weighted_avg(linear: nn.Linear, x: Tensor, mask: Tensor = None): 35 | scores = linear(x).squeeze(-1) 36 | if mask is not None: 37 | scores = scores + (1 - mask).to(scores.dtype) * -10000.0 38 | alpha = torch.softmax(scores, dim=-1) 39 | y = torch.einsum("bs,bsh->bh", alpha, x) 40 | return y 41 | 42 | 43 | def get_accuracy(logits: Tensor, labels: Tensor): 44 | assert logits.size()[:-1] == labels.size() 45 | 46 | _, pred = logits.max(dim=-1) 47 | true_label_num = (labels != -1).sum().item() 48 | correct = (pred == labels).sum().item() 49 | if true_label_num == 0: 50 | return 0, 0 51 | acc = correct * 1.0 / true_label_num 52 | return acc, true_label_num 53 | -------------------------------------------------------------------------------- /scripts/run_gat_ablation.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | #for gnn_layer in 3 4 5; do 4 | # python trainer.py model.gnn.num_layers=${gnn_layer} \ 5 | # per_gpu_train_batch_size=8 per_gpu_eval_batch_size=8 gradient_accumulation_steps=3 \ 6 | # output_dir=experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5.A100.g${gnn_layer} -cn gat_tf_emb_max_v1_3_1 7 | #done; 8 | 9 | #for tf_layer in 2 3 4; do 10 | # python trainer.py model.transformer.encoder_layers=${tf_layer} \ 11 | # per_gpu_train_batch_size=8 per_gpu_eval_batch_size=8 gradient_accumulation_steps=3 \ 12 | # output_dir=experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5.A100.tf${tf_layer} -cn gat_tf_emb_max_v1_3_1 13 | #done; 14 | 15 | 16 | 17 | #python trainer.py model.gnn.num_layers=6 per_gpu_train_batch_size=8 per_gpu_eval_batch_size=8 gradient_accumulation_steps=3 output_dir=experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5.A100.g6 -cn gat_tf_emb_max_v1_3_1 18 | 19 | 20 | #python trainer.py model.transformer.encoder_layers=5 per_gpu_train_batch_size=8 per_gpu_eval_batch_size=8 gradient_accumulation_steps=3 output_dir=experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5.A100.tf5 -cn gat_tf_emb_max_v1_3_1 21 | 22 | 23 | #for gnn in 3 4; do 24 | # python trainer.py model.gnn.num_layers=${gnn} \ 25 | # output_dir=experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.2080Ti.g${gnn} -cn gat_tf_emb_max_ctr_v1_3_1 26 | #done; 27 | 28 | 29 | #for gnn in 5 6; do 30 | # python trainer.py model.gnn.num_layers=${gnn} \ 31 | # output_dir=experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.2080Ti.g${gnn} -cn gat_tf_emb_max_ctr_v1_3_1 32 | #done; 33 | 34 | #for tf_layer in 2 3; do 35 | for tf_layer in 4 5; do 36 | python trainer.py model.transformer.encoder_layers=${tf_layer} \ 37 | per_gpu_train_batch_size=8 per_gpu_eval_batch_size=8 gradient_accumulation_steps=3 \ 38 | output_dir=experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.A100.tf${tf_layer} -cn gat_tf_emb_max_ctr_v1_3_1 39 | done; 40 | -------------------------------------------------------------------------------- /preprocess/sparsing_subgraph_v1.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script aims at sparsing the subgraphs off-line. 3 | """ 4 | 5 | import sys 6 | import argparse 7 | import glob 8 | import os.path 9 | import random 10 | from multiprocessing import Pool 11 | from typing import Union, Dict 12 | 13 | import torch 14 | from tqdm import tqdm 15 | 16 | sys.path.append(os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) 17 | 18 | from data_loader.data_utils import MaximusNeighbourSampler 19 | 20 | sampler: MaximusNeighbourSampler 21 | 22 | 23 | def init(_sampler: MaximusNeighbourSampler): 24 | global sampler 25 | sampler = _sampler 26 | 27 | 28 | def sparse_graph(graph: Union[str, Dict]): 29 | if isinstance(graph, str): 30 | graph = torch.load(graph) 31 | 32 | graph = sampler(graph) 33 | return graph 34 | 35 | 36 | if __name__ == '__main__': 37 | parser = argparse.ArgumentParser() 38 | parser.add_argument('--path', type=str) 39 | parser.add_argument('--output_file', type=str) 40 | parser.add_argument('--max_neighbour_num', type=int, default=3) 41 | parser.add_argument('--num_workers', type=int, default=32) 42 | parser.add_argument('--seed', type=int, default=42) 43 | 44 | args = parser.parse_args() 45 | 46 | random.seed(args.seed) 47 | 48 | if os.path.isfile(args.path): 49 | subgraph = torch.load(args.path) 50 | else: 51 | subgraph = list(glob.glob(args.path)) 52 | 53 | max_neighbour_sampler = MaximusNeighbourSampler(args.max_neighbour_num) 54 | 55 | with Pool(args.num_workers, initializer=init, initargs=(max_neighbour_sampler,)) as p: 56 | results = list(tqdm( 57 | p.imap(sparse_graph, subgraph, chunksize=32), 58 | total=len(subgraph), 59 | desc="Sparsing graph." 60 | )) 61 | 62 | torch.save(results, args.output_file + f"_{args.seed}_{args.max_neighbour_num}") 63 | print("Done.") 64 | -------------------------------------------------------------------------------- /general_util/average_meter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AverageMeter(object): 5 | """Computes and stores the average and current value.""" 6 | 7 | def __init__(self): 8 | self.val = 0 9 | self.avg = 0 10 | self.sum = 0 11 | self.count = 0 12 | 13 | def reset(self): 14 | self.val = 0 15 | self.avg = 0 16 | self.sum = 0 17 | self.count = 0 18 | 19 | def update(self, val, n=1): 20 | if isinstance(val, torch.Tensor): 21 | val = val.item() 22 | if isinstance(n, torch.Tensor): 23 | n = n.item() 24 | 25 | self.val = val 26 | self.sum += val * n 27 | self.count += n 28 | if self.count > 0: 29 | self.avg = self.sum / self.count 30 | else: 31 | self.avg = 0 32 | 33 | def save(self): 34 | return { 35 | 'val': self.val, 36 | 'avg': self.avg, 37 | 'sum': self.sum, 38 | 'count': self.count 39 | } 40 | 41 | def load(self, value: dict): 42 | if value is None: 43 | self.reset() 44 | self.val = value['val'] if 'val' in value else 0 45 | self.avg = value['avg'] if 'avg' in value else 0 46 | self.sum = value['sum'] if 'sum' in value else 0 47 | self.count = value['count'] if 'count' in value else 0 48 | 49 | 50 | class LogMetric(object): 51 | """ 52 | Record all metrics for logging. 53 | """ 54 | 55 | def __init__(self, *metric_names): 56 | 57 | self.metrics = { 58 | key: AverageMeter() for key in metric_names 59 | } 60 | 61 | def update(self, metric_name, val, n=1): 62 | 63 | self.metrics[metric_name].update(val, n) 64 | 65 | def reset(self, metric_name=None): 66 | if metric_name is None: 67 | for key in self.metrics.keys(): 68 | self.metrics[key].reset() 69 | return 70 | 71 | self.metrics[metric_name].reset() 72 | 73 | def get_log(self): 74 | 75 | log = { 76 | key: self.metrics[key].avg for key in self.metrics 77 | } 78 | return log 79 | -------------------------------------------------------------------------------- /data_loader/data_collator_base.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import Dict, List, Union, Tuple 4 | 5 | import torch 6 | from omegaconf import DictConfig 7 | from torch import Tensor 8 | 9 | from general_util.logger import get_child_logger 10 | 11 | logger = get_child_logger('DataCollator') 12 | 13 | 14 | class DataCollatorBase: 15 | def __init__(self, node_vocab: str, 16 | ui_edge_file: str, 17 | emb_path_dic: DictConfig): 18 | logger.info(f'Loading node vocabulary from {node_vocab}.') 19 | self.node_vocab: Dict[str, List[str]] = torch.load(node_vocab) 20 | self.node2type = {} 21 | for k, v_ls in self.node_vocab.items(): 22 | for v in v_ls: 23 | if v not in self.node2type: 24 | self.node2type[v] = k 25 | else: 26 | assert self.node2type[v] == k, (self.node2type[v], k) # Check repetition. 27 | assert 'u' in self.node_vocab 28 | assert 'i' in self.node_vocab 29 | assert 'a' in self.node_vocab 30 | self.ui_edges: Dict[str, List[str]] = json.load(open(ui_edge_file, 'r')) 31 | self.emb_path_dic = emb_path_dic 32 | 33 | def load_embedding(self, node) -> Union[Tensor, Tuple[Tensor, ...]]: 34 | node_type = self.node2type[node] 35 | if node_type == 'a': 36 | attr = torch.load(os.path.join(self.emb_path_dic['a'], f'{node}.pt')).detach() 37 | return attr 38 | elif node_type == 'i': 39 | # text = torch.load(os.path.join(self.emb_path_dic['text'], f'{node}_t.pt')) 40 | text = torch.load(os.path.join(self.emb_path_dic['text'], f'{node}_t.pt'))[0, 0].detach() 41 | mask = torch.load(os.path.join(self.emb_path_dic['mask'], f'{node}_mask.pt')) 42 | if not os.path.exists(os.path.join(self.emb_path_dic['image'], f'{node}_v.pt')): 43 | image = torch.zeros(3, 64, 64) 44 | else: 45 | image = torch.load(os.path.join(self.emb_path_dic['image'], f'{node}_v.pt')) 46 | return image, text, mask 47 | else: 48 | raise RuntimeError(f'Unrecognized node and node type: {node}, {node_type}.') 49 | 50 | def __call__(self, *args, **kwargs): 51 | raise NotImplementedError 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | experiments/ 133 | -------------------------------------------------------------------------------- /models/gat.py: -------------------------------------------------------------------------------- 1 | import dgl 2 | import torch.nn 3 | from dgl.nn.pytorch import GATConv 4 | from torch import nn, Tensor 5 | 6 | 7 | class GAT(nn.Module): 8 | def __init__(self, 9 | num_layers: int = 3, 10 | input_size: int = 768, 11 | num_heads: int = 12, 12 | head_size: int = 64, 13 | feat_dropout: float = 0.1, 14 | attn_dropout: float = 0.1, 15 | residual: bool = True): 16 | super().__init__() 17 | 18 | self.num_layers = num_layers 19 | self.num_heads = num_heads 20 | self.head_size = head_size 21 | 22 | self.gat = nn.ModuleList() 23 | for i in range(num_layers): 24 | if i == 0: 25 | self.gat.append(GATConv(in_feats=input_size, 26 | out_feats=head_size, 27 | num_heads=num_heads, 28 | feat_drop=feat_dropout, 29 | attn_drop=attn_dropout, 30 | residual=residual, 31 | activation=torch.nn.ELU())) 32 | elif i < num_layers - 1: 33 | self.gat.append(GATConv(in_feats=head_size * num_heads, 34 | out_feats=head_size, 35 | num_heads=num_heads, 36 | feat_drop=feat_dropout, 37 | attn_drop=attn_dropout, 38 | residual=residual, 39 | activation=torch.nn.ELU())) 40 | else: 41 | self.gat.append(GATConv(in_feats=head_size * num_heads, 42 | out_feats=head_size * num_heads, 43 | num_heads=1, 44 | feat_drop=feat_dropout, 45 | attn_drop=attn_dropout, 46 | residual=residual, 47 | activation=None)) 48 | 49 | def forward(self, graph: dgl.graph, node_features: Tensor): 50 | num_nodes = node_features.size(0) 51 | for layer_idx in range(self.num_layers): 52 | node_features = self.gat[layer_idx](graph, node_features) 53 | node_features = node_features.reshape(num_nodes, -1) 54 | 55 | return node_features 56 | -------------------------------------------------------------------------------- /baselines/HFGN/utility/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import roc_auc_score 3 | 4 | def recall(rank, ground_truth, N): 5 | return len(set(rank[:N]) & set(ground_truth)) / float(len(set(ground_truth))) 6 | 7 | 8 | def precision_at_k(r, k): 9 | """Score is precision @ k 10 | Relevance is binary (nonzero is relevant). 11 | Returns: 12 | Precision @ k 13 | Raises: 14 | ValueError: len(r) must be >= k 15 | """ 16 | assert k >= 1 17 | r = np.asarray(r)[:k] 18 | return np.mean(r) 19 | 20 | 21 | def average_precision(r,cut): 22 | """Score is average precision (area under PR curve) 23 | Relevance is binary (nonzero is relevant). 24 | Returns: 25 | Average precision 26 | """ 27 | r = np.asarray(r) 28 | out = [precision_at_k(r, k + 1) for k in range(cut) if r[k]] 29 | if not out: 30 | return 0. 31 | return np.sum(out)/float(min(cut, np.sum(r))) 32 | 33 | 34 | def mean_average_precision(rs): 35 | """Score is mean average precision 36 | Relevance is binary (nonzero is relevant). 37 | Returns: 38 | Mean average precision 39 | """ 40 | return np.mean([average_precision(r) for r in rs]) 41 | 42 | 43 | def dcg_at_k(r, k, method=1): 44 | """Score is discounted cumulative gain (dcg) 45 | Relevance is positive real values. Can use binary 46 | as the previous methods. 47 | Returns: 48 | Discounted cumulative gain 49 | """ 50 | r = np.asfarray(r)[:k] 51 | if r.size: 52 | if method == 0: 53 | return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1))) 54 | elif method == 1: 55 | return np.sum(r / np.log2(np.arange(2, r.size + 2))) 56 | else: 57 | raise ValueError('method must be 0 or 1.') 58 | return 0. 59 | 60 | 61 | def ndcg_at_k(r, k, method=1): 62 | """Score is normalized discounted cumulative gain (ndcg) 63 | Relevance is positive real values. Can use binary 64 | as the previous methods. 65 | Returns: 66 | Normalized discounted cumulative gain 67 | """ 68 | dcg_max = dcg_at_k(sorted(r, reverse=True), k, method) 69 | if not dcg_max: 70 | return 0. 71 | return dcg_at_k(r, k, method) / dcg_max 72 | 73 | 74 | def recall_at_k(r, k, all_pos_num): 75 | r = np.asfarray(r)[:k] 76 | return np.sum(r) / all_pos_num 77 | 78 | 79 | def hit_at_k(r, k): 80 | r = np.array(r)[:k] 81 | if np.sum(r) > 0: 82 | return 1. 83 | else: 84 | return 0. 85 | 86 | def F1(pre, rec): 87 | if pre + rec > 0: 88 | return (2.0 * pre * rec) / (pre + rec) 89 | else: 90 | return 0. 91 | 92 | def auc(ground_truth, prediction): 93 | try: 94 | res = roc_auc_score(y_true=ground_truth, y_score=prediction) 95 | except Exception: 96 | res = 0. 97 | return res -------------------------------------------------------------------------------- /data_loader/data_utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Dict, Any 3 | from collections import defaultdict 4 | 5 | import torch 6 | from torch import Tensor 7 | from general_util.logger import get_child_logger 8 | 9 | logger = get_child_logger("DataUtils") 10 | 11 | 12 | class EmbeddingMatrix: 13 | """ 14 | Cache the embedding on cpu instead of cuda to avoid 15 | """ 16 | 17 | def __init__(self, 18 | attr_text: str = None, 19 | item_image: str = None, 20 | item_text: str = None): 21 | if attr_text: 22 | logger.info(f"Loading attribute text embedding from {attr_text}.") 23 | self.attr_text: Tensor = torch.load(attr_text, map_location='cpu') 24 | else: 25 | self.attr_text = None 26 | 27 | if item_image: 28 | logger.info(f"Loading item image from {item_image}.") 29 | self.item_image: Tensor = torch.load(item_image, map_location='cpu') 30 | else: 31 | self.item_image = None 32 | 33 | if item_text: 34 | logger.info(f"Loading item text embedding from {item_text}.") 35 | self.item_text: Tensor = torch.load(item_text, map_location='cpu') 36 | else: 37 | self.item_text = None 38 | 39 | 40 | class MaximusNeighbourSampler: 41 | def __init__(self, max_neighbour_num: int = 10): 42 | self.max_neighbour_num = max_neighbour_num 43 | 44 | def __call__(self, graph: Dict[str, Any]) -> Dict[str, Any]: 45 | src = graph['src_id'] 46 | neighbours = graph['edges'] 47 | 48 | sampled_neighbours = {} 49 | 50 | queue = [src] 51 | vis = {src} 52 | node2hop = {src: 0} 53 | hop_node_list = defaultdict(set) 54 | hop_node_list[0].add(src) 55 | while len(queue) > 0: 56 | node_i = queue.pop(0) 57 | node_u_set = neighbours[node_i] 58 | if len(node_u_set) > self.max_neighbour_num: 59 | sampled_node_u = random.sample(node_u_set, self.max_neighbour_num) 60 | else: 61 | sampled_node_u = node_u_set 62 | sampled_neighbours[node_i] = set(sampled_node_u) 63 | cur_hop = node2hop[node_i] 64 | for node_u in sampled_node_u: 65 | node2hop[node_u] = cur_hop + 1 66 | hop_node_list[cur_hop + 1].add(node_u) 67 | if node_u not in vis: 68 | queue.append(node_u) 69 | vis.add(node_u) 70 | # Add the ignored edges. 71 | for node_p in hop_node_list[cur_hop]: 72 | if node_p in neighbours and node_u in neighbours[node_p] and \ 73 | node_p in sampled_neighbours and node_u not in sampled_neighbours[node_p]: 74 | sampled_neighbours[node_p].add(node_u) 75 | 76 | return { 77 | 'meta_path': graph['meta_path'], 78 | 'src_id': src, 79 | 'edges': sampled_neighbours 80 | } 81 | -------------------------------------------------------------------------------- /scripts/sparse_graph.sh: -------------------------------------------------------------------------------- 1 | max_neighbour_num=5 2 | #max_neighbour_num=3 3 | seed=42 4 | num_workers=32 5 | 6 | #python preprocess/sparsing_subgraph_v1.py --path IQON_pair_remove_edge/subgraphs_v1.0.ii \ 7 | # --output_file IQON_pair_remove_edge/subgraph.ii.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 8 | # 9 | #python preprocess/sparsing_subgraph_v1.py --path IQON_pair_remove_edge/subgraphs_v1.0.iia \ 10 | # --output_file IQON_pair_remove_edge/subgraph.iia.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 11 | # 12 | #python preprocess/sparsing_subgraph_v1.py --path IQON_pair_remove_edge/subgraphs_v1.0.iui \ 13 | # --output_file IQON_pair_remove_edge/subgraph.iui.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 14 | # 15 | #python preprocess/sparsing_subgraph_v1.py --path IQON_pair_remove_edge/subgraphs_v1.0.uia \ 16 | # --output_file IQON_pair_remove_edge/subgraph.uia.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 17 | # 18 | #python preprocess/sparsing_subgraph_v1.py --path IQON_pair_remove_edge/subgraphs_v1.0.uiu \ 19 | # --output_file IQON_pair_remove_edge/subgraph.uiu.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 20 | 21 | #python preprocess/sparsing_subgraph_v1.py --path "IQON_pair_remove_edge/subgraphs/subgraph-iai/*" \ 22 | # --output_file IQON_pair_remove_edge/subgraph.iai.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 23 | # 24 | #python preprocess/sparsing_subgraph_v1.py --path "IQON_pair_remove_edge/subgraphs/subgraph-uiaiu/*" \ 25 | # --output_file IQON_pair_remove_edge/subgraph.uiaiu.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 26 | 27 | # ========================== 28 | 29 | python preprocess/sparsing_subgraph_v1.py --path gp-bpr/subgraph.ii \ 30 | --output_file gp-bpr/subgraph.ii.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 31 | 32 | python preprocess/sparsing_subgraph_v1.py --path gp-bpr/subgraph.iia \ 33 | --output_file gp-bpr/subgraph.iia.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 34 | 35 | python preprocess/sparsing_subgraph_v1.py --path gp-bpr/subgraph.iui \ 36 | --output_file gp-bpr/subgraph.iui.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 37 | 38 | python preprocess/sparsing_subgraph_v1.py --path gp-bpr/subgraph.uia \ 39 | --output_file gp-bpr/subgraph.uia.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 40 | 41 | python preprocess/sparsing_subgraph_v1.py --path gp-bpr/subgraph.uiu \ 42 | --output_file gp-bpr/subgraph.uiu.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 43 | 44 | python preprocess/sparsing_subgraph_v1.py --path "gp-bpr/subgraph-iai/*" \ 45 | --output_file gp-bpr/subgraph.iai.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 46 | 47 | python preprocess/sparsing_subgraph_v1.py --path "gp-bpr/subgraph-uiaiu/*" \ 48 | --output_file gp-bpr/subgraph.uiaiu.v1.0.sparse --max_neighbour_num $max_neighbour_num --seed $seed --num_workers $num_workers 49 | -------------------------------------------------------------------------------- /conf/basic_config_v1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: /home/jiaofangkai/IQON_pair_remove_edge 6 | 7 | train_file: ${data_dir}/UII_train_quadruple.json 8 | dev_file: ${data_dir}/UII_valid_quadruple.json 9 | test_file: ${data_dir}/test_quadruple.json 10 | 11 | # Data loading 12 | dataset: 13 | _target_: data_loader.data_loader_v1.SubgraphDataset 14 | meta_path_dict: 15 | ii: ${data_dir}/subgraphs_v1.0.ii 16 | iia: ${data_dir}/subgraphs_v1.0.iia 17 | iai: ${data_dir}/subgraphs/subgraph-iai/* 18 | iui: ${data_dir}/subgraphs_v1.0.iui 19 | uia: ${data_dir}/subgraphs_v1.0.uia 20 | uiu: ${data_dir}/subgraphs_v1.0.uiu 21 | uiaiu: ${data_dir}/subgraphs/subgraph-uiaiu/* 22 | graph_sampler: 23 | _target_: data_loader.data_utils.MaximusNeighbourSampler 24 | max_neighbour_num: 3 25 | 26 | 27 | # Data collator 28 | collator: 29 | _target_: data_loader.data_collator_v1.SubgraphCollator 30 | node_vocab: ${data_dir}/subgraphs/vocab.pt 31 | ui_edge_file: ${data_dir}/UI.json 32 | emb_path_dic: 33 | a: /home/wangchun/work3/Initialization/attribute 34 | text: /home/wangchun/work3/Initialization/text 35 | mask: /home/wangchun/work3/Initialization/mask 36 | image: /home/wangchun/work3/Initialization/img 37 | 38 | # Dataloader 39 | num_workers: 16 40 | prefetch_factor: 2 41 | 42 | # Model 43 | model: 44 | _target_: models.gat_tf.GATTransformer 45 | vision_model: resnet18 46 | text_hidden_size: 768 47 | text_hidden_layer: 2 48 | img_hidden_size: 512 49 | hidden_size: 768 50 | gnn: 51 | _target_: models.gat.GAT 52 | num_layers: 3 53 | input_size: ${model.hidden_size} 54 | num_heads: 12 55 | head_size: 64 56 | feat_dropout: 0.1 57 | attn_dropout: 0.1 58 | residual: True 59 | transformer: 60 | _target_: models.transformer.initialize_transformer 61 | encoder_layers: 2 62 | encoder_ffn_dim: 3072 63 | encoder_attention_heads: 12 64 | encoder_layerdrop: 0.0 65 | activation_function: "gelu" 66 | d_model: ${model.hidden_size} 67 | dropout: 0.1 68 | attention_dropout: 0.0 69 | activation_dropout: 0.0 70 | init_std: 0.02 71 | classifier_dropout: 0.0 72 | 73 | pretrain: 74 | 75 | output_dir: experiments/gat_tf.v1.0 76 | 77 | do_train: True 78 | evaluate_during_training: True 79 | 80 | do_eval: True 81 | eval_sub_path: 82 | 83 | # Training hyper-parameters 84 | per_gpu_train_batch_size: 1 85 | per_gpu_eval_batch_size: 1 86 | learning_rate: 1e-5 87 | gradient_accumulation_steps: 12 88 | weight_decay: 0.01 89 | adam_epsilon: 1e-6 90 | adam_betas: "(0.9, 0.98)" 91 | max_grad_norm: 0.0 92 | num_train_epochs: 10 93 | max_steps: 0 94 | warmup_proportion: 0.1 95 | warmup_steps: 0 96 | 97 | # Prediction config 98 | prediction_cfg: 99 | metric: "acc" 100 | measure: 1 101 | best_checkpoint: 102 | best_result: 103 | 104 | 105 | logging_steps: 5 106 | save_steps: -1 107 | save_best: True 108 | eval_steps: 100 109 | no_cuda: False 110 | seed: 42 111 | local_rank: -1 112 | fp16: True 113 | fp16_opt_level: O1 114 | 115 | # fairscale.FullyShardedDDP 116 | reshard_after_forward: False 117 | cpu_offload: False 118 | move_grads_to_cpu: False 119 | move_params_to_cpu: False 120 | 121 | # Temporary variables 122 | n_gpu: 123 | device: 124 | train_batch_size: 125 | eval_batch_size: 126 | world_size: 127 | -------------------------------------------------------------------------------- /preprocess/embedding_loading.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import Union, Tuple 4 | 5 | import torch 6 | from torch import Tensor 7 | from tqdm import tqdm 8 | 9 | 10 | def load_embedding(node, node_type, emb_path_dic) -> Union[Tensor, Tuple[Tensor, ...]]: 11 | if node_type == 'a': 12 | attr = torch.load(os.path.join(emb_path_dic['a'], f'{node}.pt')).detach() 13 | return attr 14 | elif node_type == 'i': 15 | if not os.path.exists(os.path.join(emb_path_dic['text'], f'{node}_t.pt')): 16 | text = torch.zeros(2, 768) 17 | else: 18 | text = torch.load(os.path.join(emb_path_dic['text'], f'{node}_t.pt'))[:, 0].detach() 19 | # mask = torch.load(os.path.join(self.emb_path_dic['mask'], f'{node}_mask.pt')) 20 | if not os.path.exists(os.path.join(emb_path_dic['image'], f'{node}_v.pt')): 21 | image = torch.zeros(3, 224, 224) 22 | else: 23 | image = torch.load(os.path.join(emb_path_dic['image'], f'{node}_v.pt')) 24 | return image, text.mean(dim=0) 25 | else: 26 | raise RuntimeError(f'Unrecognized node and node type: {node}, {node_type}.') 27 | 28 | 29 | if __name__ == "__main__": 30 | # node_vocab = torch.load("/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/vocab.pt") 31 | # text_emb_dir = "/home/wangchun/work3/Initialization/text" 32 | # mask_emb_dir = "/home/wangchun/work3/Initialization/mask" 33 | # img_dir = "/home/wangchun/work3/Initialization/img" 34 | # att_dir = "/home/wangchun/work3/Initialization/attribute" 35 | node_vocab = torch.load("/home/jiaofangkai/gp-bpr/vocab.pt") 36 | text_emb_dir = "/home/wangchun/work3/Initialization_all/text" 37 | mask_emb_dir = "/home/wangchun/work3/Initialization_all/mask" 38 | img_dir = "/home/wangchun/work3/Initialization_all/img" 39 | att_dir = "/home/wangchun/work3/Initialization/attribute" 40 | 41 | emb_path = { 42 | 'a': att_dir, 43 | 'image': img_dir, 44 | 'mask': mask_emb_dir, 45 | 'text': text_emb_dir 46 | } 47 | 48 | a_emb = [] 49 | a_vocab = {} 50 | for i, a in enumerate(tqdm(node_vocab['a'])): 51 | tmp = load_embedding(a, 'a', emb_path) 52 | a_emb.append(tmp) 53 | a_vocab[a] = i 54 | 55 | # torch.save(torch.stack(a_emb, dim=0), "/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/attribute_emb_weight.pt") 56 | # json.dump(a_vocab, open("/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/attribute.json", "w")) 57 | torch.save(torch.stack(a_emb, dim=0), "/home/jiaofangkai/gp-bpr/attribute_emb_weight.pt") 58 | json.dump(a_vocab, open("/home/jiaofangkai/gp-bpr/attribute.json", "w")) 59 | 60 | i_t_emb = [] 61 | i_img_tensor = [] 62 | i_vocab = {} 63 | for j, i in enumerate(tqdm(node_vocab['i'])): 64 | i_img, i_text = load_embedding(i, 'i', emb_path) 65 | i_t_emb.append(i_text) 66 | i_img_tensor.append(i_img) 67 | i_vocab[i] = j 68 | 69 | # torch.save(torch.stack(i_t_emb, dim=0), "/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/item_text_emb_weight.cls.pt") 70 | # torch.save(torch.stack(i_img_tensor, dim=0), "/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/item_img.pt") 71 | # json.dump(i_vocab, open("/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/item_vocab.json", "w")) 72 | torch.save(torch.stack(i_t_emb, dim=0), "/home/jiaofangkai/gp-bpr/item_text_emb_weight.cls.pt") 73 | torch.save(torch.stack(i_img_tensor, dim=0), "/home/jiaofangkai/gp-bpr/item_img.pt") 74 | json.dump(i_vocab, open("/home/jiaofangkai/gp-bpr/item_vocab.json", "w")) 75 | -------------------------------------------------------------------------------- /baselines/HFGN/inference/fltb_test.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on June, 2020 3 | Tensorflow Implementation of HFGN model in: 4 | Xingchen Li et al. In SIGIR 2020. 5 | Hierarchical Fashion Graph Network for Personalized Outfit Recommendation. 6 | 7 | @author: Xingchen Li (xingchenl@zju.edu.cn) 8 | ''' 9 | 10 | import utility.metrics as metrics 11 | from utility.parser import parse_args 12 | import multiprocessing 13 | import math 14 | import heapq 15 | import numpy as np 16 | from tqdm import tqdm 17 | 18 | _cores = multiprocessing.cpu_count() // 2 19 | 20 | args = parse_args() 21 | Ks = [1] 22 | 23 | _data_generator = None 24 | _N_TEST = None 25 | 26 | def ranklist_by_sorted(rating, Ks): 27 | max_rat = np.max(rating) 28 | if rating[0] < max_rat: 29 | r = [0] 30 | else: 31 | r = [1] 32 | return r 33 | 34 | 35 | def get_performance(r, Ks): 36 | Ks = [1] 37 | auc = [] 38 | 39 | for K in Ks: 40 | auc.append(metrics.hit_at_k(r, K)) 41 | 42 | return {'auc': np.array(auc)} 43 | 44 | def test_one_user(x): 45 | # user u's ratings for user u 46 | rating = x 47 | 48 | r = ranklist_by_sorted(rating, Ks) 49 | 50 | return get_performance(r, Ks) 51 | 52 | def create_adj(o_items, cate_adj, item_cate): 53 | cate_adj.tolil() 54 | o_cates = [] 55 | o_graph = np.zeros([_max_ol, _max_ol],dtype=np.float32) 56 | 57 | for i in o_items: 58 | c = item_cate[i] 59 | o_cates.append(c) 60 | for i in range(len(o_cates)): 61 | for j in range(len(o_cates)): 62 | c1 = o_cates[i] 63 | c2 = o_cates[j] 64 | o_graph[i, j] = cate_adj[c1, c2] 65 | 66 | return o_graph 67 | 68 | 69 | def test(sess, model, data_generator, args, drop_flag=True, batch_test_flag=False): 70 | global _data_generator 71 | global _N_TEST 72 | global _batch_size 73 | global _max_ol 74 | 75 | _data_generator = data_generator 76 | _test_indx = data_generator.test_indx 77 | _test_len = data_generator.test_len 78 | _test_adj = data_generator.test_adj 79 | _batch_size = 1024 80 | _max_ol = data_generator.max_ol 81 | 82 | _N_TEST = _data_generator.n_fltb_tests 83 | n_test = _N_TEST/4 84 | 85 | result = {'auc': np.zeros(len(Ks))} 86 | 87 | pool = multiprocessing.Pool(_cores) 88 | 89 | count = 0 90 | 91 | num_batch = math.ceil(_N_TEST/_batch_size) 92 | 93 | for idx in range(num_batch): 94 | start = idx * _batch_size 95 | end = min((idx + 1) * _batch_size, _N_TEST) 96 | 97 | fltb_batch = np.array(_test_indx[start:end]) 98 | flen_batch = np.squeeze(np.array(_test_len[start:end])) 99 | fadj_batch = np.array(_test_adj[start:end]) 100 | 101 | rate_batch = sess.run(model.fltb_neg_scores, {model.fltb_input:fltb_batch, 102 | model.flen_input:flen_batch, 103 | model.fadj_input:fadj_batch, 104 | model.node_dropout: [0.], 105 | model.mess_dropout: [0.]}) 106 | 107 | a = np.reshape(np.squeeze(rate_batch), [-1, 4]) 108 | batch_result = pool.map(test_one_user, a) 109 | 110 | count += len(batch_result) 111 | 112 | for re in batch_result: 113 | 114 | result['auc'] += re['auc']/n_test 115 | 116 | assert count == n_test 117 | pool.close() 118 | return result 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /baselines/HFGN/utility/parser.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on June, 2020 3 | Tensorflow Implementation of HFGN model in: 4 | Xingchen Li et al. In SIGIR 2020. 5 | Hierarchical Fashion Graph Network for Personalized Outfit Recommendation. 6 | 7 | @author: Xingchen Li (xingchenl@zju.edu.cn) 8 | ''' 9 | 10 | import argparse 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser(description="Run HFGN.") 14 | parser.add_argument('--weights_path', nargs='?', default='', 15 | help='Store model path.') 16 | parser.add_argument('--data_path', nargs='?', default='../Data/', 17 | help='Input data path.') 18 | parser.add_argument('--proj_path', nargs='?', default='', 19 | help='Project path.') 20 | 21 | parser.add_argument('--dataset', nargs='?', default='pog', 22 | help='Choose a dataset') 23 | parser.add_argument('--pretrain', type=int, default=0, 24 | help='0: No pretrain, -1: Pretrain with the learned embeddings, 1:Pretrain with stored models.') 25 | parser.add_argument('--verbose', type=int, default=1, 26 | help='Interval of evaluation.') 27 | parser.add_argument('--epoch', type=int, default=500, 28 | help='Number of epoch.') 29 | 30 | parser.add_argument('--embed_size', type=int, default=64, 31 | help='Embedding size.') 32 | parser.add_argument('--batch_size', type=int, default=1024, 33 | help='Batch size.') 34 | 35 | parser.add_argument('--regs', type=float, default=0.00001, 36 | help='Regularizations.') 37 | parser.add_argument('--r_view', type=int, default=8, 38 | help='R view nums.') 39 | parser.add_argument('--lr', type=float, default=0.0001, 40 | help='Learning rate.') 41 | 42 | parser.add_argument('--fltb_lr', type=float, default=0.01, 43 | help='Learning rate for FLTB.') 44 | 45 | parser.add_argument('--recom_lr', type=float, default=0.0001, 46 | help='Learning rate for recommendation.') 47 | 48 | parser.add_argument('--model_type', nargs='?', default='HFGN', 49 | help='Specify the name of model (HFGN).') 50 | 51 | parser.add_argument('--gpu_id', type=int, default=0, 52 | help='0 for NAIS_prod, 1 for NAIS_concat') 53 | 54 | parser.add_argument('--node_dropout_flag', type=int, default=1, 55 | help='0: Disable node dropout, 1: Activate node dropout') 56 | parser.add_argument('--node_dropout', nargs='?', default='[0.1]', 57 | help='Keep probability w.r.t. node dropout (i.e., 1-dropout_ratio) for each deep layer. 1: no dropout.') 58 | parser.add_argument('--mess_dropout', nargs='?', default='[0.1]', 59 | help='Keep probability w.r.t. message dropout (i.e., 1-dropout_ratio) for each deep layer. 1: no dropout.') 60 | 61 | parser.add_argument('--Ks', nargs='?', default='[10, 20, 30, 40, 50]', 62 | help='Top-K evaluation.') 63 | parser.add_argument('--save_flag', type=int, default=1, 64 | help='0: Disable model saver, 1: Activate model saver') 65 | parser.add_argument('--train_mode', type=int, default=0, 66 | help='0: optimize one loss; 1: optimize two loss') 67 | parser.add_argument('--alpha', type=float, default=0.5, 68 | help='parameter for fltb loss.') 69 | parser.add_argument('--pretrain_path',nargs='?', default='', 70 | help='pretrain data load path.') 71 | 72 | return parser.parse_args() 73 | -------------------------------------------------------------------------------- /preprocess/data_statistics.py: -------------------------------------------------------------------------------- 1 | import json 2 | import copy 3 | import os 4 | from multiprocessing import Pool 5 | from collections import defaultdict 6 | import argparse 7 | from functools import partial 8 | from tqdm import tqdm 9 | 10 | 11 | def _initializer(_pattern, _edges, _vis_set): 12 | global __pattern__ 13 | global __edges__ 14 | global __vis_set__ 15 | __pattern__ = _pattern 16 | __edges__ = _edges 17 | __vis_set__ = _vis_set 18 | 19 | 20 | def empty_vis(_vis_set): 21 | for k in _vis_set.keys(): 22 | _vis_set[k].empty() 23 | 24 | 25 | def dfs(_pattern, _vis_set, _src_n): 26 | if len(_pattern) == 1: 27 | return 1 28 | 29 | _src = _pattern[0] 30 | rest_pattern = _pattern[1:] 31 | tgt = rest_pattern[0] 32 | 33 | _e_type = _src + tgt 34 | 35 | _cur_cnt = 0 36 | if _src_n not in __edges__[_e_type]: 37 | return 0 38 | 39 | for tgt_n in __edges__[_e_type][_src_n]: 40 | if tgt_n not in _vis_set[tgt]: 41 | # _nxt_vis_set = copy.deepcopy(_vis_set) 42 | # _nxt_vis_set[tgt].add(tgt_n) 43 | _vis_set[tgt].add(tgt_n) 44 | _cur_cnt += dfs(rest_pattern, _vis_set, tgt_n) 45 | _vis_set[tgt].remove(tgt_n) 46 | 47 | return _cur_cnt 48 | 49 | 50 | def outer_for(_src_n): 51 | _nxt_vis_set = copy.deepcopy(__vis_set__) 52 | # _nxt_vis_set = {k: set() for k in ['a', 'i', 'o', 'u']} 53 | _nxt_vis_set[__pattern__[0]].add(_src_n) 54 | return dfs(__pattern__, _nxt_vis_set, _src_n) 55 | 56 | 57 | if __name__ == '__main__': 58 | parser = argparse.ArgumentParser() 59 | parser.add_argument('--data_dir', type=str) 60 | parser.add_argument('--num_workers', type=int, default=32) 61 | args = parser.parse_args() 62 | 63 | ia = json.load(open(os.path.join(args.data_dir, 'dict/IA.json'), 'r')) 64 | ai = json.load(open(os.path.join(args.data_dir, 'dict/AI.json'), 'r')) 65 | io = json.load(open(os.path.join(args.data_dir, 'dict/IO.json'), 'r')) 66 | oi = json.load(open(os.path.join(args.data_dir, 'dict/OI.json'), 'r')) 67 | ou = json.load(open(os.path.join(args.data_dir, 'dict/OU.json'), 'r')) 68 | uo = json.load(open(os.path.join(args.data_dir, 'dict/UO.json'), 'r')) 69 | 70 | edges = defaultdict(dict) 71 | edges['ia'] = ia 72 | edges['ai'] = ai 73 | edges['io'] = io 74 | edges['oi'] = oi 75 | edges['ou'] = ou 76 | edges['uo'] = uo 77 | 78 | node_type = ['a', 'i', 'o', 'u'] 79 | vis_set = {k: set() for k in node_type} 80 | 81 | # path_pattern = ['iai', 'ioi', 'iouoi', 'aia', 'aioia', 'aiouoia'] 82 | # path_pattern = ['aioia', 'aiouoia'] 83 | # path_pattern = ['iouoi', 'aia'] 84 | # path_pattern = ['ioi'] 85 | # path_pattern = ['iai'] 86 | # aioia: 11376496 87 | # aiouoia: 4493559384 88 | # iouoi: 139248990 89 | # aia: 4279406 90 | # ioi: 351042 91 | # iai: 24757313232 92 | path_pattern = ['uoiaiou', 'oiaio'] 93 | 94 | for path_p in path_pattern: 95 | print(path_p) 96 | src = path_p[0] 97 | src_n_set = set() 98 | for e_type in edges.keys(): 99 | if e_type[0] == src: 100 | src_n_set.update(edges[e_type].keys()) 101 | src_n_ls = list(src_n_set) 102 | with Pool(1, initializer=_initializer, initargs=(path_p, edges, vis_set)) as p: 103 | _annotate = partial(outer_for) 104 | _results = list(tqdm( 105 | p.imap(_annotate, src_n_ls, chunksize=32), 106 | total=len(src_n_ls), 107 | desc="Reading examples" 108 | )) 109 | # _initializer(path_p, edges, vis_set) 110 | # _results = [(outer_for(x)) for x in tqdm(src_n_ls)] 111 | 112 | res = sum(_results) 113 | print(res) 114 | -------------------------------------------------------------------------------- /baselines/HFGN/data/recom_batch_train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on June, 2020 3 | Tensorflow Implementation of HFGN model in: 4 | Xingchen Li et al. In SIGIR 2020. 5 | Hierarchical Fashion Graph Network for Personalized Outfit Recommendation. 6 | 7 | @author: Xingchen Li (xingchenl@zju.edu.cn) 8 | ''' 9 | 10 | import multiprocessing 11 | import numpy as np 12 | import random as rd 13 | import math 14 | 15 | _data_generator = None 16 | _n_users = None 17 | _n_outfits = None 18 | _n_trains = None 19 | _batch_size = None 20 | _n_batch = None 21 | 22 | _cores = multiprocessing.cpu_count() // 2 23 | _max_ol = None 24 | 25 | def sample(data_generator, batch_size): 26 | global _data_generator 27 | global _n_users 28 | global _n_outfits 29 | global _max_ol 30 | global _n_batch 31 | global _batch_size 32 | 33 | _data_generator = data_generator 34 | _batch_size = batch_size 35 | _max_ol = _data_generator.max_ol 36 | 37 | _n_users, _n_outfits, = _data_generator.n_users, _data_generator.n_train_outfits 38 | _n_trains = _data_generator.n_recom_trains 39 | 40 | np.random.shuffle(_data_generator.pos_list) 41 | _n_batch = math.ceil(len(_data_generator.pos_list) / _batch_size) 42 | 43 | u_list, po_list, plen_list, no_list, nlen_list= [], [], [], [], [] 44 | # num_task = 8 # multiprocessing.cpu_count() 45 | if _cores == 1: 46 | for i in range(0, _n_batch): 47 | u_batch, po_batch, plen_batch, no_batch, nlen_batch= get_train_batch(i) 48 | u_list.append(u_batch) 49 | po_list.append(po_batch) 50 | plen_list.append(plen_batch) 51 | no_list.append(no_batch) 52 | nlen_list.append(nlen_batch) 53 | 54 | 55 | else: 56 | 57 | pool = multiprocessing.Pool(_cores) 58 | # t = math.ceil(len(_Dataset.pos)/_batch_size) 59 | res = pool.map(get_train_batch, range(_n_batch)) 60 | pool.close() 61 | pool.join() 62 | u_list = [r[0] for r in res] 63 | po_list = [r[1] for r in res] 64 | plen_list = [r[2] for r in res] 65 | no_list = [r[3] for r in res] 66 | nlen_list = [r[4] for r in res] 67 | 68 | 69 | return (u_list, po_list, plen_list, no_list, nlen_list) 70 | 71 | def batch_get(batches, i): 72 | return [(batches[r])[i] for r in range(5)] 73 | 74 | 75 | def create_adj(o_items, cate_adj, item_cate): 76 | cate_adj.tolil() 77 | o_cates = [] 78 | o_graph = np.zeros([_max_ol, _max_ol],dtype=np.float32) 79 | 80 | for i in o_items: 81 | c = item_cate[i] 82 | o_cates.append(c) 83 | for i in range(len(o_cates)): 84 | for j in range(len(o_cates)): 85 | c1 = o_cates[i] 86 | c2 = o_cates[j] 87 | o_graph[i, j] = cate_adj[c1, c2] 88 | 89 | return o_graph 90 | 91 | def get_train_batch(b): 92 | 93 | begin = b * _batch_size 94 | end = min(len(_data_generator.pos_list), begin + _batch_size) 95 | u_batch, po_batch, plen_batch, no_batch, nlen_batch= [],[],[],[],[] 96 | 97 | for p in range(begin, end): 98 | u, pos_o = _data_generator.pos_list[p] 99 | neg_o = pos_o 100 | while neg_o in _data_generator.train_u_outfits_dict[u] or neg_o in _data_generator.test_u_outfits_dict[u]: 101 | neg_o = rd.randrange(_n_outfits) 102 | 103 | u_batch.append(u) 104 | po_batch.append(pos_o) 105 | no_batch.append(neg_o) 106 | plen_batch.append(_data_generator.outfit_len[pos_o]) 107 | nlen_batch.append(_data_generator.outfit_len[neg_o]) 108 | 109 | u_batch = np.array(u_batch) 110 | po_batch = np.array(po_batch) 111 | plen_batch = np.array(plen_batch) 112 | no_batch = np.array(no_batch) 113 | nlen_batch = np.array(nlen_batch) 114 | 115 | 116 | return (u_batch, po_batch, plen_batch, no_batch, nlen_batch) 117 | 118 | 119 | -------------------------------------------------------------------------------- /baselines/HFGN/data/fltb_batch_train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on June, 2020 3 | Tensorflow Implementation of HFGN model in: 4 | Xingchen Li et al. In SIGIR 2020. 5 | Hierarchical Fashion Graph Network for Personalized Outfit Recommendation. 6 | 7 | @author: Xingchen Li (xingchenl@zju.edu.cn) 8 | ''' 9 | 10 | import multiprocessing 11 | import numpy as np 12 | import random as rd 13 | import math 14 | 15 | _data_generator = None 16 | _n_users = None 17 | _n_items = None 18 | _n_outfits = None 19 | _n_trains = None 20 | _batch_size = None 21 | _n_batch = None 22 | _cores = multiprocessing.cpu_count() // 2 23 | _max_ol = None 24 | 25 | def sample(data_generator, batch_size): 26 | global _data_generator 27 | global _n_outfits 28 | global _n_items 29 | global _max_ol 30 | global _n_batch 31 | global _batch_size 32 | 33 | _data_generator = data_generator 34 | _batch_size = batch_size 35 | _max_ol = _data_generator.max_ol 36 | 37 | _n_outfits, _n_items = _data_generator.n_train_outfits, _data_generator.n_all_items 38 | _n_trains = _data_generator.n_train_outfits 39 | 40 | np.random.shuffle(_data_generator.fltb_outfit_list) 41 | _n_batch = math.ceil(len(_data_generator.fltb_outfit_list) / _batch_size) 42 | 43 | po_list, plen_list, f_list, flen_list, fadj_list= [], [], [], [], [] 44 | # num_task = 8 # multiprocessing.cpu_count() 45 | if _cores == 1: 46 | for i in range(0, _n_batch): 47 | po_batch, plen_batch, f_batch, flen_batch , fadj_batch= get_train_batch(i) 48 | 49 | po_list.append(po_batch) 50 | plen_list.append(plen_batch) 51 | f_list.append(f_batch) 52 | flen_list.append(flen_batch) 53 | fadj_list.append(fadj_batch) 54 | 55 | else: 56 | 57 | pool = multiprocessing.Pool(_cores) 58 | res = pool.map(get_train_batch, range(_n_batch)) 59 | pool.close() 60 | pool.join() 61 | 62 | po_list = [r[0] for r in res] 63 | plen_list = [r[1] for r in res] 64 | f_list = [r[2] for r in res] 65 | flen_list = [r[3] for r in res] 66 | fadj_list = [r[4] for r in res] 67 | 68 | return (po_list, plen_list, f_list, flen_list, fadj_list) 69 | 70 | def batch_get(batches, i): 71 | return [(batches[r])[i] for r in range(5)] 72 | 73 | 74 | def create_adj(o_items, cate_adj, item_cate): 75 | cate_adj.tolil() 76 | o_cates = [] 77 | o_graph = np.zeros([_max_ol, _max_ol],dtype=np.float32) 78 | 79 | for i in o_items: 80 | c = item_cate[i] 81 | o_cates.append(c) 82 | for i in range(len(o_cates)): 83 | for j in range(len(o_cates)): 84 | c1 = o_cates[i] 85 | c2 = o_cates[j] 86 | o_graph[i, j] = cate_adj[c1, c2] 87 | 88 | return o_graph 89 | 90 | def get_train_batch(b): 91 | 92 | begin = b * _batch_size 93 | end = min(len(_data_generator.fltb_outfit_list), begin + _batch_size) 94 | po_batch, plen_batch, f_batch, flen_batch, fadj_batch= [],[],[],[],[] 95 | 96 | for p in range(begin, end): 97 | pos_o = _data_generator.fltb_outfit_list[p] 98 | 99 | po_batch.append(pos_o) 100 | plen_batch.append(_data_generator.outfit_len[pos_o]) 101 | 102 | """generate fltb negative samples.""" 103 | neg_len = rd.randint(3, _max_ol) 104 | neg_map = [-1] * _max_ol 105 | # neg_index = rd.shuffle(range(self.max_ol)) 106 | for i in range(neg_len): 107 | # k = neg_index[i] 108 | while True: 109 | neg = rd.randint(0, _n_items - 1) 110 | if neg not in neg_map: # no the same item in one outfit 111 | break 112 | neg_map[i] = neg 113 | neg_adj = create_adj(neg_map[:neg_len], _data_generator.cate_adj, _data_generator.item_cate_dict) 114 | f_batch.append(np.array(neg_map)) 115 | flen_batch.append(neg_len) 116 | fadj_batch.append(neg_adj) 117 | 118 | po_batch = np.array(po_batch) 119 | plen_batch = np.array(plen_batch) 120 | f_batch = np.array(f_batch) 121 | flen_batch = np.array(flen_batch) 122 | fadj_batch = np.array(fadj_batch) 123 | 124 | return (po_batch, plen_batch, f_batch, flen_batch, fadj_batch) 125 | 126 | 127 | -------------------------------------------------------------------------------- /conf/dataset_v2/gat_tf_emb_v1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: /home/jiaofangkai/IQON_pair_remove_edge_v2 6 | 7 | train_file: ${data_dir}/UII_train_quadruple.json 8 | dev_file: ${data_dir}/UII_valid_quadruple.json 9 | test_file: ${data_dir}/test_quadruple.json 10 | 11 | embedding_memory: 12 | _target_: data_loader.data_utils.EmbeddingMatrix 13 | attr_text: /home/jiaofangkai/IQON_pair_remove_edge/subgraphs/attribute_emb_weight.pt 14 | item_image: /home/jiaofangkai/IQON_pair_remove_edge/subgraphs/item_img.pt 15 | item_text: /home/jiaofangkai/IQON_pair_remove_edge/subgraphs/item_text_emb_weight.cls.pt 16 | 17 | 18 | # Data loading 19 | dataset: 20 | _target_: data_loader.data_loader_v1.SubgraphDataset 21 | meta_path_dict: 22 | ii: ${data_dir}/subgraph.ii 23 | iia: ${data_dir}/subgraph.iia 24 | iai: ${data_dir}/subgraph-iai/* 25 | iui: ${data_dir}/subgraph.iui 26 | uia: ${data_dir}/subgraph.uia 27 | uiu: ${data_dir}/subgraph.uiu 28 | uiaiu: ${data_dir}/subgraph-uiaiu/* 29 | graph_sampler: 30 | _target_: data_loader.data_utils.MaximusNeighbourSampler 31 | max_neighbour_num: 5 32 | 33 | 34 | # Data collator 35 | collator: 36 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 37 | user_vocab: ${data_dir}/user_vocab.json 38 | attr_vocab: /home/jiaofangkai/IQON_pair_remove_edge/subgraphs/attribute_vocab.json 39 | item_vocab: /home/jiaofangkai/IQON_pair_remove_edge/subgraphs/item_vocab.json 40 | node_vocab: ${data_dir}/vocab.pt 41 | 42 | 43 | # Dataloader 44 | num_workers: 8 45 | eval_num_workers: 0 46 | prefetch_factor: 2 47 | 48 | # Model 49 | model: 50 | _target_: models.gat_tf_emb.GATTransformer 51 | user_embedding: ${data_dir}/user_emb_weight.pt 52 | user_vocab: ${collator.user_vocab} 53 | freeze_user_emb: False 54 | vision_model: resnet18 55 | text_hidden_size: 768 56 | img_hidden_size: 512 57 | # hidden_size: 768 58 | hidden_size: 512 59 | gnn: 60 | _target_: models.gat.GAT 61 | # num_layers: 3 62 | num_layers: 2 63 | input_size: ${model.hidden_size} 64 | # num_heads: 12 65 | num_heads: 8 66 | head_size: 64 67 | feat_dropout: 0.1 68 | attn_dropout: 0.1 69 | residual: True 70 | transformer: 71 | _target_: models.transformer.initialize_transformer 72 | encoder_layers: 1 73 | # encoder_ffn_dim: 3072 74 | encoder_ffn_dim: 2048 75 | # encoder_attention_heads: 12 76 | encoder_attention_heads: 8 77 | encoder_layerdrop: 0.0 78 | activation_function: "gelu" 79 | d_model: ${model.hidden_size} 80 | dropout: 0.1 81 | attention_dropout: 0.0 82 | activation_dropout: 0.0 83 | init_std: 0.02 84 | classifier_dropout: 0.0 85 | 86 | pretrain: 87 | 88 | #output_dir: experiments/gat_tf_fix_emb.v1.0 89 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 90 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 91 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 92 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 93 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 94 | output_dir: experiments/data_v2.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 95 | 96 | do_train: True 97 | evaluate_during_training: True 98 | 99 | do_eval: True 100 | eval_sub_path: 101 | 102 | # Training hyper-parameters 103 | per_gpu_train_batch_size: 2 104 | per_gpu_eval_batch_size: 2 105 | #learning_rate: 5e-5 106 | learning_rate: 1e-4 107 | gradient_accumulation_steps: 12 108 | #weight_decay: 0.01 109 | weight_decay: 0.1 110 | adam_epsilon: 1e-6 111 | adam_betas: "(0.9, 0.98)" 112 | max_grad_norm: 0.0 113 | #max_grad_norm: 1.0 114 | #num_train_epochs: 30 115 | num_train_epochs: 10 116 | max_steps: 0 117 | warmup_proportion: 0.1 118 | warmup_steps: 0 119 | 120 | multi_tensor: True 121 | 122 | # Prediction config 123 | prediction_cfg: 124 | metric: "acc" 125 | measure: 1 126 | best_checkpoint: 127 | best_result: 128 | 129 | logging_steps: 5 130 | summary_helper: 131 | _target_: general_util.training_utils.SummaryWriterHelper 132 | 133 | save_steps: -1 134 | save_best: True 135 | eval_steps: 500 136 | no_cuda: False 137 | seed: 42 138 | local_rank: -1 139 | fp16: True 140 | fp16_opt_level: O1 141 | 142 | # fairscale.FullyShardedDDP 143 | reshard_after_forward: False 144 | cpu_offload: False 145 | move_grads_to_cpu: False 146 | move_params_to_cpu: False 147 | 148 | # Temporary variables 149 | n_gpu: 150 | device: 151 | train_batch_size: 152 | eval_batch_size: 153 | world_size: 154 | -------------------------------------------------------------------------------- /conf/gat_tf_emb_v1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: /home/jiaofangkai/IQON_pair_remove_edge 6 | 7 | train_file: ${data_dir}/UII_train_quadruple.json 8 | dev_file: ${data_dir}/UII_valid_quadruple.json 9 | test_file: ${data_dir}/UII_test_quadruple.json 10 | 11 | embedding_memory: 12 | _target_: data_loader.data_utils.EmbeddingMatrix 13 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 14 | item_image: ${data_dir}/subgraphs/item_img.pt 15 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 16 | 17 | 18 | # Data loading 19 | dataset: 20 | _target_: data_loader.data_loader_v1.SubgraphDataset 21 | meta_path_dict: 22 | ii: ${data_dir}/subgraphs_v1.0.ii 23 | iia: ${data_dir}/subgraphs_v1.0.iia 24 | iai: ${data_dir}/subgraphs/subgraph-iai/* 25 | iui: ${data_dir}/subgraphs_v1.0.iui 26 | uia: ${data_dir}/subgraphs_v1.0.uia 27 | uiu: ${data_dir}/subgraphs_v1.0.uiu 28 | uiaiu: ${data_dir}/subgraphs/subgraph-uiaiu/* 29 | graph_sampler: 30 | _target_: data_loader.data_utils.MaximusNeighbourSampler 31 | max_neighbour_num: 5 32 | 33 | 34 | # Data collator 35 | collator: 36 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 37 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 38 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 39 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 40 | node_vocab: ${data_dir}/subgraphs/vocab.pt 41 | 42 | 43 | # Dataloader 44 | num_workers: 8 45 | eval_num_workers: 0 46 | prefetch_factor: 2 47 | 48 | # Model 49 | model: 50 | _target_: models.gat_tf_emb.GATTransformer 51 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 52 | user_vocab: ${collator.user_vocab} 53 | freeze_user_emb: False 54 | vision_model: resnet18 55 | text_hidden_size: 768 56 | img_hidden_size: 512 57 | # hidden_size: 768 58 | hidden_size: 512 59 | gnn: 60 | _target_: models.gat.GAT 61 | # num_layers: 3 62 | num_layers: 2 63 | input_size: ${model.hidden_size} 64 | # num_heads: 12 65 | num_heads: 8 66 | head_size: 64 67 | # feat_dropout: 0.1 68 | # attn_dropout: 0.1 69 | feat_dropout: 0.2 70 | attn_dropout: 0.2 71 | residual: True 72 | transformer: 73 | _target_: models.transformer.initialize_transformer 74 | encoder_layers: 2 75 | # encoder_ffn_dim: 3072 76 | encoder_ffn_dim: 2048 77 | # encoder_attention_heads: 12 78 | encoder_attention_heads: 8 79 | encoder_layerdrop: 0.0 80 | activation_function: "gelu" 81 | d_model: ${model.hidden_size} 82 | dropout: 0.1 83 | attention_dropout: 0.0 84 | activation_dropout: 0.0 85 | init_std: 0.02 86 | classifier_dropout: 0.0 87 | 88 | pretrain: 89 | 90 | #output_dir: experiments/gat_tf_fix_emb.v1.0 91 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 92 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 93 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 94 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 95 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 96 | #output_dir: experiments/gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 97 | output_dir: experiments/gat_tf_fix_emb.v3.2.wd0.1 # 2-layer transformer // gat dropout 0.1 -> 0.2 // epoch 10 -> 5 98 | 99 | do_train: True 100 | evaluate_during_training: True 101 | 102 | do_eval: True 103 | eval_sub_path: 104 | 105 | # Training hyper-parameters 106 | per_gpu_train_batch_size: 2 107 | per_gpu_eval_batch_size: 2 108 | #learning_rate: 5e-5 109 | learning_rate: 1e-4 110 | gradient_accumulation_steps: 12 111 | #weight_decay: 0.01 112 | weight_decay: 0.1 113 | adam_epsilon: 1e-6 114 | adam_betas: "(0.9, 0.98)" 115 | max_grad_norm: 0.0 116 | #max_grad_norm: 1.0 117 | #num_train_epochs: 30 118 | #num_train_epochs: 10 119 | num_train_epochs: 5 120 | max_steps: 0 121 | warmup_proportion: 0.1 122 | warmup_steps: 0 123 | 124 | multi_tensor: True 125 | 126 | # Prediction config 127 | prediction_cfg: 128 | metric: "acc" 129 | measure: 1 130 | best_checkpoint: 131 | best_result: 132 | 133 | logging_steps: 5 134 | summary_helper: 135 | _target_: general_util.training_utils.SummaryWriterHelper 136 | 137 | save_steps: -1 138 | save_best: True 139 | eval_steps: 500 140 | no_cuda: False 141 | seed: 42 142 | local_rank: -1 143 | fp16: True 144 | fp16_opt_level: O1 145 | 146 | # fairscale.FullyShardedDDP 147 | reshard_after_forward: False 148 | cpu_offload: False 149 | move_grads_to_cpu: False 150 | move_params_to_cpu: False 151 | 152 | # Temporary variables 153 | n_gpu: 154 | device: 155 | train_batch_size: 156 | eval_batch_size: 157 | world_size: 158 | -------------------------------------------------------------------------------- /conf/gat_mlp/gat_mlp_emb_max_ctr_v1_3_1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 2 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_emb_max_mlp.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | gnn: 59 | _target_: models.gat.GAT 60 | num_layers: 2 61 | input_size: ${model.hidden_size} 62 | num_heads: 8 63 | head_size: 64 64 | feat_dropout: 0.1 65 | attn_dropout: 0.1 66 | residual: True 67 | user_path_num: 3 68 | item_path_num: 4 69 | dropout: 0.1 70 | 71 | pretrain: 72 | 73 | #output_dir: experiments/gat_tf_fix_emb.v1.0 74 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 75 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 76 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 77 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 78 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 79 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 80 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 81 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 82 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 83 | #output_dir: experiments/gat_tf_fix_emb_max.wo_img_fix.v3.1.wd0.1.n5 84 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5 85 | output_dir: experiments/gat_mlp_fix_emb_max.ctr.v3.1.wd0.1.n5.A100 86 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.2tf # 2-layer transformer // add dropout to transformer 87 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.3.wd0.1.n5.2tf.titanxp # epoch 5 -> 10 88 | 89 | do_train: True 90 | evaluate_during_training: True 91 | 92 | do_eval: True 93 | eval_sub_path: 94 | 95 | # Training hyper-parameters 96 | per_gpu_train_batch_size: 8 97 | per_gpu_eval_batch_size: 8 98 | #learning_rate: 5e-5 99 | learning_rate: 1e-4 100 | #learning_rate: 5e-4 101 | #learning_rate: 1e-3 102 | gradient_accumulation_steps: 3 103 | #weight_decay: 0.01 104 | weight_decay: 0.1 105 | adam_epsilon: 1e-6 106 | adam_betas: "(0.9, 0.98)" 107 | max_grad_norm: 0.0 108 | #max_grad_norm: 1.0 109 | #num_train_epochs: 30 110 | num_train_epochs: 5 111 | max_steps: 0 112 | warmup_proportion: 0.06 113 | warmup_steps: 114 | 115 | multi_tensor: 116 | 117 | # Prediction config 118 | prediction_cfg: 119 | metric: "acc" 120 | measure: 1 121 | best_checkpoint: 122 | best_result: 123 | 124 | logging_steps: 5 125 | summary_helper: 126 | _target_: general_util.training_utils.SummaryWriterHelper 127 | 128 | save_steps: -1 129 | save_best: True 130 | eval_steps: 500 131 | no_cuda: False 132 | seed: 42 133 | local_rank: -1 134 | fp16: True 135 | fp16_opt_level: O1 136 | 137 | # fairscale.FullyShardedDDP 138 | reshard_after_forward: False 139 | cpu_offload: False 140 | move_grads_to_cpu: False 141 | move_params_to_cpu: False 142 | 143 | # Temporary variables 144 | n_gpu: 145 | device: 146 | train_batch_size: 147 | eval_batch_size: 148 | world_size: 149 | -------------------------------------------------------------------------------- /conf/gp_bpr/gat_tf_emb_v1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: /home/jiaofangkai/gp-bpr 6 | 7 | train_file: /home/wenhaokun/GP-BPR_data/UII_train_quadruple.json 8 | dev_file: /home/wenhaokun/GP-BPR_data/UII_valid_quadruple.json 9 | test_file: /home/wenhaokun/GP-BPR_data/UII_test_quadruple.json 10 | 11 | embedding_memory: 12 | _target_: data_loader.data_utils.EmbeddingMatrix 13 | attr_text: ${data_dir}/attribute_emb_weight.pt 14 | item_image: ${data_dir}/item_img.pt 15 | item_text: ${data_dir}/item_text_emb_weight.cls.pt 16 | 17 | 18 | # Data loading 19 | dataset: 20 | _target_: data_loader.data_loader_v1.SubgraphDataset 21 | meta_path_dict: 22 | ii: ${data_dir}/subgraph.ii 23 | iia: ${data_dir}/subgraph.iia 24 | iai: ${data_dir}/subgraph-iai/* 25 | iui: ${data_dir}/subgraph.iui 26 | uia: ${data_dir}/subgraph.uia 27 | uiu: ${data_dir}/subgraph.uiu 28 | uiaiu: ${data_dir}/subgraph-uiaiu/* 29 | graph_sampler: 30 | _target_: data_loader.data_utils.MaximusNeighbourSampler 31 | # max_neighbour_num: 5 32 | max_neighbour_num: 3 33 | 34 | 35 | # Data collator 36 | collator: 37 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 38 | user_vocab: ${data_dir}/user_vocab.json 39 | attr_vocab: ${data_dir}/attribute_vocab.json 40 | item_vocab: ${data_dir}/item_vocab.json 41 | node_vocab: ${data_dir}/vocab.pt 42 | 43 | 44 | # Dataloader 45 | num_workers: 32 46 | eval_num_workers: 32 47 | prefetch_factor: 2 48 | 49 | # Model 50 | model: 51 | _target_: models.gat_tf_emb.GATTransformer 52 | user_embedding: ${data_dir}/user_emb_weight.pt 53 | user_vocab: ${collator.user_vocab} 54 | freeze_user_emb: False 55 | vision_model: resnet18 56 | text_hidden_size: 768 57 | img_hidden_size: 512 58 | # hidden_size: 768 59 | hidden_size: 512 60 | gnn: 61 | _target_: models.gat.GAT 62 | # num_layers: 3 63 | num_layers: 2 64 | input_size: ${model.hidden_size} 65 | # num_heads: 12 66 | num_heads: 8 67 | head_size: 64 68 | feat_dropout: 0.1 69 | attn_dropout: 0.1 70 | residual: True 71 | transformer: 72 | _target_: models.transformer.initialize_transformer 73 | encoder_layers: 1 74 | # encoder_ffn_dim: 3072 75 | encoder_ffn_dim: 2048 76 | # encoder_attention_heads: 12 77 | encoder_attention_heads: 8 78 | encoder_layerdrop: 0.0 79 | activation_function: "gelu" 80 | d_model: ${model.hidden_size} 81 | dropout: 0.1 82 | attention_dropout: 0.0 83 | activation_dropout: 0.0 84 | init_std: 0.02 85 | classifier_dropout: 0.0 86 | 87 | pretrain: 88 | 89 | #output_dir: experiments/gat_tf_fix_emb.v1.0 90 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 91 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 92 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 93 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 94 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 95 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 96 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 97 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 98 | output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.5.wd0.1 # max_neighbour_num 5 -> 3 99 | 100 | do_train: False 101 | evaluate_during_training: True 102 | 103 | do_eval: True 104 | eval_sub_path: checkpoint-2000 105 | 106 | # Training hyper-parameters 107 | per_gpu_train_batch_size: 8 108 | per_gpu_eval_batch_size: 8 109 | #learning_rate: 5e-5 110 | learning_rate: 1e-4 111 | #learning_rate: 1e-3 112 | gradient_accumulation_steps: 16 113 | #weight_decay: 0.01 114 | weight_decay: 0.1 115 | adam_epsilon: 1e-6 116 | adam_betas: "(0.9, 0.98)" 117 | max_grad_norm: 0.0 118 | #max_grad_norm: 1.0 119 | #num_train_epochs: 30 120 | num_train_epochs: 10 121 | max_steps: 0 122 | warmup_proportion: 0.06 123 | warmup_steps: 124 | 125 | multi_tensor: 126 | 127 | # Prediction config 128 | prediction_cfg: 129 | metric: "acc" 130 | measure: 1 131 | best_checkpoint: 132 | best_result: 133 | 134 | logging_steps: 5 135 | summary_helper: 136 | _target_: general_util.training_utils.SummaryWriterHelper 137 | 138 | save_steps: 500 139 | save_best: True 140 | eval_steps: 1000 141 | no_cuda: False 142 | seed: 42 143 | local_rank: -1 144 | fp16: True 145 | fp16_opt_level: O1 146 | 147 | # fairscale.FullyShardedDDP 148 | reshard_after_forward: False 149 | cpu_offload: False 150 | move_grads_to_cpu: False 151 | move_params_to_cpu: False 152 | 153 | # Temporary variables 154 | n_gpu: 155 | device: 156 | train_batch_size: 157 | eval_batch_size: 158 | world_size: 159 | -------------------------------------------------------------------------------- /conf/gat_pooling/gat_pooling_emb_max_v1_3_1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 2 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_emb_max_pooling.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | gnn: 59 | _target_: models.gat.GAT 60 | num_layers: 2 61 | input_size: ${model.hidden_size} 62 | num_heads: 8 63 | head_size: 64 64 | feat_dropout: 0.1 65 | attn_dropout: 0.1 66 | residual: True 67 | user_path_num: 3 68 | item_path_num: 4 69 | 70 | pretrain: 71 | 72 | #output_dir: experiments/gat_tf_fix_emb.v1.0 73 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 74 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 75 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 76 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 77 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 78 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 79 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 80 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 81 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 82 | #output_dir: experiments/gat_tf_fix_emb_max.wo_img_fix.v3.1.wd0.1.n5 83 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5 84 | #output_dir: experiments/gat_mlp_fix_emb_max.ctr.v3.1.wd0.1.n5.A100 85 | output_dir: experiments/gat_pooling_fix_emb_max.v3.1.wd0.1.n5.A100 86 | 87 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.2tf # 2-layer transformer // add dropout to transformer 88 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.3.wd0.1.n5.2tf.titanxp # epoch 5 -> 10 89 | 90 | do_train: True 91 | evaluate_during_training: True 92 | 93 | do_eval: True 94 | eval_sub_path: 95 | 96 | # Training hyper-parameters 97 | per_gpu_train_batch_size: 8 98 | per_gpu_eval_batch_size: 8 99 | #learning_rate: 5e-5 100 | learning_rate: 1e-4 101 | #learning_rate: 5e-4 102 | #learning_rate: 1e-3 103 | gradient_accumulation_steps: 3 104 | #weight_decay: 0.01 105 | weight_decay: 0.1 106 | adam_epsilon: 1e-6 107 | adam_betas: "(0.9, 0.98)" 108 | max_grad_norm: 0.0 109 | #max_grad_norm: 1.0 110 | #num_train_epochs: 30 111 | num_train_epochs: 5 112 | max_steps: 0 113 | warmup_proportion: 0.06 114 | warmup_steps: 115 | 116 | multi_tensor: 117 | 118 | # Prediction config 119 | prediction_cfg: 120 | metric: "acc" 121 | measure: 1 122 | best_checkpoint: 123 | best_result: 124 | 125 | logging_steps: 5 126 | summary_helper: 127 | _target_: general_util.training_utils.SummaryWriterHelper 128 | 129 | save_steps: -1 130 | save_best: True 131 | eval_steps: 500 132 | no_cuda: False 133 | seed: 42 134 | local_rank: -1 135 | fp16: True 136 | fp16_opt_level: O1 137 | 138 | # fairscale.FullyShardedDDP 139 | reshard_after_forward: False 140 | cpu_offload: False 141 | move_grads_to_cpu: False 142 | move_params_to_cpu: False 143 | 144 | # Temporary variables 145 | n_gpu: 146 | device: 147 | train_batch_size: 148 | eval_batch_size: 149 | world_size: 150 | -------------------------------------------------------------------------------- /conf/gp_bpr/gat_tf_emb_wo_img_fix_v1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: /home/jiaofangkai/gp-bpr 6 | 7 | train_file: /home/wenhaokun/GP-BPR_data/UII_train_quadruple.json 8 | dev_file: /home/wenhaokun/GP-BPR_data/UII_valid_quadruple.json 9 | test_file: /home/wenhaokun/GP-BPR_data/test_quadruple.json 10 | 11 | embedding_memory: 12 | _target_: data_loader.data_utils.EmbeddingMatrix 13 | attr_text: ${data_dir}/attribute_emb_weight.pt 14 | item_text: ${data_dir}/item_text_emb_weight.cls.pt 15 | 16 | 17 | # Data loading 18 | dataset: 19 | _target_: data_loader.data_loader_pre_img_v1.SubgraphDataset 20 | meta_path_dict: 21 | ii: ${data_dir}/subgraph.ii 22 | iia: ${data_dir}/subgraph.iia 23 | iai: ${data_dir}/subgraph-iai/* 24 | iui: ${data_dir}/subgraph.iui 25 | uia: ${data_dir}/subgraph.uia 26 | uiu: ${data_dir}/subgraph.uiu 27 | uiaiu: ${data_dir}/subgraph-uiaiu/* 28 | img_dir: /home/wangchun/work3/Initialization_all/img/ 29 | item_vocab: ${data_dir}/item_vocab.json 30 | graph_sampler: 31 | _target_: data_loader.data_utils.MaximusNeighbourSampler 32 | # max_neighbour_num: 5 33 | max_neighbour_num: 3 34 | 35 | 36 | # Data collator 37 | collator: 38 | _target_: data_loader.data_collator_fix_emb_wo_img.SubgraphCollatorVocab 39 | user_vocab: ${data_dir}/user_vocab.json 40 | attr_vocab: ${data_dir}/attribute_vocab.json 41 | item_vocab: ${data_dir}/item_vocab.json 42 | node_vocab: ${data_dir}/vocab.pt 43 | 44 | 45 | # Dataloader 46 | num_workers: 32 47 | eval_num_workers: 2 48 | prefetch_factor: 2 49 | 50 | # Model 51 | model: 52 | _target_: models.gat_tf_emb.GATTransformer 53 | user_embedding: ${data_dir}/user_emb_weight.pt 54 | user_vocab: ${collator.user_vocab} 55 | freeze_user_emb: False 56 | vision_model: resnet18 57 | text_hidden_size: 768 58 | img_hidden_size: 512 59 | # hidden_size: 768 60 | hidden_size: 512 61 | gnn: 62 | _target_: models.gat.GAT 63 | # num_layers: 3 64 | num_layers: 2 65 | input_size: ${model.hidden_size} 66 | # num_heads: 12 67 | num_heads: 8 68 | head_size: 64 69 | feat_dropout: 0.1 70 | attn_dropout: 0.1 71 | residual: True 72 | transformer: 73 | _target_: models.transformer.initialize_transformer 74 | encoder_layers: 1 75 | # encoder_ffn_dim: 3072 76 | encoder_ffn_dim: 2048 77 | # encoder_attention_heads: 12 78 | encoder_attention_heads: 8 79 | encoder_layerdrop: 0.0 80 | activation_function: "gelu" 81 | d_model: ${model.hidden_size} 82 | dropout: 0.1 83 | attention_dropout: 0.0 84 | activation_dropout: 0.0 85 | init_std: 0.02 86 | classifier_dropout: 0.0 87 | 88 | pretrain: 89 | 90 | #output_dir: experiments/gat_tf_fix_emb.v1.0 91 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 92 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 93 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 94 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 95 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 96 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 97 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 98 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 99 | output_dir: experiments/gp_bpr.gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 100 | 101 | do_train: True 102 | evaluate_during_training: True 103 | 104 | do_eval: True 105 | eval_sub_path: 106 | 107 | # Training hyper-parameters 108 | per_gpu_train_batch_size: 8 109 | per_gpu_eval_batch_size: 8 110 | #learning_rate: 5e-5 111 | #learning_rate: 1e-4 112 | learning_rate: 5e-4 113 | #learning_rate: 1e-3 114 | gradient_accumulation_steps: 16 115 | #weight_decay: 0.01 116 | weight_decay: 0.1 117 | adam_epsilon: 1e-6 118 | adam_betas: "(0.9, 0.98)" 119 | max_grad_norm: 0.0 120 | #max_grad_norm: 1.0 121 | #num_train_epochs: 30 122 | num_train_epochs: 10 123 | max_steps: 0 124 | warmup_proportion: 0.06 125 | warmup_steps: 126 | 127 | multi_tensor: 128 | 129 | # Prediction config 130 | prediction_cfg: 131 | metric: "acc" 132 | measure: 1 133 | best_checkpoint: 134 | best_result: 135 | 136 | logging_steps: 5 137 | summary_helper: 138 | _target_: general_util.training_utils.SummaryWriterHelper 139 | 140 | save_steps: -1 141 | save_best: True 142 | eval_steps: 500 143 | no_cuda: False 144 | seed: 42 145 | local_rank: -1 146 | fp16: True 147 | fp16_opt_level: O1 148 | 149 | # fairscale.FullyShardedDDP 150 | reshard_after_forward: False 151 | cpu_offload: False 152 | move_grads_to_cpu: False 153 | move_params_to_cpu: False 154 | 155 | # Temporary variables 156 | n_gpu: 157 | device: 158 | train_batch_size: 159 | eval_batch_size: 160 | world_size: 161 | -------------------------------------------------------------------------------- /conf/gp_bpr/gat_tf_emb_max_v1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: /home/jiaofangkai/gp-bpr 6 | 7 | train_file: /home/wenhaokun/GP-BPR_data/UII_train_quadruple.json 8 | dev_file: /home/wenhaokun/GP-BPR_data/UII_valid_quadruple.json 9 | test_file: /home/wenhaokun/GP-BPR_data/UII_test_quadruple.json 10 | 11 | embedding_memory: 12 | _target_: data_loader.data_utils.EmbeddingMatrix 13 | attr_text: ${data_dir}/attribute_emb_weight.pt 14 | item_image: ${data_dir}/item_img.pt 15 | item_text: ${data_dir}/item_text_emb_weight.cls.pt 16 | 17 | 18 | # Data loading 19 | dataset: 20 | _target_: data_loader.data_loader_v1.SubgraphDataset 21 | meta_path_dict: 22 | ii: ${data_dir}/subgraph.ii 23 | iia: ${data_dir}/subgraph.iia 24 | iai: ${data_dir}/subgraph-iai/* 25 | iui: ${data_dir}/subgraph.iui 26 | uia: ${data_dir}/subgraph.uia 27 | uiu: ${data_dir}/subgraph.uiu 28 | uiaiu: ${data_dir}/subgraph-uiaiu/* 29 | graph_sampler: 30 | _target_: data_loader.data_utils.MaximusNeighbourSampler 31 | max_neighbour_num: 3 32 | 33 | 34 | # Data collator 35 | collator: 36 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 37 | user_vocab: ${data_dir}/user_vocab.json 38 | attr_vocab: ${data_dir}/attribute_vocab.json 39 | item_vocab: ${data_dir}/item_vocab.json 40 | node_vocab: ${data_dir}/vocab.pt 41 | 42 | 43 | # Dataloader 44 | num_workers: 48 45 | eval_num_workers: 32 46 | prefetch_factor: 2 47 | 48 | # Model 49 | model: 50 | _target_: models.gat_tf_emb_max.GATTransformer 51 | user_embedding: ${data_dir}/user_emb_weight.pt 52 | user_vocab: ${collator.user_vocab} 53 | freeze_user_emb: False 54 | vision_model: resnet18 55 | text_hidden_size: 768 56 | img_hidden_size: 512 57 | # hidden_size: 768 58 | hidden_size: 512 59 | loss_type: 1 60 | gnn: 61 | _target_: models.gat.GAT 62 | # num_layers: 3 63 | num_layers: 2 64 | input_size: ${model.hidden_size} 65 | # num_heads: 12 66 | num_heads: 8 67 | head_size: 64 68 | feat_dropout: 0.1 69 | attn_dropout: 0.1 70 | residual: True 71 | transformer: 72 | _target_: models.transformer.initialize_transformer 73 | encoder_layers: 1 74 | # encoder_ffn_dim: 3072 75 | encoder_ffn_dim: 2048 76 | # encoder_attention_heads: 12 77 | encoder_attention_heads: 8 78 | encoder_layerdrop: 0.0 79 | activation_function: "gelu" 80 | d_model: ${model.hidden_size} 81 | dropout: 0.1 82 | attention_dropout: 0.0 83 | activation_dropout: 0.0 84 | init_std: 0.02 85 | classifier_dropout: 0.0 86 | 87 | pretrain: 88 | 89 | #output_dir: experiments/gat_tf_fix_emb.v1.0 90 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 91 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 92 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 93 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 94 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 95 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 96 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 97 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 98 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.5.wd0.1 # max_neighbour_num 5 -> 3 99 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.6.wd0.1 100 | output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.wd0.1 # loss_fn=1 101 | 102 | do_train: False 103 | evaluate_during_training: False 104 | 105 | do_eval: True 106 | eval_sub_path: checkpoint-* 107 | 108 | # Training hyper-parameters 109 | per_gpu_train_batch_size: 8 110 | per_gpu_eval_batch_size: 8 111 | #learning_rate: 1e-5 112 | learning_rate: 1e-4 113 | #learning_rate: 1e-3 114 | gradient_accumulation_steps: 16 115 | #weight_decay: 0.01 116 | weight_decay: 0.1 117 | adam_epsilon: 1e-6 118 | adam_betas: "(0.9, 0.98)" 119 | max_grad_norm: 0.0 120 | #max_grad_norm: 1.0 121 | #num_train_epochs: 30 122 | num_train_epochs: 10 123 | max_steps: 0 124 | warmup_proportion: 0.06 125 | warmup_steps: 126 | 127 | multi_tensor: 128 | 129 | # Prediction config 130 | prediction_cfg: 131 | metric: "acc" 132 | measure: 1 133 | best_checkpoint: 134 | best_result: 135 | 136 | logging_steps: 5 137 | summary_helper: 138 | _target_: general_util.training_utils.SummaryWriterHelper 139 | 140 | save_steps: 250 141 | save_best: True 142 | eval_steps: 1000 143 | no_cuda: False 144 | seed: 42 145 | local_rank: -1 146 | fp16: True 147 | fp16_opt_level: O1 148 | 149 | # fairscale.FullyShardedDDP 150 | reshard_after_forward: False 151 | cpu_offload: False 152 | move_grads_to_cpu: False 153 | move_params_to_cpu: False 154 | 155 | # Temporary variables 156 | n_gpu: 157 | device: 158 | train_batch_size: 159 | eval_batch_size: 160 | world_size: 161 | -------------------------------------------------------------------------------- /conf/gat_pooling/gat_pooling_emb_max_ctr_v1_3_1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | #test_file: ${data_dir}/UII_test_quadruple.json 11 | test_file: ${data_dir}/UII_test_for_mrr.json 12 | 13 | 14 | embedding_memory: 15 | _target_: data_loader.data_utils.EmbeddingMatrix 16 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 17 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 18 | item_image: ${data_dir}/subgraphs/item_img.pt 19 | 20 | 21 | # Data loading 22 | dataset: 23 | _target_: data_loader.data_loader_v1.SubgraphDataset 24 | meta_path_dict: 25 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 26 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 27 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 28 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 29 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 30 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 31 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 32 | max_tuple_num: 5 # For testing only. 33 | 34 | 35 | # Data collator 36 | collator: 37 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 38 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 39 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 40 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 41 | node_vocab: ${data_dir}/subgraphs/vocab.pt 42 | 43 | 44 | # Dataloader 45 | num_workers: 8 46 | eval_num_workers: 2 47 | prefetch_factor: 2 48 | 49 | # Model 50 | model: 51 | _target_: models.gat_emb_max_pooling.GATTransformer 52 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 53 | user_vocab: ${collator.user_vocab} 54 | freeze_user_emb: False 55 | vision_model: resnet18 56 | text_hidden_size: 768 57 | img_hidden_size: 512 58 | hidden_size: 512 59 | loss_type: 1 60 | add_ctr_loss: True 61 | gnn: 62 | _target_: models.gat.GAT 63 | num_layers: 2 64 | input_size: ${model.hidden_size} 65 | num_heads: 8 66 | head_size: 64 67 | feat_dropout: 0.1 68 | attn_dropout: 0.1 69 | residual: True 70 | user_path_num: 3 71 | item_path_num: 4 72 | 73 | pretrain: 74 | 75 | #output_dir: experiments/gat_tf_fix_emb.v1.0 76 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 77 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 78 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 79 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 80 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 81 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 82 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 83 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 84 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 85 | #output_dir: experiments/gat_tf_fix_emb_max.wo_img_fix.v3.1.wd0.1.n5 86 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5 87 | 88 | #output_dir: experiments/gat_mlp_fix_emb_max.ctr.v3.1.wd0.1.n5.A100 89 | #output_dir: experiments/gat_pooling_fix_emb_max.v3.1.wd0.1.n5.A100 90 | output_dir: experiments/gat_pooling_fix_emb_max.ctr.v3.1.wd0.1.n5.A100 91 | 92 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.2tf # 2-layer transformer // add dropout to transformer 93 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.3.wd0.1.n5.2tf.titanxp # epoch 5 -> 10 94 | 95 | do_train: True 96 | evaluate_during_training: True 97 | 98 | do_eval: True 99 | eval_sub_path: 100 | 101 | # Training hyper-parameters 102 | per_gpu_train_batch_size: 8 103 | per_gpu_eval_batch_size: 2 104 | #learning_rate: 5e-5 105 | learning_rate: 1e-4 106 | #learning_rate: 5e-4 107 | #learning_rate: 1e-3 108 | gradient_accumulation_steps: 3 109 | #weight_decay: 0.01 110 | weight_decay: 0.1 111 | adam_epsilon: 1e-6 112 | adam_betas: "(0.9, 0.98)" 113 | max_grad_norm: 0.0 114 | #max_grad_norm: 1.0 115 | #num_train_epochs: 30 116 | num_train_epochs: 5 117 | max_steps: 0 118 | warmup_proportion: 0.06 119 | warmup_steps: 120 | 121 | multi_tensor: 122 | 123 | # Prediction config 124 | prediction_cfg: 125 | metric: "acc" 126 | measure: 1 127 | best_checkpoint: 128 | best_result: 129 | 130 | logging_steps: 5 131 | summary_helper: 132 | _target_: general_util.training_utils.SummaryWriterHelper 133 | 134 | save_steps: -1 135 | save_best: True 136 | eval_steps: 500 137 | no_cuda: False 138 | seed: 42 139 | local_rank: -1 140 | fp16: True 141 | fp16_opt_level: O1 142 | 143 | # fairscale.FullyShardedDDP 144 | reshard_after_forward: False 145 | cpu_offload: False 146 | move_grads_to_cpu: False 147 | move_params_to_cpu: False 148 | 149 | # Temporary variables 150 | n_gpu: 151 | device: 152 | train_batch_size: 153 | eval_batch_size: 154 | world_size: 155 | -------------------------------------------------------------------------------- /conf/gat_tf_wo_att/gat_tf_emb_max_wo_att_v1_3_1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 2 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max_wo_att.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | gnn: 59 | _target_: models.gat.GAT 60 | num_layers: 2 61 | input_size: ${model.hidden_size} 62 | num_heads: 8 63 | head_size: 64 64 | feat_dropout: 0.1 65 | attn_dropout: 0.1 66 | residual: True 67 | transformer: 68 | _target_: models.transformer.initialize_transformer 69 | encoder_layers: 1 70 | encoder_ffn_dim: 2048 71 | encoder_attention_heads: 8 72 | encoder_layerdrop: 0.0 73 | activation_function: "gelu" 74 | d_model: ${model.hidden_size} 75 | dropout: 0.1 76 | attention_dropout: 0.0 77 | activation_dropout: 0.0 78 | init_std: 0.02 79 | classifier_dropout: 0.0 80 | 81 | pretrain: 82 | 83 | #output_dir: experiments/gat_tf_fix_emb.v1.0 84 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 85 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 86 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 87 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 88 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 89 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 90 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 92 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 93 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 94 | output_dir: experiments/gat_tf_fix_emb_max_wo_att.v3.1.wd0.1.n5.2080Ti 95 | 96 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 97 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 98 | 99 | do_train: True 100 | evaluate_during_training: True 101 | 102 | do_eval: True 103 | eval_sub_path: 104 | 105 | # Training hyper-parameters 106 | per_gpu_train_batch_size: 2 107 | per_gpu_eval_batch_size: 2 108 | #learning_rate: 5e-5 109 | learning_rate: 1e-4 110 | #learning_rate: 5e-4 111 | #learning_rate: 1e-3 112 | gradient_accumulation_steps: 12 113 | #weight_decay: 0.01 114 | weight_decay: 0.1 115 | adam_epsilon: 1e-6 116 | adam_betas: "(0.9, 0.98)" 117 | max_grad_norm: 0.0 118 | #max_grad_norm: 1.0 119 | #num_train_epochs: 30 120 | num_train_epochs: 5 121 | max_steps: 0 122 | warmup_proportion: 0.06 123 | warmup_steps: 124 | 125 | multi_tensor: 126 | 127 | # Prediction config 128 | prediction_cfg: 129 | metric: "acc" 130 | measure: 1 131 | best_checkpoint: 132 | best_result: 133 | 134 | logging_steps: 5 135 | summary_helper: 136 | _target_: general_util.training_utils.SummaryWriterHelper 137 | 138 | save_steps: -1 139 | save_best: True 140 | eval_steps: 500 141 | no_cuda: False 142 | seed: 42 143 | local_rank: -1 144 | fp16: True 145 | fp16_opt_level: O1 146 | 147 | # fairscale.FullyShardedDDP 148 | reshard_after_forward: False 149 | cpu_offload: False 150 | move_grads_to_cpu: False 151 | move_params_to_cpu: False 152 | 153 | # Temporary variables 154 | n_gpu: 155 | device: 156 | train_batch_size: 157 | eval_batch_size: 158 | world_size: 159 | -------------------------------------------------------------------------------- /conf/gp_bpr/gat_tf_emb_max_fix_graph_v1_n5.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: gp-bpr 6 | 7 | train_file: ${data_dir}/UII_train_quadruple.json 8 | dev_file: ${data_dir}/UII_valid_quadruple.json 9 | test_file: ${data_dir}/UII_test_quadruple.json 10 | 11 | embedding_memory: 12 | _target_: data_loader.data_utils.EmbeddingMatrix 13 | attr_text: ${data_dir}/attribute_emb_weight.pt 14 | item_image: ${data_dir}/item_img.pt 15 | item_text: ${data_dir}/item_text_emb_weight.cls.pt 16 | 17 | 18 | # Data loading 19 | dataset: 20 | _target_: data_loader.data_loader_v1.SubgraphDataset 21 | meta_path_dict: 22 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 23 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 24 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 25 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 26 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 27 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 28 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 29 | 30 | 31 | # Data collator 32 | collator: 33 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 34 | user_vocab: ${data_dir}/user_vocab.json 35 | attr_vocab: ${data_dir}/attribute_vocab.json 36 | item_vocab: ${data_dir}/item_vocab.json 37 | node_vocab: ${data_dir}/vocab.pt 38 | 39 | 40 | # Dataloader 41 | num_workers: 8 42 | eval_num_workers: 2 43 | prefetch_factor: 2 44 | 45 | # Model 46 | model: 47 | _target_: models.gat_tf_emb_max.GATTransformer 48 | user_embedding: ${data_dir}/user_emb_weight.pt 49 | user_vocab: ${collator.user_vocab} 50 | freeze_user_emb: False 51 | vision_model: resnet18 52 | text_hidden_size: 768 53 | img_hidden_size: 512 54 | hidden_size: 512 55 | loss_type: 1 56 | gnn: 57 | _target_: models.gat.GAT 58 | num_layers: 2 59 | input_size: ${model.hidden_size} 60 | num_heads: 8 61 | head_size: 64 62 | feat_dropout: 0.1 63 | attn_dropout: 0.1 64 | residual: True 65 | transformer: 66 | _target_: models.transformer.initialize_transformer 67 | encoder_layers: 1 68 | encoder_ffn_dim: 2048 69 | encoder_attention_heads: 8 70 | encoder_layerdrop: 0.0 71 | activation_function: "gelu" 72 | d_model: ${model.hidden_size} 73 | dropout: 0.1 74 | attention_dropout: 0.0 75 | activation_dropout: 0.0 76 | init_std: 0.02 77 | classifier_dropout: 0.0 78 | 79 | pretrain: 80 | 81 | #output_dir: experiments/gat_tf_fix_emb.v1.0 82 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 83 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 84 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 85 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 86 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 87 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 88 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 89 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 90 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.5.wd0.1 # max_neighbour_num 5 -> 3 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.6.wd0.1 92 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.wd0.1 # loss_fn=1 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.fix_graph.wd0.1 94 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.fix_graph.wd1.0 95 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.fix_graph.wd1.0.n5 96 | output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.8.fix_graph.wd1.0.n5 97 | 98 | do_train: True 99 | evaluate_during_training: True 100 | 101 | do_eval: True 102 | eval_sub_path: 103 | 104 | # Training hyper-parameters 105 | per_gpu_train_batch_size: 2 106 | per_gpu_eval_batch_size: 2 107 | #learning_rate: 1e-5 108 | #learning_rate: 1e-4 109 | learning_rate: 5e-5 110 | #learning_rate: 1e-3 111 | gradient_accumulation_steps: 64 112 | #weight_decay: 0.01 113 | #weight_decay: 0.1 114 | weight_decay: 1.0 115 | adam_epsilon: 1e-6 116 | adam_betas: "(0.9, 0.98)" 117 | max_grad_norm: 0.0 118 | #max_grad_norm: 1.0 119 | #num_train_epochs: 30 120 | num_train_epochs: 10 121 | max_steps: 0 122 | warmup_proportion: 0.06 123 | warmup_steps: 124 | 125 | multi_tensor: 126 | 127 | # Prediction config 128 | prediction_cfg: 129 | metric: "acc" 130 | measure: 1 131 | best_checkpoint: 132 | best_result: 133 | 134 | logging_steps: 5 135 | summary_helper: 136 | _target_: general_util.training_utils.SummaryWriterHelper 137 | 138 | save_steps: 250 139 | save_best: True 140 | eval_steps: 250 141 | no_cuda: False 142 | seed: 42 143 | local_rank: -1 144 | fp16: True 145 | fp16_opt_level: O1 146 | 147 | # fairscale.FullyShardedDDP 148 | reshard_after_forward: False 149 | cpu_offload: False 150 | move_grads_to_cpu: False 151 | move_params_to_cpu: False 152 | 153 | # Temporary variables 154 | n_gpu: 155 | device: 156 | train_batch_size: 157 | eval_batch_size: 158 | world_size: 159 | -------------------------------------------------------------------------------- /data_loader/data_loader_edge_v1.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import os 4 | from collections import defaultdict 5 | from typing import Dict, Any, Union, Callable 6 | 7 | import dgl 8 | import torch 9 | from omegaconf import DictConfig 10 | from torch.utils.data import Dataset 11 | from torch.utils.data.dataset import T_co 12 | from general_util.logger import get_child_logger 13 | 14 | logger = get_child_logger('EdgeDataset') 15 | 16 | 17 | class SubgraphEdgeDataset(Dataset): 18 | def __init__(self, quadruple_file: str, meta_path_dict: DictConfig, graph_sampler: Callable = None): 19 | logger.info(f'Loading data file from {quadruple_file}.') 20 | self.quadruples = json.load(open(quadruple_file, 'r')) 21 | self.meta_path = self._parse_meta_path(meta_path_dict) 22 | self.graph_sampler = graph_sampler 23 | 24 | def __getitem__(self, index) -> T_co: 25 | # user, anchor_item, pos_item, neg_item = self.quadruples[index] 26 | # quadruple = [user, anchor_item, pos_item, neg_item] 27 | quadruple = self.quadruples[index] 28 | 29 | all_nodes = set() 30 | all_dgl_graph, all_src, all_dst, all_node2re_id, all_re_id2node = [], [], [], [], [] 31 | 32 | for i, x in enumerate(quadruple): # [user, anchor_item, pos_item, neg_item] 33 | _dgl_graph_ls, _mapped_src_ls, _mapped_dst_ls, _node2re_id_ls, _re_id2node_ls, _nodes_ls = zip(*[ 34 | self._load_subgraph(src=x, graph=y) for y in self.meta_path[x]]) 35 | for subgraph_node_ls in _nodes_ls: 36 | all_nodes.update(subgraph_node_ls) 37 | all_dgl_graph.append(_dgl_graph_ls) 38 | all_src.append(_mapped_src_ls) 39 | all_dst.append(_mapped_dst_ls) 40 | all_node2re_id.append(_node2re_id_ls) 41 | all_re_id2node.append(_re_id2node_ls) 42 | 43 | return all_dgl_graph, all_src, all_dst, all_node2re_id, all_re_id2node, list(all_nodes), quadruple 44 | 45 | def __len__(self): 46 | return len(self.quadruples) 47 | 48 | @staticmethod 49 | def _parse_meta_path(meta_path_dict: DictConfig): 50 | logger.info(f'Parsing meta-path...') 51 | meta_path = defaultdict(list) 52 | for path_type, path_no_path in meta_path_dict.items(): 53 | if os.path.isfile(path_no_path): # All subgraphs are saved into single file. 54 | path_subgraph = torch.load(path_no_path) 55 | assert path_type == path_subgraph[0]['meta_path'], (path_type, path_subgraph[0]['meta_path'], path_subgraph[1]['meta_path']) 56 | for _subgraph in path_subgraph: 57 | meta_path[_subgraph["src_id"]].append(_subgraph) 58 | else: 59 | files = list(glob.glob(path_no_path)) 60 | for file in files: 61 | src = file.split('/')[-1] 62 | meta_path[src].append(file) 63 | return meta_path 64 | 65 | def _load_subgraph(self, src: str, graph: Union[Dict[str, Any], str]): 66 | if isinstance(graph, str): # Load the single subgraph file. 67 | graph: Dict[str, Any] = torch.load(graph) 68 | 69 | if self.graph_sampler is not None: 70 | graph = self.graph_sampler(graph) 71 | 72 | meta_path_type = graph['meta_path'] 73 | assert graph['src_id'] == src 74 | neighbours = graph['edges'] 75 | 76 | orig_edges = [] 77 | # ======================================================= 78 | # 在``__init__``方法里已经保证了不同种类的节点的id不会出现重复,因此直接合并即可,无需在区分种类。节点之间的位置顺序也不重要 79 | # 但我们需要按照``nodes``里的节点顺序初始化节点的embedding序列。 80 | # ======================================================= 81 | # Since we have made sure that the id of each node is unique among all nodes across different node types in the ``__init__`` method, 82 | # and the relative order among all the nodes does not matter, either, 83 | # we can directly merge all nodes from ``neighbours`` without considering their node types. 84 | # However, we should keep the same order with that in ``nodes`` with the input node embedding list. 85 | nodes = set() 86 | for u, v_set in neighbours.items(): 87 | for v in v_set: 88 | orig_edges.append((u, v)) 89 | nodes.add(u) 90 | nodes.add(v) 91 | nodes = list(nodes) 92 | 93 | node2re_id = {} 94 | re_id2node = {} 95 | for i, node in enumerate(nodes): 96 | node2re_id[node] = i 97 | re_id2node[i] = node 98 | 99 | mapped_src = [] 100 | mapped_dst = [] 101 | for e in orig_edges: 102 | mapped_src.append(node2re_id[e[0]]) 103 | mapped_dst.append(node2re_id[e[1]]) 104 | 105 | # You can initialize the DGL graph from here 106 | dgl_graph = dgl.graph((torch.tensor(mapped_src + mapped_dst), torch.tensor(mapped_dst + mapped_src))) 107 | return dgl_graph, mapped_src + mapped_dst, mapped_dst + mapped_src, node2re_id, re_id2node, nodes 108 | -------------------------------------------------------------------------------- /conf/gp_bpr/gat_tf_emb_max_fix_graph_v3.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: gp-bpr 6 | 7 | train_file: ${data_dir}/UII_train_quadruple.json 8 | dev_file: ${data_dir}/UII_valid_quadruple.json 9 | test_file: ${data_dir}/UII_test_quadruple.json 10 | 11 | embedding_memory: 12 | _target_: data_loader.data_utils.EmbeddingMatrix 13 | attr_text: ${data_dir}/attribute_emb_weight.pt 14 | item_image: ${data_dir}/item_img.pt 15 | item_text: ${data_dir}/item_text_emb_weight.cls.pt 16 | 17 | 18 | # Data loading 19 | dataset: 20 | _target_: data_loader.data_loader_v1.SubgraphDataset 21 | meta_path_dict: 22 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_3 23 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_3 24 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_3 25 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_3 26 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_3 27 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_3 28 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_3 29 | 30 | 31 | # Data collator 32 | collator: 33 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 34 | user_vocab: ${data_dir}/user_vocab.json 35 | attr_vocab: ${data_dir}/attribute_vocab.json 36 | item_vocab: ${data_dir}/item_vocab.json 37 | node_vocab: ${data_dir}/vocab.pt 38 | 39 | 40 | # Dataloader 41 | num_workers: 16 42 | eval_num_workers: 4 43 | prefetch_factor: 2 44 | 45 | # Model 46 | model: 47 | _target_: models.gat_tf_emb_max.GATTransformer 48 | user_embedding: ${data_dir}/user_emb_weight.pt 49 | user_vocab: ${collator.user_vocab} 50 | freeze_user_emb: False 51 | vision_model: resnet18 52 | text_hidden_size: 768 53 | img_hidden_size: 512 54 | hidden_size: 256 55 | loss_type: 1 56 | gnn: 57 | _target_: models.gat.GAT 58 | num_layers: 2 59 | input_size: ${model.hidden_size} 60 | num_heads: 4 61 | head_size: 64 62 | feat_dropout: 0.2 63 | attn_dropout: 0.2 64 | residual: True 65 | transformer: 66 | _target_: models.transformer.initialize_transformer 67 | encoder_layers: 1 68 | encoder_ffn_dim: 1024 69 | encoder_attention_heads: 4 70 | encoder_layerdrop: 0.0 71 | activation_function: "gelu" 72 | d_model: ${model.hidden_size} 73 | dropout: 0.2 74 | attention_dropout: 0.2 75 | activation_dropout: 0.2 76 | init_std: 0.02 77 | classifier_dropout: 0.0 78 | 79 | pretrain: 80 | 81 | #output_dir: experiments/gat_tf_fix_emb.v1.0 82 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 83 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 84 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 85 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 86 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 87 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 88 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 89 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 90 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.5.wd0.1 # max_neighbour_num 5 -> 3 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.6.wd0.1 92 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.wd0.1 # loss_fn=1 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.fix_graph.wd0.1 94 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.fix_graph.wd1.0 95 | 96 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v4.0.fix_graph.wd1.0 97 | output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v5.0.fix_graph.wd0.1 98 | 99 | do_train: True 100 | #do_train: False 101 | evaluate_during_training: True 102 | 103 | do_eval: True 104 | eval_sub_path: 105 | 106 | # Training hyper-parameters 107 | per_gpu_train_batch_size: 4 108 | per_gpu_eval_batch_size: 4 109 | #learning_rate: 1e-5 110 | learning_rate: 1e-4 111 | #learning_rate: 5e-5 112 | #learning_rate: 1e-3 113 | gradient_accumulation_steps: 32 114 | #weight_decay: 0.01 115 | weight_decay: 0.1 116 | #weight_decay: 1.0 117 | adam_epsilon: 1e-6 118 | adam_betas: "(0.9, 0.98)" 119 | max_grad_norm: 0.0 120 | #max_grad_norm: 1.0 121 | #num_train_epochs: 30 122 | num_train_epochs: 10 123 | max_steps: 0 124 | warmup_proportion: 0.06 125 | warmup_steps: 126 | 127 | multi_tensor: 128 | 129 | # Prediction config 130 | prediction_cfg: 131 | metric: "acc" 132 | measure: 1 133 | best_checkpoint: 134 | best_result: 135 | 136 | logging_steps: 5 137 | summary_helper: 138 | _target_: general_util.training_utils.SummaryWriterHelper 139 | 140 | save_steps: 250 141 | save_best: True 142 | eval_steps: 250 143 | no_cuda: False 144 | seed: 42 145 | local_rank: -1 146 | fp16: True 147 | fp16_opt_level: O1 148 | 149 | # fairscale.FullyShardedDDP 150 | reshard_after_forward: False 151 | cpu_offload: False 152 | move_grads_to_cpu: False 153 | move_params_to_cpu: False 154 | 155 | # Temporary variables 156 | n_gpu: 157 | device: 158 | train_batch_size: 159 | eval_batch_size: 160 | world_size: 161 | -------------------------------------------------------------------------------- /general_util/training_utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import os 3 | from typing import Dict, Union 4 | from fairscale.nn.data_parallel.fully_sharded_data_parallel import FullyShardedDataParallel as FullyShardedDP 5 | from omegaconf import DictConfig, OmegaConf 6 | 7 | from general_util.logger import get_child_logger 8 | import numpy as np 9 | import torch 10 | from torch.utils.tensorboard import SummaryWriter 11 | 12 | logger = get_child_logger("Training utils") 13 | 14 | 15 | def set_seed(args): 16 | random.seed(args.seed) 17 | np.random.seed(args.seed) 18 | torch.manual_seed(args.seed) 19 | if args.n_gpu > 0: 20 | torch.cuda.manual_seed_all(args.seed) 21 | 22 | 23 | def to_list(tensor): 24 | return tensor.detach().cpu().tolist() 25 | 26 | 27 | def unwrap_model(model: torch.nn.Module) -> torch.nn.Module: 28 | """ 29 | Recursively unwraps a model from potential containers (as used in distributed training). 30 | Args: 31 | model (:obj:`torch.nn.Module`): The model to unwrap. 32 | """ 33 | # since there could be multiple levels of wrapping, unwrap recursively 34 | if hasattr(model, "module"): 35 | return unwrap_model(model.module) 36 | else: 37 | return model 38 | 39 | 40 | def save_model(model: Union[torch.nn.Module, FullyShardedDP], cfg: DictConfig, output_dir: str): 41 | # Save model checkpoint. 42 | if cfg.local_rank != -1: 43 | state_dict = model.state_dict() 44 | if cfg.local_rank == 0: 45 | torch.save(state_dict, os.path.join(output_dir, "pytorch_model.bin")) 46 | else: 47 | torch.save(model.state_dict(), os.path.join(output_dir, "pytorch_model.bin")) 48 | 49 | # Save tokenizer and training args. 50 | if cfg.local_rank in [-1, 0]: 51 | OmegaConf.save(cfg, os.path.join(output_dir, "training_config.yaml")) 52 | logger.info("Saving model checkpoint to %s", output_dir) 53 | 54 | 55 | def batch_to_device(batch: Dict[str, torch.Tensor], device): 56 | batch_on_device = {} 57 | for k, v in batch.items(): 58 | batch_on_device[k] = v.to(device) 59 | return batch_on_device 60 | 61 | 62 | def initialize_optimizer(cfg: DictConfig, model: torch.nn.Module): 63 | no_decay = ['bias', 'LayerNorm.weight', 'layer_norm.weight'] 64 | optimizer_grouped_parameters = [ 65 | {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': cfg.weight_decay}, 66 | {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} 67 | ] 68 | 69 | if "optimizer" in cfg and cfg.optimizer == 'lamb': 70 | if "bit_training" in cfg and cfg.bit_training: 71 | from bitsandbytes.optim import LAMB8bit 72 | 73 | optimizer = LAMB8bit(optimizer_grouped_parameters, 74 | lr=cfg.learning_rate, 75 | betas=eval(cfg.adam_betas), 76 | eps=cfg.adam_epsilon, 77 | max_unorm=cfg.max_grad_norm) 78 | else: 79 | from apex.optimizers.fused_lamb import FusedLAMB 80 | 81 | optimizer = FusedLAMB(optimizer_grouped_parameters, 82 | lr=cfg.learning_rate, 83 | betas=eval(cfg.adam_betas), 84 | eps=cfg.adam_epsilon, 85 | use_nvlamb=(cfg.use_nvlamb if "use_nvlamb" in cfg else False), 86 | max_grad_norm=cfg.max_grad_norm) 87 | else: 88 | if "bit_training" in cfg and cfg.bit_training: 89 | from bitsandbytes.optim import AdamW8bit 90 | 91 | optimizer = AdamW8bit(optimizer_grouped_parameters, lr=cfg.learning_rate, eps=cfg.adam_epsilon, betas=(eval(cfg.adam_betas))) 92 | else: 93 | if hasattr(cfg, "multi_tensor") and cfg.multi_tensor: 94 | from torch.optim._multi_tensor import AdamW 95 | else: 96 | from transformers import AdamW 97 | 98 | optimizer = AdamW(optimizer_grouped_parameters, lr=cfg.learning_rate, eps=cfg.adam_epsilon, betas=(eval(cfg.adam_betas))) 99 | 100 | return optimizer 101 | 102 | 103 | def note_best_checkpoint(cfg: DictConfig, results: Dict[str, float], sub_path: str): 104 | metric = results[cfg.prediction_cfg.metric] 105 | if (not cfg.prediction_cfg.best_result) or (cfg.prediction_cfg.measure > 0 and metric > cfg.prediction_cfg.best_result) or ( 106 | cfg.prediction_cfg.measure < 0 and metric < cfg.prediction_cfg.best_result): 107 | cfg.prediction_cfg.best_result = metric 108 | cfg.prediction_cfg.best_checkpoint = sub_path 109 | return True 110 | return False 111 | 112 | 113 | class SummaryWriterHelper: 114 | def __init__(self, writer: SummaryWriter): 115 | self.writer = writer 116 | 117 | def __call__(self, batch, step): 118 | self.writer.add_scalar('node_num', batch['input_emb_index'].size(0), global_step=step) 119 | -------------------------------------------------------------------------------- /conf/gat_tf_emb_max_v1_3_1_wo_ii_uia.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | # ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | # uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 2 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | gnn: 59 | _target_: models.gat.GAT 60 | num_layers: 2 61 | input_size: ${model.hidden_size} 62 | num_heads: 8 63 | head_size: 64 64 | feat_dropout: 0.1 65 | attn_dropout: 0.1 66 | residual: True 67 | transformer: 68 | _target_: models.transformer.initialize_transformer 69 | encoder_layers: 1 70 | encoder_ffn_dim: 2048 71 | encoder_attention_heads: 8 72 | encoder_layerdrop: 0.0 73 | activation_function: "gelu" 74 | d_model: ${model.hidden_size} 75 | dropout: 0.1 76 | attention_dropout: 0.0 77 | activation_dropout: 0.0 78 | init_std: 0.02 79 | classifier_dropout: 0.0 80 | 81 | pretrain: 82 | 83 | #output_dir: experiments/gat_tf_fix_emb.v1.0 84 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 85 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 86 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 87 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 88 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 89 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 90 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 92 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 93 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 94 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5.T4 95 | output_dir: experiments/gat_tf_fix_emb_max.v3.1.remove_ii_uia.wd0.1.n5.T4 96 | 97 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 98 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 99 | 100 | do_train: True 101 | evaluate_during_training: True 102 | 103 | do_eval: True 104 | eval_sub_path: 105 | 106 | # Training hyper-parameters 107 | per_gpu_train_batch_size: 2 108 | per_gpu_eval_batch_size: 2 109 | #learning_rate: 5e-5 110 | learning_rate: 1e-4 111 | #learning_rate: 5e-4 112 | #learning_rate: 1e-3 113 | gradient_accumulation_steps: 12 114 | #weight_decay: 0.01 115 | weight_decay: 0.1 116 | adam_epsilon: 1e-6 117 | adam_betas: "(0.9, 0.98)" 118 | max_grad_norm: 0.0 119 | #max_grad_norm: 1.0 120 | #num_train_epochs: 30 121 | num_train_epochs: 5 122 | max_steps: 0 123 | warmup_proportion: 0.06 124 | warmup_steps: 125 | 126 | multi_tensor: 127 | 128 | # Prediction config 129 | prediction_cfg: 130 | metric: "acc" 131 | measure: 1 132 | best_checkpoint: 133 | best_result: 134 | 135 | logging_steps: 5 136 | summary_helper: 137 | _target_: general_util.training_utils.SummaryWriterHelper 138 | 139 | save_steps: -1 140 | save_best: True 141 | eval_steps: 500 142 | no_cuda: False 143 | seed: 42 144 | local_rank: -1 145 | fp16: True 146 | fp16_opt_level: O1 147 | 148 | # fairscale.FullyShardedDDP 149 | reshard_after_forward: False 150 | cpu_offload: False 151 | move_grads_to_cpu: False 152 | move_params_to_cpu: False 153 | 154 | # Temporary variables 155 | n_gpu: 156 | device: 157 | train_batch_size: 158 | eval_batch_size: 159 | world_size: 160 | -------------------------------------------------------------------------------- /conf/gat_tf_emb_max_v1_3_1_wo_iia_iai_uia_uiaiu.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | # iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | # iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | # uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | # uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 0 44 | eval_num_workers: 0 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | gnn: 59 | _target_: models.gat.GAT 60 | num_layers: 2 61 | input_size: ${model.hidden_size} 62 | num_heads: 8 63 | head_size: 64 64 | feat_dropout: 0.1 65 | attn_dropout: 0.1 66 | residual: True 67 | transformer: 68 | _target_: models.transformer.initialize_transformer 69 | encoder_layers: 1 70 | encoder_ffn_dim: 2048 71 | encoder_attention_heads: 8 72 | encoder_layerdrop: 0.0 73 | activation_function: "gelu" 74 | d_model: ${model.hidden_size} 75 | dropout: 0.1 76 | attention_dropout: 0.0 77 | activation_dropout: 0.0 78 | init_std: 0.02 79 | classifier_dropout: 0.0 80 | 81 | pretrain: 82 | 83 | #output_dir: experiments/gat_tf_fix_emb.v1.0 84 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 85 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 86 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 87 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 88 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 89 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 90 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 92 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 93 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 94 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5.T4 95 | output_dir: experiments/gat_tf_fix_emb_max.v3.1.remove_iia_iai_uia_uiaiu.wd0.1.n5.T4 96 | 97 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 98 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 99 | 100 | do_train: True 101 | evaluate_during_training: True 102 | 103 | do_eval: True 104 | eval_sub_path: 105 | 106 | # Training hyper-parameters 107 | per_gpu_train_batch_size: 4 108 | per_gpu_eval_batch_size: 4 109 | #learning_rate: 5e-5 110 | learning_rate: 1e-4 111 | #learning_rate: 5e-4 112 | #learning_rate: 1e-3 113 | gradient_accumulation_steps: 6 114 | #weight_decay: 0.01 115 | weight_decay: 0.1 116 | adam_epsilon: 1e-6 117 | adam_betas: "(0.9, 0.98)" 118 | max_grad_norm: 0.0 119 | #max_grad_norm: 1.0 120 | #num_train_epochs: 30 121 | num_train_epochs: 5 122 | max_steps: 0 123 | warmup_proportion: 0.06 124 | warmup_steps: 125 | 126 | multi_tensor: 127 | 128 | # Prediction config 129 | prediction_cfg: 130 | metric: "acc" 131 | measure: 1 132 | best_checkpoint: 133 | best_result: 134 | 135 | logging_steps: 5 136 | summary_helper: 137 | _target_: general_util.training_utils.SummaryWriterHelper 138 | 139 | save_steps: -1 140 | save_best: True 141 | eval_steps: 500 142 | no_cuda: False 143 | seed: 42 144 | local_rank: -1 145 | fp16: True 146 | fp16_opt_level: O1 147 | 148 | # fairscale.FullyShardedDDP 149 | reshard_after_forward: False 150 | cpu_offload: False 151 | move_grads_to_cpu: False 152 | move_params_to_cpu: False 153 | 154 | # Temporary variables 155 | n_gpu: 156 | device: 157 | train_batch_size: 158 | eval_batch_size: 159 | world_size: 160 | -------------------------------------------------------------------------------- /conf/item_ab/gat_tf_emb_max_v1_3_1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 4 44 | eval_num_workers: 0 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max_item_ab.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | item_use_img: True 59 | item_use_text: True 60 | gnn: 61 | _target_: models.gat.GAT 62 | num_layers: 2 63 | input_size: ${model.hidden_size} 64 | num_heads: 8 65 | head_size: 64 66 | feat_dropout: 0.1 67 | attn_dropout: 0.1 68 | residual: True 69 | transformer: 70 | _target_: models.transformer.initialize_transformer 71 | encoder_layers: 1 72 | encoder_ffn_dim: 2048 73 | encoder_attention_heads: 8 74 | encoder_layerdrop: 0.0 75 | activation_function: "gelu" 76 | d_model: ${model.hidden_size} 77 | dropout: 0.1 78 | attention_dropout: 0.0 79 | activation_dropout: 0.0 80 | init_std: 0.02 81 | classifier_dropout: 0.0 82 | 83 | pretrain: 84 | 85 | #output_dir: experiments/gat_tf_fix_emb.v1.0 86 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 87 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 88 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 89 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 90 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 92 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 94 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 95 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 96 | output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5.T4.i${model.item_use_img}${model.item_use_text} 97 | 98 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 99 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 100 | 101 | do_train: True 102 | evaluate_during_training: True 103 | 104 | do_eval: True 105 | eval_sub_path: 106 | 107 | # Training hyper-parameters 108 | per_gpu_train_batch_size: 2 109 | per_gpu_eval_batch_size: 2 110 | #learning_rate: 5e-5 111 | learning_rate: 1e-4 112 | #learning_rate: 5e-4 113 | #learning_rate: 1e-3 114 | gradient_accumulation_steps: 12 115 | #weight_decay: 0.01 116 | weight_decay: 0.1 117 | adam_epsilon: 1e-6 118 | adam_betas: "(0.9, 0.98)" 119 | max_grad_norm: 0.0 120 | #max_grad_norm: 1.0 121 | #num_train_epochs: 30 122 | num_train_epochs: 5 123 | max_steps: 0 124 | warmup_proportion: 0.06 125 | warmup_steps: 126 | 127 | multi_tensor: 128 | 129 | # Prediction config 130 | prediction_cfg: 131 | metric: "acc" 132 | measure: 1 133 | best_checkpoint: 134 | best_result: 135 | 136 | logging_steps: 5 137 | summary_helper: 138 | _target_: general_util.training_utils.SummaryWriterHelper 139 | 140 | save_steps: -1 141 | save_best: True 142 | eval_steps: 500 143 | no_cuda: False 144 | seed: 42 145 | local_rank: -1 146 | fp16: True 147 | fp16_opt_level: O1 148 | 149 | # fairscale.FullyShardedDDP 150 | reshard_after_forward: False 151 | cpu_offload: False 152 | move_grads_to_cpu: False 153 | move_params_to_cpu: False 154 | 155 | # Temporary variables 156 | n_gpu: 157 | device: 158 | train_batch_size: 159 | eval_batch_size: 160 | world_size: 161 | -------------------------------------------------------------------------------- /conf/gat_tf_emb_max_ctr_v1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 2 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | add_ctr_loss: True 59 | gnn: 60 | _target_: models.gat.GAT 61 | num_layers: 2 62 | input_size: ${model.hidden_size} 63 | num_heads: 8 64 | head_size: 64 65 | feat_dropout: 0.1 66 | attn_dropout: 0.1 67 | residual: True 68 | transformer: 69 | _target_: models.transformer.initialize_transformer 70 | encoder_layers: 2 71 | encoder_ffn_dim: 2048 72 | encoder_attention_heads: 8 73 | encoder_layerdrop: 0.0 74 | activation_function: "gelu" 75 | d_model: ${model.hidden_size} 76 | dropout: 0.1 77 | # attention_dropout: 0.0 78 | # activation_dropout: 0.0 79 | attention_dropout: 0.1 80 | activation_dropout: 0.1 81 | init_std: 0.02 82 | classifier_dropout: 0.0 83 | 84 | pretrain: 85 | 86 | #output_dir: experiments/gat_tf_fix_emb.v1.0 87 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 88 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 89 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 90 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 91 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 92 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 94 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 95 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 96 | #output_dir: experiments/gat_tf_fix_emb_max.wo_img_fix.v3.1.wd0.1.n5 97 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5 98 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.2tf # 2-layer transformer // add dropout to transformer 99 | output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.3.wd0.1.n5.2tf.titanxp # epoch 5 -> 10 100 | 101 | do_train: True 102 | evaluate_during_training: True 103 | 104 | do_eval: True 105 | eval_sub_path: 106 | 107 | # Training hyper-parameters 108 | per_gpu_train_batch_size: 2 109 | per_gpu_eval_batch_size: 2 110 | #learning_rate: 5e-5 111 | learning_rate: 1e-4 112 | #learning_rate: 5e-4 113 | #learning_rate: 1e-3 114 | gradient_accumulation_steps: 12 115 | #weight_decay: 0.01 116 | weight_decay: 0.1 117 | adam_epsilon: 1e-6 118 | adam_betas: "(0.9, 0.98)" 119 | max_grad_norm: 0.0 120 | #max_grad_norm: 1.0 121 | #num_train_epochs: 30 122 | num_train_epochs: 10 123 | max_steps: 0 124 | warmup_proportion: 0.06 125 | warmup_steps: 126 | 127 | multi_tensor: 128 | 129 | # Prediction config 130 | prediction_cfg: 131 | metric: "acc" 132 | measure: 1 133 | best_checkpoint: 134 | best_result: 135 | 136 | logging_steps: 5 137 | summary_helper: 138 | _target_: general_util.training_utils.SummaryWriterHelper 139 | 140 | save_steps: -1 141 | save_best: True 142 | eval_steps: 500 143 | no_cuda: False 144 | seed: 42 145 | local_rank: -1 146 | fp16: True 147 | fp16_opt_level: O1 148 | 149 | # fairscale.FullyShardedDDP 150 | reshard_after_forward: False 151 | cpu_offload: False 152 | move_grads_to_cpu: False 153 | move_params_to_cpu: False 154 | 155 | # Temporary variables 156 | n_gpu: 157 | device: 158 | train_batch_size: 159 | eval_batch_size: 160 | world_size: 161 | -------------------------------------------------------------------------------- /preprocess/process_user_embedding.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import torch 5 | from torch import nn 6 | from torchvision.models import resnet18 7 | from tqdm import tqdm 8 | import argparse 9 | 10 | device = torch.device("cuda:0") 11 | batch_size = 256 12 | 13 | resnet = nn.Sequential(*list(resnet18(pretrained=True).children())[:-1]).to(device=device) 14 | resnet.eval() 15 | 16 | # img_dir = "/home/wangchun/work3/Initialization/img/" 17 | # text_dir = "/home/wangchun/work3/Initialization/text/" 18 | img_dir = "/home/wangchun/work3/Initialization_all/img/" 19 | text_dir = "/home/wangchun/work3/Initialization_all/text/" 20 | 21 | 22 | def initializer(_ui_edges): 23 | global ui_edges 24 | ui_edges = _ui_edges 25 | 26 | 27 | def load_item_embedding(item): 28 | if not os.path.exists(os.path.join(text_dir, f"{item}_t.pt")) and not os.path.join(img_dir, f"{item}_v.pt"): 29 | return None, None 30 | if not os.path.exists(os.path.join(text_dir, f"{item}_t.pt")): 31 | text = torch.zeros(2, 768) 32 | else: 33 | text = torch.load(os.path.join(text_dir, f"{item}_t.pt"), map_location='cpu')[:, 0].detach() 34 | 35 | if not os.path.exists(os.path.join(img_dir, f"{item}_v.pt")): 36 | image = torch.zeros(3, 224, 224) 37 | else: 38 | image = torch.load(os.path.join(img_dir, f"{item}_v.pt")) 39 | 40 | text_h = text.mean(dim=0) 41 | # image = resnet(image.unsqueeze(0).to(device)).reshape(-1).cpu() 42 | # print(text_h.size(), image.size()) 43 | return text_h, image 44 | 45 | 46 | def get_user_embedding(_user): 47 | 48 | with torch.no_grad(): 49 | text_emb_ls = [] 50 | img_ls = [] 51 | for item in ui_edges[_user]: 52 | # text_h, image_h = load_item_embedding(item) 53 | # if text_h is None and image_h is None: 54 | # continue 55 | # emb_ls.append(torch.cat([text_h, image_h], dim=-1)) 56 | text, img = load_item_embedding(item) 57 | if text is None and img is None: 58 | continue 59 | text_emb_ls.append(text) 60 | img_ls.append(img) 61 | text_emb = torch.stack(text_emb_ls, dim=0) 62 | all_img = torch.stack(img_ls, dim=0) 63 | # print(all_img.size()) 64 | idx = 0 65 | img_emb_ls = [] 66 | while True: 67 | s_idx = idx * batch_size 68 | if s_idx >= all_img.size(0): 69 | break 70 | e_idx = (idx + 1) * batch_size 71 | batch_img = all_img[s_idx: e_idx] 72 | # print(batch_img.size()) 73 | batch_img_emb = resnet(batch_img.to(device)).reshape(batch_img.size(0), -1).cpu() 74 | img_emb_ls.append(batch_img_emb) 75 | idx += 1 76 | img_emb = torch.cat(img_emb_ls, dim=0) 77 | assert img_emb.size(0) == text_emb.size(0) 78 | # return torch.stack(emb_ls, dim=0).mean(dim=0) 79 | return torch.cat([text_emb, img_emb], dim=-1).mean(dim=0) 80 | 81 | 82 | if __name__ == '__main__': 83 | parser = argparse.ArgumentParser() 84 | parser.add_argument('--node_vocab', type=str, required=True) 85 | parser.add_argument('--ui_edge_file', type=str, required=True) 86 | parser.add_argument('--output_dir', type=str, required=True) 87 | 88 | args = parser.parse_args() 89 | 90 | # node_vocab = torch.load("/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/vocab.pt") 91 | node_vocab = torch.load(args.node_vocab) 92 | # ui = json.load(open("/home/jiaofangkai/IQON_pair_remove_edge/UI.json", 'r')) 93 | ui = json.load(open(args.ui_edge_file, 'r')) 94 | 95 | # Process user embedding 96 | initializer(ui) 97 | 98 | user_embedding = {} 99 | for u in tqdm(node_vocab['u'], total=len(node_vocab['u'])): 100 | user_embedding[u] = get_user_embedding(u) 101 | 102 | # torch.save(user_embedding, "/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/user_embedding.pt") 103 | torch.save(user_embedding, os.path.join(args.output_dir, 'user_embedding.pt')) 104 | 105 | # user_embedding = torch.load(os.path.join(args.output_dir, 'user_embedding.pt')) 106 | # for u, emb in user_embedding.items(): 107 | # user_embedding[u] = emb.mean(dim=0) 108 | # torch.save(user_embedding, os.path.join(args.output_dir, 'user_embedding.pt')) 109 | 110 | # Process user vocabulary 111 | # user_embedding = torch.load("/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/user_embedding.pt") 112 | user_emb_weight = [] 113 | user_vocab = {} 114 | for i, (user, user_emb) in enumerate(user_embedding.items()): 115 | user_emb_weight.append(user_emb) 116 | user_vocab[user] = i 117 | user_emb_weight = torch.stack(user_emb_weight, dim=0) 118 | 119 | # torch.save(user_emb_weight, "/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/user_emb_weight.pt") 120 | # json.dump(user_vocab, open("/home/jiaofangkai/IQON_pair_remove_edge/subgraphs/user_vocab.json", "w")) 121 | torch.save(user_emb_weight, os.path.join(args.output_dir, 'user_emb_weight.pt')) 122 | json.dump(user_vocab, open(os.path.join(args.output_dir, 'user_vocab.json'), 'w')) 123 | 124 | print("Done.") 125 | -------------------------------------------------------------------------------- /conf/gp_bpr/gat_tf_emb_max_fix_graph_v2.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: gp-bpr 6 | 7 | train_file: ${data_dir}/UII_train_quadruple.json 8 | dev_file: ${data_dir}/UII_valid_quadruple.json 9 | test_file: ${data_dir}/UII_test_quadruple.json 10 | 11 | embedding_memory: 12 | _target_: data_loader.data_utils.EmbeddingMatrix 13 | attr_text: ${data_dir}/attribute_emb_weight.pt 14 | item_image: ${data_dir}/item_img.pt 15 | item_text: ${data_dir}/item_text_emb_weight.cls.pt 16 | 17 | 18 | # Data loading 19 | dataset: 20 | _target_: data_loader.data_loader_v1.SubgraphDataset 21 | meta_path_dict: 22 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_3 23 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_3 24 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_3 25 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_3 26 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_3 27 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_3 28 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_3 29 | 30 | 31 | # Data collator 32 | collator: 33 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 34 | user_vocab: ${data_dir}/user_vocab.json 35 | attr_vocab: ${data_dir}/attribute_vocab.json 36 | item_vocab: ${data_dir}/item_vocab.json 37 | node_vocab: ${data_dir}/vocab.pt 38 | 39 | 40 | # Dataloader 41 | num_workers: 16 42 | eval_num_workers: 4 43 | prefetch_factor: 2 44 | 45 | # Model 46 | model: 47 | _target_: models.gat_tf_emb_max.GATTransformer 48 | user_embedding: ${data_dir}/user_emb_weight.pt 49 | user_vocab: ${collator.user_vocab} 50 | freeze_user_emb: False 51 | vision_model: resnet18 52 | text_hidden_size: 768 53 | img_hidden_size: 512 54 | hidden_size: 512 55 | loss_type: 1 56 | gnn: 57 | _target_: models.gat.GAT 58 | num_layers: 4 59 | input_size: ${model.hidden_size} 60 | num_heads: 8 61 | head_size: 64 62 | feat_dropout: 0.1 63 | attn_dropout: 0.1 64 | residual: True 65 | transformer: 66 | _target_: models.transformer.initialize_transformer 67 | encoder_layers: 2 68 | encoder_ffn_dim: 2048 69 | encoder_attention_heads: 8 70 | encoder_layerdrop: 0.0 71 | activation_function: "gelu" 72 | d_model: ${model.hidden_size} 73 | dropout: 0.1 74 | attention_dropout: 0.1 75 | activation_dropout: 0.1 76 | init_std: 0.02 77 | classifier_dropout: 0.0 78 | 79 | pretrain: 80 | 81 | #output_dir: experiments/gat_tf_fix_emb.v1.0 82 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 83 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 84 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 85 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 86 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 87 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 88 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 89 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 90 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.5.wd0.1 # max_neighbour_num 5 -> 3 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.6.wd0.1 92 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.wd0.1 # loss_fn=1 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.fix_graph.wd0.1 94 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v3.7.fix_graph.wd1.0 95 | 96 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v4.0.fix_graph.wd1.0 97 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v4.1.fix_graph.wd1.0.fp32 98 | output_dir: experiments/gp_bpr.gat_tf_fix_emb_max.v4.1.fix_graph.wd0.01 99 | 100 | do_train: True 101 | #do_train: False 102 | evaluate_during_training: True 103 | 104 | do_eval: True 105 | eval_sub_path: 106 | 107 | # Training hyper-parameters 108 | per_gpu_train_batch_size: 4 109 | per_gpu_eval_batch_size: 4 110 | #learning_rate: 1e-5 111 | #learning_rate: 1e-4 112 | learning_rate: 5e-5 113 | #learning_rate: 5e-5 114 | #learning_rate: 1e-3 115 | gradient_accumulation_steps: 32 116 | weight_decay: 0.01 117 | #weight_decay: 0.1 118 | #weight_decay: 1.0 119 | adam_epsilon: 1e-6 120 | adam_betas: "(0.9, 0.98)" 121 | max_grad_norm: 0.0 122 | #max_grad_norm: 1.0 123 | #num_train_epochs: 30 124 | num_train_epochs: 10 125 | max_steps: 0 126 | warmup_proportion: 0.06 127 | warmup_steps: 128 | 129 | multi_tensor: 130 | 131 | # Prediction config 132 | prediction_cfg: 133 | metric: "acc" 134 | measure: 1 135 | best_checkpoint: 136 | best_result: 137 | 138 | logging_steps: 5 139 | summary_helper: 140 | _target_: general_util.training_utils.SummaryWriterHelper 141 | 142 | save_steps: 250 143 | save_best: True 144 | eval_steps: 250 145 | no_cuda: False 146 | seed: 42 147 | local_rank: -1 148 | fp16: True 149 | fp16_opt_level: O1 150 | 151 | # fairscale.FullyShardedDDP 152 | reshard_after_forward: False 153 | cpu_offload: False 154 | move_grads_to_cpu: False 155 | move_params_to_cpu: False 156 | 157 | # Temporary variables 158 | n_gpu: 159 | device: 160 | train_batch_size: 161 | eval_batch_size: 162 | world_size: 163 | -------------------------------------------------------------------------------- /conf/ctr_pretrain/gat_tf_emb_max_ctr_pt_v1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 2 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max_ctr_only.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | gnn: 58 | _target_: models.gat.GAT 59 | num_layers: 2 60 | input_size: ${model.hidden_size} 61 | num_heads: 8 62 | head_size: 64 63 | feat_dropout: 0.1 64 | attn_dropout: 0.1 65 | residual: True 66 | transformer: 67 | _target_: models.transformer.initialize_transformer 68 | encoder_layers: 1 69 | encoder_ffn_dim: 2048 70 | encoder_attention_heads: 8 71 | encoder_layerdrop: 0.0 72 | activation_function: "gelu" 73 | d_model: ${model.hidden_size} 74 | dropout: 0.1 75 | attention_dropout: 0.0 76 | activation_dropout: 0.0 77 | init_std: 0.02 78 | classifier_dropout: 0.0 79 | 80 | pretrain: 81 | 82 | #output_dir: experiments/gat_tf_fix_emb.v1.0 83 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 84 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 85 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 86 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 87 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 88 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 89 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 90 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 91 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 92 | #output_dir: experiments/gat_tf_fix_emb_max.wo_img_fix.v3.1.wd0.1.n5 93 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5 94 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.T4 95 | output_dir: experiments/gat_tf_fix_emb_max.ctr.pt.v3.1.wd0.01.n5.micro8.A100 96 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.2tf # 2-layer transformer // add dropout to transformer 97 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.3.wd0.1.n5.2tf.titanxp # epoch 5 -> 10 98 | 99 | do_train: True 100 | evaluate_during_training: True 101 | 102 | do_eval: True 103 | eval_sub_path: 104 | 105 | # Training hyper-parameters 106 | per_gpu_train_batch_size: 8 107 | per_gpu_eval_batch_size: 8 108 | #learning_rate: 5e-5 109 | #learning_rate: 1e-4 110 | #learning_rate: 5e-4 111 | learning_rate: 1e-3 112 | gradient_accumulation_steps: 16 113 | weight_decay: 0.01 114 | #weight_decay: 0.1 115 | adam_epsilon: 1e-6 116 | adam_betas: "(0.9, 0.98)" 117 | max_grad_norm: 0.0 118 | #max_grad_norm: 1.0 119 | num_train_epochs: 30 120 | #num_train_epochs: 5 121 | max_steps: 0 122 | warmup_proportion: 0.06 123 | warmup_steps: 124 | 125 | multi_tensor: 126 | 127 | # Prediction config 128 | prediction_cfg: 129 | metric: "acc" 130 | measure: 1 131 | best_checkpoint: 132 | best_result: 133 | 134 | logging_steps: 5 135 | summary_helper: 136 | _target_: general_util.training_utils.SummaryWriterHelper 137 | 138 | save_steps: -1 139 | save_best: True 140 | eval_steps: 500 141 | no_cuda: False 142 | seed: 42 143 | local_rank: -1 144 | fp16: True 145 | fp16_opt_level: O1 146 | 147 | # fairscale.FullyShardedDDP 148 | reshard_after_forward: False 149 | cpu_offload: False 150 | move_grads_to_cpu: False 151 | move_params_to_cpu: False 152 | 153 | # Temporary variables 154 | n_gpu: 155 | device: 156 | train_batch_size: 157 | eval_batch_size: 158 | world_size: 159 | -------------------------------------------------------------------------------- /conf/rgcn/rgcn_tf_emb_max_v1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_edge_v1.SubgraphEdgeDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_edge_fix_emb.SubgraphEdgeCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 2 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | gnn: 59 | _target_: models.rgcn_layer.RelationGCN 60 | in_dims: ${model.hidden_size} 61 | h_dim: ${model.hidden_size} 62 | out_dim: ${model.hidden_size} 63 | num_rels: 5 64 | num_bases: 4 65 | num_hidden_layers: 2 66 | dropout: 0.1 67 | transformer: 68 | _target_: models.transformer.initialize_transformer 69 | encoder_layers: 1 70 | encoder_ffn_dim: 2048 71 | encoder_attention_heads: 8 72 | encoder_layerdrop: 0.0 73 | activation_function: "gelu" 74 | d_model: ${model.hidden_size} 75 | dropout: 0.1 76 | attention_dropout: 0.1 77 | activation_dropout: 0.1 78 | init_std: 0.02 79 | classifier_dropout: 0.0 80 | 81 | pretrain: 82 | 83 | #output_dir: experiments/gat_tf_fix_emb.v1.0 84 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 85 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 86 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 87 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 88 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 89 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 90 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 92 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 93 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 94 | 95 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 96 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 97 | 98 | #output_dir: experiments/simple-gat_tf_emb_max.v3.3.wd0.1.n5.T4 # simple-gat // 1-layer transformer // lr=8e-5 // epoch=10 99 | output_dir: experiments/rgcn_tf_emb_max.v3.4.wd0.1.n5.T4 # 8 epoch 100 | 101 | do_train: True 102 | evaluate_during_training: True 103 | 104 | do_eval: True 105 | eval_sub_path: 106 | 107 | # Training hyper-parameters 108 | per_gpu_train_batch_size: 2 109 | per_gpu_eval_batch_size: 2 110 | learning_rate: 8e-5 111 | #learning_rate: 1e-4 112 | #learning_rate: 5e-4 113 | #learning_rate: 1e-3 114 | gradient_accumulation_steps: 12 115 | #weight_decay: 0.01 116 | weight_decay: 0.1 117 | adam_epsilon: 1e-6 118 | adam_betas: "(0.9, 0.98)" 119 | max_grad_norm: 0.0 120 | #max_grad_norm: 1.0 121 | #num_train_epochs: 30 122 | num_train_epochs: 8 123 | max_steps: 0 124 | warmup_proportion: 0.06 125 | warmup_steps: 126 | 127 | multi_tensor: 128 | 129 | # Prediction config 130 | prediction_cfg: 131 | metric: "acc" 132 | measure: 1 133 | best_checkpoint: 134 | best_result: 135 | 136 | logging_steps: 5 137 | summary_helper: 138 | _target_: general_util.training_utils.SummaryWriterHelper 139 | 140 | save_steps: -1 141 | save_best: True 142 | eval_steps: 500 143 | no_cuda: False 144 | seed: 42 145 | local_rank: -1 146 | fp16: True 147 | fp16_opt_level: O1 148 | 149 | # fairscale.FullyShardedDDP 150 | reshard_after_forward: False 151 | cpu_offload: False 152 | move_grads_to_cpu: False 153 | move_params_to_cpu: False 154 | 155 | # Temporary variables 156 | n_gpu: 157 | device: 158 | train_batch_size: 159 | eval_batch_size: 160 | world_size: 161 | -------------------------------------------------------------------------------- /conf/gat_tf_emb_max_v1_3_1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | #test_file: ${data_dir}/UII_test_quadruple.json 11 | test_file: ${data_dir}/UII_test_for_mrr.json 12 | 13 | 14 | embedding_memory: 15 | _target_: data_loader.data_utils.EmbeddingMatrix 16 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 17 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 18 | item_image: ${data_dir}/subgraphs/item_img.pt 19 | 20 | 21 | # Data loading 22 | dataset: 23 | _target_: data_loader.data_loader_v1.SubgraphDataset 24 | meta_path_dict: 25 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 26 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 27 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 28 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 29 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 30 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 31 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 32 | max_tuple_num: 5 33 | 34 | 35 | # Data collator 36 | collator: 37 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 38 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 39 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 40 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 41 | node_vocab: ${data_dir}/subgraphs/vocab.pt 42 | 43 | 44 | # Dataloader 45 | num_workers: 8 46 | eval_num_workers: 2 47 | prefetch_factor: 2 48 | 49 | # Model 50 | model: 51 | _target_: models.gat_tf_emb_max.GATTransformer 52 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 53 | user_vocab: ${collator.user_vocab} 54 | freeze_user_emb: False 55 | vision_model: resnet18 56 | text_hidden_size: 768 57 | img_hidden_size: 512 58 | hidden_size: 512 59 | loss_type: 1 60 | gnn: 61 | _target_: models.gat.GAT 62 | num_layers: 2 63 | input_size: ${model.hidden_size} 64 | num_heads: 8 65 | head_size: 64 66 | feat_dropout: 0.1 67 | attn_dropout: 0.1 68 | residual: True 69 | transformer: 70 | _target_: models.transformer.initialize_transformer 71 | encoder_layers: 1 72 | encoder_ffn_dim: 2048 73 | encoder_attention_heads: 8 74 | encoder_layerdrop: 0.0 75 | activation_function: "gelu" 76 | d_model: ${model.hidden_size} 77 | dropout: 0.1 78 | attention_dropout: 0.0 79 | activation_dropout: 0.0 80 | init_std: 0.02 81 | classifier_dropout: 0.0 82 | 83 | pretrain: 84 | 85 | #output_dir: experiments/gat_tf_fix_emb.v1.0 86 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 87 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 88 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 89 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 90 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 92 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 94 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 95 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 96 | 97 | output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5.T4 98 | #output_dir: /home/share/jiaofangkai/outfit_rec_exp_server162/gat_tf_fix_emb_max.v3.1.wd0.1.n5.T4 # """ For predicting only. """ 99 | 100 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 101 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 102 | 103 | do_train: True 104 | evaluate_during_training: True 105 | 106 | do_eval: True 107 | eval_sub_path: 108 | 109 | # Training hyper-parameters 110 | per_gpu_train_batch_size: 2 111 | per_gpu_eval_batch_size: 2 112 | #learning_rate: 5e-5 113 | learning_rate: 1e-4 114 | #learning_rate: 5e-4 115 | #learning_rate: 1e-3 116 | gradient_accumulation_steps: 12 117 | #weight_decay: 0.01 118 | weight_decay: 0.1 119 | adam_epsilon: 1e-6 120 | adam_betas: "(0.9, 0.98)" 121 | max_grad_norm: 0.0 122 | #max_grad_norm: 1.0 123 | #num_train_epochs: 30 124 | num_train_epochs: 5 125 | max_steps: 0 126 | warmup_proportion: 0.06 127 | warmup_steps: 128 | 129 | multi_tensor: 130 | 131 | # Prediction config 132 | prediction_cfg: 133 | metric: "acc" 134 | measure: 1 135 | best_checkpoint: 136 | best_result: 137 | 138 | logging_steps: 5 139 | summary_helper: 140 | _target_: general_util.training_utils.SummaryWriterHelper 141 | 142 | save_steps: -1 143 | save_best: True 144 | eval_steps: 500 145 | no_cuda: False 146 | seed: 42 147 | local_rank: -1 148 | fp16: True 149 | fp16_opt_level: O1 150 | 151 | # fairscale.FullyShardedDDP 152 | reshard_after_forward: False 153 | cpu_offload: False 154 | move_grads_to_cpu: False 155 | move_params_to_cpu: False 156 | 157 | # Temporary variables 158 | n_gpu: 159 | device: 160 | train_batch_size: 161 | eval_batch_size: 162 | world_size: 163 | -------------------------------------------------------------------------------- /conf/simple_gat/simple_gat_tf_emb_max_v1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_edge_v1.SubgraphEdgeDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_edge_fix_emb.SubgraphEdgeCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 2 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | gnn: 59 | _target_: models.simple_gat.SimpleGAT 60 | edge_dim: 64 61 | num_etypes: 5 62 | num_hidden: ${model.hidden_size} 63 | head_size: 64 64 | num_layers: 2 65 | heads: [8, 8, 8] 66 | activation: "gelu" 67 | feat_dropout: 0.1 68 | attn_dropout: 0.1 69 | residual: False 70 | decode: "proj" 71 | transformer: 72 | _target_: models.transformer.initialize_transformer 73 | encoder_layers: 1 74 | encoder_ffn_dim: 2048 75 | encoder_attention_heads: 8 76 | encoder_layerdrop: 0.0 77 | activation_function: "gelu" 78 | d_model: ${model.hidden_size} 79 | dropout: 0.1 80 | attention_dropout: 0.1 81 | activation_dropout: 0.1 82 | init_std: 0.02 83 | classifier_dropout: 0.0 84 | 85 | pretrain: 86 | 87 | #output_dir: experiments/gat_tf_fix_emb.v1.0 88 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 89 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 90 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 91 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 92 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 94 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 95 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 96 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 97 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 98 | 99 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 100 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 101 | 102 | output_dir: experiments/simple-gat_tf_emb_max.v3.3.wd0.1.n5.T4 # simple-gat // 1-layer transformer // lr=8e-5 // epoch=10 103 | 104 | do_train: True 105 | evaluate_during_training: True 106 | 107 | do_eval: True 108 | eval_sub_path: 109 | 110 | # Training hyper-parameters 111 | per_gpu_train_batch_size: 4 112 | per_gpu_eval_batch_size: 4 113 | learning_rate: 8e-5 114 | #learning_rate: 1e-4 115 | #learning_rate: 5e-4 116 | #learning_rate: 1e-3 117 | gradient_accumulation_steps: 6 118 | #weight_decay: 0.01 119 | weight_decay: 0.1 120 | adam_epsilon: 1e-6 121 | adam_betas: "(0.9, 0.98)" 122 | max_grad_norm: 0.0 123 | #max_grad_norm: 1.0 124 | #num_train_epochs: 30 125 | num_train_epochs: 10 126 | max_steps: 0 127 | warmup_proportion: 0.06 128 | warmup_steps: 129 | 130 | multi_tensor: 131 | 132 | # Prediction config 133 | prediction_cfg: 134 | metric: "acc" 135 | measure: 1 136 | best_checkpoint: 137 | best_result: 138 | 139 | logging_steps: 5 140 | summary_helper: 141 | _target_: general_util.training_utils.SummaryWriterHelper 142 | 143 | save_steps: -1 144 | save_best: True 145 | eval_steps: 500 146 | no_cuda: False 147 | seed: 42 148 | local_rank: -1 149 | fp16: True 150 | fp16_opt_level: O1 151 | 152 | # fairscale.FullyShardedDDP 153 | reshard_after_forward: False 154 | cpu_offload: False 155 | move_grads_to_cpu: False 156 | move_params_to_cpu: False 157 | 158 | # Temporary variables 159 | n_gpu: 160 | device: 161 | train_batch_size: 162 | eval_batch_size: 163 | world_size: 164 | -------------------------------------------------------------------------------- /models/simple_gat.py: -------------------------------------------------------------------------------- 1 | """Most code are copied from https://github.com/THUDM/HGB/blob/master/LP/benchmark/methods/baseline/GNN.py.""" 2 | 3 | import torch 4 | from torch import nn 5 | 6 | from models.simple_gat_conv import SimpleGATConv 7 | from models.modeling_utils import get_activation_func 8 | from general_util.logger import get_child_logger 9 | 10 | logger = get_child_logger("SimpleGAT") 11 | 12 | 13 | class DistMult(nn.Module): 14 | def __init__(self, num_rel, dim): 15 | super(DistMult, self).__init__() 16 | self.W = nn.Parameter(torch.FloatTensor(size=(num_rel, dim, dim))) 17 | nn.init.xavier_normal_(self.W, gain=1.414) 18 | 19 | def forward(self, left_emb, right_emb, r_id): 20 | thW = self.W[r_id] 21 | left_emb = torch.unsqueeze(left_emb, 1) 22 | right_emb = torch.unsqueeze(right_emb, 2) 23 | return torch.bmm(torch.bmm(left_emb, thW), right_emb).squeeze() 24 | 25 | 26 | class Dot(nn.Module): 27 | def __init__(self): 28 | super(Dot, self).__init__() 29 | 30 | def forward(self, left_emb, right_emb, r_id): 31 | left_emb = torch.unsqueeze(left_emb, 1) 32 | right_emb = torch.unsqueeze(right_emb, 2) 33 | return torch.bmm(left_emb, right_emb).squeeze() 34 | 35 | 36 | class SimpleGAT(nn.Module): 37 | def __init__(self, 38 | edge_dim, 39 | num_etypes, 40 | num_hidden, 41 | head_size, 42 | num_layers, 43 | heads, 44 | activation, 45 | feat_dropout, 46 | attn_dropout, 47 | negative_slope=0.01, 48 | residual=False, 49 | alpha=0., 50 | decode='distmult'): 51 | super(SimpleGAT, self).__init__() 52 | 53 | # self.g = g 54 | self.num_layers = num_layers 55 | self.gat_layers = nn.ModuleList() 56 | self.activation = get_activation_func(activation) if isinstance(activation, str) else activation 57 | 58 | logger.info(f'Simple-GAT parameters:\theads: {heads}') 59 | 60 | # self.fc_list = nn.ModuleList([nn.Linear(in_dim, num_hidden, bias=True) for in_dim in in_dims]) 61 | # for fc in self.fc_list: 62 | # nn.init.xavier_normal_(fc.weight, gain=1.414) 63 | 64 | # input projection (no residual) 65 | out_dim = num_hidden 66 | self.gat_layers.append(SimpleGATConv(edge_dim, num_etypes, 67 | num_hidden, head_size, heads[0], 68 | feat_dropout, attn_dropout, negative_slope, False, self.activation, alpha=alpha)) 69 | out_dim += head_size 70 | # hidden layers 71 | for l in range(1, num_layers): 72 | # due to multi-head, the in_dim = num_hidden * num_heads 73 | self.gat_layers.append(SimpleGATConv(edge_dim, num_etypes, 74 | head_size * heads[l - 1], head_size, heads[l], 75 | feat_dropout, attn_dropout, negative_slope, residual, self.activation, alpha=alpha)) 76 | out_dim += head_size 77 | # output projection 78 | self.gat_layers.append(SimpleGATConv(edge_dim, num_etypes, 79 | head_size * heads[-2], num_hidden, heads[-1], 80 | feat_dropout, attn_dropout, negative_slope, residual, None, alpha=alpha)) 81 | out_dim += num_hidden 82 | 83 | self.epsilon = torch.FloatTensor([1e-12]).cuda() 84 | if decode == 'distmult': 85 | self.decoder = DistMult(num_etypes, num_hidden * (num_layers + 2)) 86 | elif decode == 'dot': 87 | self.decoder = Dot() 88 | elif decode == 'proj': 89 | self.decoder = nn.Linear(out_dim, num_hidden) 90 | else: 91 | raise RuntimeError() 92 | 93 | def l2_norm(self, x): 94 | # This is an equivalent replacement for tf.l2_normalize, 95 | # see https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/math/l2_normalize for more information. 96 | return x / (torch.max(torch.norm(x, dim=1, keepdim=True), self.epsilon)) 97 | 98 | def forward(self, graph, h, e_feat): 99 | # h = [] 100 | # for fc, feature in zip(self.fc_list, features_list): 101 | # h.append(fc(feature)) 102 | # h = torch.cat(h, 0) 103 | 104 | emb = [self.l2_norm(h)] 105 | res_attn = None 106 | for l in range(self.num_layers): 107 | h, res_attn = self.gat_layers[l](graph, h, e_feat, res_attn=res_attn) 108 | emb.append(self.l2_norm(h.mean(1))) 109 | h = h.flatten(1) 110 | # output projection 111 | logits, _ = self.gat_layers[-1](graph, h, e_feat, res_attn=res_attn) # None) 112 | logits = logits.mean(1) 113 | logits = self.l2_norm(logits) 114 | emb.append(logits) 115 | logits = torch.cat(emb, 1) 116 | 117 | # left_emb = logits[left] 118 | # right_emb = logits[right] 119 | # return self.decoder(left_emb, right_emb, mid) 120 | return self.decoder(logits) 121 | -------------------------------------------------------------------------------- /conf/item_ab/gat_tf_emb_max_ctr_v1_3_1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | gpu: T4 6 | 7 | data_dir: IQON_pair_remove_edge 8 | feat_dir: iqon_pair_feat 9 | 10 | train_file: ${data_dir}/UII_train_quadruple.json 11 | dev_file: ${data_dir}/UII_valid_quadruple.json 12 | #test_file: ${data_dir}/UII_test_quadruple.json 13 | test_file: ${data_dir}/UII_test_for_mrr.json 14 | 15 | 16 | embedding_memory: 17 | _target_: data_loader.data_utils.EmbeddingMatrix 18 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 19 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 20 | item_image: ${data_dir}/subgraphs/item_img.pt 21 | 22 | 23 | # Data loading 24 | dataset: 25 | _target_: data_loader.data_loader_v1.SubgraphDataset 26 | meta_path_dict: 27 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 28 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 29 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 30 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 31 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 32 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 33 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 34 | max_tuple_num: 5 35 | 36 | 37 | # Data collator 38 | collator: 39 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 40 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 41 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 42 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 43 | node_vocab: ${data_dir}/subgraphs/vocab.pt 44 | 45 | 46 | # Dataloader 47 | num_workers: 4 48 | eval_num_workers: 0 49 | prefetch_factor: 2 50 | 51 | # Model 52 | model: 53 | _target_: models.gat_tf_emb_max_item_ab.GATTransformer 54 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 55 | user_vocab: ${collator.user_vocab} 56 | freeze_user_emb: False 57 | vision_model: resnet18 58 | text_hidden_size: 768 59 | img_hidden_size: 512 60 | hidden_size: 512 61 | loss_type: 1 62 | add_ctr_loss: True 63 | item_use_img: True 64 | item_use_text: True 65 | gnn: 66 | _target_: models.gat.GAT 67 | num_layers: 2 68 | input_size: ${model.hidden_size} 69 | num_heads: 8 70 | head_size: 64 71 | feat_dropout: 0.1 72 | attn_dropout: 0.1 73 | residual: True 74 | transformer: 75 | _target_: models.transformer.initialize_transformer 76 | encoder_layers: 1 77 | encoder_ffn_dim: 2048 78 | encoder_attention_heads: 8 79 | encoder_layerdrop: 0.0 80 | activation_function: "gelu" 81 | d_model: ${model.hidden_size} 82 | dropout: 0.1 83 | attention_dropout: 0.0 84 | activation_dropout: 0.0 85 | init_std: 0.02 86 | classifier_dropout: 0.0 87 | 88 | pretrain: 89 | 90 | #output_dir: experiments/gat_tf_fix_emb.v1.0 91 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 92 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 93 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 94 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 95 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 96 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 97 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 98 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 99 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 100 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 101 | 102 | output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.${gpu}.i${model.item_use_img}${model.item_use_text} 103 | 104 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 105 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 106 | 107 | do_train: True 108 | evaluate_during_training: True 109 | 110 | do_eval: True 111 | eval_sub_path: 112 | 113 | # Training hyper-parameters 114 | per_gpu_train_batch_size: 2 115 | per_gpu_eval_batch_size: 2 116 | #learning_rate: 5e-5 117 | learning_rate: 1e-4 118 | #learning_rate: 5e-4 119 | #learning_rate: 1e-3 120 | gradient_accumulation_steps: 12 121 | #weight_decay: 0.01 122 | weight_decay: 0.1 123 | adam_epsilon: 1e-6 124 | adam_betas: "(0.9, 0.98)" 125 | max_grad_norm: 0.0 126 | #max_grad_norm: 1.0 127 | #num_train_epochs: 30 128 | num_train_epochs: 5 129 | max_steps: 0 130 | warmup_proportion: 0.06 131 | warmup_steps: 132 | 133 | multi_tensor: 134 | 135 | # Prediction config 136 | prediction_cfg: 137 | metric: "acc" 138 | measure: 1 139 | best_checkpoint: 140 | best_result: 141 | 142 | logging_steps: 5 143 | summary_helper: 144 | _target_: general_util.training_utils.SummaryWriterHelper 145 | 146 | save_steps: -1 147 | save_best: True 148 | eval_steps: 500 149 | no_cuda: False 150 | seed: 42 151 | local_rank: -1 152 | fp16: True 153 | fp16_opt_level: O1 154 | 155 | # fairscale.FullyShardedDDP 156 | reshard_after_forward: False 157 | cpu_offload: False 158 | move_grads_to_cpu: False 159 | move_params_to_cpu: False 160 | 161 | # Temporary variables 162 | n_gpu: 163 | device: 164 | train_batch_size: 165 | eval_batch_size: 166 | world_size: 167 | -------------------------------------------------------------------------------- /conf/gat_tf_emb_max_ctr_v1_3_1_wo_ii_uia.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | #test_file: ${data_dir}/UII_test_quadruple.json 11 | test_file: ${data_dir}/UII_test_for_mrr.json 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | # ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | # uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | max_tuple_num: 32 | 33 | 34 | # Data collator 35 | collator: 36 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 37 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 38 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 39 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 40 | node_vocab: ${data_dir}/subgraphs/vocab.pt 41 | 42 | 43 | # Dataloader 44 | num_workers: 8 45 | eval_num_workers: 2 46 | prefetch_factor: 2 47 | 48 | # Model 49 | model: 50 | _target_: models.gat_tf_emb_max.GATTransformer 51 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 52 | user_vocab: ${collator.user_vocab} 53 | freeze_user_emb: False 54 | vision_model: resnet18 55 | text_hidden_size: 768 56 | img_hidden_size: 512 57 | hidden_size: 512 58 | loss_type: 1 59 | add_ctr_loss: True 60 | gnn: 61 | _target_: models.gat.GAT 62 | num_layers: 2 63 | input_size: ${model.hidden_size} 64 | num_heads: 8 65 | head_size: 64 66 | feat_dropout: 0.1 67 | attn_dropout: 0.1 68 | residual: True 69 | transformer: 70 | _target_: models.transformer.initialize_transformer 71 | encoder_layers: 1 72 | encoder_ffn_dim: 2048 73 | encoder_attention_heads: 8 74 | encoder_layerdrop: 0.0 75 | activation_function: "gelu" 76 | d_model: ${model.hidden_size} 77 | dropout: 0.1 78 | attention_dropout: 0.0 79 | activation_dropout: 0.0 80 | init_std: 0.02 81 | classifier_dropout: 0.0 82 | 83 | pretrain: 84 | 85 | #output_dir: experiments/gat_tf_fix_emb.v1.0 86 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 87 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 88 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 89 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 90 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 92 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 94 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 95 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 96 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5.T4 97 | 98 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.remove_ii_uia.wd0.1.n5.T4 99 | output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.remove_ii_uia.wd0.1.n5.A100 100 | 101 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 102 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 103 | 104 | do_train: True 105 | evaluate_during_training: True 106 | 107 | do_eval: True 108 | eval_sub_path: 109 | 110 | # Training hyper-parameters 111 | per_gpu_train_batch_size: 8 112 | per_gpu_eval_batch_size: 2 113 | #learning_rate: 5e-5 114 | learning_rate: 1e-4 115 | #learning_rate: 5e-4 116 | #learning_rate: 1e-3 117 | gradient_accumulation_steps: 3 118 | #weight_decay: 0.01 119 | weight_decay: 0.1 120 | adam_epsilon: 1e-6 121 | adam_betas: "(0.9, 0.98)" 122 | max_grad_norm: 0.0 123 | #max_grad_norm: 1.0 124 | #num_train_epochs: 30 125 | num_train_epochs: 5 126 | max_steps: 0 127 | warmup_proportion: 0.06 128 | warmup_steps: 129 | 130 | multi_tensor: 131 | 132 | # Prediction config 133 | prediction_cfg: 134 | metric: "acc" 135 | measure: 1 136 | best_checkpoint: 137 | best_result: 138 | 139 | logging_steps: 5 140 | summary_helper: 141 | _target_: general_util.training_utils.SummaryWriterHelper 142 | 143 | save_steps: -1 144 | save_best: True 145 | eval_steps: 500 146 | no_cuda: False 147 | seed: 42 148 | local_rank: -1 149 | fp16: True 150 | fp16_opt_level: O1 151 | 152 | # fairscale.FullyShardedDDP 153 | reshard_after_forward: False 154 | cpu_offload: False 155 | move_grads_to_cpu: False 156 | move_params_to_cpu: False 157 | 158 | # Temporary variables 159 | n_gpu: 160 | device: 161 | train_batch_size: 162 | eval_batch_size: 163 | world_size: 164 | -------------------------------------------------------------------------------- /conf/ctr_pretrain/gat_tf_emb_max_ctr_pt_v2.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 2 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max_ctr_only.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | gnn: 58 | _target_: models.gat.GAT 59 | num_layers: 4 60 | input_size: ${model.hidden_size} 61 | num_heads: 8 62 | head_size: 64 63 | feat_dropout: 0.1 64 | attn_dropout: 0.1 65 | residual: True 66 | transformer: 67 | _target_: models.transformer.initialize_transformer 68 | encoder_layers: 2 69 | encoder_ffn_dim: 2048 70 | encoder_attention_heads: 8 71 | encoder_layerdrop: 0.0 72 | activation_function: "gelu" 73 | d_model: ${model.hidden_size} 74 | dropout: 0.1 75 | attention_dropout: 0.0 76 | activation_dropout: 0.0 77 | init_std: 0.02 78 | classifier_dropout: 0.0 79 | 80 | pretrain: 81 | 82 | #output_dir: experiments/gat_tf_fix_emb.v1.0 83 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 84 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 85 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 86 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 87 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 88 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 89 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 90 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 91 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 92 | #output_dir: experiments/gat_tf_fix_emb_max.wo_img_fix.v3.1.wd0.1.n5 93 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5 94 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.T4 95 | 96 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.pt.v3.1.wd0.01.n5.micro8.A100 97 | output_dir: experiments/gat_tf_fix_emb_max.ctr.pt.v2.0.wd0.01.n5.micro8.A100 # Add more layer. 98 | 99 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.2tf # 2-layer transformer // add dropout to transformer 100 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.3.wd0.1.n5.2tf.titanxp # epoch 5 -> 10 101 | 102 | do_train: True 103 | evaluate_during_training: True 104 | 105 | do_eval: True 106 | eval_sub_path: 107 | 108 | # Training hyper-parameters 109 | per_gpu_train_batch_size: 8 110 | per_gpu_eval_batch_size: 8 111 | #learning_rate: 5e-5 112 | #learning_rate: 1e-4 113 | learning_rate: 5e-4 114 | #learning_rate: 1e-3 115 | gradient_accumulation_steps: 16 116 | weight_decay: 0.01 117 | #weight_decay: 0.1 118 | adam_epsilon: 1e-6 119 | adam_betas: "(0.9, 0.98)" 120 | max_grad_norm: 0.0 121 | #max_grad_norm: 1.0 122 | num_train_epochs: 30 123 | #num_train_epochs: 5 124 | max_steps: 0 125 | warmup_proportion: 0.1 126 | warmup_steps: 127 | 128 | multi_tensor: 129 | 130 | # Prediction config 131 | prediction_cfg: 132 | metric: "acc" 133 | measure: 1 134 | best_checkpoint: 135 | best_result: 136 | 137 | logging_steps: 5 138 | summary_helper: 139 | _target_: general_util.training_utils.SummaryWriterHelper 140 | 141 | save_steps: -1 142 | save_best: True 143 | eval_steps: 250 144 | no_cuda: False 145 | seed: 42 146 | local_rank: -1 147 | fp16: True 148 | fp16_opt_level: O1 149 | 150 | # fairscale.FullyShardedDDP 151 | reshard_after_forward: False 152 | cpu_offload: False 153 | move_grads_to_cpu: False 154 | move_params_to_cpu: False 155 | 156 | # Temporary variables 157 | n_gpu: 158 | device: 159 | train_batch_size: 160 | eval_batch_size: 161 | world_size: 162 | -------------------------------------------------------------------------------- /conf/gat_tf_emb_max_ctr_v1_3_1_wo_iia_iai_uia_uiaiu.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | #test_file: ${data_dir}/UII_test_quadruple.json 11 | test_file: ${data_dir}/UII_test_for_mrr.json 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | # iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | # iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | # uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | # uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | max_tuple_num: 32 | 33 | 34 | # Data collator 35 | collator: 36 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 37 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 38 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 39 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 40 | node_vocab: ${data_dir}/subgraphs/vocab.pt 41 | 42 | 43 | # Dataloader 44 | num_workers: 0 45 | eval_num_workers: 0 46 | prefetch_factor: 2 47 | 48 | # Model 49 | model: 50 | _target_: models.gat_tf_emb_max.GATTransformer 51 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 52 | user_vocab: ${collator.user_vocab} 53 | freeze_user_emb: False 54 | vision_model: resnet18 55 | text_hidden_size: 768 56 | img_hidden_size: 512 57 | hidden_size: 512 58 | loss_type: 1 59 | add_ctr_loss: True 60 | gnn: 61 | _target_: models.gat.GAT 62 | num_layers: 2 63 | input_size: ${model.hidden_size} 64 | num_heads: 8 65 | head_size: 64 66 | feat_dropout: 0.1 67 | attn_dropout: 0.1 68 | residual: True 69 | transformer: 70 | _target_: models.transformer.initialize_transformer 71 | encoder_layers: 1 72 | encoder_ffn_dim: 2048 73 | encoder_attention_heads: 8 74 | encoder_layerdrop: 0.0 75 | activation_function: "gelu" 76 | d_model: ${model.hidden_size} 77 | dropout: 0.1 78 | attention_dropout: 0.0 79 | activation_dropout: 0.0 80 | init_std: 0.02 81 | classifier_dropout: 0.0 82 | 83 | pretrain: 84 | 85 | #output_dir: experiments/gat_tf_fix_emb.v1.0 86 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 87 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 88 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 89 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 90 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 91 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 92 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 94 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 95 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 96 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5.T4 97 | 98 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.remove_iia_iai_uia_uiaiu.wd0.1.n5.T4 99 | output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.remove_iia_iai_uia_uiaiu.wd0.1.n5.2080Ti 100 | 101 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 102 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 103 | 104 | do_train: False 105 | evaluate_during_training: True 106 | 107 | do_eval: True 108 | eval_sub_path: 109 | 110 | # Training hyper-parameters 111 | per_gpu_train_batch_size: 2 112 | per_gpu_eval_batch_size: 2 113 | #learning_rate: 5e-5 114 | learning_rate: 1e-4 115 | #learning_rate: 5e-4 116 | #learning_rate: 1e-3 117 | gradient_accumulation_steps: 12 118 | #weight_decay: 0.01 119 | weight_decay: 0.1 120 | adam_epsilon: 1e-6 121 | adam_betas: "(0.9, 0.98)" 122 | max_grad_norm: 0.0 123 | #max_grad_norm: 1.0 124 | #num_train_epochs: 30 125 | num_train_epochs: 5 126 | max_steps: 0 127 | warmup_proportion: 0.06 128 | warmup_steps: 129 | 130 | multi_tensor: 131 | 132 | # Prediction config 133 | prediction_cfg: 134 | metric: "acc" 135 | measure: 1 136 | best_checkpoint: 137 | best_result: 138 | 139 | logging_steps: 5 140 | summary_helper: 141 | _target_: general_util.training_utils.SummaryWriterHelper 142 | 143 | save_steps: -1 144 | save_best: True 145 | eval_steps: 500 146 | no_cuda: False 147 | seed: 42 148 | local_rank: -1 149 | fp16: True 150 | fp16_opt_level: O1 151 | 152 | # fairscale.FullyShardedDDP 153 | reshard_after_forward: False 154 | cpu_offload: False 155 | move_grads_to_cpu: False 156 | move_params_to_cpu: False 157 | 158 | # Temporary variables 159 | n_gpu: 160 | device: 161 | train_batch_size: 162 | eval_batch_size: 163 | world_size: 164 | -------------------------------------------------------------------------------- /conf/gat_tf_emb_max_ctr_v1_3_1_1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | #test_file: ${data_dir}/UII_test_for_mrr.json 12 | 13 | 14 | embedding_memory: 15 | _target_: data_loader.data_utils.EmbeddingMatrix 16 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 17 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 18 | item_image: ${data_dir}/subgraphs/item_img.pt 19 | 20 | 21 | # Data loading 22 | dataset: 23 | _target_: data_loader.data_loader_v1.SubgraphDataset 24 | meta_path_dict: 25 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 26 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 27 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 28 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 29 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 30 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 31 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 32 | 33 | 34 | # Data collator 35 | collator: 36 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 37 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 38 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 39 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 40 | node_vocab: ${data_dir}/subgraphs/vocab.pt 41 | 42 | 43 | # Dataloader 44 | num_workers: 8 45 | eval_num_workers: 2 46 | prefetch_factor: 2 47 | 48 | # Model 49 | model: 50 | _target_: models.gat_tf_emb_max.GATTransformer 51 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 52 | user_vocab: ${collator.user_vocab} 53 | freeze_user_emb: False 54 | vision_model: resnet18 55 | text_hidden_size: 768 56 | img_hidden_size: 512 57 | hidden_size: 512 58 | loss_type: 1 59 | add_ctr_loss: True 60 | gnn: 61 | _target_: models.gat.GAT 62 | num_layers: 2 63 | input_size: ${model.hidden_size} 64 | num_heads: 8 65 | head_size: 64 66 | feat_dropout: 0.1 67 | attn_dropout: 0.1 68 | residual: True 69 | transformer: 70 | _target_: models.transformer.initialize_transformer 71 | encoder_layers: 1 72 | encoder_ffn_dim: 2048 73 | encoder_attention_heads: 8 74 | encoder_layerdrop: 0.0 75 | activation_function: "gelu" 76 | d_model: ${model.hidden_size} 77 | dropout: 0.1 78 | attention_dropout: 0.0 79 | activation_dropout: 0.0 80 | # attention_dropout: 0.1 81 | # activation_dropout: 0.1 82 | init_std: 0.02 83 | classifier_dropout: 0.0 84 | 85 | pretrain: 86 | 87 | #output_dir: experiments/gat_tf_fix_emb.v1.0 88 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 89 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 90 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 91 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 92 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 94 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 95 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 96 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 97 | #output_dir: experiments/gat_tf_fix_emb_max.wo_img_fix.v3.1.wd0.1.n5 98 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5 99 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.T4 100 | output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.micro8.A100 101 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.2tf # 2-layer transformer // add dropout to transformer 102 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.3.wd0.1.n5.2tf.titanxp # epoch 5 -> 10 103 | 104 | do_train: True 105 | evaluate_during_training: True 106 | 107 | do_eval: True 108 | eval_sub_path: 109 | 110 | # Training hyper-parameters 111 | per_gpu_train_batch_size: 8 112 | per_gpu_eval_batch_size: 4 113 | #learning_rate: 5e-5 114 | learning_rate: 1e-4 115 | #learning_rate: 5e-4 116 | #learning_rate: 1e-3 117 | gradient_accumulation_steps: 3 118 | #weight_decay: 0.01 119 | weight_decay: 0.1 120 | adam_epsilon: 1e-6 121 | adam_betas: "(0.9, 0.98)" 122 | max_grad_norm: 0.0 123 | #max_grad_norm: 1.0 124 | #num_train_epochs: 30 125 | num_train_epochs: 5 126 | max_steps: 0 127 | warmup_proportion: 0.06 128 | warmup_steps: 129 | 130 | multi_tensor: 131 | 132 | # Prediction config 133 | prediction_cfg: 134 | metric: "acc" 135 | measure: 1 136 | best_checkpoint: 137 | best_result: 138 | 139 | logging_steps: 5 140 | summary_helper: 141 | _target_: general_util.training_utils.SummaryWriterHelper 142 | 143 | save_steps: -1 144 | save_best: True 145 | eval_steps: 500 146 | no_cuda: False 147 | seed: 42 148 | local_rank: -1 149 | fp16: True 150 | fp16_opt_level: O1 151 | 152 | # fairscale.FullyShardedDDP 153 | reshard_after_forward: False 154 | cpu_offload: False 155 | move_grads_to_cpu: False 156 | move_params_to_cpu: False 157 | 158 | # Temporary variables 159 | n_gpu: 160 | device: 161 | train_batch_size: 162 | eval_batch_size: 163 | world_size: 164 | -------------------------------------------------------------------------------- /conf/simple_gat/gpbpr_simple_gat_tf_emb_max_v1_3_1.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: gp-bpr 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | #test_file: ${data_dir}/UII_test_for_mrr.json 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/attribute_emb_weight.pt 16 | item_image: ${data_dir}/item_img.pt 17 | item_text: ${data_dir}/item_text_emb_weight.cls.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_edge_v1.SubgraphEdgeDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_3 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_3 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_3 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_3 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_3 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_3 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_3 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_edge_fix_emb.SubgraphEdgeCollatorVocab 36 | user_vocab: ${data_dir}/user_vocab.json 37 | attr_vocab: ${data_dir}/attribute_vocab.json 38 | item_vocab: ${data_dir}/item_vocab.json 39 | node_vocab: ${data_dir}/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 4 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max.GATTransformer 50 | user_embedding: ${data_dir}/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | gnn: 59 | _target_: models.simple_gat.SimpleGAT 60 | edge_dim: 64 61 | num_etypes: 5 62 | num_hidden: ${model.hidden_size} 63 | head_size: 64 64 | num_layers: 1 65 | heads: [8, 8] 66 | activation: "elu" 67 | feat_dropout: 0.1 68 | attn_dropout: 0.1 69 | residual: False 70 | decode: "proj" 71 | transformer: 72 | _target_: models.transformer.initialize_transformer 73 | encoder_layers: 1 74 | encoder_ffn_dim: 2048 75 | encoder_attention_heads: 8 76 | encoder_layerdrop: 0.0 77 | activation_function: "gelu" 78 | d_model: ${model.hidden_size} 79 | dropout: 0.1 80 | attention_dropout: 0.0 81 | activation_dropout: 0.0 82 | init_std: 0.02 83 | classifier_dropout: 0.0 84 | 85 | pretrain: 86 | 87 | #output_dir: experiments/gat_tf_fix_emb.v1.0 88 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 89 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 90 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 91 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 92 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 94 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 95 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 96 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 97 | #output_dir: experiments/gat_tf_fix_emb_max.v3.1.wd0.1.n5 98 | 99 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.TitanXP # 2-layer tf // add dropout in tf // lr 5e-4 100 | #output_dir: experiments/gat_tf_fix_emb_max.v3.2.wd0.1.n5.T4.lr1 # lr 1e-4 101 | 102 | #output_dir: experiments/simple-gat_tf_emb_max.v3.3.wd0.1.n5.T4 # simple-gat // 1-layer transformer // lr=8e-5 // epoch=10 103 | #output_dir: experiments/simple-gat_tf_emb_max.v3.1.wd0.1.n5.TitanXP 104 | 105 | output_dir: experiments/gp_bpr.simple-gat_tf_emb_max.v3.1.wd1.0.n3.e25.2080Ti 106 | 107 | do_train: True 108 | evaluate_during_training: True 109 | 110 | do_eval: True 111 | eval_sub_path: 112 | 113 | # Training hyper-parameters 114 | per_gpu_train_batch_size: 4 115 | per_gpu_eval_batch_size: 4 116 | #learning_rate: 8e-5 117 | #learning_rate: 1e-4 118 | #learning_rate: 5e-4 119 | learning_rate: 1e-3 120 | gradient_accumulation_steps: 32 121 | #weight_decay: 0.01 122 | #weight_decay: 0.1 123 | weight_decay: 1.0 124 | adam_epsilon: 1e-6 125 | adam_betas: "(0.9, 0.98)" 126 | max_grad_norm: 0.0 127 | #max_grad_norm: 1.0 128 | num_train_epochs: 25 129 | #num_train_epochs: 5 130 | max_steps: 0 131 | warmup_proportion: 0.06 132 | warmup_steps: 133 | 134 | multi_tensor: 135 | 136 | # Prediction config 137 | prediction_cfg: 138 | metric: "acc" 139 | measure: 1 140 | best_checkpoint: 141 | best_result: 142 | 143 | logging_steps: 5 144 | summary_helper: 145 | _target_: general_util.training_utils.SummaryWriterHelper 146 | 147 | save_steps: -1 148 | save_best: True 149 | eval_steps: 500 150 | no_cuda: False 151 | seed: 42 152 | local_rank: -1 153 | fp16: True 154 | fp16_opt_level: O1 155 | 156 | # fairscale.FullyShardedDDP 157 | reshard_after_forward: False 158 | cpu_offload: False 159 | move_grads_to_cpu: False 160 | move_params_to_cpu: False 161 | 162 | # Temporary variables 163 | n_gpu: 164 | device: 165 | train_batch_size: 166 | eval_batch_size: 167 | world_size: 168 | -------------------------------------------------------------------------------- /conf/gat_tf_emb_max_ctr_v1_3_3.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | run: 3 | dir: ./ 4 | 5 | data_dir: IQON_pair_remove_edge 6 | feat_dir: iqon_pair_feat 7 | 8 | train_file: ${data_dir}/UII_train_quadruple.json 9 | dev_file: ${data_dir}/UII_valid_quadruple.json 10 | test_file: ${data_dir}/UII_test_quadruple.json 11 | 12 | 13 | embedding_memory: 14 | _target_: data_loader.data_utils.EmbeddingMatrix 15 | attr_text: ${data_dir}/subgraphs/attribute_emb_weight.pt 16 | item_text: ${data_dir}/subgraphs/item_text_emb_weight.cls.pt 17 | item_image: ${data_dir}/subgraphs/item_img.pt 18 | 19 | 20 | # Data loading 21 | dataset: 22 | _target_: data_loader.data_loader_v1.SubgraphDataset 23 | meta_path_dict: 24 | ii: ${data_dir}/subgraph.ii.v1.0.sparse_42_5 25 | iia: ${data_dir}/subgraph.iia.v1.0.sparse_42_5 26 | iai: ${data_dir}/subgraph.iai.v1.0.sparse_42_5 27 | iui: ${data_dir}/subgraph.iui.v1.0.sparse_42_5 28 | uia: ${data_dir}/subgraph.uia.v1.0.sparse_42_5 29 | uiu: ${data_dir}/subgraph.uiu.v1.0.sparse_42_5 30 | uiaiu: ${data_dir}/subgraph.uiaiu.v1.0.sparse_42_5 31 | 32 | 33 | # Data collator 34 | collator: 35 | _target_: data_loader.data_collator_fix_emb.SubgraphCollatorVocab 36 | user_vocab: ${data_dir}/subgraphs/user_vocab.json 37 | attr_vocab: ${data_dir}/subgraphs/attribute_vocab.json 38 | item_vocab: ${data_dir}/subgraphs/item_vocab.json 39 | node_vocab: ${data_dir}/subgraphs/vocab.pt 40 | 41 | 42 | # Dataloader 43 | num_workers: 8 44 | eval_num_workers: 2 45 | prefetch_factor: 2 46 | 47 | # Model 48 | model: 49 | _target_: models.gat_tf_emb_max.GATTransformer 50 | user_embedding: ${data_dir}/subgraphs/user_emb_weight.pt 51 | user_vocab: ${collator.user_vocab} 52 | freeze_user_emb: False 53 | vision_model: resnet18 54 | text_hidden_size: 768 55 | img_hidden_size: 512 56 | hidden_size: 512 57 | loss_type: 1 58 | add_ctr_loss: True 59 | gnn: 60 | _target_: models.gat.GAT 61 | num_layers: 2 62 | input_size: ${model.hidden_size} 63 | num_heads: 8 64 | head_size: 64 65 | feat_dropout: 0.1 66 | attn_dropout: 0.1 67 | residual: True 68 | transformer: 69 | _target_: models.transformer.initialize_transformer 70 | encoder_layers: 1 71 | encoder_ffn_dim: 2048 72 | encoder_attention_heads: 8 73 | encoder_layerdrop: 0.0 74 | activation_function: "gelu" 75 | d_model: ${model.hidden_size} 76 | dropout: 0.1 77 | attention_dropout: 0.0 78 | activation_dropout: 0.0 79 | # attention_dropout: 0.1 80 | # activation_dropout: 0.1 81 | init_std: 0.02 82 | classifier_dropout: 0.0 83 | 84 | pretrain: 85 | 86 | #output_dir: experiments/gat_tf_fix_emb.v1.0 87 | #output_dir: experiments/gat_tf_fix_emb.v1.1 # lr 5e-5 -> 1e-4 88 | #output_dir: experiments/gat_tf_fix_emb.v1.1.max_gnorm_1 89 | #output_dir: experiments/gat_tf_fix_emb.v2.0.max_gnorm_1 # gnn dropout 0.1 -> 0.4 90 | #output_dir: experiments/gat_tf_fix_emb.v3.0 # 768 -> 512 // 3-layer GAT -> 2-layer GAT 91 | #output_dir: experiments/gat_tf_fix_emb.v3.0.wd0.1 92 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.1.wd0.1 # 1-layer transformer bs: 32 -> 24 93 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.3.wd0.1 # 2-layer transformer // epoch 10 -> 5 94 | #output_dir: experiments/gp_bpr.gat_tf_fix_emb.v3.4.wd0.1 # 1-layer transformer // bs 24 -> 128 // epoch 5 -> 10 95 | #output_dir: experiments/gat_tf_fix_emb_wo-img-fix.v3.5.wd0.1 # max_neighbour_num 5 -> 3 96 | #output_dir: experiments/gat_tf_fix_emb_max.wo_img_fix.v3.1.wd0.1.n5 97 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5 98 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.T4 99 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.2.wd0.1.n5.T4 # low learning rate 100 | output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.3.wd0.1.n5.T4 # epoch 5 -> 10 101 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.1.wd0.1.n5.2tf # 2-layer transformer // add dropout to transformer 102 | #output_dir: experiments/gat_tf_fix_emb_max.ctr.v3.3.wd0.1.n5.2tf.titanxp # epoch 5 -> 10 103 | 104 | do_train: True 105 | evaluate_during_training: True 106 | 107 | do_eval: True 108 | eval_sub_path: 109 | 110 | # Training hyper-parameters 111 | per_gpu_train_batch_size: 2 112 | per_gpu_eval_batch_size: 2 113 | learning_rate: 5e-5 114 | #learning_rate: 1e-4 115 | #learning_rate: 5e-4 116 | #learning_rate: 1e-3 117 | gradient_accumulation_steps: 12 118 | #weight_decay: 0.01 119 | weight_decay: 0.1 120 | adam_epsilon: 1e-6 121 | adam_betas: "(0.9, 0.98)" 122 | max_grad_norm: 0.0 123 | #max_grad_norm: 1.0 124 | #num_train_epochs: 30 125 | num_train_epochs: 10 126 | max_steps: 0 127 | warmup_proportion: 0.06 128 | warmup_steps: 129 | 130 | multi_tensor: 131 | 132 | # Prediction config 133 | prediction_cfg: 134 | metric: "acc" 135 | measure: 1 136 | best_checkpoint: 137 | best_result: 138 | 139 | logging_steps: 5 140 | summary_helper: 141 | _target_: general_util.training_utils.SummaryWriterHelper 142 | 143 | save_steps: -1 144 | save_best: True 145 | eval_steps: 500 146 | no_cuda: False 147 | seed: 42 148 | local_rank: -1 149 | fp16: True 150 | fp16_opt_level: O1 151 | 152 | # fairscale.FullyShardedDDP 153 | reshard_after_forward: False 154 | cpu_offload: False 155 | move_grads_to_cpu: False 156 | move_params_to_cpu: False 157 | 158 | # Temporary variables 159 | n_gpu: 160 | device: 161 | train_batch_size: 162 | eval_batch_size: 163 | world_size: 164 | --------------------------------------------------------------------------------