├── log └── best │ └── EMPTY ├── conda ├── build.sh ├── conda_release.sh └── meta.yaml ├── deepcarskit ├── properties │ ├── model │ │ ├── Pop.yaml │ │ ├── BPR.yaml │ │ ├── EASE.yaml │ │ ├── FM.yaml │ │ ├── LR.yaml │ │ ├── FPMC.yaml │ │ ├── ItemKNN.yaml │ │ ├── TransRec.yaml │ │ ├── STAMP.yaml │ │ ├── SRGNN.yaml │ │ ├── LightGCN.yaml │ │ ├── NPE.yaml │ │ ├── LINE.yaml │ │ ├── SpectralCF.yaml │ │ ├── CFKG.yaml │ │ ├── CKE.yaml │ │ ├── MultiDAE.yaml │ │ ├── AFM.yaml │ │ ├── FISM.yaml │ │ ├── FNN.yaml │ │ ├── NFM.yaml │ │ ├── WideDeep.yaml │ │ ├── DeepFM.yaml │ │ ├── ENMF.yaml │ │ ├── SLIMElastic.yaml │ │ ├── FOSSIL.yaml │ │ ├── HGN.yaml │ │ ├── RippleNet.yaml │ │ ├── SHAN.yaml │ │ ├── DSSM.yaml │ │ ├── Caser.yaml │ │ ├── KGCN.yaml │ │ ├── NARM.yaml │ │ ├── DIN.yaml │ │ ├── GRU4Rec.yaml │ │ ├── DGCF.yaml │ │ ├── RepeatNet.yaml │ │ ├── DCN.yaml │ │ ├── NGCF.yaml │ │ ├── FFM.yaml │ │ ├── GRU4RecKG.yaml │ │ ├── KGAT.yaml │ │ ├── KGNNLS.yaml │ │ ├── MultiVAE.yaml │ │ ├── KSR.yaml │ │ ├── NextItNet.yaml │ │ ├── PNN.yaml │ │ ├── DIEN.yaml │ │ ├── AutoInt.yaml │ │ ├── FwFM.yaml │ │ ├── HRM.yaml │ │ ├── GCMC.yaml │ │ ├── xDeepFM.yaml │ │ ├── GRU4RecF.yaml │ │ ├── NAIS.yaml │ │ ├── CDAE.yaml │ │ ├── KTUP.yaml │ │ ├── ConvNCF.yaml │ │ ├── MKR.yaml │ │ ├── RecVAE.yaml │ │ ├── MacridVAE.yaml │ │ ├── NeuMF.yaml │ │ ├── SASRec.yaml │ │ ├── BERT4Rec.yaml │ │ ├── GCSAN.yaml │ │ ├── RaCT.yaml │ │ ├── FDSA.yaml │ │ ├── SASRecF.yaml │ │ ├── DMF.yaml │ │ ├── NNCF.yaml │ │ ├── S3Rec.yaml │ │ ├── lightgbm.yaml │ │ └── xgboost.yaml │ ├── quick_start_config │ │ ├── sequential.yaml │ │ ├── knowledge_base.yaml │ │ ├── context-aware.yaml │ │ └── sequential_embedding_model.yaml │ └── overall.yaml ├── model │ ├── ae │ │ └── __init__.py │ ├── fms │ │ ├── __init__.py │ │ ├── fm.py │ │ └── deepfm.py │ ├── neucf │ │ ├── __init__.py │ │ ├── neucmf0w.py │ │ ├── neucmfw0.py │ │ ├── neucmf0i.py │ │ ├── neucmfww.py │ │ ├── neucmfi0.py │ │ └── neucmfii.py │ ├── __init__.py │ ├── layers.py │ └── context_recommender.py ├── data │ ├── dataset │ │ └── __init__.py │ ├── dataloader │ │ ├── __init__.py │ │ └── general_dataloader.py │ ├── __init__.py │ └── utils.py ├── config │ ├── __init__.py │ └── configurator.py ├── quick_start │ ├── __init__.py │ └── quick_start.py ├── __init__.py ├── trainer │ ├── __init__.py │ └── trainer.py ├── evaluator │ ├── __init__.py │ ├── evaluator.py │ ├── collector.py │ └── base_metric.py └── utils │ ├── __init__.py │ ├── utils.py │ └── logger.py ├── images ├── NeuCMF.png └── intro-img1.jpg ├── requirements.txt ├── MANIFEST.in ├── .gitignore ├── check_gpu.py ├── check_torch.py ├── .github └── FUNDING.yml ├── run.py ├── LICENSE ├── dataset ├── tripadvisor │ └── ReadMe.html └── depaulmovie │ └── ReadMe.html ├── setup.py ├── config.yaml ├── README.md └── style.cfg /log/best/EMPTY: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /conda/build.sh: -------------------------------------------------------------------------------- 1 | $PYTHON setup.py install -------------------------------------------------------------------------------- /deepcarskit/properties/model/Pop.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/BPR.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 -------------------------------------------------------------------------------- /deepcarskit/properties/model/EASE.yaml: -------------------------------------------------------------------------------- 1 | reg_weight: 250.0 -------------------------------------------------------------------------------- /deepcarskit/properties/model/FM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 -------------------------------------------------------------------------------- /deepcarskit/properties/model/LR.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 -------------------------------------------------------------------------------- /deepcarskit/properties/model/FPMC.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/ItemKNN.yaml: -------------------------------------------------------------------------------- 1 | k: 100 2 | shrink: 0.0 -------------------------------------------------------------------------------- /deepcarskit/properties/model/TransRec.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 -------------------------------------------------------------------------------- /deepcarskit/model/ae/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.model.ae import * -------------------------------------------------------------------------------- /deepcarskit/model/fms/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.model.fms import * -------------------------------------------------------------------------------- /deepcarskit/model/neucf/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.model.neucf import * -------------------------------------------------------------------------------- /deepcarskit/properties/model/STAMP.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | loss_type: 'CE' 3 | -------------------------------------------------------------------------------- /deepcarskit/data/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.data.dataset.dataset import Dataset -------------------------------------------------------------------------------- /deepcarskit/config/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.config.configurator import CARSConfig 2 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/SRGNN.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | step: 1 3 | loss_type: 'CE' -------------------------------------------------------------------------------- /images/NeuCMF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irecsys/DeepCARSKit/HEAD/images/NeuCMF.png -------------------------------------------------------------------------------- /deepcarskit/properties/model/LightGCN.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | n_layers: 2 3 | reg_weight: 1e-05 -------------------------------------------------------------------------------- /deepcarskit/properties/model/NPE.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | loss_type: "CE" 3 | dropout_prob: 0.3 -------------------------------------------------------------------------------- /images/intro-img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irecsys/DeepCARSKit/HEAD/images/intro-img1.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | recbole==1.0.1 2 | scipy==1.6.0 3 | numpy==1.20.0 4 | xgboost 5 | torch_geometric -------------------------------------------------------------------------------- /deepcarskit/data/dataloader/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.data.dataloader.general_dataloader import * -------------------------------------------------------------------------------- /deepcarskit/properties/model/LINE.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | order: 2 3 | second_order_loss_weight: 1 -------------------------------------------------------------------------------- /deepcarskit/properties/model/SpectralCF.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | n_layers: 4 3 | reg_weight: 1e-03 -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include deepcarskit/properties * 2 | recursive-include deepcarskit/dataset_example * 3 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/CFKG.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | loss_function: 'inner_product' 3 | margin: 1.0 4 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/CKE.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | kg_embedding_size: 64 3 | reg_weights: [1e-2,1e-2] -------------------------------------------------------------------------------- /deepcarskit/properties/model/MultiDAE.yaml: -------------------------------------------------------------------------------- 1 | mlp_hidden_size: [600] 2 | latent_dimension: 64 3 | dropout_prob: 0.5 -------------------------------------------------------------------------------- /deepcarskit/properties/model/AFM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | attention_size: 25 3 | dropout_prob: 0.3 4 | reg_weight: 2 -------------------------------------------------------------------------------- /deepcarskit/properties/model/FISM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | split_to: 0 3 | reg_weights: [1e-2, 1e-2] 4 | alpha: 0 -------------------------------------------------------------------------------- /deepcarskit/properties/model/FNN.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | mlp_hidden_size: [256, 256, 256] 3 | dropout_prob: 0.2 -------------------------------------------------------------------------------- /deepcarskit/properties/model/NFM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | mlp_hidden_size: [64, 64, 64] 3 | dropout_prob: 0.0 4 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/WideDeep.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | mlp_hidden_size: [32, 16, 8] 3 | dropout_prob: 0.1 -------------------------------------------------------------------------------- /deepcarskit/properties/model/DeepFM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | mlp_hidden_size: [128, 128, 128] 3 | dropout_prob: 0.2 4 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/ENMF.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | dropout_prob: 0.7 3 | reg_weight: 0.0 4 | negative_weight: 0.5 -------------------------------------------------------------------------------- /deepcarskit/properties/model/SLIMElastic.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | l1_ratio: 0.02 3 | positive_only: True 4 | hide_item: True 5 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/FOSSIL.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | loss_type: "CE" 3 | reg_weight: 0.00 4 | order_len: 3 5 | alpha: 0.6 -------------------------------------------------------------------------------- /deepcarskit/properties/model/HGN.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | loss_type: 'BPR' 3 | pooling_type: "average" 4 | reg_weight: [0.00,0.00] -------------------------------------------------------------------------------- /deepcarskit/properties/model/RippleNet.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | n_hop: 2 3 | n_memory: 16 4 | kg_weight: 0.01 5 | reg_weight: 1e-7 -------------------------------------------------------------------------------- /deepcarskit/properties/model/SHAN.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | short_item_length: 2 3 | loss_type: "CE" 4 | reg_weight: [0.01,0.0001] -------------------------------------------------------------------------------- /deepcarskit/properties/model/DSSM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | mlp_hidden_size: [256, 256, 256] 3 | dropout_prob: 0.3 4 | double_tower: True -------------------------------------------------------------------------------- /deepcarskit/quick_start/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.quick_start.quick_start import run, objective_function, load_data_and_model 2 | 3 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/Caser.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | dropout_prob: 0.4 3 | reg_weight: 1e-4 4 | nv: 8 5 | nh: 16 6 | loss_type: 'CE' -------------------------------------------------------------------------------- /deepcarskit/properties/model/KGCN.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | n_iter: 1 3 | aggregator: "sum" 4 | reg_weight: 1e-7 5 | neighbor_sample_size: 4 -------------------------------------------------------------------------------- /deepcarskit/properties/model/NARM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | hidden_size: 128 3 | n_layers: 1 4 | dropout_probs: [0.25,0.5] 5 | loss_type: 'CE' -------------------------------------------------------------------------------- /deepcarskit/properties/model/DIN.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | mlp_hidden_size: [256,256,256] 3 | dropout_prob: 0 4 | pooling_mode: 'mean' 5 | 6 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/GRU4Rec.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | hidden_size: 128 3 | num_layers: 1 4 | dropout_prob: 0.3 5 | loss_type: 'CE' 6 | -------------------------------------------------------------------------------- /deepcarskit/model/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/DGCF.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | n_factors: 4 3 | n_iterations: 2 4 | n_layers: 1 5 | reg_weight: 1e-3 6 | cor_weight: 0.01 -------------------------------------------------------------------------------- /deepcarskit/properties/model/RepeatNet.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | loss_type: "CE" 3 | hidden_size: 64 4 | joint_train: False 5 | dropout_prob: 0.5 6 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/DCN.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | mlp_hidden_size: [256, 256, 256] 3 | cross_layer_num: 6 4 | reg_weight: 2 5 | dropout_prob: 0.2 -------------------------------------------------------------------------------- /deepcarskit/properties/model/NGCF.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | hidden_size_list: [64,64,64] 3 | node_dropout: 0.0 4 | message_dropout: 0.1 5 | reg_weight: 1e-5 -------------------------------------------------------------------------------- /deepcarskit/properties/model/FFM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | # define fields: key: field's id, value: features in this field. can be ignored. 3 | fields: ~ 4 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/GRU4RecKG.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | hidden_size: 128 3 | num_layers: 1 4 | dropout_prob: 0.1 5 | freeze_kg: True 6 | loss_type: 'CE' -------------------------------------------------------------------------------- /deepcarskit/properties/model/KGAT.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | kg_embedding_size: 64 3 | layers: [64] 4 | mess_dropout: 0.1 5 | reg_weight: 1e-5 6 | aggregator_type: 'bi' -------------------------------------------------------------------------------- /deepcarskit/properties/model/KGNNLS.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | n_iter: 1 3 | aggregator: "sum" 4 | reg_weight: 1e-7 5 | neighbor_sample_size: 4 6 | 7 | ls_weight: 0.5 -------------------------------------------------------------------------------- /deepcarskit/properties/model/MultiVAE.yaml: -------------------------------------------------------------------------------- 1 | mlp_hidden_size: [600] 2 | latent_dimension: 128 3 | dropout_prob: 0.5 4 | anneal_cap: 0.2 5 | total_anneal_steps: 200000 6 | -------------------------------------------------------------------------------- /deepcarskit/properties/quick_start_config/sequential.yaml: -------------------------------------------------------------------------------- 1 | eval_args: 2 | split: {'LS': 'valid_and_test'} 3 | order: TO 4 | mode: full 5 | repeatable: True 6 | -------------------------------------------------------------------------------- /deepcarskit/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | __version__ = '1.0.1' -------------------------------------------------------------------------------- /deepcarskit/properties/model/KSR.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | hidden_size: 128 3 | num_layers: 1 4 | dropout_prob: 0.1 5 | loss_type: 'CE' 6 | freeze_kg: False 7 | gamma: 10 -------------------------------------------------------------------------------- /deepcarskit/properties/model/NextItNet.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | kernel_size: 3 3 | block_num: 5 4 | dilations: [1,4] 5 | reg_weight: 1e-5 6 | loss_type: 'CE' 7 | 8 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/PNN.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | mlp_hidden_size: [128, 256, 128] 3 | dropout_prob: 0.0 4 | reg_weight: 0 5 | use_inner: True 6 | use_outer: False -------------------------------------------------------------------------------- /deepcarskit/properties/model/DIEN.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | mlp_hidden_size: [256,256,256] 3 | dropout_prob: 0 4 | pooling_mode: 'mean' 5 | gru_type: 'AUGRU' 6 | alpha: 1 7 | -------------------------------------------------------------------------------- /deepcarskit/data/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.data.utils import * 2 | 3 | __all__ = ['create_dataset', 'data_preparation', 'save_split_dataloaders', 'load_split_dataloaders'] 4 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/AutoInt.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | attention_size: 16 3 | n_layers: 3 4 | num_heads: 2 5 | dropout_probs: [0.2,0.2,0.2] 6 | mlp_hidden_size: [128,128] -------------------------------------------------------------------------------- /deepcarskit/properties/model/FwFM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | dropout_prob: 0.0 3 | # define fields: key: field's id, value: features in this field. can be ignored. 4 | fields: ~ 5 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/HRM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | high_order: 2 3 | loss_type: "CE" 4 | dropout_prob: 0.2 5 | pooling_type_layer_1: "max" 6 | pooling_type_layer_2: "max" -------------------------------------------------------------------------------- /deepcarskit/properties/model/GCMC.yaml: -------------------------------------------------------------------------------- 1 | accum: "stack" 2 | gcn_output_dim: 500 3 | embedding_size: 64 4 | dropout_prob: 0.3 5 | sparse_feature: True 6 | class_num: 2 7 | num_basis_functions: 2 -------------------------------------------------------------------------------- /deepcarskit/properties/model/xDeepFM.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 10 2 | mlp_hidden_size: [128,128,128] 3 | reg_weight: 5e-4 4 | dropout_prob: 0.2 5 | direct: False 6 | cin_layer_size: [100,100,100] -------------------------------------------------------------------------------- /deepcarskit/properties/model/GRU4RecF.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | hidden_size: 128 3 | num_layers: 1 4 | dropout_prob: 0.3 5 | selected_features: ['class'] 6 | pooling_mode: 'sum' 7 | loss_type: 'CE' -------------------------------------------------------------------------------- /deepcarskit/properties/model/NAIS.yaml: -------------------------------------------------------------------------------- 1 | algorithm: prod 2 | embedding_size: 64 3 | weight_size: 64 4 | split_to: 0 5 | reg_weights: [1e-7, 1e-7, 1e-5] 6 | alpha: 0 7 | beta: 0.5 8 | pretrain_path: ~ -------------------------------------------------------------------------------- /deepcarskit/properties/model/CDAE.yaml: -------------------------------------------------------------------------------- 1 | loss_type: BCE 2 | hid_activation: relu 3 | out_activation: sigmoid 4 | corruption_ratio: 0.5 5 | embedding_size: 64 6 | reg_weight_1: 0. 7 | reg_weight_2: 0.01 8 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/KTUP.yaml: -------------------------------------------------------------------------------- 1 | train_rec_step: 5 2 | train_kg_step: 5 3 | embedding_size: 64 4 | use_st_gumbel: True 5 | L1_flag: False 6 | margin: 1.0 7 | kg_weight: 1.0 8 | align_weight: 1.0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.bak 2 | *.log 3 | *.pth 4 | *.pyc 5 | *.zip 6 | *.ttf 7 | *.xml 8 | *.iml 9 | events.out.* 10 | saved/ 11 | log/ 12 | log_tensorboard/ 13 | doc/ 14 | **/__pycache__/ 15 | .idea/ 16 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/ConvNCF.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | cnn_channels: [1, 32, 32, 32, 32] 3 | cnn_kernels: [4, 4, 2, 2] 4 | cnn_strides: [4, 4, 2, 2] 5 | dropout_prob: 0.2 6 | reg_weights: [0.1, 0.1] -------------------------------------------------------------------------------- /deepcarskit/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.trainer.trainer import CARSTrainer 2 | from recbole.trainer import * 3 | 4 | __all__ = ['Trainer', 'KGTrainer', 'KGATTrainer', 'S3RecTrainer', 'CARSTrainer'] 5 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/MKR.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | kg_embedding_size: 64 3 | low_layers_num: 1 4 | high_layers_num: 1 5 | reg_weight: 1e-6 6 | use_inner_product: True 7 | kge_interval: 3 8 | dropout_prob: 0.0 -------------------------------------------------------------------------------- /deepcarskit/properties/quick_start_config/knowledge_base.yaml: -------------------------------------------------------------------------------- 1 | load_col: 2 | inter: ['user_id', 'item_id', 'rating', 'timestamp'] 3 | kg: ['head_id', 'relation_id', 'tail_id'] 4 | link: ['item_id', 'entity_id'] -------------------------------------------------------------------------------- /deepcarskit/properties/model/RecVAE.yaml: -------------------------------------------------------------------------------- 1 | hidden_dimension: 600 2 | latent_dimension: 200 3 | dropout_prob: 0.5 4 | beta: 0.2 5 | mixture_weights: [0.15, 0.75, 0.1] 6 | gamma: 0.005 7 | n_enc_epochs: 3 8 | n_dec_epochs: 1 9 | -------------------------------------------------------------------------------- /deepcarskit/properties/quick_start_config/context-aware.yaml: -------------------------------------------------------------------------------- 1 | eval_args: 2 | split: {'RS':[0.8,0.1,0.1]} 3 | order: RO 4 | group_by: ~ 5 | mode: labeled 6 | neg_sampling: ~ 7 | metrics: ['AUC', 'LogLoss'] 8 | valid_metric: AUC -------------------------------------------------------------------------------- /deepcarskit/properties/quick_start_config/sequential_embedding_model.yaml: -------------------------------------------------------------------------------- 1 | load_col: 2 | inter: ['user_id', 'item_id', 'rating', 'timestamp'] 3 | ent: ['ent_id', 'ent_emb'] 4 | additional_feat_suffix: ent 5 | repeatable: True 6 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/MacridVAE.yaml: -------------------------------------------------------------------------------- 1 | embedding_size: 64 2 | drop_out: 0.5 3 | kfac: 10 4 | nogb: False 5 | std: 0.01 6 | encoder_hidden_size: [600] 7 | tau: 0.1 8 | anneal_cap: 0.2 9 | total_anneal_steps: 200000 10 | reg_weights: [0, 0] -------------------------------------------------------------------------------- /deepcarskit/properties/model/NeuMF.yaml: -------------------------------------------------------------------------------- 1 | mf_embedding_size: 64 2 | mlp_embedding_size: 64 3 | mlp_hidden_size: [128,64] 4 | dropout_prob: 0.1 5 | mf_train: True 6 | mlp_train: True 7 | 8 | use_pretrain: False 9 | mf_pretrain_path: ~ 10 | mlp_pretrain_path: ~ -------------------------------------------------------------------------------- /deepcarskit/properties/model/SASRec.yaml: -------------------------------------------------------------------------------- 1 | n_layers: 2 2 | n_heads: 2 3 | hidden_size: 64 4 | inner_size: 256 5 | hidden_dropout_prob: 0.5 6 | attn_dropout_prob: 0.5 7 | hidden_act: 'gelu' 8 | layer_norm_eps: 1e-12 9 | initializer_range: 0.02 10 | loss_type: 'CE' -------------------------------------------------------------------------------- /conda/conda_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | conda-build --python 3.7 . 4 | printf "python 3.7 version is released \n" 5 | conda-build --python 3.8 . 6 | printf "python 3.8 version is released \n" 7 | conda-build --python 3.9 . 8 | printf "python 3.9 version is released \n" 9 | -------------------------------------------------------------------------------- /deepcarskit/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.evaluator.base_metric import * 2 | from recbole.evaluator.metrics import * 3 | from deepcarskit.evaluator.evaluator import * 4 | from recbole.evaluator.register import * 5 | from deepcarskit.evaluator.collector import * 6 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/BERT4Rec.yaml: -------------------------------------------------------------------------------- 1 | n_layers: 2 2 | n_heads: 2 3 | hidden_size: 64 4 | inner_size: 256 5 | hidden_dropout_prob: 0.5 6 | attn_dropout_prob: 0.5 7 | hidden_act: 'gelu' 8 | layer_norm_eps: 1e-12 9 | initializer_range: 0.02 10 | mask_ratio: 0.2 11 | loss_type: 'CE' -------------------------------------------------------------------------------- /deepcarskit/properties/model/GCSAN.yaml: -------------------------------------------------------------------------------- 1 | n_layers: 1 2 | n_heads: 1 3 | hidden_size: 64 4 | inner_size: 256 5 | hidden_dropout_prob: 0.2 6 | attn_dropout_prob: 0.2 7 | hidden_act: 'gelu' 8 | layer_norm_eps: 1e-12 9 | initializer_range: 0.02 10 | step: 1 11 | weight: 0.6 12 | reg_weight: 5e-5 13 | loss_type: 'CE' -------------------------------------------------------------------------------- /deepcarskit/properties/model/RaCT.yaml: -------------------------------------------------------------------------------- 1 | mlp_hidden_size: [600] 2 | latent_dimension: 256 3 | dropout_prob: 0.5 4 | anneal_cap: 0.2 5 | total_anneal_steps: 200000 6 | critic_layers: [100,100,10] 7 | metrics_k: 100 8 | train_stage: 'actor_pretrain' 9 | pretrain_epochs: 150 10 | save_step: 10 11 | pre_model_path: '' -------------------------------------------------------------------------------- /deepcarskit/properties/model/FDSA.yaml: -------------------------------------------------------------------------------- 1 | n_layers: 2 2 | n_heads: 2 3 | hidden_size: 64 4 | inner_size: 256 5 | hidden_dropout_prob: 0.5 6 | attn_dropout_prob: 0.5 7 | hidden_act: 'gelu' 8 | layer_norm_eps: 1e-12 9 | initializer_range: 0.02 10 | selected_features: ['class'] 11 | pooling_mode: 'mean' 12 | loss_type: 'CE' -------------------------------------------------------------------------------- /deepcarskit/properties/model/SASRecF.yaml: -------------------------------------------------------------------------------- 1 | n_layers: 2 2 | n_heads: 2 3 | hidden_size: 64 4 | inner_size: 256 5 | hidden_dropout_prob: 0.5 6 | attn_dropout_prob: 0.5 7 | hidden_act: 'gelu' 8 | layer_norm_eps: 1e-12 9 | initializer_range: 0.02 10 | selected_features: ['class'] 11 | pooling_mode: 'sum' 12 | loss_type: 'CE' -------------------------------------------------------------------------------- /deepcarskit/properties/model/DMF.yaml: -------------------------------------------------------------------------------- 1 | # WARNING: 2 | # 1. if you set inter_matrix_type='rating', you must set `unused_col: ~` in your data config files. 3 | # 2. The dimensions of the last layer of users and items must be the same 4 | 5 | inter_matrix_type: '01' 6 | user_embedding_size: 64 7 | item_embedding_size: 64 8 | user_hidden_size_list: [64, 64] 9 | item_hidden_size_list: [64, 64] -------------------------------------------------------------------------------- /check_gpu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # Check if CUDA (GPU support) is available 4 | gpu_available = torch.cuda.is_available() 5 | print(f"CUDA available: {gpu_available}") 6 | 7 | # If CUDA is available, get the name of the GPU 8 | if gpu_available: 9 | gpu_name = torch.cuda.get_device_name(0) 10 | print(f"GPU detected: {gpu_name}") 11 | else: 12 | print("No GPU detected.") 13 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/NNCF.yaml: -------------------------------------------------------------------------------- 1 | ui_embedding_size: 64 2 | neigh_embedding_size: 32 3 | num_conv_kernel: 128 4 | conv_kernel_size: 5 5 | pool_kernel_size: 5 6 | mlp_hidden_size: [128,64,32,16] 7 | neigh_num: 10 8 | dropout: 0.5 9 | 10 | # The method to use neighborhood information, you can choose random, knn or louvain algorithom 11 | # e.g. neigh_info_method: "knn" or neigh_info_method: "louvain" 12 | neigh_info_method: "knn" 13 | 14 | resolution: 1 15 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/S3Rec.yaml: -------------------------------------------------------------------------------- 1 | n_layers: 2 2 | n_heads: 2 3 | hidden_size: 64 4 | inner_size: 256 5 | hidden_dropout_prob: 0.5 6 | attn_dropout_prob: 0.5 7 | hidden_act: 'gelu' 8 | layer_norm_eps: 1e-12 9 | initializer_range: 0.02 10 | item_attribute: 'class' 11 | mask_ratio: 0.2 12 | aap_weight: 1.0 13 | mip_weight: 0.2 14 | map_weight: 1.0 15 | sp_weight: 0.5 16 | train_stage: 'pretrain' 17 | pretrain_epochs: 500 18 | save_step: 10 19 | pre_model_path: '' 20 | loss_type: 'CE' -------------------------------------------------------------------------------- /check_torch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # Get the PyTorch version 4 | torch_version = torch.__version__ 5 | print(f"PyTorch version: {torch_version}") 6 | 7 | # Check if CUDA is available (indicating GPU support) 8 | is_cuda_available = torch.cuda.is_available() 9 | print(f"CUDA available: {is_cuda_available}") 10 | 11 | # Determine the type of PyTorch version 12 | if is_cuda_available: 13 | print("This is the GPU version of PyTorch.") 14 | else: 15 | print("This is the CPU version of PyTorch.") 16 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/lightgbm.yaml: -------------------------------------------------------------------------------- 1 | convert_token_to_onehot: False 2 | token_num_threshold: 10000 3 | 4 | # Dataset 5 | lgb_silent: False 6 | 7 | # Train 8 | lgb_model: ~ 9 | lgb_params: 10 | boosting: gbdt 11 | num_leaves: 90 12 | min_data_in_leaf: 30 13 | max_depth: -1 14 | learning_rate: 0.1 15 | objective: binary 16 | lambda_l1: 0.1 17 | metric: ['auc', 'binary_logloss'] 18 | force_row_wise: True 19 | lgb_learning_rates: ~ 20 | lgb_num_boost_round: 300 21 | lgb_early_stopping_rounds: ~ 22 | lgb_verbose_eval: 100 23 | 24 | -------------------------------------------------------------------------------- /deepcarskit/properties/model/xgboost.yaml: -------------------------------------------------------------------------------- 1 | convert_token_to_onehot: False 2 | token_num_threshold: 10000 3 | 4 | # DMatrix 5 | xgb_silent: ~ 6 | xgb_nthread: ~ 7 | 8 | xgb_model: ~ 9 | xgb_params: 10 | booster: gbtree 11 | objective: binary:logistic 12 | eval_metric: ['auc','logloss'] 13 | # gamma: 0.1 14 | max_depth: 3 15 | # lambda: 1 16 | # subsample: 0.7 17 | # colsample_bytree: 0.7 18 | # min_child_weight: 3 19 | eta: 1 20 | seed: 2020 21 | # nthread: -1 22 | xgb_num_boost_round: 100 23 | xgb_early_stopping_rounds: ~ 24 | xgb_verbose_eval: 50 25 | 26 | -------------------------------------------------------------------------------- /deepcarskit/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from deepcarskit.utils.logger import init_logger, set_color 2 | from recbole.utils.utils import get_local_time, ensure_dir, get_model, get_trainer, \ 3 | early_stopping, calculate_valid_score, dict2str, init_seed, get_tensorboard, get_gpu_usage 4 | from recbole.utils.enum_type import * 5 | from recbole.utils.argument_list import * 6 | 7 | __all__ = [ 8 | 'init_logger', 'get_local_time', 'ensure_dir', 'get_model', 'get_trainer', 'early_stopping', 9 | 'calculate_valid_score', 'dict2str', 'Enum', 'ModelType', 'KGDataLoaderState', 'EvaluatorType', 'InputType', 10 | 'FeatureType', 'FeatureSource', 'init_seed', 'general_arguments', 'training_arguments', 'evaluation_arguments', 11 | 'dataset_arguments', 'get_tensorboard', 'set_color', 'get_gpu_usage' 12 | ] 13 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: deepcarskit 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /deepcarskit/properties/overall.yaml: -------------------------------------------------------------------------------- 1 | # general 2 | gpu_id: 0 3 | use_gpu: True 4 | seed: 2020 5 | state: INFO 6 | reproducibility: True 7 | data_path: 'dataset/' 8 | checkpoint_dir: 'saved' 9 | show_progress: True 10 | save_dataset: False 11 | save_dataloaders: False 12 | 13 | # training settings 14 | epochs: 300 15 | train_batch_size: 2048 16 | learner: adam 17 | learning_rate: 0.001 18 | neg_sampling: 19 | uniform: 1 20 | eval_step: 1 21 | stopping_step: 10 22 | clip_grad_norm: ~ 23 | # clip_grad_norm: {'max_norm': 5, 'norm_type': 2} 24 | weight_decay: 0.0 25 | 26 | # evaluation settings 27 | eval_args: 28 | split: {'RS':[0.8,0.1,0.1]} 29 | group_by: user 30 | order: RO 31 | mode: full 32 | repeatable: False 33 | metrics: ["Recall","MRR","NDCG","Hit","Precision"] 34 | topk: [10] 35 | valid_metric: MRR@10 36 | valid_metric_bigger: True 37 | eval_batch_size: 4096 38 | loss_decimal_place: 4 39 | metric_decimal_place: 4 40 | -------------------------------------------------------------------------------- /conda/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: deepcarskit 3 | version: 1.0.1 4 | 5 | source: 6 | path: ../ 7 | 8 | requirements: 9 | build: 10 | - python 11 | host: 12 | - python 13 | - recbole ==1.0.1 14 | - numpy >=1.17.2 15 | - scipy ==1.6.0 16 | - pandas >=1.0.5 17 | - tqdm >=4.48.2 18 | - pyyaml >=5.1.0 19 | - scikit-learn >=0.23.2 20 | - pytorch >=1.7.0 21 | - colorlog==4.7.2 22 | - colorama==0.4.4 23 | - tensorboard >=2.5.0 24 | run: 25 | - python 26 | - recbole ==1.0.1 27 | - numpy >=1.17.2 28 | - scipy ==1.6.0 29 | - pandas >=1.0.5 30 | - tqdm >=4.48.2 31 | - pyyaml >=5.1.0 32 | - scikit-learn >=0.23.2 33 | - pytorch >=1.7.0 34 | - colorlog==4.7.2 35 | - colorama==0.4.4 36 | - tensorboard >=2.5.0 37 | test: 38 | imports: 39 | - deepcarskit 40 | 41 | about: 42 | home: https://github.com/irecsys/DeepCARSKit 43 | license: MIT 44 | summary: "A Deep Learning Based Context-Aware Recommendation Library" 45 | 46 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | 2 | # @Author : Yong Zheng 3 | 4 | 5 | import argparse 6 | import time 7 | import torch 8 | import multiprocessing as mcpu 9 | from deepcarskit.quick_start import run 10 | from logging import getLogger 11 | 12 | 13 | 14 | if __name__ == '__main__': 15 | print('GPU availability: ', torch.cuda.is_available()) 16 | 17 | n_gpu = torch.cuda.device_count() 18 | print('Num of GPU: ', n_gpu) 19 | 20 | if n_gpu>0: 21 | print(torch.cuda.get_device_name(0)) 22 | print('Current GPU index: ', torch.cuda.current_device()) 23 | 24 | logger = getLogger() 25 | t0 = time.time() 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument('--config_files', type=str, default='config.yaml', help='config files') 28 | 29 | args, _ = parser.parse_known_args() 30 | 31 | config_list = args.config_files.strip().split(' ') if args.config_files else None 32 | run(config_file_list=config_list) 33 | t1 = time.time() 34 | total = t1 - t0 35 | logger.info('time cost: '+ f': {total}s') -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 RUCAIBox 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dataset/tripadvisor/ReadMe.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Data Name: TripAdvisor v2

5 | 6 | Data Descriptions:
7 | This data was scripted from online reviews on tripadvisor.com. There is only one context: trip type (Family, Couples, Business, Solo travel, Friends). Other features about users and hotels are available. The data set is pretty sparse in ratings and contexts: 14175 ratings, 2731 users, 2269 hotels. 8 |

9 | 10 | Citation Information: 11 | 12 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /dataset/depaulmovie/ReadMe.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Data Name:DePaulMovie

5 | 6 | Data Descriptions:
7 | This data was collected from surveys -- students were asked to rate movies in different time, location, and with different companions.

8 | 9 | Citation Information: 10 | 11 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import os 6 | 7 | from setuptools import setup, find_packages 8 | 9 | install_requires = ['numpy>=1.17.2', 'torch>=1.7.0', 'scipy==1.6.0', 'pandas>=1.0.5', 'tqdm>=4.48.2', 10 | 'colorlog==4.7.2','colorama==0.4.4', 'numpy==1.20.0', 11 | 'scikit_learn>=0.23.2', 'pyyaml>=5.1.0', 'tensorboard>=2.5.0', 'recbole==1.0.1'] 12 | 13 | setup_requires = [] 14 | 15 | extras_require = { 16 | 'hyperopt': ['hyperopt>=0.2.4'] 17 | } 18 | 19 | classifiers = ["License :: OSI Approved :: MIT License"] 20 | 21 | # Readthedocs requires Sphinx extensions to be specified as part of 22 | # install_requires in order to build properly. 23 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 24 | if on_rtd: 25 | install_requires.extend(setup_requires) 26 | 27 | setup( 28 | name='deepcarskit', 29 | version= 30 | '1.0.1', # please remember to edit deepcarskit/__init__.py in response, once updating the version 31 | description='A Deep Learning Based Context-Aware Recommendation Library', 32 | long_description_content_type="text/markdown", 33 | url='https://github.com/irecsys/DeepCARSKit', 34 | author='Yong Zheng', 35 | author_email='DeepCARSKit@Gmail.com', 36 | packages=[ 37 | package for package in find_packages() 38 | if package.startswith('deepcarskit') 39 | ], 40 | include_package_data=True, 41 | install_requires=install_requires, 42 | setup_requires=setup_requires, 43 | extras_require=extras_require, 44 | zip_safe=False, 45 | classifiers=classifiers, 46 | ) 47 | -------------------------------------------------------------------------------- /deepcarskit/model/fms/fm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020/7/8 10:09 3 | # @Author : Shanlei Mu 4 | # @Email : slmu@ruc.edu.cn 5 | # @File : fms.py 6 | 7 | # UPDATE: 8 | # @Time : 2020/8/13, 9 | # @Author : Zihan Lin 10 | # @Email : linzihan.super@foxmain.com 11 | 12 | # UPDATE: 13 | # @Time : 2021/12 14 | # @Author : Yong Zheng 15 | # @Notes : made changes to adapt it for CARS 16 | 17 | r""" 18 | FM 19 | ################################################ 20 | References 21 | ----- 22 | Steffen Rendle et al. "Factorization Machines." in ICDM 2010. 23 | 24 | Notes 25 | ----- 26 | context variables are treated as individual dimensions 27 | """ 28 | 29 | import torch.nn as nn 30 | from torch.nn.init import xavier_normal_ 31 | 32 | from deepcarskit.model.context_recommender import ContextRecommender 33 | from recbole.model.layers import BaseFactorizationMachine 34 | from recbole.utils import EvaluatorType 35 | 36 | 37 | class FM(ContextRecommender): 38 | """Factorization Machine considers the second-order interaction with features to predict the final score. 39 | 40 | """ 41 | 42 | def __init__(self, config, dataset): 43 | 44 | super(FM, self).__init__(config, dataset) 45 | 46 | # define layers and loss 47 | self.fm = BaseFactorizationMachine(reduce_sum=True) 48 | self.config = config 49 | 50 | if self.config['eval_type'] == EvaluatorType.RANKING: 51 | self.actfun = nn.Sigmoid() 52 | self.loss = nn.BCELoss() 53 | self.LABEL = self.config['LABEL_FIELD'] 54 | else: 55 | self.actfun = nn.LeakyReLU() 56 | self.loss = nn.MSELoss() 57 | self.LABEL = self.config['RATING_FIELD'] 58 | 59 | # parameters initialization 60 | self.apply(self._init_weights) 61 | 62 | def _init_weights(self, module): 63 | if isinstance(module, nn.Embedding): 64 | xavier_normal_(module.weight.data) 65 | 66 | def forward(self, interaction): 67 | fm_all_embeddings = self.concat_embed_input_fields(interaction) # [batch_size, num_field, embed_dim] 68 | y = self.actfun(self.first_order_linear(interaction) + self.fm(fm_all_embeddings)) 69 | return y.squeeze(-1) 70 | 71 | def calculate_loss(self, interaction): 72 | label = interaction[self.LABEL] 73 | 74 | output = self.forward(interaction) 75 | return self.loss(output, label) 76 | 77 | def predict(self, interaction): 78 | return self.forward(interaction) 79 | -------------------------------------------------------------------------------- /deepcarskit/utils/utils.py: -------------------------------------------------------------------------------- 1 | # @Time : 2021/12 2 | # @Author : Yong Zheng 3 | 4 | """ 5 | deepcarskit.utils.utils 6 | ################################ 7 | """ 8 | 9 | 10 | import importlib 11 | 12 | from recbole.utils.enum_type import ModelType 13 | 14 | def get_model(model_name): 15 | r"""Automatically select model class based on model name 16 | 17 | Args: 18 | model_name (str): model name 19 | 20 | Returns: 21 | Recommender: model class 22 | """ 23 | 24 | model_submodule_recbole = [ 25 | 'general_recommender', 'sequential_recommender', 'knowledge_aware_recommender', 26 | 'exlib_recommender' 27 | ] 28 | 29 | model_submodule_deepcarskit = [ 30 | 'ae', 'fms', 'neucf' 31 | ] 32 | 33 | model_file_name = model_name.lower() 34 | model_module = None 35 | for submodule in model_submodule_deepcarskit: 36 | module_path = '.'.join(['deepcarskit.model', submodule, model_file_name]) 37 | if importlib.util.find_spec(module_path, __name__): 38 | model_module = importlib.import_module(module_path, __name__) 39 | break 40 | 41 | if model_module is None: 42 | for submodule in model_submodule_recbole: 43 | module_path = '.'.join(['recbole.model', submodule, model_file_name]) 44 | if importlib.util.find_spec(module_path, __name__): 45 | model_module = importlib.import_module(module_path, __name__) 46 | break 47 | 48 | if model_module is None: 49 | raise ValueError('`model_name` [{}] is not the name of an existing model.'.format(model_name)) 50 | model_class = getattr(model_module, model_name) 51 | return model_class 52 | 53 | 54 | def get_trainer(model_type, model_name): 55 | r"""Automatically select trainer class based on model type and model name 56 | 57 | Args: 58 | model_type (ModelType): model type 59 | model_name (str): model name 60 | 61 | Returns: 62 | Trainer: trainer class 63 | """ 64 | try: 65 | return getattr(importlib.import_module('deepcarskit.trainer'), model_name + 'Trainer') 66 | except AttributeError: 67 | if model_type == ModelType.KNOWLEDGE: 68 | return getattr(importlib.import_module('recbole.trainer'), 'KGTrainer') 69 | elif model_type == ModelType.TRADITIONAL: 70 | return getattr(importlib.import_module('recbole.trainer'), 'TraditionalTrainer') 71 | else: 72 | return getattr(importlib.import_module('deepcarskit.trainer'), 'CARSTrainer') 73 | -------------------------------------------------------------------------------- /deepcarskit/config/configurator.py: -------------------------------------------------------------------------------- 1 | # @Time : 2021/12 2 | # @Author : Yong Zheng 3 | # @Notes : Inherit from recbole.config 4 | 5 | """ 6 | deepcarskit.config.configurator 7 | ################################ 8 | """ 9 | 10 | from deepcarskit.utils.utils import get_model 11 | from recbole.config import Config 12 | from recbole.utils import init_seed 13 | 14 | 15 | class CARSConfig(Config): 16 | 17 | def __init__(self, model=None, dataset=None, config_file_list=None, config_dict=None): 18 | super(CARSConfig, self).__init__(model, dataset, config_file_list, config_dict) 19 | 20 | def _get_model_and_dataset(self, model, dataset): 21 | 22 | if model is None: 23 | try: 24 | model = self.external_config_dict['model'] 25 | except KeyError: 26 | raise KeyError( 27 | 'model need to be specified in at least one of the these ways: ' 28 | '[model variable, config file, config dict, command line] ' 29 | ) 30 | if not isinstance(model, str): 31 | # if model is a class object 32 | final_model_class = model 33 | final_model = model.__name__ 34 | else: 35 | # if model is a name in string format 36 | final_model = model 37 | final_model_class = get_model(final_model) # need to get class object 38 | 39 | if dataset is None: 40 | try: 41 | final_dataset = self.external_config_dict['dataset'] 42 | except KeyError: 43 | raise KeyError( 44 | 'dataset need to be specified in at least one of the these ways: ' 45 | '[dataset variable, config file, config dict, command line] ' 46 | ) 47 | else: 48 | final_dataset = dataset 49 | 50 | return final_model, final_model_class, final_dataset 51 | 52 | def _get_final_config_dict(self): 53 | final_config_dict = dict() 54 | final_config_dict.update(self.internal_config_dict) 55 | final_config_dict.update(self.external_config_dict) 56 | # turn on corresponding metrics according to the recommendation task 57 | if final_config_dict['ranking']: 58 | final_config_dict['metrics'] = final_config_dict['ranking_metrics'] 59 | final_config_dict['valid_metric'] = final_config_dict['ranking_valid_metric'] 60 | else: 61 | final_config_dict['metrics'] = final_config_dict['err_metrics'] 62 | final_config_dict['valid_metric'] = final_config_dict['err_valid_metric'] 63 | return final_config_dict 64 | -------------------------------------------------------------------------------- /deepcarskit/evaluator/evaluator.py: -------------------------------------------------------------------------------- 1 | # @Time : 2021/12 2 | # @Author : Yong Zheng 3 | # @Notes : added F1 metrics, if precision and recall defined in user requests 4 | 5 | """ 6 | deepcarskit.evaluator.evaluator 7 | ##################################### 8 | """ 9 | import numpy as np 10 | from recbole.evaluator.register import metrics_dict 11 | from recbole.evaluator.collector import DataStruct 12 | 13 | 14 | class Evaluator(object): 15 | """Evaluator is used to check parameter correctness, and summarize the results of all metrics. 16 | """ 17 | 18 | def __init__(self, config): 19 | self.config = config 20 | self.metrics = [metric.lower() for metric in self.config['metrics']] 21 | self.metric_class = {} 22 | 23 | for metric in self.metrics: 24 | self.metric_class[metric] = metrics_dict[metric](self.config) 25 | 26 | def evaluate(self, dataobject: DataStruct): 27 | """calculate all the metrics. It is called at the end of each epoch 28 | 29 | Args: 30 | dataobject (DataStruct): It contains all the information needed for metrics. 31 | 32 | Returns: 33 | dict: such as ``{'hit@20': 0.3824, 'recall@20': 0.0527, 'hit@10': 0.3153, 'recall@10': 0.0329, 'gauc': 0.9236}`` 34 | 35 | """ 36 | result_dict = {} 37 | topk = [] 38 | metric_f1 = False 39 | if self.config['ranking']: 40 | topk = self.config['topk'] 41 | if 'precision' in self.metrics and 'recall' in self.metrics: 42 | metric_f1 = True 43 | 44 | for metric in self.metrics: 45 | # dataobject has two keys: rec.score, data.label 46 | metric_val = self.metric_class[metric].calculate_metric(dataobject) 47 | result_dict.update(metric_val) 48 | 49 | # adding F1 metric, if precision and recall were calculated 50 | if metric_f1: 51 | k = topk[0] 52 | keys = result_dict.keys() 53 | key1 = 'precision@'+str(k) 54 | key2 = 'recall@'+str(k) 55 | key = 'f1@'+str(k) 56 | if key1 in keys and key2 in keys and key not in keys: 57 | metric = {} 58 | for k in topk: 59 | key1 = 'precision@'+str(k) 60 | key2 = 'recall@'+str(k) 61 | key = 'f1@'+str(k) 62 | precision = result_dict[key1] 63 | recall = result_dict[key2] 64 | if (precision + recall) == 0: 65 | f1 = 0 66 | else: 67 | f1 = round(2*precision*recall/(precision + recall), self.config['metric_decimal_place']) 68 | metric[key] = f1 69 | result_dict.update(metric) 70 | return result_dict 71 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | field_separator: "," 2 | seq_separator: " " 3 | 4 | gpu_id: 0 5 | use_gpu: True 6 | show_progress: False 7 | save_dataset: False 8 | save_dataloaders: False 9 | 10 | ############### data setting ############### 11 | seed: 2022 12 | dataset: depaulmovie 13 | # define data_path as the parent directory of your data folder 14 | # data_path: d:\dataset\ 15 | 16 | USER_ID_FIELD: user_id 17 | ITEM_ID_FIELD: item_id 18 | RATING_FIELD: rating 19 | CONTEXT_SITUATION_FIELD: contexts 20 | USER_CONTEXT_FIELD: uc_id 21 | 22 | # note: you can use either load or unload, cannot use them both 23 | # load_col is used to load specific columns; unload_col is used to ignore selected columns 24 | # set "load_col: ~", if you want to load all cols 25 | # load_col: {'inter': ['user_id','item_id','rating','contexts','uc_id']} 26 | # unload_col: {'inter': ['contexts']} 27 | # by default, we load all cols, unless there are some special requirements 28 | load_col: ~ 29 | 30 | # used for topN ranking only 31 | LABEL_FIELD: label 32 | threshold: 33 | rating: 0 34 | # the current library does not support negative sampling 35 | neg_sampling: ~ 36 | 37 | ############### model setting ############### 38 | model: NeuCMFii 39 | 40 | # General model 41 | epochs: 50 42 | train_batch_size: 500 43 | eval_batch_size: 409600 44 | learner: adam 45 | # learner: adam, RMSprop 46 | 47 | stopping_step: 10 48 | clip_grad_norm: ~ 49 | # clip_grad_norm: {'max_norm': 5, 'norm_type': 2} 50 | weight_decay: 0.0 51 | 52 | # NeuCF models 53 | mf_embedding_size: 64 54 | mlp_embedding_size: 64 55 | mlp_hidden_size: [128,64,32] 56 | learning_rate: 0.01 57 | dropout_prob: 0.1 58 | 59 | #tf_train: True 60 | mf_train: True 61 | mlp_train: True 62 | 63 | # FM models 64 | embedding_size: 64 65 | #mlp_hidden_size: [128,64,32] 66 | #learning_rate: 0.01 67 | #dropout_prob: 0.3 68 | 69 | ############### Evaluation setting ############### 70 | eval_args: 71 | # split: {'RS': [0.8, 0.2]} # hold-out evaluation 72 | split: {'CV': 5, 'num_processes': 4} # N-fold cross validation by multiprocessing 73 | group_by: user 74 | mode: labeled # do not change it, DeepCARSKit only support this mode 75 | order: RO 76 | 77 | # indicate the task is ranking or rating prediction 78 | # evaluation metrics automatically selected based on True/False setting here 79 | ranking: False 80 | # indicate activation function for ranking task 81 | # LeakyReLu is the default activation function for both ranking or rating prediction 82 | sigmoid: False 83 | 84 | # define metrics for ranking and rating prediction tasks 85 | ranking_valid_metric: Recall@10 86 | ranking_metrics: ['Precision','Recall','NDCG','MRR','MAP'] 87 | topk: [10,20,30] 88 | 89 | err_valid_metric: MAE 90 | err_metrics: ['MAE','RMSE','AUC'] 91 | 92 | ############### Output setting ############### 93 | loss_decimal_place: 4 94 | metric_decimal_place: 4 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /deepcarskit/model/fms/deepfm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020/7/8 3 | # @Author : Shanlei Mu 4 | # @Email : slmu@ruc.edu.cn 5 | # @File : deepfm.py 6 | 7 | # UPDATE: 8 | # @Time : 2020/8/14 9 | # @Author : Zihan Lin 10 | # @Email : linzihan.super@foxmain.com 11 | 12 | # UPDATE: 13 | # @Time : 2021/12 14 | # @Author : Yong Zheng 15 | # @Notes : made changes to adapt it for CARS 16 | 17 | r""" 18 | DeepFM 19 | ################################################ 20 | References 21 | ----- 22 | Huifeng Guo et al. "DeepFM: A Factorization-Machine based Neural Network for CTR Prediction." in IJCAI 2017. 23 | 24 | Notes 25 | ----- 26 | context variables are treated as individual dimensions 27 | """ 28 | 29 | import torch.nn as nn 30 | from torch.nn.init import xavier_normal_, constant_ 31 | 32 | from deepcarskit.model.context_recommender import ContextRecommender 33 | from recbole.model.layers import BaseFactorizationMachine, MLPLayers 34 | from recbole.utils import EvaluatorType 35 | 36 | 37 | class DeepFM(ContextRecommender): 38 | """DeepFM is a DNN enhanced FM which both use a DNN and a FM to calculate feature interaction. 39 | Also DeepFM can be seen as a combination of FNN and FM. 40 | 41 | """ 42 | 43 | def __init__(self, config, dataset): 44 | super(DeepFM, self).__init__(config, dataset) 45 | 46 | # load parameters info 47 | self.config = config 48 | self.mlp_hidden_size = config['mlp_hidden_size'] 49 | self.dropout_prob = config['dropout_prob'] 50 | 51 | # define layers and loss 52 | self.fm = BaseFactorizationMachine(reduce_sum=True) 53 | size_list = [self.embedding_size * self.num_feature_field] + self.mlp_hidden_size 54 | self.mlp_layers = MLPLayers(size_list, self.dropout_prob) 55 | self.deep_predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) # Linear product to the final score 56 | 57 | # parameters initialization 58 | self.apply(self._init_weights) 59 | 60 | def _init_weights(self, module): 61 | if isinstance(module, nn.Embedding): 62 | xavier_normal_(module.weight.data) 63 | elif isinstance(module, nn.Linear): 64 | xavier_normal_(module.weight.data) 65 | if module.bias is not None: 66 | constant_(module.bias.data, 0) 67 | 68 | def forward(self, interaction): 69 | deepfm_all_embeddings = self.concat_embed_input_fields(interaction) # [batch_size, num_field, embed_dim] 70 | batch_size = deepfm_all_embeddings.shape[0] 71 | y_fm = self.first_order_linear(interaction) + self.fm(deepfm_all_embeddings) 72 | 73 | y_deep = self.deep_predict_layer(self.mlp_layers(deepfm_all_embeddings.view(batch_size, -1))) 74 | y = self.actfun(y_fm + y_deep) 75 | return y.squeeze(-1) 76 | 77 | def calculate_loss(self, interaction): 78 | label = interaction[self.LABEL] 79 | output = self.forward(interaction) 80 | return self.loss(output, label) 81 | 82 | def predict(self, interaction): 83 | return self.forward(interaction) 84 | -------------------------------------------------------------------------------- /deepcarskit/model/layers.py: -------------------------------------------------------------------------------- 1 | 2 | # @Time : 2021/12 3 | # @Author : Yong Zheng 4 | # @Notes : Inherit from recbole.model.layers.FMFirstOrderLinear 5 | 6 | 7 | 8 | """ 9 | deepcarskit.model.layers 10 | ############################# 11 | Common Layers in recommender system 12 | """ 13 | 14 | from recbole.model.layers import FMFirstOrderLinear 15 | from recbole.model.layers import FMEmbedding 16 | 17 | import numpy as np 18 | import torch 19 | import torch.nn as nn 20 | 21 | from recbole.utils import FeatureType, FeatureSource 22 | 23 | 24 | 25 | class FMFirstOrderLinear(FMFirstOrderLinear): 26 | """Calculate the first order score of the input features. 27 | This class is a member of ContextRecommender, you can call it easily when inherit ContextRecommender. 28 | 29 | """ 30 | 31 | def __init__(self, config, dataset, output_dim=1): 32 | 33 | super(FMFirstOrderLinear, self).__init__(config, dataset, output_dim) 34 | self.field_names = dataset.fields( 35 | source=[ 36 | FeatureSource.INTERACTION, 37 | FeatureSource.USER, 38 | FeatureSource.USER_ID, 39 | FeatureSource.ITEM, 40 | FeatureSource.ITEM_ID, 41 | ] 42 | ) 43 | if config['ranking']: 44 | self.LABEL = config['LABEL_FIELD'] 45 | else: 46 | self.LABEL = config['RATING_FIELD'] 47 | self.device = config['device'] 48 | self.token_field_names = [] 49 | self.token_field_dims = [] 50 | self.float_field_names = [] 51 | self.float_field_dims = [] 52 | self.token_seq_field_names = [] 53 | self.token_seq_field_dims = [] 54 | for field_name in self.field_names: 55 | if field_name == config['RATING_FIELD'] or field_name == config['LABEL_FIELD']: 56 | continue 57 | if dataset.field2type[field_name] == FeatureType.TOKEN: 58 | self.token_field_names.append(field_name) 59 | self.token_field_dims.append(dataset.num(field_name)) 60 | elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ: 61 | self.token_seq_field_names.append(field_name) 62 | self.token_seq_field_dims.append(dataset.num(field_name)) 63 | else: 64 | self.float_field_names.append(field_name) 65 | self.float_field_dims.append(dataset.num(field_name)) 66 | if len(self.token_field_dims) > 0: 67 | self.token_field_offsets = np.array((0, *np.cumsum(self.token_field_dims)[:-1]), dtype=np.long) 68 | self.token_embedding_table = FMEmbedding(self.token_field_dims, self.token_field_offsets, output_dim) 69 | if len(self.float_field_dims) > 0: 70 | self.float_embedding_table = nn.Embedding(np.sum(self.float_field_dims, dtype=np.int32), output_dim) 71 | if len(self.token_seq_field_dims) > 0: 72 | self.token_seq_embedding_table = nn.ModuleList() 73 | for token_seq_field_dim in self.token_seq_field_dims: 74 | self.token_seq_embedding_table.append(nn.Embedding(token_seq_field_dim, output_dim)) 75 | 76 | self.bias = nn.Parameter(torch.zeros((output_dim,)), requires_grad=True) -------------------------------------------------------------------------------- /deepcarskit/utils/logger.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020/8/7 3 | # @Author : Zihan Lin 4 | # @Email : linzihan.super@foxmail.com 5 | 6 | # UPDATE 7 | # @Time : 2021/3/7 8 | # @Author : Jiawei Guan 9 | # @Email : guanjw@ruc.edu.cn 10 | 11 | # UPDATE: 12 | # @Time : 2021/12 13 | # @Author : Yong Zheng 14 | # @Notes : made light changes to adapt it for CARS 15 | 16 | """ 17 | deepcarskit.utils.logger 18 | ############################### 19 | """ 20 | 21 | import logging 22 | import os 23 | import colorlog 24 | import re 25 | 26 | from recbole.utils.utils import get_local_time, ensure_dir 27 | from colorama import init 28 | 29 | log_colors_config = { 30 | 'DEBUG': 'cyan', 31 | 'WARNING': 'yellow', 32 | 'ERROR': 'red', 33 | 'CRITICAL': 'red', 34 | } 35 | 36 | 37 | class RemoveColorFilter(logging.Filter): 38 | 39 | def filter(self, record): 40 | if record: 41 | ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') 42 | record.msg = ansi_escape.sub('', str(record.msg)) 43 | return True 44 | 45 | 46 | def set_color(log, color, highlight=True): 47 | color_set = ['black', 'red', 'green', 'yellow', 'blue', 'pink', 'cyan', 'white'] 48 | try: 49 | index = color_set.index(color) 50 | except: 51 | index = len(color_set) - 1 52 | prev_log = '\033[' 53 | if highlight: 54 | prev_log += '1;3' 55 | else: 56 | prev_log += '0;3' 57 | prev_log += str(index) + 'm' 58 | return prev_log + log + '\033[0m' 59 | 60 | 61 | def init_logger(config): 62 | """ 63 | A logger that can show a message on standard output and write it into the 64 | file named `filename` simultaneously. 65 | All the message that you want to log MUST be str. 66 | 67 | Args: 68 | config (Config): An instance object of Config, used to record parameter information. 69 | 70 | Example: 71 | >>> logger = logging.getLogger(config) 72 | >>> logger.debug(train_state) 73 | >>> logger.info(train_result) 74 | """ 75 | init(autoreset=True) 76 | LOGROOT = './log/' 77 | dir_name = os.path.dirname(LOGROOT) 78 | ensure_dir(dir_name) 79 | 80 | logfilename = '{}-{}-{}.log'.format(config['dataset'], config['model'], get_local_time()) 81 | 82 | logfilepath = os.path.join(LOGROOT, logfilename) 83 | 84 | filefmt = "%(asctime)-15s %(levelname)s %(message)s" 85 | filedatefmt = "%a %d %b %Y %H:%M:%S" 86 | fileformatter = logging.Formatter(filefmt, filedatefmt) 87 | 88 | sfmt = "%(log_color)s%(asctime)-15s %(levelname)s %(message)s" 89 | sdatefmt = "%d %b %H:%M" 90 | sformatter = colorlog.ColoredFormatter(sfmt, sdatefmt, log_colors=log_colors_config) 91 | if config['state'] is None or config['state'].lower() == 'info': 92 | level = logging.INFO 93 | elif config['state'].lower() == 'debug': 94 | level = logging.DEBUG 95 | elif config['state'].lower() == 'error': 96 | level = logging.ERROR 97 | elif config['state'].lower() == 'warning': 98 | level = logging.WARNING 99 | elif config['state'].lower() == 'critical': 100 | level = logging.CRITICAL 101 | else: 102 | level = logging.INFO 103 | 104 | fh = logging.FileHandler(logfilepath) 105 | fh.setLevel(level) 106 | fh.setFormatter(fileformatter) 107 | remove_color_filter = RemoveColorFilter() 108 | fh.addFilter(remove_color_filter) 109 | 110 | sh = logging.StreamHandler() 111 | sh.setLevel(level) 112 | sh.setFormatter(sformatter) 113 | 114 | logging.basicConfig(level=level, handlers=[sh, fh]) 115 | return fh, logfilepath 116 | -------------------------------------------------------------------------------- /deepcarskit/evaluator/collector.py: -------------------------------------------------------------------------------- 1 | # @Time : 2021/12 2 | # @Author : Yong Zheng 3 | # @Notes : Inherit from recbole.evaluator.Collector 4 | 5 | """ 6 | recbole.evaluator.collector 7 | ################################################ 8 | """ 9 | 10 | from recbole.evaluator.register import Register 11 | from recbole.evaluator import Collector, DataStruct 12 | import torch 13 | 14 | 15 | class CARSCollector(Collector): 16 | """The collector is used to collect the resource for evaluator. 17 | As the evaluation metrics are various, the needed resource not only contain the recommended result 18 | but also other resource from data and model. They all can be collected by the collector during the training 19 | and evaluation process. 20 | 21 | This class is only used in Trainer. 22 | 23 | """ 24 | 25 | def __init__(self, config): 26 | self.config = config 27 | self.data_struct = DataStruct() 28 | self.register = Register(config) 29 | self.full = ('full' in config['eval_args']['mode']) 30 | self.topk = self.config['topk'] 31 | self.device = self.config['device'] 32 | 33 | def eval_batch_collect( 34 | self, scores_tensor: torch.Tensor, interaction, positive_u: torch.Tensor, positive_i: torch.Tensor 35 | ): 36 | """ Collect the evaluation resource from batched eval data and batched model output. 37 | Args: 38 | scores_tensor (Torch.Tensor): the output tensor of model with the shape of `(N, )` 39 | interaction(Interaction): batched eval data. 40 | positive_u(Torch.Tensor): the row index of positive items for each user. 41 | positive_i(Torch.Tensor): the positive item id for each user. 42 | """ 43 | if self.register.need('rec.items'): 44 | 45 | # get topk 46 | _, topk_idx = torch.topk(scores_tensor, max(self.topk), dim=-1) # n_users x k 47 | self.data_struct.update_tensor('rec.items', topk_idx) 48 | 49 | if self.register.need('rec.topk'): 50 | 51 | _, topk_idx = torch.topk(scores_tensor, max(self.topk), dim=-1) # n_users x k 52 | pos_matrix = torch.zeros_like(scores_tensor, dtype=torch.int) 53 | pos_matrix[positive_u, positive_i] = 1 54 | pos_len_list = pos_matrix.sum(dim=1, keepdim=True) 55 | pos_idx = torch.gather(pos_matrix, dim=1, index=topk_idx) 56 | result = torch.cat((pos_idx, pos_len_list), dim=1) 57 | self.data_struct.update_tensor('rec.topk', result) 58 | 59 | if self.register.need('rec.meanrank'): 60 | 61 | desc_scores, desc_index = torch.sort(scores_tensor, dim=-1, descending=True) 62 | 63 | # get the index of positive items in the ranking list 64 | pos_matrix = torch.zeros_like(scores_tensor) 65 | pos_matrix[positive_u, positive_i] = 1 66 | pos_index = torch.gather(pos_matrix, dim=1, index=desc_index) 67 | 68 | avg_rank = self._average_rank(desc_scores) 69 | pos_rank_sum = torch.where(pos_index == 1, avg_rank, torch.zeros_like(avg_rank)).sum(dim=-1, keepdim=True) 70 | 71 | pos_len_list = pos_matrix.sum(dim=1, keepdim=True) 72 | user_len_list = desc_scores.argmin(dim=1, keepdim=True) 73 | result = torch.cat((pos_rank_sum, user_len_list, pos_len_list), dim=1) 74 | self.data_struct.update_tensor('rec.meanrank', result) 75 | 76 | if self.register.need('rec.score'): 77 | 78 | self.data_struct.update_tensor('rec.score', scores_tensor) 79 | 80 | if self.register.need('data.label'): 81 | self.label_field = self.config['LABEL_FIELD'] 82 | if self.config['ranking']: 83 | self.data_struct.update_tensor('data.label', interaction[self.label_field].to(self.device)) 84 | else: 85 | self.data_struct.update_tensor('data.label', interaction[self.config['RATING_FIELD']].to(self.device)) 86 | -------------------------------------------------------------------------------- /deepcarskit/model/neucf/neucmf0w.py: -------------------------------------------------------------------------------- 1 | # @Time : 2021/12 2 | # @Author : Yong Zheng 3 | 4 | 5 | r""" 6 | NeuCMF0w 7 | ################################################ 8 | References 9 | ----- 10 | Unger, M., Tuzhilin, A., & Livne, A. (2020). Context-aware recommendations based on deep learning frameworks. ACM Transactions on Management Information Systems (TMIS), 11(2), 1-15. 11 | 12 | Notes 13 | ----- 14 | 1). NeuCMF0w has 2 towers (MLP and MF), and it fuses contexts into MLP tower only. 15 | 16 | 2). NeuCMF0w utilizes context situation as a whole/a single dimension to be embedded 17 | """ 18 | 19 | import torch 20 | import torch.nn as nn 21 | from torch.nn.init import normal_ 22 | 23 | from deepcarskit.model.context_recommender import ContextRecommender 24 | from recbole.model.layers import MLPLayers 25 | from recbole.utils import InputType, EvaluatorType 26 | 27 | 28 | class NeuCMF0w(ContextRecommender): 29 | 30 | input_type = InputType.POINTWISE 31 | 32 | def __init__(self, config, dataset): 33 | super(NeuCMF0w, self).__init__(config, dataset) 34 | 35 | # load parameters info 36 | self.mf_embedding_size = config['mf_embedding_size'] 37 | self.mlp_embedding_size = config['mlp_embedding_size'] 38 | self.mlp_hidden_size = config['mlp_hidden_size'] 39 | self.dropout_prob = config['dropout_prob'] 40 | self.mf_train = config['mf_train'] 41 | self.mlp_train = config['mlp_train'] 42 | self.use_pretrain = config['use_pretrain'] 43 | self.mf_pretrain_path = config['mf_pretrain_path'] 44 | self.mlp_pretrain_path = config['mlp_pretrain_path'] 45 | 46 | # define layers and loss 47 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size) 48 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size) 49 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size) 50 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size) 51 | self.context_situation_mlp_embedding = nn.Embedding(self.n_context_situation, self.mlp_embedding_size) 52 | 53 | # mlp layers = user, item, context_situation 54 | self.mlp_layers = MLPLayers([3 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob) 55 | self.mlp_layers.logger = None # remove logger to use torch.save() 56 | if self.mf_train and self.mlp_train: 57 | self.predict_layer = nn.Linear(self.mf_embedding_size + self.mlp_hidden_size[-1], 1) 58 | elif self.mf_train: 59 | self.predict_layer = nn.Linear(self.mf_embedding_size, 1) 60 | elif self.mlp_train: 61 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) 62 | 63 | # parameters initialization 64 | if self.use_pretrain: 65 | self.load_pretrain() 66 | else: 67 | self.apply(self._init_weights) 68 | 69 | def _init_weights(self, module): 70 | if isinstance(module, nn.Embedding): 71 | normal_(module.weight.data, mean=0.0, std=0.01) 72 | 73 | def forward(self, user, item, context_situation): 74 | user_mf_e = self.user_mf_embedding(user) 75 | item_mf_e = self.item_mf_embedding(item) 76 | user_mlp_e = self.user_mlp_embedding(user) 77 | item_mlp_e = self.item_mlp_embedding(item) 78 | context_situation_e = self.context_situation_mlp_embedding(context_situation) 79 | if self.mf_train: 80 | mf_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size] 81 | if self.mlp_train: 82 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_e), -1)) # [batch_size, layers[-1]] 83 | 84 | if self.mf_train and self.mlp_train: 85 | output = self.actfun(self.predict_layer(torch.cat((mf_output, mlp_output), -1))) 86 | elif self.mf_train: 87 | output = self.actfun(self.predict_layer(mf_output)) 88 | elif self.mlp_train: 89 | output = self.actfun(self.predict_layer(mlp_output)) 90 | else: 91 | raise RuntimeError('mf_train and mlp_train can not be False at the same time') 92 | return output.squeeze(-1) 93 | 94 | def calculate_loss(self, interaction): 95 | user = interaction[self.USER_ID] 96 | item = interaction[self.ITEM_ID] 97 | context_situation = interaction[self.CONTEXT_SITUATION_ID] 98 | label = interaction[self.LABEL] 99 | 100 | output = self.forward(user, item, context_situation) 101 | return self.loss(output, label) 102 | 103 | def predict(self, interaction): 104 | user = interaction[self.USER_ID] 105 | item = interaction[self.ITEM_ID] 106 | context_situation = interaction[self.CONTEXT_SITUATION_ID] 107 | return self.forward(user, item, context_situation) 108 | 109 | def dump_parameters(self): 110 | r"""A simple implementation of dumping model parameters for pretrain. 111 | 112 | """ 113 | if self.mf_train and not self.mlp_train: 114 | save_path = self.mf_pretrain_path 115 | torch.save(self, save_path) 116 | elif self.mlp_train and not self.mf_train: 117 | save_path = self.mlp_pretrain_path 118 | torch.save(self, save_path) 119 | -------------------------------------------------------------------------------- /deepcarskit/evaluator/base_metric.py: -------------------------------------------------------------------------------- 1 | # @Time : 2020/10/21 2 | # @Author : Kaiyuan Li 3 | # @email : tsotfsk@outlook.com 4 | 5 | # UPDATE 6 | # @Time : 2020/10/21, 2021/8/29 7 | # @Author : Kaiyuan Li, Zhichao Feng 8 | # @email : tsotfsk@outlook.com, fzcbupt@gmail.com 9 | 10 | # UPDATE: 11 | # @Time : 2021/12 12 | # @Author : Yong Zheng 13 | # @Notes : made light changes to adapt it for CARS 14 | 15 | """ 16 | deepcarskit.evaluator.abstract_metric 17 | ##################################### 18 | """ 19 | import numpy 20 | import torch 21 | from recbole.utils import EvaluatorType 22 | 23 | 24 | class AbstractMetric(object): 25 | """:class:`AbstractMetric` is the base object of all metrics. If you want to 26 | implement a metric, you should inherit this class. 27 | 28 | Args: 29 | config (Config): the config of evaluator. 30 | """ 31 | smaller = False 32 | 33 | def __init__(self, config): 34 | self.decimal_place = config['metric_decimal_place'] 35 | 36 | def calculate_metric(self, dataobject): 37 | """Get the dictionary of a metric. 38 | 39 | Args: 40 | dataobject(DataStruct): it contains all the information needed to calculate metrics. 41 | 42 | Returns: 43 | dict: such as ``{'metric@10': 3153, 'metric@20': 0.3824}`` 44 | """ 45 | raise NotImplementedError('Method [calculate_metric] should be implemented.') 46 | 47 | 48 | class TopkMetric(AbstractMetric): 49 | """:class:`TopkMetric` is a base object of top-k metrics. If you want to 50 | implement an top-k metric, you can inherit this class. 51 | 52 | Args: 53 | config (Config): The config of evaluator. 54 | """ 55 | metric_type = EvaluatorType.RANKING 56 | metric_need = ['uc', 'rec.topk'] 57 | 58 | def __init__(self, config): 59 | super().__init__(config) 60 | self.topk = config['topk'] 61 | 62 | def used_info(self, dataobject): 63 | """Get the bool matrix indicating whether the corresponding item is positive 64 | and number of positive items for each user. 65 | """ 66 | rec_mat = dataobject.get('rec.topk') 67 | topk_idx, pos_len_list = torch.split(rec_mat, [max(self.topk), 1], dim=1) 68 | return topk_idx.to(torch.bool).numpy(), pos_len_list.squeeze(-1).numpy() 69 | 70 | def topk_result(self, metric, value): 71 | """Match the metric value to the `k` and put them in `dictionary` form. 72 | 73 | Args: 74 | metric(str): the name of calculated metric. 75 | value(numpy.ndarray): metrics for each user, including values from `metric@1` to `metric@max(self.topk)`. 76 | 77 | Returns: 78 | dict: metric values required in the configuration. 79 | """ 80 | 81 | metric_dict = {} 82 | avg_result = value.mean(axis=0) 83 | for k in self.topk: 84 | key = '{}@{}'.format(metric, k) 85 | metric_dict[key] = round(avg_result[k - 1], self.decimal_place) 86 | return metric_dict 87 | 88 | def metric_info(self, pos_index, pos_len=None): 89 | """Calculate the value of the metric. 90 | 91 | Args: 92 | pos_index(numpy.ndarray): a bool matrix, shape of ``n_users * max(topk)``. The item with the (j+1)-th \ 93 | highest score of i-th user is positive if ``pos_index[i][j] == True`` and negative otherwise. 94 | pos_len(numpy.ndarray): a vector representing the number of positive items per user, shape of ``(n_users,)``. 95 | 96 | Returns: 97 | numpy.ndarray: metrics for each user, including values from `metric@1` to `metric@max(self.topk)`. 98 | """ 99 | raise NotImplementedError('Method [metric_info] of top-k metric should be implemented.') 100 | 101 | 102 | class LossMetric(AbstractMetric): 103 | """:class:`LossMetric` is a base object of loss based metrics and AUC. If you want to 104 | implement an loss based metric, you can inherit this class. 105 | 106 | Args: 107 | config (Config): The config of evaluator. 108 | """ 109 | metric_type = EvaluatorType.VALUE 110 | metric_need = ['rec.score', 'data.label'] 111 | 112 | def __init__(self, config): 113 | super().__init__(config) 114 | self.config = config 115 | 116 | def used_info(self, dataobject): 117 | """Get scores that model predicted and the ground truth.""" 118 | preds = dataobject.get('rec.score') 119 | trues = dataobject.get('data.label') 120 | 121 | return preds.squeeze(-1).numpy(), trues.squeeze(-1).numpy() 122 | 123 | def output_metric(self, metric, dataobject): 124 | preds, trues = self.used_info(dataobject) 125 | result = self.metric_info(preds, trues) 126 | result=round(result, self.decimal_place) 127 | return {metric: result} 128 | 129 | def metric_info(self, preds, trues): 130 | """Calculate the value of the metric. 131 | 132 | Args: 133 | preds (numpy.ndarray): the scores predicted by model, a one-dimensional vector. 134 | trues (numpy.ndarray): the label of items, which has the same shape as ``preds``. 135 | 136 | Returns: 137 | float: The value of the metric. 138 | """ 139 | raise NotImplementedError('Method [metric_info] of loss-based metric should be implemented.') 140 | -------------------------------------------------------------------------------- /deepcarskit/model/neucf/neucmfw0.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022 3 | # @Author : Yong Zheng 4 | 5 | 6 | 7 | r""" 8 | NeuCMFw0 9 | ################################################ 10 | References 11 | ----- 12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022 13 | 14 | Notes 15 | ----- 16 | 1). NeuCMFw0 has 4 towers: MLP tower without contexts, MF tower with UI, MF with UC, MF with IC 17 | 18 | 2). w => we consider context situation as a whole/single dimension and create embedding for it, when we fuse them into the MF towers 19 | """ 20 | 21 | import torch 22 | import torch.nn as nn 23 | from torch.nn.init import normal_ 24 | 25 | from deepcarskit.model.context_recommender import ContextRecommender 26 | from recbole.model.layers import MLPLayers 27 | from recbole.utils import InputType, EvaluatorType 28 | 29 | 30 | class NeuCMFw0(ContextRecommender): 31 | 32 | input_type = InputType.POINTWISE 33 | 34 | def __init__(self, config, dataset): 35 | super(NeuCMFw0, self).__init__(config, dataset) 36 | 37 | # load parameters info 38 | self.mf_embedding_size = config['mf_embedding_size'] 39 | self.mlp_embedding_size = config['mlp_embedding_size'] 40 | self.mlp_hidden_size = config['mlp_hidden_size'] 41 | self.dropout_prob = config['dropout_prob'] 42 | self.mf_train = config['mf_train'] 43 | self.mlp_train = config['mlp_train'] 44 | self.use_pretrain = config['use_pretrain'] 45 | self.mf_pretrain_path = config['mf_pretrain_path'] 46 | self.mlp_pretrain_path = config['mlp_pretrain_path'] 47 | 48 | # define layers and loss 49 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size) 50 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size) 51 | self.context_situation_mf_embedding = nn.Embedding(self.n_context_situation, self.mf_embedding_size) 52 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size) 53 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size) 54 | self.context_situation_mlp_embedding = nn.Embedding(self.n_context_situation, self.mlp_embedding_size) 55 | 56 | # mlp layers = user, item 57 | self.mlp_layers = MLPLayers([2 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob) 58 | self.mlp_layers.logger = None # remove logger to use torch.save() 59 | if self.mf_train and self.mlp_train: 60 | self.predict_layer = nn.Linear(3 * self.mf_embedding_size + self.mlp_hidden_size[-1], 1) 61 | elif self.mf_train: 62 | self.predict_layer = nn.Linear(3 * self.mf_embedding_size, 1) 63 | elif self.mlp_train: 64 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) 65 | 66 | # parameters initialization 67 | if self.use_pretrain: 68 | self.load_pretrain() 69 | else: 70 | self.apply(self._init_weights) 71 | 72 | def _init_weights(self, module): 73 | if isinstance(module, nn.Embedding): 74 | normal_(module.weight.data, mean=0.0, std=0.01) 75 | 76 | def forward(self, user, item, context_situation): 77 | user_mf_e = self.user_mf_embedding(user) 78 | item_mf_e = self.item_mf_embedding(item) 79 | context_situation_mf_e = self.context_situation_mf_embedding(context_situation) 80 | user_mlp_e = self.user_mlp_embedding(user) 81 | item_mlp_e = self.item_mlp_embedding(item) 82 | if self.mf_train: 83 | mf_ui_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size] 84 | mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e) # [batch_size, embedding_size] 85 | mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e) # [batch_size, embedding_size] 86 | if self.mlp_train: 87 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e), -1)) # [batch_size, layers[-1]] 88 | 89 | if self.mf_train and self.mlp_train: 90 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1))) 91 | elif self.mf_train: 92 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1))) 93 | elif self.mlp_train: 94 | output = self.actfun(self.predict_layer(mlp_output)) 95 | else: 96 | raise RuntimeError('mf_train and mlp_train can not be False at the same time') 97 | return output.squeeze(-1) 98 | 99 | def calculate_loss(self, interaction): 100 | user = interaction[self.USER_ID] 101 | item = interaction[self.ITEM_ID] 102 | context_situation = interaction[self.CONTEXT_SITUATION_ID] 103 | label = interaction[self.LABEL] 104 | 105 | output = self.forward(user, item, context_situation) 106 | return self.loss(output, label) 107 | 108 | def predict(self, interaction): 109 | user = interaction[self.USER_ID] 110 | item = interaction[self.ITEM_ID] 111 | context_situation = interaction[self.CONTEXT_SITUATION_ID] 112 | return self.forward(user, item, context_situation) 113 | 114 | def dump_parameters(self): 115 | r"""A simple implementation of dumping model parameters for pretrain. 116 | 117 | """ 118 | if self.mf_train and not self.mlp_train: 119 | save_path = self.mf_pretrain_path 120 | torch.save(self, save_path) 121 | elif self.mlp_train and not self.mf_train: 122 | save_path = self.mlp_pretrain_path 123 | torch.save(self, save_path) 124 | -------------------------------------------------------------------------------- /deepcarskit/model/neucf/neucmf0i.py: -------------------------------------------------------------------------------- 1 | # @Time : 2021/12 2 | # @Author : Yong Zheng 3 | 4 | r""" 5 | NeuCMF0i 6 | ################################################ 7 | References 8 | ----- 9 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022 10 | 11 | Notes 12 | ----- 13 | 1). NeuCMF0i has 2 towers (MLP and MF), and it fuses contexts into MLP tower only. 14 | 15 | 2). NeuCMF0i creates embedding for each individual context conditions. 16 | """ 17 | 18 | import torch 19 | import torch.nn as nn 20 | from torch.nn.init import normal_ 21 | 22 | from deepcarskit.model.context_recommender import ContextRecommender 23 | from recbole.model.layers import MLPLayers 24 | from recbole.utils import InputType, EvaluatorType 25 | 26 | 27 | class NeuCMF0i(ContextRecommender): 28 | 29 | input_type = InputType.POINTWISE 30 | 31 | def __init__(self, config, dataset): 32 | super(NeuCMF0i, self).__init__(config, dataset) 33 | 34 | # load parameters info 35 | self.mf_embedding_size = config['mf_embedding_size'] 36 | self.mlp_embedding_size = config['mlp_embedding_size'] 37 | self.mlp_hidden_size = config['mlp_hidden_size'] 38 | self.dropout_prob = config['dropout_prob'] 39 | self.mf_train = config['mf_train'] 40 | self.mlp_train = config['mlp_train'] 41 | self.use_pretrain = config['use_pretrain'] 42 | self.mf_pretrain_path = config['mf_pretrain_path'] 43 | self.mlp_pretrain_path = config['mlp_pretrain_path'] 44 | 45 | # define layers and loss 46 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size) 47 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size) 48 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size) 49 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size) 50 | self.context_dimensions_mlp_embedding = [] 51 | for i in range(0, self.n_contexts_dim): 52 | self.context_dimensions_mlp_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mlp_embedding_size).to(self.device)) 53 | 54 | # mlp layers = user, item, context_situation 55 | self.mlp_layers = MLPLayers([(2 + self.n_contexts_dim) * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob) 56 | self.mlp_layers.logger = None # remove logger to use torch.save() 57 | if self.mf_train and self.mlp_train: 58 | self.predict_layer = nn.Linear(self.mf_embedding_size + self.mlp_hidden_size[-1], 1) 59 | elif self.mf_train: 60 | self.predict_layer = nn.Linear(self.mf_embedding_size, 1) 61 | elif self.mlp_train: 62 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) 63 | 64 | # parameters initialization 65 | if self.use_pretrain: 66 | self.load_pretrain() 67 | else: 68 | self.apply(self._init_weights) 69 | 70 | def _init_weights(self, module): 71 | if isinstance(module, nn.Embedding): 72 | normal_(module.weight.data, mean=0.0, std=0.01) 73 | 74 | def forward(self, user, item, context_situation_list): 75 | user_mf_e = self.user_mf_embedding(user) 76 | item_mf_e = self.item_mf_embedding(item) 77 | user_mlp_e = self.user_mlp_embedding(user) 78 | item_mlp_e = self.item_mlp_embedding(item) 79 | context_situation_e = None 80 | for i in range(0, self.n_contexts_dim): 81 | condition = context_situation_list[i] 82 | embd = self.context_dimensions_mlp_embedding[i](condition) 83 | if context_situation_e is None: 84 | context_situation_e = embd 85 | else: 86 | context_situation_e = torch.cat((context_situation_e, embd), -1) 87 | if self.mf_train: 88 | mf_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size] 89 | if self.mlp_train: 90 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_e), -1)) # [batch_size, layers[-1]] 91 | if self.mf_train and self.mlp_train: 92 | output = self.actfun(self.predict_layer(torch.cat((mf_output, mlp_output), -1))) 93 | elif self.mf_train: 94 | output = self.actfun(self.predict_layer(mf_output)) 95 | elif self.mlp_train: 96 | output = self.actfun(self.predict_layer(mlp_output)) 97 | else: 98 | raise RuntimeError('mf_train and mlp_train can not be False at the same time') 99 | return output.squeeze(-1) 100 | 101 | def calculate_loss(self, interaction): 102 | user = interaction[self.USER_ID] 103 | item = interaction[self.ITEM_ID] 104 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS) 105 | label = interaction[self.LABEL] 106 | 107 | output = self.forward(user, item, context_situation_list) 108 | return self.loss(output, label) 109 | 110 | def predict(self, interaction): 111 | user = interaction[self.USER_ID] 112 | item = interaction[self.ITEM_ID] 113 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS) 114 | return self.forward(user, item, context_situation_list) 115 | 116 | def dump_parameters(self): 117 | r"""A simple implementation of dumping model parameters for pretrain. 118 | 119 | """ 120 | if self.mf_train and not self.mlp_train: 121 | save_path = self.mf_pretrain_path 122 | torch.save(self, save_path) 123 | elif self.mlp_train and not self.mf_train: 124 | save_path = self.mlp_pretrain_path 125 | torch.save(self, save_path) -------------------------------------------------------------------------------- /deepcarskit/model/neucf/neucmfww.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022 3 | # @Author : Yong Zheng 4 | 5 | 6 | 7 | r""" 8 | NeuCMFww 9 | ################################################ 10 | References 11 | ----- 12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022 13 | 14 | Notes 15 | ----- 16 | 1). NeuCMFww has 4 towers: MLP tower with contexts, MF tower with UI, MF with UC, MF with IC 17 | 18 | 2). ww => we consider context situation as a whole/single dimension and create embedding for it, when we fuse contexts into the MLP and MF towers 19 | """ 20 | 21 | import torch 22 | import torch.nn as nn 23 | from torch.nn.init import normal_ 24 | 25 | from deepcarskit.model.context_recommender import ContextRecommender 26 | from recbole.model.layers import MLPLayers 27 | from recbole.utils import InputType, EvaluatorType 28 | 29 | 30 | class NeuCMFww(ContextRecommender): 31 | 32 | input_type = InputType.POINTWISE 33 | 34 | def __init__(self, config, dataset): 35 | super(NeuCMFww, self).__init__(config, dataset) 36 | 37 | # load parameters info 38 | self.mf_embedding_size = config['mf_embedding_size'] 39 | self.mlp_embedding_size = config['mlp_embedding_size'] 40 | self.mlp_hidden_size = config['mlp_hidden_size'] 41 | self.dropout_prob = config['dropout_prob'] 42 | self.mf_train = config['mf_train'] 43 | self.mlp_train = config['mlp_train'] 44 | self.use_pretrain = config['use_pretrain'] 45 | self.mf_pretrain_path = config['mf_pretrain_path'] 46 | self.mlp_pretrain_path = config['mlp_pretrain_path'] 47 | 48 | # define layers and loss 49 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size) 50 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size) 51 | self.context_situation_mf_embedding = nn.Embedding(self.n_context_situation, self.mf_embedding_size) 52 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size) 53 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size) 54 | self.context_situation_mlp_embedding = nn.Embedding(self.n_context_situation, self.mlp_embedding_size) 55 | 56 | # mlp layers = user, item, context_situation 57 | self.mlp_layers = MLPLayers([3 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob) 58 | self.mlp_layers.logger = None # remove logger to use torch.save() 59 | if self.mf_train and self.mlp_train: 60 | self.predict_layer = nn.Linear(3 * self.mf_embedding_size + self.mlp_hidden_size[-1], 1) 61 | elif self.mf_train: 62 | self.predict_layer = nn.Linear(3 * self.mf_embedding_size, 1) 63 | elif self.mlp_train: 64 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) 65 | 66 | # parameters initialization 67 | if self.use_pretrain: 68 | self.load_pretrain() 69 | else: 70 | self.apply(self._init_weights) 71 | 72 | def _init_weights(self, module): 73 | if isinstance(module, nn.Embedding): 74 | normal_(module.weight.data, mean=0.0, std=0.01) 75 | 76 | def forward(self, user, item, context_situation): 77 | user_mf_e = self.user_mf_embedding(user) 78 | item_mf_e = self.item_mf_embedding(item) 79 | context_situation_mf_e = self.context_situation_mf_embedding(context_situation) 80 | user_mlp_e = self.user_mlp_embedding(user) 81 | item_mlp_e = self.item_mlp_embedding(item) 82 | context_situation_mlp_e = self.context_situation_mlp_embedding(context_situation) 83 | if self.mf_train: 84 | mf_ui_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size] 85 | mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e) # [batch_size, embedding_size] 86 | mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e) # [batch_size, embedding_size] 87 | if self.mlp_train: 88 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_mlp_e), -1)) # [batch_size, layers[-1]] 89 | 90 | if self.mf_train and self.mlp_train: 91 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1))) 92 | elif self.mf_train: 93 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1))) 94 | elif self.mlp_train: 95 | output = self.actfun(self.predict_layer(mlp_output)) 96 | else: 97 | raise RuntimeError('mf_train and mlp_train can not be False at the same time') 98 | return output.squeeze(-1) 99 | 100 | def calculate_loss(self, interaction): 101 | user = interaction[self.USER_ID] 102 | item = interaction[self.ITEM_ID] 103 | context_situation = interaction[self.CONTEXT_SITUATION_ID] 104 | label = interaction[self.LABEL] 105 | 106 | output = self.forward(user, item, context_situation) 107 | return self.loss(output, label) 108 | 109 | def predict(self, interaction): 110 | user = interaction[self.USER_ID] 111 | item = interaction[self.ITEM_ID] 112 | context_situation = interaction[self.CONTEXT_SITUATION_ID] 113 | return self.forward(user, item, context_situation) 114 | 115 | def dump_parameters(self): 116 | r"""A simple implementation of dumping model parameters for pretrain. 117 | 118 | """ 119 | if self.mf_train and not self.mlp_train: 120 | save_path = self.mf_pretrain_path 121 | torch.save(self, save_path) 122 | elif self.mlp_train and not self.mf_train: 123 | save_path = self.mlp_pretrain_path 124 | torch.save(self, save_path) 125 | -------------------------------------------------------------------------------- /deepcarskit/trainer/trainer.py: -------------------------------------------------------------------------------- 1 | # @Time : 2021/12 2 | # @Author : Yong Zheng 3 | # @Notes : Inherit from recbole.trainer.Trainer 4 | 5 | r""" 6 | recbole.trainer.trainer 7 | ################################ 8 | """ 9 | 10 | 11 | import numpy as np 12 | import torch 13 | 14 | from tqdm import tqdm 15 | 16 | from deepcarskit.data import LabledDataSortEvalDataLoader 17 | from deepcarskit.evaluator import CARSCollector 18 | from recbole.trainer import Trainer 19 | from recbole.data import FullSortEvalDataLoader 20 | from recbole.utils import EvaluatorType, set_color, get_gpu_usage 21 | from deepcarskit.evaluator import Evaluator 22 | 23 | class CARSTrainer(Trainer): 24 | r"""The basic Trainer for basic training and evaluation strategies in recommender systems. This class defines common 25 | functions for training and evaluation processes of most recommender system models, including fit(), evaluate(), 26 | resume_checkpoint() and some other features helpful for model training and evaluation. 27 | 28 | Generally speaking, this class can serve most recommender system models, If the training process of the model is to 29 | simply optimize a single loss without involving any complex training strategies, such as adversarial learning, 30 | pre-training and so on. 31 | 32 | Initializing the Trainer needs two parameters: `config` and `model`. `config` records the parameters information 33 | for controlling training and evaluation, such as `learning_rate`, `epochs`, `eval_step` and so on. 34 | `model` is the instantiated object of a Model Class. 35 | 36 | """ 37 | 38 | def __init__(self, config, model): 39 | super(CARSTrainer, self).__init__(config, model) 40 | self.eval_collector = CARSCollector(config) 41 | self.evaluator = Evaluator(config) 42 | 43 | def _labled_data_sort_batch_eval(self, batched_data): 44 | interaction, history_index, positive_u, positive_i = batched_data 45 | try: 46 | # Note: interaction without item ids 47 | scores = self.model.full_sort_predict(interaction.to(self.device)) 48 | except NotImplementedError: 49 | inter_len = len(interaction) 50 | new_inter = interaction.to(self.device).repeat_interleave(self.tot_item_num) 51 | batch_size = len(new_inter) 52 | new_inter.update(self.item_tensor.repeat(inter_len)) 53 | if batch_size <= self.test_batch_size: 54 | scores = self.model.predict(new_inter) 55 | else: 56 | scores = self._spilt_predict(new_inter, batch_size) 57 | 58 | scores = scores.view(-1, self.tot_item_num) 59 | scores[:, 0] = -np.inf 60 | if history_index is not None: 61 | scores[history_index] = -np.inf 62 | return interaction, scores, positive_u, positive_i 63 | 64 | 65 | @torch.no_grad() 66 | def evaluate(self, eval_data, load_best_model=True, model_file=None, show_progress=False): 67 | r"""Evaluate the model based on the eval data. 68 | 69 | Args: 70 | eval_data (DataLoader): the eval data 71 | load_best_model (bool, optional): whether load the best model in the training process, default: True. 72 | It should be set True, if users want to test the model after training. 73 | model_file (str, optional): the saved model file, default: None. If users want to test the previously 74 | trained model file, they can set this parameter. 75 | show_progress (bool): Show the progress of evaluate epoch. Defaults to ``False``. 76 | 77 | Returns: 78 | dict: eval result, key is the eval metric and value in the corresponding metric value. 79 | """ 80 | if not eval_data: 81 | return 82 | 83 | if load_best_model: 84 | if model_file: 85 | checkpoint_file = model_file 86 | else: 87 | checkpoint_file = self.saved_model_file 88 | checkpoint = torch.load(checkpoint_file) 89 | self.model.load_state_dict(checkpoint['state_dict']) 90 | self.model.load_other_parameter(checkpoint.get('other_parameter')) 91 | message_output = 'Loading model structure and parameters from {}'.format(checkpoint_file) 92 | self.logger.info(message_output) 93 | 94 | self.model.eval() 95 | 96 | if isinstance(eval_data, FullSortEvalDataLoader): 97 | eval_func = self._full_sort_batch_eval 98 | if self.item_tensor is None: 99 | self.item_tensor = eval_data.dataset.get_item_feature().to(self.device) 100 | elif isinstance(eval_data, LabledDataSortEvalDataLoader): 101 | eval_func = self._labled_data_sort_batch_eval 102 | if self.item_tensor is None: 103 | self.item_tensor = eval_data.dataset.get_item_feature().to(self.device) 104 | else: 105 | eval_func = self._neg_sample_batch_eval 106 | if self.config['eval_type'] == EvaluatorType.RANKING: 107 | self.tot_item_num = eval_data.dataset.item_num 108 | 109 | iter_data = ( 110 | tqdm( 111 | eval_data, 112 | total=len(eval_data), 113 | ncols=100, 114 | desc=set_color(f"Evaluate ", 'pink'), 115 | ) if show_progress else eval_data 116 | ) 117 | for batch_idx, batched_data in enumerate(iter_data): 118 | interaction, scores, positive_u, positive_i = eval_func(batched_data) 119 | if self.gpu_available and show_progress: 120 | iter_data.set_postfix_str(set_color('GPU RAM: ' + get_gpu_usage(self.device), 'yellow')) 121 | self.eval_collector.eval_batch_collect(scores, interaction, positive_u, positive_i) 122 | self.eval_collector.model_collect(self.model) 123 | struct = self.eval_collector.get_data_struct() 124 | result = self.evaluator.evaluate(struct) 125 | 126 | return result 127 | 128 | 129 | -------------------------------------------------------------------------------- /deepcarskit/model/neucf/neucmfi0.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022 3 | # @Author : Yong Zheng 4 | 5 | 6 | 7 | r""" 8 | NeuCMFi0 9 | ################################################ 10 | References 11 | ----- 12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022 13 | 14 | Notes 15 | ----- 16 | 1). NeuCMFi0 has 4 towers: MLP tower without contexts, MF tower with UI, MF with UC, MF with IC 17 | 18 | 2). i => we create embeddings for each individual context conditions when we fuse them into the MF towers 19 | """ 20 | 21 | import torch 22 | import torch.nn as nn 23 | from torch.nn.init import normal_ 24 | 25 | from deepcarskit.model.context_recommender import ContextRecommender 26 | from recbole.model.layers import MLPLayers 27 | from recbole.utils import InputType, EvaluatorType 28 | 29 | 30 | class NeuCMFi0(ContextRecommender): 31 | 32 | input_type = InputType.POINTWISE 33 | 34 | def __init__(self, config, dataset): 35 | super(NeuCMFi0, self).__init__(config, dataset) 36 | 37 | # load parameters info 38 | self.mf_embedding_size = config['mf_embedding_size'] 39 | self.mlp_embedding_size = config['mlp_embedding_size'] 40 | self.mlp_hidden_size = config['mlp_hidden_size'] 41 | self.dropout_prob = config['dropout_prob'] 42 | self.mf_train = config['mf_train'] 43 | self.mlp_train = config['mlp_train'] 44 | self.use_pretrain = config['use_pretrain'] 45 | self.mf_pretrain_path = config['mf_pretrain_path'] 46 | self.mlp_pretrain_path = config['mlp_pretrain_path'] 47 | 48 | # define layers and loss 49 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size*self.n_contexts_dim) 50 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size*self.n_contexts_dim) 51 | self.context_situation_mf_embedding = [] 52 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size) 53 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size) 54 | self.context_dimensions_mlp_embedding = [] 55 | for i in range(0, self.n_contexts_dim): 56 | self.context_dimensions_mlp_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mlp_embedding_size).to(self.device)) 57 | self.context_situation_mf_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mf_embedding_size).to(self.device)) 58 | num_mf_towers = 3 59 | 60 | # mlp layers = user, item 61 | self.mlp_layers = MLPLayers([2 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob) 62 | self.mlp_layers.logger = None # remove logger to use torch.save() 63 | if self.mf_train and self.mlp_train: 64 | self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim + self.mlp_hidden_size[-1], 1) 65 | elif self.mf_train: 66 | self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim, 1) 67 | elif self.mlp_train: 68 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) 69 | 70 | # parameters initialization 71 | if self.use_pretrain: 72 | self.load_pretrain() 73 | else: 74 | self.apply(self._init_weights) 75 | 76 | def _init_weights(self, module): 77 | if isinstance(module, nn.Embedding): 78 | normal_(module.weight.data, mean=0.0, std=0.01) 79 | 80 | def forward(self, user, item, context_situation_list): 81 | user_mf_e = self.user_mf_embedding(user) 82 | item_mf_e = self.item_mf_embedding(item) 83 | 84 | context_situation_mf_e = None 85 | for i in range(0, self.n_contexts_dim): 86 | condition = context_situation_list[i] 87 | embd = self.context_dimensions_mlp_embedding[i](condition) 88 | if context_situation_mf_e is None: 89 | context_situation_mf_e = embd 90 | else: 91 | context_situation_mf_e = torch.cat((context_situation_mf_e, embd), -1) 92 | 93 | user_mlp_e = self.user_mlp_embedding(user) 94 | item_mlp_e = self.item_mlp_embedding(item) 95 | if self.mf_train: 96 | mf_ui_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size] 97 | mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e) # [batch_size, embedding_size] 98 | mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e) # [batch_size, embedding_size] 99 | if self.mlp_train: 100 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e), -1)) # [batch_size, layers[-1]] 101 | 102 | if self.mf_train and self.mlp_train: 103 | output = self.actfun( 104 | self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1))) 105 | elif self.mf_train: 106 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1))) 107 | elif self.mlp_train: 108 | output = self.actfun(self.predict_layer(mlp_output)) 109 | else: 110 | raise RuntimeError('mf_train and mlp_train can not be False at the same time') 111 | return output.squeeze(-1) 112 | 113 | def calculate_loss(self, interaction): 114 | user = interaction[self.USER_ID] 115 | item = interaction[self.ITEM_ID] 116 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS) 117 | label = interaction[self.LABEL] 118 | 119 | output = self.forward(user, item, context_situation_list) 120 | return self.loss(output, label) 121 | 122 | def predict(self, interaction): 123 | user = interaction[self.USER_ID] 124 | item = interaction[self.ITEM_ID] 125 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS) 126 | return self.forward(user, item, context_situation_list) 127 | 128 | def dump_parameters(self): 129 | r"""A simple implementation of dumping model parameters for pretrain. 130 | 131 | """ 132 | if self.mf_train and not self.mlp_train: 133 | save_path = self.mf_pretrain_path 134 | torch.save(self, save_path) 135 | elif self.mlp_train and not self.mf_train: 136 | save_path = self.mlp_pretrain_path 137 | torch.save(self, save_path) 138 | -------------------------------------------------------------------------------- /deepcarskit/model/neucf/neucmfii.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022 3 | # @Author : Yong Zheng 4 | 5 | 6 | 7 | r""" 8 | NeuCMFii 9 | ################################################ 10 | References 11 | ----- 12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022 13 | 14 | Notes 15 | ----- 16 | 1). NeuCMFii has 4 towers: MLP tower with contexts, MF tower with UI, MF with UC, MF with IC 17 | 18 | 2). ii => we create embeddings for each individual context conditions when we fuse contexts into the MLP and MF towers 19 | """ 20 | 21 | import torch 22 | import torch.nn as nn 23 | from torch.nn.init import normal_ 24 | 25 | from deepcarskit.model.context_recommender import ContextRecommender 26 | from recbole.model.layers import MLPLayers 27 | from recbole.utils import InputType, EvaluatorType 28 | 29 | 30 | class NeuCMFii(ContextRecommender): 31 | 32 | input_type = InputType.POINTWISE 33 | 34 | def __init__(self, config, dataset): 35 | super(NeuCMFii, self).__init__(config, dataset) 36 | 37 | # load parameters info 38 | self.mf_embedding_size = config['mf_embedding_size'] 39 | self.mlp_embedding_size = config['mlp_embedding_size'] 40 | self.mlp_hidden_size = config['mlp_hidden_size'] 41 | self.dropout_prob = config['dropout_prob'] 42 | self.mf_train = config['mf_train'] 43 | self.mlp_train = config['mlp_train'] 44 | self.use_pretrain = config['use_pretrain'] 45 | self.mf_pretrain_path = config['mf_pretrain_path'] 46 | self.mlp_pretrain_path = config['mlp_pretrain_path'] 47 | 48 | # define layers and loss 49 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size*self.n_contexts_dim) 50 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size*self.n_contexts_dim) 51 | self.context_situation_mf_embedding = [] 52 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size) 53 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size) 54 | self.context_dimensions_mlp_embedding = [] 55 | for i in range(0, self.n_contexts_dim): 56 | self.context_dimensions_mlp_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mlp_embedding_size).to(self.device)) 57 | self.context_situation_mf_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mf_embedding_size).to(self.device)) 58 | num_mf_towers = 3 59 | 60 | # mlp layers = user, item, context_situation 61 | self.mlp_layers = MLPLayers([(2 + self.n_contexts_dim) * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob) 62 | self.mlp_layers.logger = None # remove logger to use torch.save() 63 | if self.mf_train and self.mlp_train: 64 | self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim + self.mlp_hidden_size[-1], 1) 65 | elif self.mf_train: 66 | self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim, 1) 67 | elif self.mlp_train: 68 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) 69 | 70 | # parameters initialization 71 | if self.use_pretrain: 72 | self.load_pretrain() 73 | else: 74 | self.apply(self._init_weights) 75 | 76 | def _init_weights(self, module): 77 | if isinstance(module, nn.Embedding): 78 | normal_(module.weight.data, mean=0.0, std=0.01) 79 | 80 | def forward(self, user, item, context_situation_list): 81 | user_mf_e = self.user_mf_embedding(user) 82 | item_mf_e = self.item_mf_embedding(item) 83 | 84 | context_situation_mf_e = None 85 | for i in range(0, self.n_contexts_dim): 86 | condition = context_situation_list[i] 87 | embd = self.context_dimensions_mlp_embedding[i](condition) 88 | if context_situation_mf_e is None: 89 | context_situation_mf_e = embd 90 | else: 91 | context_situation_mf_e = torch.cat((context_situation_mf_e, embd), -1) 92 | 93 | user_mlp_e = self.user_mlp_embedding(user) 94 | item_mlp_e = self.item_mlp_embedding(item) 95 | context_situation_e = None 96 | for i in range(0, self.n_contexts_dim): 97 | condition = context_situation_list[i] 98 | embd = self.context_dimensions_mlp_embedding[i](condition) 99 | if context_situation_e is None: 100 | context_situation_e = embd 101 | else: 102 | context_situation_e = torch.cat((context_situation_e, embd), -1) 103 | if self.mf_train: 104 | mf_ui_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size] 105 | mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e) # [batch_size, embedding_size] 106 | mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e) # [batch_size, embedding_size] 107 | if self.mlp_train: 108 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_e), -1)) # [batch_size, layers[-1]] 109 | 110 | if self.mf_train and self.mlp_train: 111 | output = self.actfun( 112 | self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1))) 113 | elif self.mf_train: 114 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1))) 115 | elif self.mlp_train: 116 | output = self.actfun(self.predict_layer(mlp_output)) 117 | else: 118 | raise RuntimeError('mf_train and mlp_train can not be False at the same time') 119 | return output.squeeze(-1) 120 | 121 | def calculate_loss(self, interaction): 122 | user = interaction[self.USER_ID] 123 | item = interaction[self.ITEM_ID] 124 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS) 125 | label = interaction[self.LABEL] 126 | 127 | output = self.forward(user, item, context_situation_list) 128 | return self.loss(output, label) 129 | 130 | def predict(self, interaction): 131 | user = interaction[self.USER_ID] 132 | item = interaction[self.ITEM_ID] 133 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS) 134 | return self.forward(user, item, context_situation_list) 135 | 136 | def dump_parameters(self): 137 | r"""A simple implementation of dumping model parameters for pretrain. 138 | 139 | """ 140 | if self.mf_train and not self.mlp_train: 141 | save_path = self.mf_pretrain_path 142 | torch.save(self, save_path) 143 | elif self.mlp_train and not self.mf_train: 144 | save_path = self.mlp_pretrain_path 145 | torch.save(self, save_path) 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # DeepCARSKit 3 | 4 | *A Deep Learning Based Context-Aware Recommendation Library* 5 | 6 | [![License](https://img.shields.io/badge/License-MIT-orange.svg)](./LICENSE) 7 | [![Website carskit.github.io](https://img.shields.io/website-up-down-green-red/http/monip.org.svg)](https://carskit.github.io/) 8 | [![python](https://badges.aleen42.com/src/python.svg)](https://badges.aleen42.com/src/python.svg) 9 | [![Citation Badge](https://api.juleskreuer.eu/citation-badge.php?doi=10.1016/j.simpa.2022.100292)](https://scholar.google.com/citations?view_op=view_citation&hl=en&citation_for_view=0FENWMcAAAAJ:Bg7qf7VwUHIC) 10 | [![DOI:10.1007/978-3-319-76207-4_15](https://zenodo.org/badge/DOI/10.1016/j.simpa.2022.100292.svg)](https://doi.org/10.1016/j.simpa.2022.100292) 11 | 12 | [![CARSKit Website](images/intro-img1.jpg)](https://carskit.github.io/) 13 | 14 | 15 | ## History 16 | + **[CARSKit](https://github.com/irecsys/CARSKit)** was released in 2015, and it was the first open-source library for 17 | context-aware recommendations. There were no more significant updates in CARSKit since 2019. It was a library built based on Java and [Librec](https://github.com/guoguibing/librec) v1.3. 18 | There is a version in Python, [CARSKit-API](https://github.com/WagnoLeaoSergio/CARSKit_API), which is a python wrapper of CARSKit. 19 | + Recommender systems based on deep learning have been well-developed in recent years. The context-aware 20 | recommendation models based on traditional collaborative filtering (e.g., KNN-based CF, matrix factorization) turned out to 21 | be out-dated. Therefore, we develop and release [DeepCARSKit](https://github.com/irecsys/DeepCARSKit) which was built upon the [RecBole](https://recbole.io/) v1.0.0 recommendation library. 22 | DeepCARSKit is *a Deep Learning Based Context-Aware Recommendation Library* which can be run with correct setting based on Python and [PyTorch](https://pytorch.org/). 23 | 24 | 25 | ## Feature 26 | + **Implemented Deep Context-Aware Recommendation Models.** Currently, we support the CARS models built based on factorization machines (FM) and 27 | Neural Collaborative Filtering (NeuCF and NeuMF). More algorithms will be added. 28 | 29 | + **Multiple Data Splits & Evaluation Options.** We provide evaluations based on both hold-out and N-fold cross validations. 30 | 31 | + **Extensive and Standard Evaluation Protocols.** We rewrite codes in RecBole to adapt the evaluations for context-aware recommendations. 32 | Particularly, item recommendations can be produced for each unique combination of (user and context situation). Relevance and Ranking metrics, 33 | such as precision, recall, NDCG, MRR, can be calculated by taking context information into consideration. 34 | 35 | + **Autosave Best Logs.** DeepCARSKit can automatically save the best log/configuration of the models you run, in the folder of 'log/best/'. 36 | 37 | + **Other Features.** Other characteristic in DeepCARSKit are inherited from RecBole, suc as GPU accelerations. 38 | 39 | 40 | ## News & Updates 41 | **11/13/2024**: We release DeepCARSKit v1.0.1 42 | + Update requirements.txt 43 | + Address the randomness issue in N-fold cross validation by utilizing multiprocessing 44 | 45 | **03/19/2022**: We release DeepCARSKit v1.0.0 46 | 47 | ## Documents 48 | + [DeepCARSKit API](https://carskit.github.io/doc/DeepCARSKit/index.html) 49 | + [RecBole API](https://recbole.io/docs/) 50 | + Yong Zheng. "[DeepCARSKit: A Deep Learning Based Context-Aware Recommendation Library](https://doi.org/10.1016/j.simpa.2022.100292)", Software Impacts, Vol. 13, Elsevier, 2022 51 | + Yong Zheng. "[DeepCARSKit: A Demo and User Guide](https://doi.org/10.1145/3511047.3536417)", Adjunct Proceedings of the 30th ACM Conference on User Modeling, Adaptation and Personalization (ACM UMAP), Spain, July, 2022 52 | + Yong Zheng, Gonzalo Florez Arias. "[A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations](https://doi.org/10.1145/3511047.3536404)", Adjunct Proceedings of the 30th ACM Conference on User Modeling, Adaptation and Personalization (ACM UMAP), Spain, July, 2022 53 | 54 | 55 | 56 | 57 | ## Installation 58 | DeepCARSKit works with the following operating systems: 59 | 60 | * Linux 61 | * Windows 10 62 | * macOS X 63 | 64 | DeepCARSKit requires Python version 3.7 or later, torch version 1.7.0 or later, and RecBole version 1.0.1. 65 | For more details, you can refer to the list of [requirements](https://github.com/irecsys/DeepCARSKit/blob/main/requirements.txt). 66 | If you want to use DeepCARSKit with GPU, 67 | please ensure that CUDA or cudatoolkit version is 9.2 or later. 68 | This requires NVIDIA driver version >= 396.26 (for Linux) or >= 397.44 (for Windows10). 69 | 70 | The DeepCARSKit library was successfully tested by using the following environments: 71 | - `python==3.9.20` 72 | - `recbole==1.0.1` 73 | - `numpy==1.20.0` 74 | - `scipy==1.6.0` 75 | - `lightgbm==4.5.0` 76 | - `xgboost==2.1.1` 77 | 78 | More info about installation from conda and pip will be released later. 79 | Currenly, you can make a git clone of the source codes. We will pulish it to pypi and conda in next release. 80 | 81 | ## Quick-Start 82 | With the source code, you can use the provided script for initial usage of our library: 83 | 84 | ```bash 85 | python run.py 86 | ``` 87 | 88 | This script will run the NeuCMFi model on the DePaulMovie dataset. 89 | 90 | ### Data Sets & Preparation 91 | A list of available data sets for research on context-aware recommender systems can be found [here](https://github.com/irecsys/CARSKit/tree/master/context-aware_data_sets). 92 | We provide two data sets (i.e., DePaulMovie and TripAdvisor) in the library. You can refer to its data format, such as [depaulmovie.inter](https://github.com/irecsys/DeepCARSKit/blob/main/dataset/depaulmovie/depaulmovie.inter). 93 | 94 | More specifically, you need to prepare a data set looks like this: (use 'float' and 'token' to indicate numerical and nominal variables) 95 | 96 | + user_id:token 97 | + item_id:token 98 | + rating:float 99 | + context variable 1:token 100 | + context variable 2:token 101 | + context variable N:token 102 | + contexts:token => a concatenation of context conditions 103 | + uc_id:token => a concatenation of user_id and contexts 104 | 105 | ### Algorithms in NeuCMF Framework 106 | An extensive NeuCMF framework is included in the DeepCARSKit library. There are multiple variants of the NeuCMF models in this framework. 107 | 108 | [![alt text](images/NeuCMF.png)](https://carskit.github.io/) 109 | 110 | 111 | ### Hyperparameter tuning 112 | You can tune up the parameters from the configuration file, config.yaml 113 | 114 | A user guide with more and more details is on the way... 115 | 116 | 117 | ## Major Releases 118 | | Releases | Date | 119 | |----------|------------| 120 | | v1.0.1 | 11/13/2024 | 121 | | v1.0.0 | 03/19/2022 | 122 | 123 | 124 | 125 | ## Cite 126 | If you find DeepCARSKit useful for your research or development, please cite the following paper: 127 | 128 | ``` 129 | @article{deepcarskit, 130 | title={DeepCARSKit: A Deep Learning Based Context-Aware Recommendation Library}, 131 | author={Zheng, Yong}, 132 | journal={Software Impacts}, 133 | volume={13}, 134 | pages={100292}, 135 | year={2022}, 136 | publisher={Elsevier} 137 | } 138 | ``` 139 | ## Contributing 140 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. 141 | Please make sure to update tests as appropriate. 142 | 143 | We welcome collaborations and contributors to the DeepCARSKit. Your names will be listed here. 144 | 145 | ## Sponsors 146 | The current project was supported by Google Cloud Platform. We are looking for more sponsors to support the development and distribution of this libraray. 147 | If you are interested in sponsorship, please let me know. Our official email is DeepCARSKit [at] gmail [dot] com. 148 | 149 | ## License 150 | [MIT License](./LICENSE) 151 | -------------------------------------------------------------------------------- /deepcarskit/data/dataloader/general_dataloader.py: -------------------------------------------------------------------------------- 1 | # @Time : 2021/12 2 | # @Author : Yong Zheng 3 | # @Notes : added LabledDataSortEvalDataLoader for context-aware ranking evaluations 4 | 5 | """ 6 | deepcarskit.data.dataloader.general_dataloader 7 | ################################################ 8 | """ 9 | 10 | import numpy as np 11 | import torch 12 | 13 | from recbole.data.dataloader.general_dataloader import FullSortEvalDataLoader 14 | from recbole.data.interaction import Interaction, cat_interactions 15 | from recbole.utils import InputType, ModelType 16 | from collections import defaultdict 17 | from logging import getLogger 18 | 19 | class FullSortEvalDataLoader(FullSortEvalDataLoader): 20 | def __init__(self, config, dataset, sampler, shuffle=False, used_ids=None): 21 | super().__init__(config, dataset, sampler, shuffle=shuffle) 22 | 23 | 24 | class LabledDataSortEvalDataLoader(FullSortEvalDataLoader): 25 | """:class:`FullSortEvalDataLoader` is a dataloader for full-sort evaluation. In order to speed up calculation, 26 | this dataloader would only return then user part of interactions, positive items and used items. 27 | It would not return negative items. 28 | 29 | Args: 30 | config (Config): The config of dataloader. 31 | dataset (Dataset): The dataset of dataloader. 32 | sampler (Sampler): The sampler of dataloader. 33 | shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``. 34 | 35 | used_item = all items that users have interacted in the training and evaluation set. 36 | positve_item = all items that users have interacted in the evaluation set. 37 | history_item = all items that users have interacted in the training set. 38 | """ 39 | 40 | def __init__(self, config, dataset, sampler, shuffle=False, used_ids=None): 41 | self.uid_field = dataset.uid_field 42 | self.iid_field = dataset.iid_field 43 | self.is_sequential = config['MODEL_TYPE'] == ModelType.SEQUENTIAL 44 | 45 | self.user_id = config['USER_ID_FIELD'] 46 | self.item_id = config['ITEM_ID_FIELD'] 47 | self.uc_id = config['USER_CONTEXT_FIELD'] 48 | self.LABEL = config['LABEL_FIELD'] 49 | 50 | if not self.is_sequential: 51 | multidict_uc_items = self._get_multidict(dataset) # uc and rated items 52 | 53 | ''' 54 | uc_positve_item = all items that uc have rated and must be positive in the evaluation set. 55 | uc_history_item = all items that uc have rated in the training set. 56 | ''' 57 | self.ucid_list = multidict_uc_items.keys() 58 | self.uc_num = max(self.ucid_list)+1 59 | self.ucid2items_num = np.zeros(self.uc_num, dtype=np.int64) 60 | self.ucid2positive_item = np.array([None] * self.uc_num) 61 | self.ucid2history_item = np.array([None] * self.uc_num) 62 | self.ucid_condidates={} 63 | 64 | # rated items (positive AND negative) for each uc in the training set 65 | ucid2used_item = used_ids 66 | 67 | for ucid in self.ucid_list: 68 | 69 | uc_positve_itemlist = set(multidict_uc_items[ucid]) 70 | self.ucid2positive_item[ucid] = torch.tensor(list(uc_positve_itemlist), dtype=torch.int64) 71 | 72 | self.ucid2items_num[ucid] = len(uc_positve_itemlist) 73 | 74 | uc_history_itemlist = ucid2used_item[ucid] 75 | 76 | self.ucid2history_item[ucid] = torch.tensor(list(uc_history_itemlist), dtype=torch.int64) 77 | 78 | # get uid and context information from uc innerid 79 | context_fields = dataset._get_context_fields() 80 | uid_list = [] 81 | dict_context = {} 82 | for context in context_fields: 83 | dict_context[context]=[] 84 | 85 | for ucid in self.ucid_list: 86 | uid = dataset._get_uid_from_usercontexts(ucid) 87 | uid_list.append(uid) 88 | tuple_context = dataset._get_context_tuple_from_usercontexts(ucid) 89 | for i in range(0,len(context_fields)): 90 | context = context_fields[i] 91 | dict_context[context].append(tuple_context[i]) 92 | 93 | self.ucid_list = torch.tensor(list(self.ucid_list), dtype=torch.int64) 94 | uid_list = torch.tensor(list(uid_list), dtype=torch.int64) 95 | # add uc data into data for predictions 96 | self.uc_df = dataset.join(Interaction({self.uid_field: uid_list, self.uc_id: self.ucid_list})) 97 | for context in dict_context.keys(): 98 | new_inter = dataset.join(Interaction({context: torch.tensor(list(dict_context[context]), dtype=torch.int64)})) 99 | self.uc_df.update(new_inter) 100 | 101 | 102 | self.config = config 103 | self.logger = getLogger() 104 | self.dataset = dataset 105 | self.sampler = sampler 106 | self.batch_size = self.step = None 107 | self.shuffle = shuffle 108 | self.pr = 0 109 | self._init_batch_size_and_step() 110 | 111 | def _get_multidict(self, dataset): 112 | matrix_uc_item = dataset._create_sparse_matrix(dataset.inter_feat, self.uc_id, self.item_id, 'coo', 113 | self.LABEL) 114 | # multidict_u_uc = defaultdict(list) 115 | # key = userid, value = Dict (key = uc, value = decending ranked items with ratings) 116 | # will get a list of Dict, given a userid 117 | multidict_uc_items = defaultdict(list) 118 | multidict_uc_items_positives = defaultdict(list) 119 | 120 | 121 | rows, cols = matrix_uc_item.shape 122 | for uc_id in range(1, rows): 123 | # Index = 0 => [PAD] 124 | # uc_id == inner id for user_context 125 | uc_items = matrix_uc_item.getrow(uc_id) # csr_matrix 126 | items = uc_items.indices # a list of items 127 | rates = uc_items.data # a list of ratings 128 | num_rates = len(rates) 129 | 130 | if num_rates == 0: 131 | continue 132 | 133 | dict_item_rating = {} 134 | 135 | for i in range(0, num_rates): 136 | key = items[i] 137 | value = rates[i] 138 | dict_item_rating[key] = value 139 | # sort items based on ratings 140 | dict_item_rating_decending = sorted(dict_item_rating.items(), key=lambda x: x[1], reverse=True) 141 | # add these items into dict which uses uc as key 142 | for items in dict_item_rating_decending: 143 | multidict_uc_items[uc_id].append(items[0]) 144 | return multidict_uc_items 145 | 146 | @property 147 | def pr_end(self): 148 | if not self.is_sequential: 149 | return len(self.ucid_list) 150 | else: 151 | return len(self.dataset) 152 | 153 | def _next_batch_data(self): 154 | if not self.is_sequential: 155 | uc_df = self.uc_df[self.pr:self.pr + self.step] 156 | ucid_list = list(uc_df[self.uc_id]) 157 | 158 | history_item = self.ucid2history_item[ucid_list] 159 | positive_item = self.ucid2positive_item[ucid_list] 160 | 161 | history_u = torch.cat([torch.full_like(hist_iid, i) for i, hist_iid in enumerate(history_item)]) 162 | history_i = torch.cat(list(history_item)) 163 | 164 | positive_u = torch.cat([torch.full_like(pos_iid, i) for i, pos_iid in enumerate(positive_item)]) 165 | positive_i = torch.cat(list(positive_item)) 166 | 167 | self.pr += self.step 168 | return uc_df, (history_u, history_i), positive_u, positive_i 169 | else: 170 | interaction = self.dataset[self.pr:self.pr + self.step] 171 | inter_num = len(interaction) 172 | positive_u = torch.arange(inter_num) 173 | positive_i = interaction[self.iid_field] 174 | 175 | self.pr += self.step 176 | return interaction, None, positive_u, positive_i 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /style.cfg: -------------------------------------------------------------------------------- 1 | [style] 2 | # Align closing bracket with visual indentation. 3 | align_closing_bracket_with_visual_indent=True 4 | 5 | # Allow dictionary keys to exist on multiple lines. For example: 6 | # 7 | # x = { 8 | # ('this is the first element of a tuple', 9 | # 'this is the second element of a tuple'): 10 | # value, 11 | # } 12 | allow_multiline_dictionary_keys=False 13 | 14 | # Allow lambdas to be formatted on more than one line. 15 | allow_multiline_lambdas=False 16 | 17 | # Allow splitting before a default / named assignment in an argument list. 18 | allow_split_before_default_or_named_assigns=True 19 | 20 | # Allow splits before the dictionary value. 21 | allow_split_before_dict_value=True 22 | 23 | # Let spacing indicate operator precedence. For example: 24 | # 25 | # a = 1 * 2 + 3 / 4 26 | # b = 1 / 2 - 3 * 4 27 | # c = (1 + 2) * (3 - 4) 28 | # d = (1 - 2) / (3 + 4) 29 | # e = 1 * 2 - 3 30 | # f = 1 + 2 + 3 + 4 31 | # 32 | # will be formatted as follows to indicate precedence: 33 | # 34 | # a = 1*2 + 3/4 35 | # b = 1/2 - 3*4 36 | # c = (1+2) * (3-4) 37 | # d = (1-2) / (3+4) 38 | # e = 1*2 - 3 39 | # f = 1 + 2 + 3 + 4 40 | # 41 | arithmetic_precedence_indication=False 42 | 43 | # Number of blank lines surrounding top-level function and class 44 | # definitions. 45 | blank_lines_around_top_level_definition=2 46 | 47 | # Insert a blank line before a class-level docstring. 48 | blank_line_before_class_docstring=False 49 | 50 | # Insert a blank line before a module docstring. 51 | blank_line_before_module_docstring=True 52 | 53 | # Insert a blank line before a 'def' or 'class' immediately nested 54 | # within another 'def' or 'class'. For example: 55 | # 56 | # class Foo: 57 | # # <------ this blank line 58 | # def method(): 59 | # ... 60 | blank_line_before_nested_class_or_def=True 61 | 62 | # Do not split consecutive brackets. Only relevant when 63 | # dedent_closing_brackets is set. For example: 64 | # 65 | # call_func_that_takes_a_dict( 66 | # { 67 | # 'key1': 'value1', 68 | # 'key2': 'value2', 69 | # } 70 | # ) 71 | # 72 | # would reformat to: 73 | # 74 | # call_func_that_takes_a_dict({ 75 | # 'key1': 'value1', 76 | # 'key2': 'value2', 77 | # }) 78 | coalesce_brackets=True 79 | 80 | # The column limit. 81 | column_limit=120 82 | 83 | # The style for continuation alignment. Possible values are: 84 | # 85 | # - SPACE: Use spaces for continuation alignment. This is default behavior. 86 | # - FIXED: Use fixed number (CONTINUATION_INDENT_WIDTH) of columns 87 | # (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs or 88 | # CONTINUATION_INDENT_WIDTH spaces) for continuation alignment. 89 | # - VALIGN-RIGHT: Vertically align continuation lines to multiple of 90 | # INDENT_WIDTH columns. Slightly right (one tab or a few spaces) if 91 | # cannot vertically align continuation lines with indent characters. 92 | continuation_align_style=SPACE 93 | 94 | # Indent width used for line continuations. 95 | continuation_indent_width=4 96 | 97 | # Put closing brackets on a separate line, dedented, if the bracketed 98 | # expression can't fit in a single line. Applies to all kinds of brackets, 99 | # including function definitions and calls. For example: 100 | # 101 | # config = { 102 | # 'key1': 'value1', 103 | # 'key2': 'value2', 104 | # } # <--- this bracket is dedented and on a separate line 105 | # 106 | # time_series = self.remote_client.query_entity_counters( 107 | # entity='dev3246.region1', 108 | # key='dns.query_latency_tcp', 109 | # transform=Transformation.AVERAGE(window=timedelta(seconds=60)), 110 | # start_ts=now()-timedelta(days=3), 111 | # end_ts=now(), 112 | # ) # <--- this bracket is dedented and on a separate line 113 | dedent_closing_brackets=True 114 | 115 | # Disable the heuristic which places each list element on a separate line 116 | # if the list is comma-terminated. 117 | disable_ending_comma_heuristic=False 118 | 119 | # Place each dictionary entry onto its own line. 120 | each_dict_entry_on_separate_line=True 121 | 122 | # Require multiline dictionary even if it would normally fit on one line. 123 | # For example: 124 | # 125 | # config = { 126 | # 'key1': 'value1' 127 | # } 128 | force_multiline_dict=False 129 | 130 | # The regex for an i18n comment. The presence of this comment stops 131 | # reformatting of that line, because the comments are required to be 132 | # next to the string they translate. 133 | i18n_comment= 134 | 135 | # The i18n function call names. The presence of this function stops 136 | # reformattting on that line, because the string it has cannot be moved 137 | # away from the i18n comment. 138 | i18n_function_call= 139 | 140 | # Indent blank lines. 141 | indent_blank_lines=False 142 | 143 | # Put closing brackets on a separate line, indented, if the bracketed 144 | # expression can't fit in a single line. Applies to all kinds of brackets, 145 | # including function definitions and calls. For example: 146 | # 147 | # config = { 148 | # 'key1': 'value1', 149 | # 'key2': 'value2', 150 | # } # <--- this bracket is indented and on a separate line 151 | # 152 | # time_series = self.remote_client.query_entity_counters( 153 | # entity='dev3246.region1', 154 | # key='dns.query_latency_tcp', 155 | # transform=Transformation.AVERAGE(window=timedelta(seconds=60)), 156 | # start_ts=now()-timedelta(days=3), 157 | # end_ts=now(), 158 | # ) # <--- this bracket is indented and on a separate line 159 | indent_closing_brackets=False 160 | 161 | # Indent the dictionary value if it cannot fit on the same line as the 162 | # dictionary key. For example: 163 | # 164 | # config = { 165 | # 'key1': 166 | # 'value1', 167 | # 'key2': value1 + 168 | # value2, 169 | # } 170 | indent_dictionary_value=False 171 | 172 | # The number of columns to use for indentation. 173 | indent_width=4 174 | 175 | # Join short lines into one line. E.g., single line 'if' statements. 176 | join_multiple_lines=True 177 | 178 | # Do not include spaces around selected binary operators. For example: 179 | # 180 | # 1 + 2 * 3 - 4 / 5 181 | # 182 | # will be formatted as follows when configured with "*,/": 183 | # 184 | # 1 + 2*3 - 4/5 185 | no_spaces_around_selected_binary_operators= 186 | 187 | # Use spaces around default or named assigns. 188 | spaces_around_default_or_named_assign=False 189 | 190 | # Adds a space after the opening '{' and before the ending '}' dict delimiters. 191 | # 192 | # {1: 2} 193 | # 194 | # will be formatted as: 195 | # 196 | # { 1: 2 } 197 | spaces_around_dict_delimiters=False 198 | 199 | # Adds a space after the opening '[' and before the ending ']' list delimiters. 200 | # 201 | # [1, 2] 202 | # 203 | # will be formatted as: 204 | # 205 | # [ 1, 2 ] 206 | spaces_around_list_delimiters=False 207 | 208 | # Use spaces around the power operator. 209 | spaces_around_power_operator=True 210 | 211 | # Use spaces around the subscript / slice operator. For example: 212 | # 213 | # my_list[1 : 10 : 2] 214 | spaces_around_subscript_colon=False 215 | 216 | # Adds a space after the opening '(' and before the ending ')' tuple delimiters. 217 | # 218 | # (1, 2, 3) 219 | # 220 | # will be formatted as: 221 | # 222 | # ( 1, 2, 3 ) 223 | spaces_around_tuple_delimiters=False 224 | 225 | # The number of spaces required before a trailing comment. 226 | # This can be a single value (representing the number of spaces 227 | # before each trailing comment) or list of values (representing 228 | # alignment column values; trailing comments within a block will 229 | # be aligned to the first column value that is greater than the maximum 230 | # line length within the block). For example: 231 | # 232 | # With spaces_before_comment=5: 233 | # 234 | # 1 + 1 # Adding values 235 | # 236 | # will be formatted as: 237 | # 238 | # 1 + 1 # Adding values <-- 5 spaces between the end of the statement and comment 239 | # 240 | # With spaces_before_comment=15, 20: 241 | # 242 | # 1 + 1 # Adding values 243 | # two + two # More adding 244 | # 245 | # longer_statement # This is a longer statement 246 | # short # This is a shorter statement 247 | # 248 | # a_very_long_statement_that_extends_beyond_the_final_column # Comment 249 | # short # This is a shorter statement 250 | # 251 | # will be formatted as: 252 | # 253 | # 1 + 1 # Adding values <-- end of line comments in block aligned to col 15 254 | # two + two # More adding 255 | # 256 | # longer_statement # This is a longer statement <-- end of line comments in block aligned to col 20 257 | # short # This is a shorter statement 258 | # 259 | # a_very_long_statement_that_extends_beyond_the_final_column # Comment <-- the end of line comments are aligned based on the line length 260 | # short # This is a shorter statement 261 | # 262 | spaces_before_comment=2 263 | 264 | # Insert a space between the ending comma and closing bracket of a list, 265 | # etc. 266 | space_between_ending_comma_and_closing_bracket=False 267 | 268 | # Use spaces inside brackets, braces, and parentheses. For example: 269 | # 270 | # method_call( 1 ) 271 | # my_dict[ 3 ][ 1 ][ get_index( *args, **kwargs ) ] 272 | # my_set = { 1, 2, 3 } 273 | space_inside_brackets=False 274 | 275 | # Split before arguments 276 | split_all_comma_separated_values=False 277 | 278 | # Split before arguments, but do not split all subexpressions recursively 279 | # (unless needed). 280 | split_all_top_level_comma_separated_values=False 281 | 282 | # Split before arguments if the argument list is terminated by a 283 | # comma. 284 | split_arguments_when_comma_terminated=False 285 | 286 | # Set to True to prefer splitting before '+', '-', '*', '/', '//', or '@' 287 | # rather than after. 288 | split_before_arithmetic_operator=False 289 | 290 | # Set to True to prefer splitting before '&', '|' or '^' rather than 291 | # after. 292 | split_before_bitwise_operator=True 293 | 294 | # Split before the closing bracket if a list or dict literal doesn't fit on 295 | # a single line. 296 | split_before_closing_bracket=True 297 | 298 | # Split before a dictionary or set generator (comp_for). For example, note 299 | # the split before the 'for': 300 | # 301 | # foo = { 302 | # variable: 'Hello world, have a nice day!' 303 | # for variable in bar if variable != 42 304 | # } 305 | split_before_dict_set_generator=True 306 | 307 | # Split before the '.' if we need to split a longer expression: 308 | # 309 | # foo = ('This is a really long string: {}, {}, {}, {}'.format(a, b, c, d)) 310 | # 311 | # would reformat to something like: 312 | # 313 | # foo = ('This is a really long string: {}, {}, {}, {}' 314 | # .format(a, b, c, d)) 315 | split_before_dot=False 316 | 317 | # Split after the opening paren which surrounds an expression if it doesn't 318 | # fit on a single line. 319 | split_before_expression_after_opening_paren=False 320 | 321 | # If an argument / parameter list is going to be split, then split before 322 | # the first argument. 323 | split_before_first_argument=False 324 | 325 | # Set to True to prefer splitting before 'and' or 'or' rather than 326 | # after. 327 | split_before_logical_operator=True 328 | 329 | # Split named assignments onto individual lines. 330 | split_before_named_assigns=True 331 | 332 | # Set to True to split list comprehensions and generators that have 333 | # non-trivial expressions and multiple clauses before each of these 334 | # clauses. For example: 335 | # 336 | # result = [ 337 | # a_long_var + 100 for a_long_var in xrange(1000) 338 | # if a_long_var % 10] 339 | # 340 | # would reformat to something like: 341 | # 342 | # result = [ 343 | # a_long_var + 100 344 | # for a_long_var in xrange(1000) 345 | # if a_long_var % 10] 346 | split_complex_comprehension=False 347 | 348 | # The penalty for splitting right after the opening bracket. 349 | split_penalty_after_opening_bracket=300 350 | 351 | # The penalty for splitting the line after a unary operator. 352 | split_penalty_after_unary_operator=10000 353 | 354 | # The penalty of splitting the line around the '+', '-', '*', '/', '//', 355 | # ``%``, and '@' operators. 356 | split_penalty_arithmetic_operator=300 357 | 358 | # The penalty for splitting right before an if expression. 359 | split_penalty_before_if_expr=0 360 | 361 | # The penalty of splitting the line around the '&', '|', and '^' 362 | # operators. 363 | split_penalty_bitwise_operator=300 364 | 365 | # The penalty for splitting a list comprehension or generator 366 | # expression. 367 | split_penalty_comprehension=80 368 | 369 | # The penalty for characters over the column limit. 370 | split_penalty_excess_character=7000 371 | 372 | # The penalty incurred by adding a line split to the unwrapped line. The 373 | # more line splits added the higher the penalty. 374 | split_penalty_for_added_line_split=30 375 | 376 | # The penalty of splitting a list of "import as" names. For example: 377 | # 378 | # from a_very_long_or_indented_module_name_yada_yad import (long_argument_1, 379 | # long_argument_2, 380 | # long_argument_3) 381 | # 382 | # would reformat to something like: 383 | # 384 | # from a_very_long_or_indented_module_name_yada_yad import ( 385 | # long_argument_1, long_argument_2, long_argument_3) 386 | split_penalty_import_names=0 387 | 388 | # The penalty of splitting the line around the 'and' and 'or' 389 | # operators. 390 | split_penalty_logical_operator=300 391 | 392 | # Use the Tab character for indentation. 393 | use_tabs=False 394 | 395 | -------------------------------------------------------------------------------- /deepcarskit/quick_start/quick_start.py: -------------------------------------------------------------------------------- 1 | # @Time : 2020/10/6 2 | # @Author : Shanlei Mu 3 | # @Email : slmu@ruc.edu.cn 4 | 5 | 6 | # UPDATE: 7 | # @Time : 2021/12 8 | # @Author : Yong Zheng 9 | # @Notes : made several changes to adapt it for CARS 10 | 11 | """ 12 | deepcarskit.quick_start 13 | ######################## 14 | """ 15 | import logging 16 | from logging import getLogger 17 | import shutil 18 | import glob 19 | import os 20 | 21 | import torch 22 | import pickle 23 | 24 | 25 | # from past.builtins import raw_input 26 | 27 | from deepcarskit.config import CARSConfig 28 | from deepcarskit.data import create_dataset, data_preparation, save_split_dataloaders, load_split_dataloaders 29 | from deepcarskit.utils.utils import get_model, get_trainer 30 | from deepcarskit.utils import init_logger, init_seed, set_color 31 | from multiprocessing.dummy import Pool as ThreadPool 32 | from multiprocessing import Pool 33 | from recbole.utils import EvaluatorType 34 | 35 | 36 | def eval_folds(args_tuple): 37 | train_data_fold = args_tuple[0] 38 | valid_data_fold = args_tuple[1] 39 | 40 | config = args_tuple[2] 41 | init_seed(config['seed'], config['reproducibility']) 42 | 43 | logger = args_tuple[3] 44 | fold = args_tuple[4] 45 | 46 | if config['save_dataloaders']: 47 | save_split_dataloaders(config, dataloaders=(train_data_fold, valid_data_fold)) 48 | 49 | # model loading and initialization 50 | init_seed(config['seed'], config['reproducibility']) 51 | model = get_model(config['model'])(config, train_data_fold.dataset).to(config['device']) 52 | 53 | # trainer loading and initialization 54 | trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) 55 | name = trainer.saved_model_file 56 | ind = name.rindex('.') 57 | lname = list(name) 58 | lname.insert(ind, '_f'+str(fold)) 59 | trainer.saved_model_file = ''.join(lname) 60 | 61 | # model training 62 | best_valid_score_fold, best_valid_result_fold = trainer.fit( 63 | train_data_fold, valid_data_fold, saved=True, show_progress=config['show_progress'] 64 | ) 65 | msghead = 'Fold ' + str(fold) + ' completed: ' 66 | logger.info(set_color(msghead, 'yellow') + f': {best_valid_result_fold}') 67 | 68 | return best_valid_score_fold, best_valid_result_fold 69 | 70 | 71 | def run(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True): 72 | r""" A fast running api, which includes the complete process of 73 | training and testing a model on a specified dataset 74 | 75 | Args: 76 | model (str, optional): Model name. Defaults to ``None``. 77 | dataset (str, optional): Dataset name. Defaults to ``None``. 78 | config_file_list (list, optional): Config files used to modify experiment parameters. Defaults to ``None``. 79 | config_dict (dict, optional): Parameters dictionary used to modify experiment parameters. Defaults to ``None``. 80 | saved (bool, optional): Whether to save the model. Defaults to ``True``. 81 | """ 82 | # configurations initialization 83 | config = CARSConfig(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict) 84 | init_seed(config['seed'], config['reproducibility']) 85 | 86 | # logger initialization 87 | log_handler, log_filepath = init_logger(config) 88 | logger = getLogger() 89 | 90 | logger.info(config) 91 | 92 | # dataset filtering 93 | dataset = create_dataset(config) 94 | if config['save_dataset']: 95 | dataset.save() 96 | logger.info(dataset) 97 | 98 | # dataset splitting 99 | # train_data, valid_data, test_data = data_preparation(config, dataset) 100 | train_data, valid_data = data_preparation(config, dataset) 101 | 102 | 103 | CV = False 104 | if isinstance(train_data, list): 105 | CV = True 106 | n_folds = len(train_data) 107 | 108 | if CV: 109 | list_train_test = [] 110 | for i in range(n_folds): 111 | t = (train_data[i], valid_data[i], config, logger, (i+1)) 112 | list_train_test.append(t) 113 | 114 | # pool = ThreadPool() 115 | # rsts = pool.map(eval_folds, list_train_test) 116 | # pool.close() 117 | # pool.join() 118 | # print('cpu count: ', os.cpu_count()) 119 | 120 | num_processes = config['eval_args']['split']['num_processes'] 121 | with Pool(processes=num_processes) as pool: 122 | rsts = pool.map(eval_folds, list_train_test) 123 | 124 | best_valid_score = 0 125 | best_valid_result = {} 126 | 127 | for rst_fold in rsts: 128 | valid_score_fold = rst_fold[0] 129 | valid_result_fold = rst_fold[1] 130 | 131 | best_valid_score += valid_score_fold 132 | if not best_valid_result: 133 | best_valid_result = valid_result_fold 134 | else: 135 | for key in best_valid_result.keys(): 136 | best_valid_result[key] = best_valid_result[key] + valid_result_fold[key] 137 | 138 | best_valid_score = round(best_valid_score/n_folds, config['metric_decimal_place']) 139 | for key in best_valid_result: 140 | best_valid_result[key] = round(best_valid_result[key]/n_folds, config['metric_decimal_place']) 141 | msghead = 'Data: '+config['dataset']+', Results on '+str(n_folds)+' CV: best valid by '+config['model'] 142 | layers = [str(int) for int in config['mlp_hidden_size']] 143 | layers = ' '.join(layers) 144 | logger.info(set_color(msghead, 'yellow') + f': {best_valid_result}'+', lrate: '+str(config['learning_rate'])+', layers: ['+layers+']') 145 | log_handler.close() 146 | logger.removeHandler(log_handler) 147 | logger_name = log_filepath[:-4] + "_" + config['valid_metric'] + " = " + str(best_valid_score) + ".log" 148 | shutil.move(log_filepath, logger_name) 149 | update_best_log(config, logger_name, best_valid_result) 150 | else: 151 | if config['save_dataloaders']: 152 | save_split_dataloaders(config, dataloaders=(train_data, valid_data)) 153 | 154 | # model loading and initialization 155 | init_seed(config['seed'], config['reproducibility']) 156 | model = get_model(config['model'])(config, train_data.dataset).to(config['device']) 157 | logger.info(model) 158 | 159 | # trainer loading and initialization 160 | trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) 161 | 162 | # model training 163 | best_valid_score, best_valid_result = trainer.fit( 164 | train_data, valid_data, saved=saved, show_progress=config['show_progress'] 165 | ) 166 | 167 | # model evaluation 168 | # test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress']) 169 | 170 | msghead = 'Data: '+config['dataset']+', best valid by '+config['model'] 171 | logger.info(set_color(msghead, 'yellow') + f': {best_valid_result}') 172 | # logger.info(set_color('test result', 'yellow') + f': {test_result}') 173 | log_handler.close() 174 | logger.removeHandler(log_handler) 175 | logger_name = log_filepath[:-4] + "_" + config['valid_metric'] + " = " + str(best_valid_score) + ".log" 176 | shutil.move(log_filepath, logger_name) 177 | update_best_log(config, logger_name, best_valid_result) 178 | 179 | ''' 180 | # example of predictions by context recommender 181 | # note, raw value in the original data is expected to be transformed to inner ID 182 | 183 | # rawid <--->innderid 184 | print("innerid: ", dataset._get_innderid_from_rawid("user_id", "1003")) 185 | print("rawid: ", dataset._get_rawid_from_innerid("user_id", 1)) 186 | 187 | userid = dataset._get_innderid_from_rawid("user_id","1003") 188 | itemid = dataset._get_innderid_from_rawid("item_id","tt0120912") 189 | timeid = dataset._get_innderid_from_rawid("time","Weekday") 190 | locid = dataset._get_innderid_from_rawid("location","Cinema") 191 | cmpid = dataset._get_innderid_from_rawid("companion","Alone") 192 | 193 | user = torch.tensor([userid]) 194 | item = torch.tensor([itemid]) 195 | contexts = [] 196 | contexts.append(torch.tensor([timeid])) 197 | contexts.append(torch.tensor([locid])) 198 | contexts.append(torch.tensor([cmpid])) 199 | print(userid, ', ', itemid, ', ', timeid, ', ', locid, ', ', cmpid) 200 | print("prediction: ",model.forward(user, item, contexts)) 201 | exit() 202 | ''' 203 | 204 | return { 205 | 'best_valid_score': best_valid_score, 206 | 'valid_score_bigger': config['valid_metric_bigger'], 207 | 'best_valid_result': best_valid_result, 208 | # 'test_result': test_result 209 | } 210 | 211 | def update_best_log(config, newlog, best_valid_result): 212 | dataset = config['dataset'] 213 | # compare which log file is better 214 | ranking = False 215 | if config['eval_type'] == EvaluatorType.RANKING: 216 | ranking = True 217 | metric = config['ranking_valid_metric'] 218 | else: 219 | metric = config['err_valid_metric'] 220 | 221 | metric_value = best_valid_result[metric.lower()] 222 | 223 | end = newlog.rindex('.') 224 | s1 = newlog.index('-') 225 | s2 = newlog.index('-', s1 + 1, end) 226 | model = newlog[s1 + 1:s2] 227 | 228 | match = [dataset, model, metric] 229 | 230 | 231 | folder_best = './log/best/' 232 | existing_logs = glob.glob(folder_best+'/*.log') 233 | 234 | found = False 235 | oldlog = None 236 | for file in existing_logs: 237 | if all(x in file for x in match): 238 | oldlog = file 239 | found = True 240 | break 241 | 242 | newlog_filename = newlog[newlog.rindex('/')+1:] 243 | 244 | if not found: 245 | shutil.copyfile(newlog, folder_best+newlog_filename) 246 | else: 247 | newvalue = metric_value 248 | oldvalue = float(oldlog[oldlog.rindex('=') + 1: oldlog.rindex('.')]) 249 | 250 | if ranking: 251 | if newvalue > oldvalue: 252 | shutil.copyfile(newlog, folder_best+newlog_filename) 253 | os.remove(oldlog) 254 | impro = (newvalue - oldvalue) / oldvalue 255 | print('Better results! improvement: {:.2%}'.format(impro) + ', best log saved in ' + folder_best + newlog_filename) 256 | else: 257 | if newvalue < oldvalue: 258 | shutil.copyfile(newlog, folder_best+newlog_filename) 259 | os.remove(oldlog) 260 | impro = (oldvalue - newvalue) / oldvalue 261 | print('Better results! improvement: {:.2%}'.format(impro) + ', best log saved in ' + folder_best + newlog_filename) 262 | return 263 | 264 | 265 | 266 | def objective_function(config_dict=None, config_file_list=None, saved=True): 267 | r""" The default objective_function used in HyperTuning 268 | 269 | Args: 270 | config_dict (dict, optional): Parameters dictionary used to modify experiment parameters. Defaults to ``None``. 271 | config_file_list (list, optional): Config files used to modify experiment parameters. Defaults to ``None``. 272 | saved (bool, optional): Whether to save the model. Defaults to ``True``. 273 | """ 274 | 275 | config = CARSConfig(config_dict=config_dict, config_file_list=config_file_list) 276 | init_seed(config['seed'], config['reproducibility']) 277 | logging.basicConfig(level=logging.ERROR) 278 | dataset = create_dataset(config) 279 | train_data, valid_data, test_data = data_preparation(config, dataset) 280 | init_seed(config['seed'], config['reproducibility']) 281 | model = get_model(config['model'])(config, train_data.dataset).to(config['device']) 282 | trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) 283 | best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, verbose=False, saved=saved) 284 | # test_result = trainer.evaluate(test_data, load_best_model=saved) 285 | 286 | return { 287 | 'best_valid_score': best_valid_score, 288 | 'valid_score_bigger': config['valid_metric_bigger'], 289 | 'best_valid_result': best_valid_result, 290 | # 'test_result': test_result 291 | } 292 | 293 | 294 | def load_data_and_model(model_file, dataset_file=None, dataloader_file=None): 295 | r"""Load filtered dataset, split dataloaders and saved model. 296 | 297 | Args: 298 | model_file (str): The path of saved model file. 299 | dataset_file (str, optional): The path of filtered dataset. Defaults to ``None``. 300 | dataloader_file (str, optional): The path of split dataloaders. Defaults to ``None``. 301 | 302 | Note: 303 | The :attr:`dataset` will be loaded or created according to the following strategy: 304 | If :attr:`dataset_file` is not ``None``, the :attr:`dataset` will be loaded from :attr:`dataset_file`. 305 | If :attr:`dataset_file` is ``None`` and :attr:`dataloader_file` is ``None``, 306 | the :attr:`dataset` will be created according to :attr:`config`. 307 | If :attr:`dataset_file` is ``None`` and :attr:`dataloader_file` is not ``None``, 308 | the :attr:`dataset` will neither be loaded or created. 309 | 310 | The :attr:`dataloader` will be loaded or created according to the following strategy: 311 | If :attr:`dataloader_file` is not ``None``, the :attr:`dataloader` will be loaded from :attr:`dataloader_file`. 312 | If :attr:`dataloader_file` is ``None``, the :attr:`dataloader` will be created according to :attr:`config`. 313 | 314 | Returns: 315 | tuple: 316 | - config (Config): An instance object of Config, which record parameter information in :attr:`model_file`. 317 | - model (AbstractRecommender): The model load from :attr:`model_file`. 318 | - dataset (Dataset): The filtered dataset. 319 | - train_data (AbstractDataLoader): The dataloader for training. 320 | - valid_data (AbstractDataLoader): The dataloader for validation. 321 | - test_data (AbstractDataLoader): The dataloader for testing. 322 | """ 323 | checkpoint = torch.load(model_file) 324 | config = checkpoint['config'] 325 | init_seed(config['seed'], config['reproducibility']) 326 | init_logger(config) 327 | 328 | dataset = None 329 | if dataset_file: 330 | with open(dataset_file, 'rb') as f: 331 | dataset = pickle.load(f) 332 | 333 | if dataloader_file: 334 | train_data, valid_data, test_data = load_split_dataloaders(dataloader_file) 335 | else: 336 | if dataset is None: 337 | dataset = create_dataset(config) 338 | train_data, valid_data, test_data = data_preparation(config, dataset) 339 | 340 | init_seed(config['seed'], config['reproducibility']) 341 | model = get_model(config['model'])(config, train_data.dataset).to(config['device']) 342 | model.load_state_dict(checkpoint['state_dict']) 343 | model.load_other_parameter(checkpoint.get('other_parameter')) 344 | 345 | return config, model, dataset, train_data, valid_data, test_data 346 | -------------------------------------------------------------------------------- /deepcarskit/data/utils.py: -------------------------------------------------------------------------------- 1 | # @Time : 2020/7/21 2 | # @Author : Yupeng Hou 3 | # @Email : houyupeng@ruc.edu.cn 4 | 5 | # UPDATE: 6 | # @Time : 2021/7/9, 2020/9/17, 2020/8/31, 2021/2/20, 2021/3/1 7 | # @Author : Yupeng Hou, Yushuo Chen, Kaiyuan Li, Haoran Cheng, Jiawei Guan 8 | # @Email : houyupeng@ruc.edu.cn, chenyushuo@ruc.edu.cn, tsotfsk@outlook.com, chenghaoran29@foxmail.com, guanjw@ruc.edu.cn 9 | 10 | # UPDATE: 11 | # @Time : 2021/12 12 | # @Author : Yong Zheng 13 | # @Notes : made several changes to adapt it for CARS 14 | 15 | 16 | """ 17 | deepcarskit.data.utils 18 | ######################## 19 | """ 20 | 21 | import copy 22 | import importlib 23 | import os 24 | import pickle 25 | 26 | from deepcarskit.data.dataloader import * 27 | from recbole.data.dataloader import TrainDataLoader, NegSampleEvalDataLoader, KnowledgeBasedDataLoader, UserDataLoader 28 | from recbole.sampler import KGSampler, Sampler, RepeatableSampler 29 | from recbole.utils import ModelType, ensure_dir, get_local_time, set_color 30 | from recbole.utils import EvaluatorType 31 | from logging import getLogger 32 | 33 | 34 | def create_dataset(config): 35 | """Create dataset according to :attr:`config['model']` and :attr:`config['MODEL_TYPE']`. 36 | 37 | Args: 38 | config (Config): An instance object of Config, used to record parameter information. 39 | Returns: 40 | Dataset: Constructed dataset. 41 | """ 42 | # David Wang: import the model dynamically 43 | dataset_module = importlib.import_module('deepcarskit.data.dataset') 44 | if hasattr(dataset_module, config['model'] + 'Dataset'): 45 | """ David Wang: 46 | if a data set is name after Dataset in custom data set model, return the data set class object 47 | """ 48 | return getattr(dataset_module, config['model'] + 'Dataset')(config) 49 | else: 50 | model_type = config['MODEL_TYPE'] 51 | if model_type == ModelType.SEQUENTIAL: 52 | from .dataset import SequentialDataset 53 | return SequentialDataset(config) 54 | elif model_type == ModelType.KNOWLEDGE: 55 | from .dataset import KnowledgeBasedDataset 56 | return KnowledgeBasedDataset(config) 57 | elif model_type == ModelType.DECISIONTREE: 58 | from .dataset import DecisionTreeDataset 59 | return DecisionTreeDataset(config) 60 | else: 61 | from .dataset import Dataset 62 | return Dataset(config) 63 | 64 | 65 | def save_split_dataloaders(config, dataloaders): 66 | """Save split dataloaders. 67 | 68 | Args: 69 | config (Config): An instance object of Config, used to record parameter information. 70 | dataloaders (tuple of AbstractDataLoader): The split dataloaders. 71 | """ 72 | save_path = config['checkpoint_dir'] 73 | saved_dataloaders_file = f'{config["dataset"]}-for-{config["model"]}-dataloader.pth' 74 | file_path = os.path.join(save_path, saved_dataloaders_file) 75 | logger = getLogger() 76 | logger.info(set_color('Saved split dataloaders', 'blue') + f': {file_path}') 77 | with open(file_path, 'wb') as f: 78 | pickle.dump(dataloaders, f) 79 | 80 | 81 | def load_split_dataloaders(saved_dataloaders_file): 82 | """Load split dataloaders. 83 | 84 | Args: 85 | saved_dataloaders_file (str): The path of split dataloaders. 86 | 87 | Returns: 88 | dataloaders (tuple of AbstractDataLoader): The split dataloaders. 89 | """ 90 | with open(saved_dataloaders_file, 'rb') as f: 91 | dataloaders = pickle.load(f) 92 | return dataloaders 93 | 94 | 95 | def data_preparation(config, dataset, save=False): 96 | """Split the dataset by :attr:`config['eval_args']` and create training, validation and test dataloader. 97 | 98 | Args: 99 | config (Config): An instance object of Config, used to record parameter information. 100 | dataset (Dataset): An instance object of Dataset, which contains all interaction records. 101 | save (bool, optional): If ``True``, it will call :func:`save_datasets` to save split dataset. 102 | Defaults to ``False``. 103 | 104 | Returns: 105 | tuple: 106 | - train_data (AbstractDataLoader): The dataloader for training. 107 | - valid_data (AbstractDataLoader): The dataloader for validation. 108 | - test_data (AbstractDataLoader): The dataloader for testing. 109 | """ 110 | model_type = config['MODEL_TYPE'] 111 | # David Wang: make a copy since dataset.build() will modify the .inter_feat attribute to Interaction object 112 | dataset = copy.copy(dataset) 113 | # David Wang: read data file and create 3 pandas DateFrame data sets 114 | 115 | 116 | 117 | CV = True 118 | built_datasets = dataset.build() 119 | if isinstance(built_datasets, list): 120 | CV = False 121 | logger = getLogger() 122 | 123 | # dict 124 | # key = number of fold 125 | # value = [train, valid set] 126 | 127 | if CV: 128 | train = [] 129 | valid = [] 130 | for fold in built_datasets: 131 | train_dataset, valid_dataset = built_datasets[fold] 132 | train_sampler, valid_sampler = create_samplers(config, dataset, built_datasets[fold]) 133 | used_ids = get_used_ids(config, dataset=train_dataset) 134 | 135 | if model_type != ModelType.KNOWLEDGE: 136 | train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, shuffle=True) 137 | else: 138 | kg_sampler = KGSampler(dataset, config['train_neg_sample_args']['distribution']) 139 | train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, kg_sampler, shuffle=True) 140 | 141 | if config['ranking']: 142 | valid_data_loader = get_dataloader(config, 'evaluation') 143 | valid_data = valid_data_loader(config, valid_dataset, valid_sampler, shuffle=False, used_ids=used_ids) 144 | else: 145 | valid_data = get_dataloader(config, 'evaluation')(config, valid_dataset, valid_sampler, shuffle=False) 146 | 147 | logger.info( 148 | set_color('[Training]: ', 'pink') + set_color('train_batch_size', 'cyan') + ' = ' + 149 | set_color(f'[{config["train_batch_size"]}]', 'yellow') + set_color(' negative sampling', 'cyan') + ': ' + 150 | set_color(f'[{config["neg_sampling"]}]', 'yellow') 151 | ) 152 | logger.info( 153 | set_color('[Evaluation]: ', 'pink') + set_color('eval_batch_size', 'cyan') + ' = ' + 154 | set_color(f'[{config["eval_batch_size"]}]', 'yellow') + set_color(' eval_args', 'cyan') + ': ' + 155 | set_color(f'[{config["eval_args"]}]', 'yellow') 156 | ) 157 | train.append(train_data) 158 | valid.append(valid_data) 159 | # if save: 160 | # save_split_dataloaders(config, dataloaders=(train_data, valid_data)) 161 | 162 | return train, valid 163 | else: 164 | train_dataset, valid_dataset = built_datasets 165 | train_sampler, valid_sampler = create_samplers(config, dataset, built_datasets) 166 | used_ids = get_used_ids(config, dataset=train_dataset) 167 | 168 | if model_type != ModelType.KNOWLEDGE: 169 | train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, shuffle=True) 170 | else: 171 | kg_sampler = KGSampler(dataset, config['train_neg_sample_args']['distribution']) 172 | train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, kg_sampler, shuffle=True) 173 | 174 | if config['ranking']: 175 | valid_data = get_dataloader(config, 'evaluation')(config, valid_dataset, valid_sampler, shuffle=False, used_ids=used_ids) 176 | else: 177 | valid_data = get_dataloader(config, 'evaluation')(config, valid_dataset, valid_sampler, shuffle=False) 178 | 179 | logger.info( 180 | set_color('[Training]: ', 'pink') + set_color('train_batch_size', 'cyan') + ' = ' + 181 | set_color(f'[{config["train_batch_size"]}]', 'yellow') + set_color(' negative sampling', 'cyan') + ': ' + 182 | set_color(f'[{config["neg_sampling"]}]', 'yellow') 183 | ) 184 | logger.info( 185 | set_color('[Evaluation]: ', 'pink') + set_color('eval_batch_size', 'cyan') + ' = ' + 186 | set_color(f'[{config["eval_batch_size"]}]', 'yellow') + set_color(' eval_args', 'cyan') + ': ' + 187 | set_color(f'[{config["eval_args"]}]', 'yellow') 188 | ) 189 | if save: 190 | save_split_dataloaders(config, dataloaders=(train_data, valid_data)) 191 | 192 | return train_data, valid_data 193 | 194 | 195 | def get_dataloader(config, phase): 196 | """Return a dataloader class according to :attr:`config` and :attr:`phase`. 197 | 198 | Args: 199 | config (Config): An instance object of Config, used to record parameter information. 200 | phase (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'. 201 | 202 | Returns: 203 | type: The dataloader class that meets the requirements in :attr:`config` and :attr:`phase`. 204 | """ 205 | register_table = { 206 | "MultiDAE": _get_AE_dataloader, 207 | "MultiVAE": _get_AE_dataloader, 208 | 'MacridVAE': _get_AE_dataloader, 209 | 'CDAE': _get_AE_dataloader, 210 | 'ENMF': _get_AE_dataloader, 211 | 'RaCT': _get_AE_dataloader, 212 | 'RecVAE': _get_AE_dataloader, 213 | } 214 | 215 | if config['model'] in register_table: 216 | return register_table[config['model']](config, phase) 217 | 218 | model_type = config['MODEL_TYPE'] 219 | if phase == 'train': 220 | if model_type != ModelType.KNOWLEDGE: 221 | return TrainDataLoader 222 | else: 223 | return KnowledgeBasedDataLoader 224 | else: 225 | eval_strategy = config['eval_neg_sample_args']['strategy'] 226 | if eval_strategy in {'none', 'by'}: 227 | if config['eval_type'] == EvaluatorType.RANKING: 228 | return LabledDataSortEvalDataLoader 229 | else: 230 | return NegSampleEvalDataLoader 231 | elif eval_strategy == 'full': 232 | return FullSortEvalDataLoader 233 | 234 | def get_used_ids(config, dataset): 235 | """ 236 | Returns: 237 | dict: Used item_ids is the same as positive item_ids. 238 | Key is phase, and value is a numpy.ndarray which index is user_id, and element is a set of item_ids. 239 | """ 240 | used_item_id = None 241 | uc_num = dataset.user_context_num 242 | iid_field = dataset.iid_field 243 | ucid_field = dataset.ucid_field 244 | last = [set() for _ in range(uc_num)] 245 | cur = np.array([set(s) for s in last]) 246 | for ucid, iid in zip(dataset.inter_feat[ucid_field].numpy(), dataset.inter_feat[iid_field].numpy()): 247 | cur[ucid].add(iid) 248 | last = used_item_id = cur 249 | 250 | for used_item_set in used_item_id: 251 | if len(used_item_set) + 1 == dataset.item_num: # [pad] is a item. 252 | raise ValueError( 253 | 'Some users have interacted with all items, ' 254 | 'which we can not sample negative items for them. ' 255 | 'Please set `user_inter_num_interval` to filter those users.' 256 | ) 257 | return used_item_id 258 | 259 | def _get_AE_dataloader(config, phase): 260 | """Customized function for VAE models to get correct dataloader class. 261 | 262 | Args: 263 | config (Config): An instance object of Config, used to record parameter information. 264 | phase (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'. 265 | 266 | Returns: 267 | type: The dataloader class that meets the requirements in :attr:`config` and :attr:`phase`. 268 | """ 269 | if phase == 'train': 270 | return UserDataLoader 271 | else: 272 | eval_strategy = config['eval_neg_sample_args']['strategy'] 273 | if eval_strategy in {'none', 'by'}: 274 | return NegSampleEvalDataLoader 275 | elif eval_strategy == 'full': 276 | return FullSortEvalDataLoader 277 | 278 | 279 | def create_samplers(config, dataset, built_datasets): 280 | """Create sampler for training, validation and testing. 281 | 282 | Args: 283 | config (Config): An instance object of Config, used to record parameter information. 284 | dataset (Dataset): An instance object of Dataset, which contains all interaction records. 285 | built_datasets (list of Dataset): A list of split Dataset, which contains dataset for 286 | training, validation and testing. 287 | 288 | Returns: 289 | tuple: 290 | - train_sampler (AbstractSampler): The sampler for training. 291 | - valid_sampler (AbstractSampler): The sampler for validation. 292 | - test_sampler (AbstractSampler): The sampler for testing. 293 | """ 294 | phases = ['train', 'valid'] 295 | train_neg_sample_args = config['train_neg_sample_args'] 296 | eval_neg_sample_args = config['eval_neg_sample_args'] 297 | 298 | sampler = None 299 | train_sampler, valid_sampler = None, None 300 | 301 | if train_neg_sample_args['strategy'] != 'none': 302 | if not config['repeatable']: 303 | sampler = Sampler(phases, built_datasets, train_neg_sample_args['distribution']) 304 | else: 305 | sampler = RepeatableSampler(phases, dataset, train_neg_sample_args['distribution']) 306 | train_sampler = sampler.set_phase('train') 307 | 308 | if eval_neg_sample_args['strategy'] != 'none': 309 | if sampler is None: 310 | if not config['repeatable']: 311 | sampler = Sampler(phases, built_datasets, eval_neg_sample_args['distribution']) 312 | else: 313 | sampler = RepeatableSampler(phases, dataset, eval_neg_sample_args['distribution']) 314 | else: 315 | sampler.set_distribution(eval_neg_sample_args['distribution']) 316 | valid_sampler = sampler.set_phase('valid') 317 | 318 | return train_sampler, valid_sampler 319 | 320 | ''' 321 | def create_samplers(config, dataset, built_datasets): 322 | """Create sampler for training, validation and testing. 323 | 324 | Args: 325 | config (Config): An instance object of Config, used to record parameter information. 326 | dataset (Dataset): An instance object of Dataset, which contains all interaction records. 327 | built_datasets (list of Dataset): A list of split Dataset, which contains dataset for 328 | training, validation and testing. 329 | 330 | Returns: 331 | tuple: 332 | - train_sampler (AbstractSampler): The sampler for training. 333 | - valid_sampler (AbstractSampler): The sampler for validation. 334 | - test_sampler (AbstractSampler): The sampler for testing. 335 | """ 336 | phases = ['train', 'valid', 'test'] 337 | train_neg_sample_args = config['train_neg_sample_args'] 338 | eval_neg_sample_args = config['eval_neg_sample_args'] 339 | sampler = None 340 | train_sampler, valid_sampler, test_sampler = None, None, None 341 | 342 | if train_neg_sample_args['strategy'] != 'none': 343 | if not config['repeatable']: 344 | sampler = Sampler(phases, built_datasets, train_neg_sample_args['distribution']) 345 | else: 346 | sampler = RepeatableSampler(phases, dataset, train_neg_sample_args['distribution']) 347 | train_sampler = sampler.set_phase('train') 348 | 349 | if eval_neg_sample_args['strategy'] != 'none': 350 | if sampler is None: 351 | if not config['repeatable']: 352 | sampler = Sampler(phases, built_datasets, eval_neg_sample_args['distribution']) 353 | else: 354 | sampler = RepeatableSampler(phases, dataset, eval_neg_sample_args['distribution']) 355 | else: 356 | sampler.set_distribution(eval_neg_sample_args['distribution']) 357 | valid_sampler = sampler.set_phase('valid') 358 | test_sampler = sampler.set_phase('test') 359 | 360 | return train_sampler, valid_sampler, test_sampler 361 | ''' -------------------------------------------------------------------------------- /deepcarskit/model/context_recommender.py: -------------------------------------------------------------------------------- 1 | # @Time : 2021/12 2 | # @Author : Yong Zheng 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | 8 | from recbole.model.abstract_recommender import AbstractRecommender 9 | from recbole.model.layers import FMEmbedding 10 | from recbole.utils import ModelType, InputType, FeatureSource, FeatureType, set_color, EvaluatorType 11 | from deepcarskit.model.layers import FMFirstOrderLinear 12 | 13 | class ContextRecommender(AbstractRecommender): 14 | """This is a abstract context-aware recommender. All the context-aware model should implement this class. 15 | The base context-aware recommender class provide the basic embedding function of feature fields which also 16 | contains a first-order part of feature fields. 17 | """ 18 | type = ModelType.CONTEXT 19 | input_type = InputType.POINTWISE 20 | 21 | def __init__(self, config, dataset): 22 | super(ContextRecommender, self).__init__() 23 | self.config = config 24 | 25 | self.field_names = dataset.fields( 26 | source=[ 27 | FeatureSource.INTERACTION, 28 | FeatureSource.USER, 29 | FeatureSource.USER_ID, 30 | FeatureSource.ITEM, 31 | FeatureSource.ITEM_ID, 32 | ] 33 | ) 34 | 35 | self.USER_ID = config['USER_ID_FIELD'] 36 | self.ITEM_ID = config['ITEM_ID_FIELD'] 37 | self.CONTEXT_SITUATION_ID = config['CONTEXT_SITUATION_FIELD'] 38 | 39 | self.actfun = nn.LeakyReLU() 40 | self.loss = nn.MSELoss() 41 | if config['ranking']: 42 | self.LABEL = config['LABEL_FIELD'] 43 | if config['sigmoid']: 44 | self.actfun = nn.Sigmoid() 45 | self.loss = nn.BCELoss() 46 | else: 47 | self.LABEL = config['RATING_FIELD'] 48 | 49 | self.CONTEXTS = [] 50 | for i in range(2, len(self.field_names)): 51 | if self.field_names[i] == config['LABEL_FIELD'] or self.field_names[i] == config['USER_CONTEXT_FIELD'] or self.field_names[i] == config['RATING_FIELD']: 52 | continue 53 | else: 54 | self.CONTEXTS.append(self.field_names[i]) 55 | 56 | self.n_context_situation = 0 57 | if self.CONTEXT_SITUATION_ID in self.CONTEXTS: 58 | self.n_context_situation = dataset.num(self.CONTEXT_SITUATION_ID) 59 | self.CONTEXTS.remove(self.CONTEXT_SITUATION_ID) 60 | 61 | msghead = "Loaded context variables: " 62 | if self.n_context_situation == 0: 63 | msg = ' '.join(self.CONTEXTS) + ', without context situation ID: ' + self.CONTEXT_SITUATION_ID 64 | else: 65 | msg = ' '.join(self.CONTEXTS) + ', with context situation ID: ' + self.CONTEXT_SITUATION_ID 66 | self.logger.info(set_color(msghead, 'yellow') + msg) 67 | 68 | self.n_users = dataset.num(self.USER_ID) 69 | self.n_items = dataset.num(self.ITEM_ID) 70 | # number of context variables 71 | self.n_contexts_dim = len(self.CONTEXTS) 72 | # number of context conditions in each dimension 73 | self.n_contexts_conditions = [] 74 | 75 | for i in range(self.n_contexts_dim): 76 | dim=self.CONTEXTS[i] 77 | n_dim = dataset.num(dim) 78 | self.n_contexts_conditions.append(n_dim) 79 | 80 | self.embedding_size = config['embedding_size'] 81 | self.device = config['device'] 82 | self.double_tower = config['double_tower'] 83 | if self.double_tower is None: 84 | self.double_tower = False 85 | self.token_field_names = [] 86 | self.token_field_dims = [] 87 | self.float_field_names = [] 88 | self.float_field_dims = [] 89 | self.token_seq_field_names = [] 90 | self.token_seq_field_dims = [] 91 | self.num_feature_field = 0 92 | 93 | if self.double_tower: 94 | self.user_field_names = dataset.fields(source=[FeatureSource.USER, FeatureSource.USER_ID]) 95 | self.item_field_names = dataset.fields(source=[FeatureSource.ITEM, FeatureSource.ITEM_ID]) 96 | self.field_names = self.user_field_names + self.item_field_names 97 | self.user_token_field_num = 0 98 | self.user_float_field_num = 0 99 | self.user_token_seq_field_num = 0 100 | for field_name in self.user_field_names: 101 | if dataset.field2type[field_name] == FeatureType.TOKEN: 102 | self.user_token_field_num += 1 103 | elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ: 104 | self.user_token_seq_field_num += 1 105 | else: 106 | self.user_float_field_num += dataset.num(field_name) 107 | self.item_token_field_num = 0 108 | self.item_float_field_num = 0 109 | self.item_token_seq_field_num = 0 110 | for field_name in self.item_field_names: 111 | if dataset.field2type[field_name] == FeatureType.TOKEN: 112 | self.item_token_field_num += 1 113 | elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ: 114 | self.item_token_seq_field_num += 1 115 | else: 116 | self.item_float_field_num += dataset.num(field_name) 117 | 118 | for field_name in self.field_names: 119 | if field_name == self.config['RATING_FIELD'] or field_name == self.config['LABEL_FIELD']: 120 | continue 121 | if dataset.field2type[field_name] == FeatureType.TOKEN: 122 | self.token_field_names.append(field_name) 123 | self.token_field_dims.append(dataset.num(field_name)) 124 | elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ: 125 | self.token_seq_field_names.append(field_name) 126 | self.token_seq_field_dims.append(dataset.num(field_name)) 127 | else: 128 | self.float_field_names.append(field_name) 129 | self.float_field_dims.append(dataset.num(field_name)) 130 | self.num_feature_field += 1 131 | if len(self.token_field_dims) > 0: 132 | self.token_field_offsets = np.array((0, *np.cumsum(self.token_field_dims)[:-1]), dtype=np.long) 133 | self.token_embedding_table = FMEmbedding( 134 | self.token_field_dims, self.token_field_offsets, self.embedding_size 135 | ) 136 | if len(self.float_field_dims) > 0: 137 | self.float_embedding_table = nn.Embedding( 138 | np.sum(self.float_field_dims, dtype=np.int32), self.embedding_size 139 | ) 140 | if len(self.token_seq_field_dims) > 0: 141 | self.token_seq_embedding_table = nn.ModuleList() 142 | for token_seq_field_dim in self.token_seq_field_dims: 143 | self.token_seq_embedding_table.append(nn.Embedding(token_seq_field_dim, self.embedding_size)) 144 | 145 | self.first_order_linear = FMFirstOrderLinear(config, dataset) 146 | 147 | def embed_float_fields(self, float_fields, embed=True): 148 | """Embed the float feature columns 149 | 150 | Args: 151 | float_fields (torch.FloatTensor): The input dense tensor. shape of [batch_size, num_float_field] 152 | embed (bool): Return the embedding of columns or just the columns itself. Defaults to ``True``. 153 | 154 | Returns: 155 | torch.FloatTensor: The result embedding tensor of float columns. 156 | """ 157 | # input Tensor shape : [batch_size, num_float_field] 158 | if not embed or float_fields is None: 159 | return float_fields 160 | 161 | num_float_field = float_fields.shape[1] 162 | # [batch_size, num_float_field] 163 | index = torch.arange(0, num_float_field).unsqueeze(0).expand_as(float_fields).long().to(self.device) 164 | 165 | # [batch_size, num_float_field, embed_dim] 166 | float_embedding = self.float_embedding_table(index) 167 | float_embedding = torch.mul(float_embedding, float_fields.unsqueeze(2)) 168 | 169 | return float_embedding 170 | 171 | def getContextSituationList(self, interaction, context_dims): 172 | situation = [] 173 | for dim in context_dims: 174 | situation.append(interaction[dim].tolist()) 175 | situation = torch.tensor(situation).to(self.device) 176 | return situation 177 | 178 | def getContextSituationDict(self, interaction, context_dims): 179 | situation = {} 180 | for dim in context_dims: 181 | situation[dim] = interaction[dim] 182 | return situation 183 | 184 | def embed_token_fields(self, token_fields): 185 | """Embed the token feature columns 186 | 187 | Args: 188 | token_fields (torch.LongTensor): The input tensor. shape of [batch_size, num_token_field] 189 | 190 | Returns: 191 | torch.FloatTensor: The result embedding tensor of token columns. 192 | """ 193 | # input Tensor shape : [batch_size, num_token_field] 194 | if token_fields is None: 195 | return None 196 | # [batch_size, num_token_field, embed_dim] 197 | token_embedding = self.token_embedding_table(token_fields) 198 | 199 | return token_embedding 200 | 201 | def embed_token_seq_fields(self, token_seq_fields, mode='mean'): 202 | """Embed the token feature columns 203 | 204 | Args: 205 | token_seq_fields (torch.LongTensor): The input tensor. shape of [batch_size, seq_len] 206 | mode (str): How to aggregate the embedding of feature in this field. default=mean 207 | 208 | Returns: 209 | torch.FloatTensor: The result embedding tensor of token sequence columns. 210 | """ 211 | # input is a list of Tensor shape of [batch_size, seq_len] 212 | fields_result = [] 213 | for i, token_seq_field in enumerate(token_seq_fields): 214 | embedding_table = self.token_seq_embedding_table[i] 215 | mask = token_seq_field != 0 # [batch_size, seq_len] 216 | mask = mask.float() 217 | value_cnt = torch.sum(mask, dim=1, keepdim=True) # [batch_size, 1] 218 | 219 | token_seq_embedding = embedding_table(token_seq_field) # [batch_size, seq_len, embed_dim] 220 | 221 | mask = mask.unsqueeze(2).expand_as(token_seq_embedding) # [batch_size, seq_len, embed_dim] 222 | if mode == 'max': 223 | masked_token_seq_embedding = token_seq_embedding - (1 - mask) * 1e9 # [batch_size, seq_len, embed_dim] 224 | result = torch.max(masked_token_seq_embedding, dim=1, keepdim=True) # [batch_size, 1, embed_dim] 225 | elif mode == 'sum': 226 | masked_token_seq_embedding = token_seq_embedding * mask.float() 227 | result = torch.sum(masked_token_seq_embedding, dim=1, keepdim=True) # [batch_size, 1, embed_dim] 228 | else: 229 | masked_token_seq_embedding = token_seq_embedding * mask.float() 230 | result = torch.sum(masked_token_seq_embedding, dim=1) # [batch_size, embed_dim] 231 | eps = torch.FloatTensor([1e-8]).to(self.device) 232 | result = torch.div(result, value_cnt + eps) # [batch_size, embed_dim] 233 | result = result.unsqueeze(1) # [batch_size, 1, embed_dim] 234 | fields_result.append(result) 235 | if len(fields_result) == 0: 236 | return None 237 | else: 238 | return torch.cat(fields_result, dim=1) # [batch_size, num_token_seq_field, embed_dim] 239 | 240 | def double_tower_embed_input_fields(self, interaction): 241 | """Embed the whole feature columns in a double tower way. 242 | 243 | Args: 244 | interaction (Interaction): The input data collection. 245 | 246 | Returns: 247 | torch.FloatTensor: The embedding tensor of token sequence columns in the first part. 248 | torch.FloatTensor: The embedding tensor of float sequence columns in the first part. 249 | torch.FloatTensor: The embedding tensor of token sequence columns in the second part. 250 | torch.FloatTensor: The embedding tensor of float sequence columns in the second part. 251 | 252 | """ 253 | if not self.double_tower: 254 | raise RuntimeError('Please check your model hyper parameters and set \'double tower\' as True') 255 | sparse_embedding, dense_embedding = self.embed_input_fields(interaction) 256 | if dense_embedding is not None: 257 | first_dense_embedding, second_dense_embedding = \ 258 | torch.split(dense_embedding, [self.user_float_field_num, self.item_float_field_num], dim=1) 259 | else: 260 | first_dense_embedding, second_dense_embedding = None, None 261 | 262 | if sparse_embedding is not None: 263 | sizes = [ 264 | self.user_token_seq_field_num, self.item_token_seq_field_num, self.user_token_field_num, 265 | self.item_token_field_num 266 | ] 267 | first_token_seq_embedding, second_token_seq_embedding, first_token_embedding, second_token_embedding = \ 268 | torch.split(sparse_embedding, sizes, dim=1) 269 | first_sparse_embedding = torch.cat([first_token_seq_embedding, first_token_embedding], dim=1) 270 | second_sparse_embedding = torch.cat([second_token_seq_embedding, second_token_embedding], dim=1) 271 | else: 272 | first_sparse_embedding, second_sparse_embedding = None, None 273 | 274 | return first_sparse_embedding, first_dense_embedding, second_sparse_embedding, second_dense_embedding 275 | 276 | def concat_embed_input_fields(self, interaction): 277 | sparse_embedding, dense_embedding = self.embed_input_fields(interaction) 278 | all_embeddings = [] 279 | if sparse_embedding is not None: 280 | all_embeddings.append(sparse_embedding) 281 | if dense_embedding is not None and len(dense_embedding.shape) == 3: 282 | all_embeddings.append(dense_embedding) 283 | return torch.cat(all_embeddings, dim=1) # [batch_size, num_field, embed_dim] 284 | 285 | def embed_input_fields(self, interaction): 286 | """Embed the whole feature columns. 287 | 288 | Args: 289 | interaction (Interaction): The input data collection. 290 | 291 | Returns: 292 | torch.FloatTensor: The embedding tensor of token sequence columns. 293 | torch.FloatTensor: The embedding tensor of float sequence columns. 294 | """ 295 | float_fields = [] 296 | for field_name in self.float_field_names: 297 | if len(interaction[field_name].shape) == 2: 298 | float_fields.append(interaction[field_name]) 299 | else: 300 | float_fields.append(interaction[field_name].unsqueeze(1)) 301 | if len(float_fields) > 0: 302 | float_fields = torch.cat(float_fields, dim=1) # [batch_size, num_float_field] 303 | else: 304 | float_fields = None 305 | # [batch_size, num_float_field] or [batch_size, num_float_field, embed_dim] or None 306 | float_fields_embedding = self.embed_float_fields(float_fields) 307 | 308 | token_fields = [] 309 | for field_name in self.token_field_names: 310 | token_fields.append(interaction[field_name].unsqueeze(1)) 311 | if len(token_fields) > 0: 312 | token_fields = torch.cat(token_fields, dim=1) # [batch_size, num_token_field] 313 | else: 314 | token_fields = None 315 | # [batch_size, num_token_field, embed_dim] or None 316 | token_fields_embedding = self.embed_token_fields(token_fields) 317 | 318 | token_seq_fields = [] 319 | for field_name in self.token_seq_field_names: 320 | token_seq_fields.append(interaction[field_name]) 321 | # [batch_size, num_token_seq_field, embed_dim] or None 322 | token_seq_fields_embedding = self.embed_token_seq_fields(token_seq_fields) 323 | 324 | if token_fields_embedding is None: 325 | sparse_embedding = token_seq_fields_embedding 326 | else: 327 | if token_seq_fields_embedding is None: 328 | sparse_embedding = token_fields_embedding 329 | else: 330 | sparse_embedding = torch.cat([token_fields_embedding, token_seq_fields_embedding], dim=1) 331 | 332 | dense_embedding = float_fields_embedding 333 | 334 | # sparse_embedding shape: [batch_size, num_token_seq_field+num_token_field, embed_dim] or None 335 | # dense_embedding shape: [batch_size, num_float_field] or [batch_size, num_float_field, embed_dim] or None 336 | return sparse_embedding, dense_embedding 337 | --------------------------------------------------------------------------------