├── log
└── best
│ └── EMPTY
├── conda
├── build.sh
├── conda_release.sh
└── meta.yaml
├── deepcarskit
├── properties
│ ├── model
│ │ ├── Pop.yaml
│ │ ├── BPR.yaml
│ │ ├── EASE.yaml
│ │ ├── FM.yaml
│ │ ├── LR.yaml
│ │ ├── FPMC.yaml
│ │ ├── ItemKNN.yaml
│ │ ├── TransRec.yaml
│ │ ├── STAMP.yaml
│ │ ├── SRGNN.yaml
│ │ ├── LightGCN.yaml
│ │ ├── NPE.yaml
│ │ ├── LINE.yaml
│ │ ├── SpectralCF.yaml
│ │ ├── CFKG.yaml
│ │ ├── CKE.yaml
│ │ ├── MultiDAE.yaml
│ │ ├── AFM.yaml
│ │ ├── FISM.yaml
│ │ ├── FNN.yaml
│ │ ├── NFM.yaml
│ │ ├── WideDeep.yaml
│ │ ├── DeepFM.yaml
│ │ ├── ENMF.yaml
│ │ ├── SLIMElastic.yaml
│ │ ├── FOSSIL.yaml
│ │ ├── HGN.yaml
│ │ ├── RippleNet.yaml
│ │ ├── SHAN.yaml
│ │ ├── DSSM.yaml
│ │ ├── Caser.yaml
│ │ ├── KGCN.yaml
│ │ ├── NARM.yaml
│ │ ├── DIN.yaml
│ │ ├── GRU4Rec.yaml
│ │ ├── DGCF.yaml
│ │ ├── RepeatNet.yaml
│ │ ├── DCN.yaml
│ │ ├── NGCF.yaml
│ │ ├── FFM.yaml
│ │ ├── GRU4RecKG.yaml
│ │ ├── KGAT.yaml
│ │ ├── KGNNLS.yaml
│ │ ├── MultiVAE.yaml
│ │ ├── KSR.yaml
│ │ ├── NextItNet.yaml
│ │ ├── PNN.yaml
│ │ ├── DIEN.yaml
│ │ ├── AutoInt.yaml
│ │ ├── FwFM.yaml
│ │ ├── HRM.yaml
│ │ ├── GCMC.yaml
│ │ ├── xDeepFM.yaml
│ │ ├── GRU4RecF.yaml
│ │ ├── NAIS.yaml
│ │ ├── CDAE.yaml
│ │ ├── KTUP.yaml
│ │ ├── ConvNCF.yaml
│ │ ├── MKR.yaml
│ │ ├── RecVAE.yaml
│ │ ├── MacridVAE.yaml
│ │ ├── NeuMF.yaml
│ │ ├── SASRec.yaml
│ │ ├── BERT4Rec.yaml
│ │ ├── GCSAN.yaml
│ │ ├── RaCT.yaml
│ │ ├── FDSA.yaml
│ │ ├── SASRecF.yaml
│ │ ├── DMF.yaml
│ │ ├── NNCF.yaml
│ │ ├── S3Rec.yaml
│ │ ├── lightgbm.yaml
│ │ └── xgboost.yaml
│ ├── quick_start_config
│ │ ├── sequential.yaml
│ │ ├── knowledge_base.yaml
│ │ ├── context-aware.yaml
│ │ └── sequential_embedding_model.yaml
│ └── overall.yaml
├── model
│ ├── ae
│ │ └── __init__.py
│ ├── fms
│ │ ├── __init__.py
│ │ ├── fm.py
│ │ └── deepfm.py
│ ├── neucf
│ │ ├── __init__.py
│ │ ├── neucmf0w.py
│ │ ├── neucmfw0.py
│ │ ├── neucmf0i.py
│ │ ├── neucmfww.py
│ │ ├── neucmfi0.py
│ │ └── neucmfii.py
│ ├── __init__.py
│ ├── layers.py
│ └── context_recommender.py
├── data
│ ├── dataset
│ │ └── __init__.py
│ ├── dataloader
│ │ ├── __init__.py
│ │ └── general_dataloader.py
│ ├── __init__.py
│ └── utils.py
├── config
│ ├── __init__.py
│ └── configurator.py
├── quick_start
│ ├── __init__.py
│ └── quick_start.py
├── __init__.py
├── trainer
│ ├── __init__.py
│ └── trainer.py
├── evaluator
│ ├── __init__.py
│ ├── evaluator.py
│ ├── collector.py
│ └── base_metric.py
└── utils
│ ├── __init__.py
│ ├── utils.py
│ └── logger.py
├── images
├── NeuCMF.png
└── intro-img1.jpg
├── requirements.txt
├── MANIFEST.in
├── .gitignore
├── check_gpu.py
├── check_torch.py
├── .github
└── FUNDING.yml
├── run.py
├── LICENSE
├── dataset
├── tripadvisor
│ └── ReadMe.html
└── depaulmovie
│ └── ReadMe.html
├── setup.py
├── config.yaml
├── README.md
└── style.cfg
/log/best/EMPTY:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/conda/build.sh:
--------------------------------------------------------------------------------
1 | $PYTHON setup.py install
--------------------------------------------------------------------------------
/deepcarskit/properties/model/Pop.yaml:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/BPR.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
--------------------------------------------------------------------------------
/deepcarskit/properties/model/EASE.yaml:
--------------------------------------------------------------------------------
1 | reg_weight: 250.0
--------------------------------------------------------------------------------
/deepcarskit/properties/model/FM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
--------------------------------------------------------------------------------
/deepcarskit/properties/model/LR.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
--------------------------------------------------------------------------------
/deepcarskit/properties/model/FPMC.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/ItemKNN.yaml:
--------------------------------------------------------------------------------
1 | k: 100
2 | shrink: 0.0
--------------------------------------------------------------------------------
/deepcarskit/properties/model/TransRec.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
--------------------------------------------------------------------------------
/deepcarskit/model/ae/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.model.ae import *
--------------------------------------------------------------------------------
/deepcarskit/model/fms/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.model.fms import *
--------------------------------------------------------------------------------
/deepcarskit/model/neucf/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.model.neucf import *
--------------------------------------------------------------------------------
/deepcarskit/properties/model/STAMP.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_type: 'CE'
3 |
--------------------------------------------------------------------------------
/deepcarskit/data/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.data.dataset.dataset import Dataset
--------------------------------------------------------------------------------
/deepcarskit/config/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.config.configurator import CARSConfig
2 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/SRGNN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | step: 1
3 | loss_type: 'CE'
--------------------------------------------------------------------------------
/images/NeuCMF.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irecsys/DeepCARSKit/HEAD/images/NeuCMF.png
--------------------------------------------------------------------------------
/deepcarskit/properties/model/LightGCN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_layers: 2
3 | reg_weight: 1e-05
--------------------------------------------------------------------------------
/deepcarskit/properties/model/NPE.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_type: "CE"
3 | dropout_prob: 0.3
--------------------------------------------------------------------------------
/images/intro-img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irecsys/DeepCARSKit/HEAD/images/intro-img1.jpg
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | recbole==1.0.1
2 | scipy==1.6.0
3 | numpy==1.20.0
4 | xgboost
5 | torch_geometric
--------------------------------------------------------------------------------
/deepcarskit/data/dataloader/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.data.dataloader.general_dataloader import *
--------------------------------------------------------------------------------
/deepcarskit/properties/model/LINE.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | order: 2
3 | second_order_loss_weight: 1
--------------------------------------------------------------------------------
/deepcarskit/properties/model/SpectralCF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_layers: 4
3 | reg_weight: 1e-03
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include deepcarskit/properties *
2 | recursive-include deepcarskit/dataset_example *
3 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/CFKG.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_function: 'inner_product'
3 | margin: 1.0
4 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/CKE.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | kg_embedding_size: 64
3 | reg_weights: [1e-2,1e-2]
--------------------------------------------------------------------------------
/deepcarskit/properties/model/MultiDAE.yaml:
--------------------------------------------------------------------------------
1 | mlp_hidden_size: [600]
2 | latent_dimension: 64
3 | dropout_prob: 0.5
--------------------------------------------------------------------------------
/deepcarskit/properties/model/AFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | attention_size: 25
3 | dropout_prob: 0.3
4 | reg_weight: 2
--------------------------------------------------------------------------------
/deepcarskit/properties/model/FISM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | split_to: 0
3 | reg_weights: [1e-2, 1e-2]
4 | alpha: 0
--------------------------------------------------------------------------------
/deepcarskit/properties/model/FNN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [256, 256, 256]
3 | dropout_prob: 0.2
--------------------------------------------------------------------------------
/deepcarskit/properties/model/NFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [64, 64, 64]
3 | dropout_prob: 0.0
4 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/WideDeep.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [32, 16, 8]
3 | dropout_prob: 0.1
--------------------------------------------------------------------------------
/deepcarskit/properties/model/DeepFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [128, 128, 128]
3 | dropout_prob: 0.2
4 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/ENMF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | dropout_prob: 0.7
3 | reg_weight: 0.0
4 | negative_weight: 0.5
--------------------------------------------------------------------------------
/deepcarskit/properties/model/SLIMElastic.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | l1_ratio: 0.02
3 | positive_only: True
4 | hide_item: True
5 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/FOSSIL.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_type: "CE"
3 | reg_weight: 0.00
4 | order_len: 3
5 | alpha: 0.6
--------------------------------------------------------------------------------
/deepcarskit/properties/model/HGN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_type: 'BPR'
3 | pooling_type: "average"
4 | reg_weight: [0.00,0.00]
--------------------------------------------------------------------------------
/deepcarskit/properties/model/RippleNet.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_hop: 2
3 | n_memory: 16
4 | kg_weight: 0.01
5 | reg_weight: 1e-7
--------------------------------------------------------------------------------
/deepcarskit/properties/model/SHAN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | short_item_length: 2
3 | loss_type: "CE"
4 | reg_weight: [0.01,0.0001]
--------------------------------------------------------------------------------
/deepcarskit/properties/model/DSSM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [256, 256, 256]
3 | dropout_prob: 0.3
4 | double_tower: True
--------------------------------------------------------------------------------
/deepcarskit/quick_start/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.quick_start.quick_start import run, objective_function, load_data_and_model
2 |
3 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/Caser.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | dropout_prob: 0.4
3 | reg_weight: 1e-4
4 | nv: 8
5 | nh: 16
6 | loss_type: 'CE'
--------------------------------------------------------------------------------
/deepcarskit/properties/model/KGCN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_iter: 1
3 | aggregator: "sum"
4 | reg_weight: 1e-7
5 | neighbor_sample_size: 4
--------------------------------------------------------------------------------
/deepcarskit/properties/model/NARM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size: 128
3 | n_layers: 1
4 | dropout_probs: [0.25,0.5]
5 | loss_type: 'CE'
--------------------------------------------------------------------------------
/deepcarskit/properties/model/DIN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [256,256,256]
3 | dropout_prob: 0
4 | pooling_mode: 'mean'
5 |
6 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/GRU4Rec.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size: 128
3 | num_layers: 1
4 | dropout_prob: 0.3
5 | loss_type: 'CE'
6 |
--------------------------------------------------------------------------------
/deepcarskit/model/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/DGCF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_factors: 4
3 | n_iterations: 2
4 | n_layers: 1
5 | reg_weight: 1e-3
6 | cor_weight: 0.01
--------------------------------------------------------------------------------
/deepcarskit/properties/model/RepeatNet.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_type: "CE"
3 | hidden_size: 64
4 | joint_train: False
5 | dropout_prob: 0.5
6 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/DCN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [256, 256, 256]
3 | cross_layer_num: 6
4 | reg_weight: 2
5 | dropout_prob: 0.2
--------------------------------------------------------------------------------
/deepcarskit/properties/model/NGCF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size_list: [64,64,64]
3 | node_dropout: 0.0
4 | message_dropout: 0.1
5 | reg_weight: 1e-5
--------------------------------------------------------------------------------
/deepcarskit/properties/model/FFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | # define fields: key: field's id, value: features in this field. can be ignored.
3 | fields: ~
4 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/GRU4RecKG.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size: 128
3 | num_layers: 1
4 | dropout_prob: 0.1
5 | freeze_kg: True
6 | loss_type: 'CE'
--------------------------------------------------------------------------------
/deepcarskit/properties/model/KGAT.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | kg_embedding_size: 64
3 | layers: [64]
4 | mess_dropout: 0.1
5 | reg_weight: 1e-5
6 | aggregator_type: 'bi'
--------------------------------------------------------------------------------
/deepcarskit/properties/model/KGNNLS.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_iter: 1
3 | aggregator: "sum"
4 | reg_weight: 1e-7
5 | neighbor_sample_size: 4
6 |
7 | ls_weight: 0.5
--------------------------------------------------------------------------------
/deepcarskit/properties/model/MultiVAE.yaml:
--------------------------------------------------------------------------------
1 | mlp_hidden_size: [600]
2 | latent_dimension: 128
3 | dropout_prob: 0.5
4 | anneal_cap: 0.2
5 | total_anneal_steps: 200000
6 |
--------------------------------------------------------------------------------
/deepcarskit/properties/quick_start_config/sequential.yaml:
--------------------------------------------------------------------------------
1 | eval_args:
2 | split: {'LS': 'valid_and_test'}
3 | order: TO
4 | mode: full
5 | repeatable: True
6 |
--------------------------------------------------------------------------------
/deepcarskit/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | __version__ = '1.0.1'
--------------------------------------------------------------------------------
/deepcarskit/properties/model/KSR.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size: 128
3 | num_layers: 1
4 | dropout_prob: 0.1
5 | loss_type: 'CE'
6 | freeze_kg: False
7 | gamma: 10
--------------------------------------------------------------------------------
/deepcarskit/properties/model/NextItNet.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | kernel_size: 3
3 | block_num: 5
4 | dilations: [1,4]
5 | reg_weight: 1e-5
6 | loss_type: 'CE'
7 |
8 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/PNN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [128, 256, 128]
3 | dropout_prob: 0.0
4 | reg_weight: 0
5 | use_inner: True
6 | use_outer: False
--------------------------------------------------------------------------------
/deepcarskit/properties/model/DIEN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [256,256,256]
3 | dropout_prob: 0
4 | pooling_mode: 'mean'
5 | gru_type: 'AUGRU'
6 | alpha: 1
7 |
--------------------------------------------------------------------------------
/deepcarskit/data/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.data.utils import *
2 |
3 | __all__ = ['create_dataset', 'data_preparation', 'save_split_dataloaders', 'load_split_dataloaders']
4 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/AutoInt.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | attention_size: 16
3 | n_layers: 3
4 | num_heads: 2
5 | dropout_probs: [0.2,0.2,0.2]
6 | mlp_hidden_size: [128,128]
--------------------------------------------------------------------------------
/deepcarskit/properties/model/FwFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | dropout_prob: 0.0
3 | # define fields: key: field's id, value: features in this field. can be ignored.
4 | fields: ~
5 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/HRM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | high_order: 2
3 | loss_type: "CE"
4 | dropout_prob: 0.2
5 | pooling_type_layer_1: "max"
6 | pooling_type_layer_2: "max"
--------------------------------------------------------------------------------
/deepcarskit/properties/model/GCMC.yaml:
--------------------------------------------------------------------------------
1 | accum: "stack"
2 | gcn_output_dim: 500
3 | embedding_size: 64
4 | dropout_prob: 0.3
5 | sparse_feature: True
6 | class_num: 2
7 | num_basis_functions: 2
--------------------------------------------------------------------------------
/deepcarskit/properties/model/xDeepFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [128,128,128]
3 | reg_weight: 5e-4
4 | dropout_prob: 0.2
5 | direct: False
6 | cin_layer_size: [100,100,100]
--------------------------------------------------------------------------------
/deepcarskit/properties/model/GRU4RecF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size: 128
3 | num_layers: 1
4 | dropout_prob: 0.3
5 | selected_features: ['class']
6 | pooling_mode: 'sum'
7 | loss_type: 'CE'
--------------------------------------------------------------------------------
/deepcarskit/properties/model/NAIS.yaml:
--------------------------------------------------------------------------------
1 | algorithm: prod
2 | embedding_size: 64
3 | weight_size: 64
4 | split_to: 0
5 | reg_weights: [1e-7, 1e-7, 1e-5]
6 | alpha: 0
7 | beta: 0.5
8 | pretrain_path: ~
--------------------------------------------------------------------------------
/deepcarskit/properties/model/CDAE.yaml:
--------------------------------------------------------------------------------
1 | loss_type: BCE
2 | hid_activation: relu
3 | out_activation: sigmoid
4 | corruption_ratio: 0.5
5 | embedding_size: 64
6 | reg_weight_1: 0.
7 | reg_weight_2: 0.01
8 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/KTUP.yaml:
--------------------------------------------------------------------------------
1 | train_rec_step: 5
2 | train_kg_step: 5
3 | embedding_size: 64
4 | use_st_gumbel: True
5 | L1_flag: False
6 | margin: 1.0
7 | kg_weight: 1.0
8 | align_weight: 1.0
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.bak
2 | *.log
3 | *.pth
4 | *.pyc
5 | *.zip
6 | *.ttf
7 | *.xml
8 | *.iml
9 | events.out.*
10 | saved/
11 | log/
12 | log_tensorboard/
13 | doc/
14 | **/__pycache__/
15 | .idea/
16 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/ConvNCF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | cnn_channels: [1, 32, 32, 32, 32]
3 | cnn_kernels: [4, 4, 2, 2]
4 | cnn_strides: [4, 4, 2, 2]
5 | dropout_prob: 0.2
6 | reg_weights: [0.1, 0.1]
--------------------------------------------------------------------------------
/deepcarskit/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.trainer.trainer import CARSTrainer
2 | from recbole.trainer import *
3 |
4 | __all__ = ['Trainer', 'KGTrainer', 'KGATTrainer', 'S3RecTrainer', 'CARSTrainer']
5 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/MKR.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | kg_embedding_size: 64
3 | low_layers_num: 1
4 | high_layers_num: 1
5 | reg_weight: 1e-6
6 | use_inner_product: True
7 | kge_interval: 3
8 | dropout_prob: 0.0
--------------------------------------------------------------------------------
/deepcarskit/properties/quick_start_config/knowledge_base.yaml:
--------------------------------------------------------------------------------
1 | load_col:
2 | inter: ['user_id', 'item_id', 'rating', 'timestamp']
3 | kg: ['head_id', 'relation_id', 'tail_id']
4 | link: ['item_id', 'entity_id']
--------------------------------------------------------------------------------
/deepcarskit/properties/model/RecVAE.yaml:
--------------------------------------------------------------------------------
1 | hidden_dimension: 600
2 | latent_dimension: 200
3 | dropout_prob: 0.5
4 | beta: 0.2
5 | mixture_weights: [0.15, 0.75, 0.1]
6 | gamma: 0.005
7 | n_enc_epochs: 3
8 | n_dec_epochs: 1
9 |
--------------------------------------------------------------------------------
/deepcarskit/properties/quick_start_config/context-aware.yaml:
--------------------------------------------------------------------------------
1 | eval_args:
2 | split: {'RS':[0.8,0.1,0.1]}
3 | order: RO
4 | group_by: ~
5 | mode: labeled
6 | neg_sampling: ~
7 | metrics: ['AUC', 'LogLoss']
8 | valid_metric: AUC
--------------------------------------------------------------------------------
/deepcarskit/properties/quick_start_config/sequential_embedding_model.yaml:
--------------------------------------------------------------------------------
1 | load_col:
2 | inter: ['user_id', 'item_id', 'rating', 'timestamp']
3 | ent: ['ent_id', 'ent_emb']
4 | additional_feat_suffix: ent
5 | repeatable: True
6 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/MacridVAE.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | drop_out: 0.5
3 | kfac: 10
4 | nogb: False
5 | std: 0.01
6 | encoder_hidden_size: [600]
7 | tau: 0.1
8 | anneal_cap: 0.2
9 | total_anneal_steps: 200000
10 | reg_weights: [0, 0]
--------------------------------------------------------------------------------
/deepcarskit/properties/model/NeuMF.yaml:
--------------------------------------------------------------------------------
1 | mf_embedding_size: 64
2 | mlp_embedding_size: 64
3 | mlp_hidden_size: [128,64]
4 | dropout_prob: 0.1
5 | mf_train: True
6 | mlp_train: True
7 |
8 | use_pretrain: False
9 | mf_pretrain_path: ~
10 | mlp_pretrain_path: ~
--------------------------------------------------------------------------------
/deepcarskit/properties/model/SASRec.yaml:
--------------------------------------------------------------------------------
1 | n_layers: 2
2 | n_heads: 2
3 | hidden_size: 64
4 | inner_size: 256
5 | hidden_dropout_prob: 0.5
6 | attn_dropout_prob: 0.5
7 | hidden_act: 'gelu'
8 | layer_norm_eps: 1e-12
9 | initializer_range: 0.02
10 | loss_type: 'CE'
--------------------------------------------------------------------------------
/conda/conda_release.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | conda-build --python 3.7 .
4 | printf "python 3.7 version is released \n"
5 | conda-build --python 3.8 .
6 | printf "python 3.8 version is released \n"
7 | conda-build --python 3.9 .
8 | printf "python 3.9 version is released \n"
9 |
--------------------------------------------------------------------------------
/deepcarskit/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.evaluator.base_metric import *
2 | from recbole.evaluator.metrics import *
3 | from deepcarskit.evaluator.evaluator import *
4 | from recbole.evaluator.register import *
5 | from deepcarskit.evaluator.collector import *
6 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/BERT4Rec.yaml:
--------------------------------------------------------------------------------
1 | n_layers: 2
2 | n_heads: 2
3 | hidden_size: 64
4 | inner_size: 256
5 | hidden_dropout_prob: 0.5
6 | attn_dropout_prob: 0.5
7 | hidden_act: 'gelu'
8 | layer_norm_eps: 1e-12
9 | initializer_range: 0.02
10 | mask_ratio: 0.2
11 | loss_type: 'CE'
--------------------------------------------------------------------------------
/deepcarskit/properties/model/GCSAN.yaml:
--------------------------------------------------------------------------------
1 | n_layers: 1
2 | n_heads: 1
3 | hidden_size: 64
4 | inner_size: 256
5 | hidden_dropout_prob: 0.2
6 | attn_dropout_prob: 0.2
7 | hidden_act: 'gelu'
8 | layer_norm_eps: 1e-12
9 | initializer_range: 0.02
10 | step: 1
11 | weight: 0.6
12 | reg_weight: 5e-5
13 | loss_type: 'CE'
--------------------------------------------------------------------------------
/deepcarskit/properties/model/RaCT.yaml:
--------------------------------------------------------------------------------
1 | mlp_hidden_size: [600]
2 | latent_dimension: 256
3 | dropout_prob: 0.5
4 | anneal_cap: 0.2
5 | total_anneal_steps: 200000
6 | critic_layers: [100,100,10]
7 | metrics_k: 100
8 | train_stage: 'actor_pretrain'
9 | pretrain_epochs: 150
10 | save_step: 10
11 | pre_model_path: ''
--------------------------------------------------------------------------------
/deepcarskit/properties/model/FDSA.yaml:
--------------------------------------------------------------------------------
1 | n_layers: 2
2 | n_heads: 2
3 | hidden_size: 64
4 | inner_size: 256
5 | hidden_dropout_prob: 0.5
6 | attn_dropout_prob: 0.5
7 | hidden_act: 'gelu'
8 | layer_norm_eps: 1e-12
9 | initializer_range: 0.02
10 | selected_features: ['class']
11 | pooling_mode: 'mean'
12 | loss_type: 'CE'
--------------------------------------------------------------------------------
/deepcarskit/properties/model/SASRecF.yaml:
--------------------------------------------------------------------------------
1 | n_layers: 2
2 | n_heads: 2
3 | hidden_size: 64
4 | inner_size: 256
5 | hidden_dropout_prob: 0.5
6 | attn_dropout_prob: 0.5
7 | hidden_act: 'gelu'
8 | layer_norm_eps: 1e-12
9 | initializer_range: 0.02
10 | selected_features: ['class']
11 | pooling_mode: 'sum'
12 | loss_type: 'CE'
--------------------------------------------------------------------------------
/deepcarskit/properties/model/DMF.yaml:
--------------------------------------------------------------------------------
1 | # WARNING:
2 | # 1. if you set inter_matrix_type='rating', you must set `unused_col: ~` in your data config files.
3 | # 2. The dimensions of the last layer of users and items must be the same
4 |
5 | inter_matrix_type: '01'
6 | user_embedding_size: 64
7 | item_embedding_size: 64
8 | user_hidden_size_list: [64, 64]
9 | item_hidden_size_list: [64, 64]
--------------------------------------------------------------------------------
/check_gpu.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | # Check if CUDA (GPU support) is available
4 | gpu_available = torch.cuda.is_available()
5 | print(f"CUDA available: {gpu_available}")
6 |
7 | # If CUDA is available, get the name of the GPU
8 | if gpu_available:
9 | gpu_name = torch.cuda.get_device_name(0)
10 | print(f"GPU detected: {gpu_name}")
11 | else:
12 | print("No GPU detected.")
13 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/NNCF.yaml:
--------------------------------------------------------------------------------
1 | ui_embedding_size: 64
2 | neigh_embedding_size: 32
3 | num_conv_kernel: 128
4 | conv_kernel_size: 5
5 | pool_kernel_size: 5
6 | mlp_hidden_size: [128,64,32,16]
7 | neigh_num: 10
8 | dropout: 0.5
9 |
10 | # The method to use neighborhood information, you can choose random, knn or louvain algorithom
11 | # e.g. neigh_info_method: "knn" or neigh_info_method: "louvain"
12 | neigh_info_method: "knn"
13 |
14 | resolution: 1
15 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/S3Rec.yaml:
--------------------------------------------------------------------------------
1 | n_layers: 2
2 | n_heads: 2
3 | hidden_size: 64
4 | inner_size: 256
5 | hidden_dropout_prob: 0.5
6 | attn_dropout_prob: 0.5
7 | hidden_act: 'gelu'
8 | layer_norm_eps: 1e-12
9 | initializer_range: 0.02
10 | item_attribute: 'class'
11 | mask_ratio: 0.2
12 | aap_weight: 1.0
13 | mip_weight: 0.2
14 | map_weight: 1.0
15 | sp_weight: 0.5
16 | train_stage: 'pretrain'
17 | pretrain_epochs: 500
18 | save_step: 10
19 | pre_model_path: ''
20 | loss_type: 'CE'
--------------------------------------------------------------------------------
/check_torch.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | # Get the PyTorch version
4 | torch_version = torch.__version__
5 | print(f"PyTorch version: {torch_version}")
6 |
7 | # Check if CUDA is available (indicating GPU support)
8 | is_cuda_available = torch.cuda.is_available()
9 | print(f"CUDA available: {is_cuda_available}")
10 |
11 | # Determine the type of PyTorch version
12 | if is_cuda_available:
13 | print("This is the GPU version of PyTorch.")
14 | else:
15 | print("This is the CPU version of PyTorch.")
16 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/lightgbm.yaml:
--------------------------------------------------------------------------------
1 | convert_token_to_onehot: False
2 | token_num_threshold: 10000
3 |
4 | # Dataset
5 | lgb_silent: False
6 |
7 | # Train
8 | lgb_model: ~
9 | lgb_params:
10 | boosting: gbdt
11 | num_leaves: 90
12 | min_data_in_leaf: 30
13 | max_depth: -1
14 | learning_rate: 0.1
15 | objective: binary
16 | lambda_l1: 0.1
17 | metric: ['auc', 'binary_logloss']
18 | force_row_wise: True
19 | lgb_learning_rates: ~
20 | lgb_num_boost_round: 300
21 | lgb_early_stopping_rounds: ~
22 | lgb_verbose_eval: 100
23 |
24 |
--------------------------------------------------------------------------------
/deepcarskit/properties/model/xgboost.yaml:
--------------------------------------------------------------------------------
1 | convert_token_to_onehot: False
2 | token_num_threshold: 10000
3 |
4 | # DMatrix
5 | xgb_silent: ~
6 | xgb_nthread: ~
7 |
8 | xgb_model: ~
9 | xgb_params:
10 | booster: gbtree
11 | objective: binary:logistic
12 | eval_metric: ['auc','logloss']
13 | # gamma: 0.1
14 | max_depth: 3
15 | # lambda: 1
16 | # subsample: 0.7
17 | # colsample_bytree: 0.7
18 | # min_child_weight: 3
19 | eta: 1
20 | seed: 2020
21 | # nthread: -1
22 | xgb_num_boost_round: 100
23 | xgb_early_stopping_rounds: ~
24 | xgb_verbose_eval: 50
25 |
26 |
--------------------------------------------------------------------------------
/deepcarskit/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.utils.logger import init_logger, set_color
2 | from recbole.utils.utils import get_local_time, ensure_dir, get_model, get_trainer, \
3 | early_stopping, calculate_valid_score, dict2str, init_seed, get_tensorboard, get_gpu_usage
4 | from recbole.utils.enum_type import *
5 | from recbole.utils.argument_list import *
6 |
7 | __all__ = [
8 | 'init_logger', 'get_local_time', 'ensure_dir', 'get_model', 'get_trainer', 'early_stopping',
9 | 'calculate_valid_score', 'dict2str', 'Enum', 'ModelType', 'KGDataLoaderState', 'EvaluatorType', 'InputType',
10 | 'FeatureType', 'FeatureSource', 'init_seed', 'general_arguments', 'training_arguments', 'evaluation_arguments',
11 | 'dataset_arguments', 'get_tensorboard', 'set_color', 'get_gpu_usage'
12 | ]
13 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: deepcarskit
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 |
--------------------------------------------------------------------------------
/deepcarskit/properties/overall.yaml:
--------------------------------------------------------------------------------
1 | # general
2 | gpu_id: 0
3 | use_gpu: True
4 | seed: 2020
5 | state: INFO
6 | reproducibility: True
7 | data_path: 'dataset/'
8 | checkpoint_dir: 'saved'
9 | show_progress: True
10 | save_dataset: False
11 | save_dataloaders: False
12 |
13 | # training settings
14 | epochs: 300
15 | train_batch_size: 2048
16 | learner: adam
17 | learning_rate: 0.001
18 | neg_sampling:
19 | uniform: 1
20 | eval_step: 1
21 | stopping_step: 10
22 | clip_grad_norm: ~
23 | # clip_grad_norm: {'max_norm': 5, 'norm_type': 2}
24 | weight_decay: 0.0
25 |
26 | # evaluation settings
27 | eval_args:
28 | split: {'RS':[0.8,0.1,0.1]}
29 | group_by: user
30 | order: RO
31 | mode: full
32 | repeatable: False
33 | metrics: ["Recall","MRR","NDCG","Hit","Precision"]
34 | topk: [10]
35 | valid_metric: MRR@10
36 | valid_metric_bigger: True
37 | eval_batch_size: 4096
38 | loss_decimal_place: 4
39 | metric_decimal_place: 4
40 |
--------------------------------------------------------------------------------
/conda/meta.yaml:
--------------------------------------------------------------------------------
1 | package:
2 | name: deepcarskit
3 | version: 1.0.1
4 |
5 | source:
6 | path: ../
7 |
8 | requirements:
9 | build:
10 | - python
11 | host:
12 | - python
13 | - recbole ==1.0.1
14 | - numpy >=1.17.2
15 | - scipy ==1.6.0
16 | - pandas >=1.0.5
17 | - tqdm >=4.48.2
18 | - pyyaml >=5.1.0
19 | - scikit-learn >=0.23.2
20 | - pytorch >=1.7.0
21 | - colorlog==4.7.2
22 | - colorama==0.4.4
23 | - tensorboard >=2.5.0
24 | run:
25 | - python
26 | - recbole ==1.0.1
27 | - numpy >=1.17.2
28 | - scipy ==1.6.0
29 | - pandas >=1.0.5
30 | - tqdm >=4.48.2
31 | - pyyaml >=5.1.0
32 | - scikit-learn >=0.23.2
33 | - pytorch >=1.7.0
34 | - colorlog==4.7.2
35 | - colorama==0.4.4
36 | - tensorboard >=2.5.0
37 | test:
38 | imports:
39 | - deepcarskit
40 |
41 | about:
42 | home: https://github.com/irecsys/DeepCARSKit
43 | license: MIT
44 | summary: "A Deep Learning Based Context-Aware Recommendation Library"
45 |
46 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 |
2 | # @Author : Yong Zheng
3 |
4 |
5 | import argparse
6 | import time
7 | import torch
8 | import multiprocessing as mcpu
9 | from deepcarskit.quick_start import run
10 | from logging import getLogger
11 |
12 |
13 |
14 | if __name__ == '__main__':
15 | print('GPU availability: ', torch.cuda.is_available())
16 |
17 | n_gpu = torch.cuda.device_count()
18 | print('Num of GPU: ', n_gpu)
19 |
20 | if n_gpu>0:
21 | print(torch.cuda.get_device_name(0))
22 | print('Current GPU index: ', torch.cuda.current_device())
23 |
24 | logger = getLogger()
25 | t0 = time.time()
26 | parser = argparse.ArgumentParser()
27 | parser.add_argument('--config_files', type=str, default='config.yaml', help='config files')
28 |
29 | args, _ = parser.parse_known_args()
30 |
31 | config_list = args.config_files.strip().split(' ') if args.config_files else None
32 | run(config_file_list=config_list)
33 | t1 = time.time()
34 | total = t1 - t0
35 | logger.info('time cost: '+ f': {total}s')
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 RUCAIBox
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/dataset/tripadvisor/ReadMe.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Data Name: TripAdvisor v2
5 |
6 | Data Descriptions:
7 | This data was scripted from online reviews on tripadvisor.com. There is only one context: trip type (Family, Couples, Business, Solo travel, Friends). Other features about users and hotels are available. The data set is pretty sparse in ratings and contexts: 14175 ratings, 2731 users, 2269 hotels.
8 |
9 |
10 | Citation Information:
11 |
12 |
13 | -
14 | In Latex:
15 | @inproceedings{zheng2014contextrec,
16 | title={Contexts Recommendation Using Multi-label Classification},
17 | author={Zheng, Y. and Mobasher, B. and Burke, R.},
18 | booktitle={Proceedings of the 13th IEEE/WIC/ACM International Conference on Web Intelligence (WI 2014)},
19 | doi = {},
20 | pages={},
21 | year={2014},
22 | organization={IEEE/WIC/ACM}
23 | }
24 |
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/dataset/depaulmovie/ReadMe.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Data Name:DePaulMovie
5 |
6 | Data Descriptions:
7 | This data was collected from surveys -- students were asked to rate movies in different time, location, and with different companions.
8 |
9 | Citation Information:
10 |
11 |
12 | -
13 | In Latex:
14 | @inproceedings{zheng2015carskit,
15 | title={CARSKit: A Java-Based Context-aware Recommendation Engine},
16 | author={Zheng, Yong and Mobasher, Bamshad and Burke, Robin},
17 | booktitle={Proceedings of the 15th IEEE International Conference on Data Mining Workshops},
18 | year={2015},
19 | publisher={IEEE}
20 | }
21 |
22 |
23 | -
24 | In General:
25 | Zheng, Y. and Mobasher, B. and Burke, R. "CARSKit: A Java-Based Context-aware Recommendation Engine". Proceedings of the 15th IEEE International Conference on Data Mining Workshops, 2015, IEEE
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 |
5 | import os
6 |
7 | from setuptools import setup, find_packages
8 |
9 | install_requires = ['numpy>=1.17.2', 'torch>=1.7.0', 'scipy==1.6.0', 'pandas>=1.0.5', 'tqdm>=4.48.2',
10 | 'colorlog==4.7.2','colorama==0.4.4', 'numpy==1.20.0',
11 | 'scikit_learn>=0.23.2', 'pyyaml>=5.1.0', 'tensorboard>=2.5.0', 'recbole==1.0.1']
12 |
13 | setup_requires = []
14 |
15 | extras_require = {
16 | 'hyperopt': ['hyperopt>=0.2.4']
17 | }
18 |
19 | classifiers = ["License :: OSI Approved :: MIT License"]
20 |
21 | # Readthedocs requires Sphinx extensions to be specified as part of
22 | # install_requires in order to build properly.
23 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
24 | if on_rtd:
25 | install_requires.extend(setup_requires)
26 |
27 | setup(
28 | name='deepcarskit',
29 | version=
30 | '1.0.1', # please remember to edit deepcarskit/__init__.py in response, once updating the version
31 | description='A Deep Learning Based Context-Aware Recommendation Library',
32 | long_description_content_type="text/markdown",
33 | url='https://github.com/irecsys/DeepCARSKit',
34 | author='Yong Zheng',
35 | author_email='DeepCARSKit@Gmail.com',
36 | packages=[
37 | package for package in find_packages()
38 | if package.startswith('deepcarskit')
39 | ],
40 | include_package_data=True,
41 | install_requires=install_requires,
42 | setup_requires=setup_requires,
43 | extras_require=extras_require,
44 | zip_safe=False,
45 | classifiers=classifiers,
46 | )
47 |
--------------------------------------------------------------------------------
/deepcarskit/model/fms/fm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2020/7/8 10:09
3 | # @Author : Shanlei Mu
4 | # @Email : slmu@ruc.edu.cn
5 | # @File : fms.py
6 |
7 | # UPDATE:
8 | # @Time : 2020/8/13,
9 | # @Author : Zihan Lin
10 | # @Email : linzihan.super@foxmain.com
11 |
12 | # UPDATE:
13 | # @Time : 2021/12
14 | # @Author : Yong Zheng
15 | # @Notes : made changes to adapt it for CARS
16 |
17 | r"""
18 | FM
19 | ################################################
20 | References
21 | -----
22 | Steffen Rendle et al. "Factorization Machines." in ICDM 2010.
23 |
24 | Notes
25 | -----
26 | context variables are treated as individual dimensions
27 | """
28 |
29 | import torch.nn as nn
30 | from torch.nn.init import xavier_normal_
31 |
32 | from deepcarskit.model.context_recommender import ContextRecommender
33 | from recbole.model.layers import BaseFactorizationMachine
34 | from recbole.utils import EvaluatorType
35 |
36 |
37 | class FM(ContextRecommender):
38 | """Factorization Machine considers the second-order interaction with features to predict the final score.
39 |
40 | """
41 |
42 | def __init__(self, config, dataset):
43 |
44 | super(FM, self).__init__(config, dataset)
45 |
46 | # define layers and loss
47 | self.fm = BaseFactorizationMachine(reduce_sum=True)
48 | self.config = config
49 |
50 | if self.config['eval_type'] == EvaluatorType.RANKING:
51 | self.actfun = nn.Sigmoid()
52 | self.loss = nn.BCELoss()
53 | self.LABEL = self.config['LABEL_FIELD']
54 | else:
55 | self.actfun = nn.LeakyReLU()
56 | self.loss = nn.MSELoss()
57 | self.LABEL = self.config['RATING_FIELD']
58 |
59 | # parameters initialization
60 | self.apply(self._init_weights)
61 |
62 | def _init_weights(self, module):
63 | if isinstance(module, nn.Embedding):
64 | xavier_normal_(module.weight.data)
65 |
66 | def forward(self, interaction):
67 | fm_all_embeddings = self.concat_embed_input_fields(interaction) # [batch_size, num_field, embed_dim]
68 | y = self.actfun(self.first_order_linear(interaction) + self.fm(fm_all_embeddings))
69 | return y.squeeze(-1)
70 |
71 | def calculate_loss(self, interaction):
72 | label = interaction[self.LABEL]
73 |
74 | output = self.forward(interaction)
75 | return self.loss(output, label)
76 |
77 | def predict(self, interaction):
78 | return self.forward(interaction)
79 |
--------------------------------------------------------------------------------
/deepcarskit/utils/utils.py:
--------------------------------------------------------------------------------
1 | # @Time : 2021/12
2 | # @Author : Yong Zheng
3 |
4 | """
5 | deepcarskit.utils.utils
6 | ################################
7 | """
8 |
9 |
10 | import importlib
11 |
12 | from recbole.utils.enum_type import ModelType
13 |
14 | def get_model(model_name):
15 | r"""Automatically select model class based on model name
16 |
17 | Args:
18 | model_name (str): model name
19 |
20 | Returns:
21 | Recommender: model class
22 | """
23 |
24 | model_submodule_recbole = [
25 | 'general_recommender', 'sequential_recommender', 'knowledge_aware_recommender',
26 | 'exlib_recommender'
27 | ]
28 |
29 | model_submodule_deepcarskit = [
30 | 'ae', 'fms', 'neucf'
31 | ]
32 |
33 | model_file_name = model_name.lower()
34 | model_module = None
35 | for submodule in model_submodule_deepcarskit:
36 | module_path = '.'.join(['deepcarskit.model', submodule, model_file_name])
37 | if importlib.util.find_spec(module_path, __name__):
38 | model_module = importlib.import_module(module_path, __name__)
39 | break
40 |
41 | if model_module is None:
42 | for submodule in model_submodule_recbole:
43 | module_path = '.'.join(['recbole.model', submodule, model_file_name])
44 | if importlib.util.find_spec(module_path, __name__):
45 | model_module = importlib.import_module(module_path, __name__)
46 | break
47 |
48 | if model_module is None:
49 | raise ValueError('`model_name` [{}] is not the name of an existing model.'.format(model_name))
50 | model_class = getattr(model_module, model_name)
51 | return model_class
52 |
53 |
54 | def get_trainer(model_type, model_name):
55 | r"""Automatically select trainer class based on model type and model name
56 |
57 | Args:
58 | model_type (ModelType): model type
59 | model_name (str): model name
60 |
61 | Returns:
62 | Trainer: trainer class
63 | """
64 | try:
65 | return getattr(importlib.import_module('deepcarskit.trainer'), model_name + 'Trainer')
66 | except AttributeError:
67 | if model_type == ModelType.KNOWLEDGE:
68 | return getattr(importlib.import_module('recbole.trainer'), 'KGTrainer')
69 | elif model_type == ModelType.TRADITIONAL:
70 | return getattr(importlib.import_module('recbole.trainer'), 'TraditionalTrainer')
71 | else:
72 | return getattr(importlib.import_module('deepcarskit.trainer'), 'CARSTrainer')
73 |
--------------------------------------------------------------------------------
/deepcarskit/config/configurator.py:
--------------------------------------------------------------------------------
1 | # @Time : 2021/12
2 | # @Author : Yong Zheng
3 | # @Notes : Inherit from recbole.config
4 |
5 | """
6 | deepcarskit.config.configurator
7 | ################################
8 | """
9 |
10 | from deepcarskit.utils.utils import get_model
11 | from recbole.config import Config
12 | from recbole.utils import init_seed
13 |
14 |
15 | class CARSConfig(Config):
16 |
17 | def __init__(self, model=None, dataset=None, config_file_list=None, config_dict=None):
18 | super(CARSConfig, self).__init__(model, dataset, config_file_list, config_dict)
19 |
20 | def _get_model_and_dataset(self, model, dataset):
21 |
22 | if model is None:
23 | try:
24 | model = self.external_config_dict['model']
25 | except KeyError:
26 | raise KeyError(
27 | 'model need to be specified in at least one of the these ways: '
28 | '[model variable, config file, config dict, command line] '
29 | )
30 | if not isinstance(model, str):
31 | # if model is a class object
32 | final_model_class = model
33 | final_model = model.__name__
34 | else:
35 | # if model is a name in string format
36 | final_model = model
37 | final_model_class = get_model(final_model) # need to get class object
38 |
39 | if dataset is None:
40 | try:
41 | final_dataset = self.external_config_dict['dataset']
42 | except KeyError:
43 | raise KeyError(
44 | 'dataset need to be specified in at least one of the these ways: '
45 | '[dataset variable, config file, config dict, command line] '
46 | )
47 | else:
48 | final_dataset = dataset
49 |
50 | return final_model, final_model_class, final_dataset
51 |
52 | def _get_final_config_dict(self):
53 | final_config_dict = dict()
54 | final_config_dict.update(self.internal_config_dict)
55 | final_config_dict.update(self.external_config_dict)
56 | # turn on corresponding metrics according to the recommendation task
57 | if final_config_dict['ranking']:
58 | final_config_dict['metrics'] = final_config_dict['ranking_metrics']
59 | final_config_dict['valid_metric'] = final_config_dict['ranking_valid_metric']
60 | else:
61 | final_config_dict['metrics'] = final_config_dict['err_metrics']
62 | final_config_dict['valid_metric'] = final_config_dict['err_valid_metric']
63 | return final_config_dict
64 |
--------------------------------------------------------------------------------
/deepcarskit/evaluator/evaluator.py:
--------------------------------------------------------------------------------
1 | # @Time : 2021/12
2 | # @Author : Yong Zheng
3 | # @Notes : added F1 metrics, if precision and recall defined in user requests
4 |
5 | """
6 | deepcarskit.evaluator.evaluator
7 | #####################################
8 | """
9 | import numpy as np
10 | from recbole.evaluator.register import metrics_dict
11 | from recbole.evaluator.collector import DataStruct
12 |
13 |
14 | class Evaluator(object):
15 | """Evaluator is used to check parameter correctness, and summarize the results of all metrics.
16 | """
17 |
18 | def __init__(self, config):
19 | self.config = config
20 | self.metrics = [metric.lower() for metric in self.config['metrics']]
21 | self.metric_class = {}
22 |
23 | for metric in self.metrics:
24 | self.metric_class[metric] = metrics_dict[metric](self.config)
25 |
26 | def evaluate(self, dataobject: DataStruct):
27 | """calculate all the metrics. It is called at the end of each epoch
28 |
29 | Args:
30 | dataobject (DataStruct): It contains all the information needed for metrics.
31 |
32 | Returns:
33 | dict: such as ``{'hit@20': 0.3824, 'recall@20': 0.0527, 'hit@10': 0.3153, 'recall@10': 0.0329, 'gauc': 0.9236}``
34 |
35 | """
36 | result_dict = {}
37 | topk = []
38 | metric_f1 = False
39 | if self.config['ranking']:
40 | topk = self.config['topk']
41 | if 'precision' in self.metrics and 'recall' in self.metrics:
42 | metric_f1 = True
43 |
44 | for metric in self.metrics:
45 | # dataobject has two keys: rec.score, data.label
46 | metric_val = self.metric_class[metric].calculate_metric(dataobject)
47 | result_dict.update(metric_val)
48 |
49 | # adding F1 metric, if precision and recall were calculated
50 | if metric_f1:
51 | k = topk[0]
52 | keys = result_dict.keys()
53 | key1 = 'precision@'+str(k)
54 | key2 = 'recall@'+str(k)
55 | key = 'f1@'+str(k)
56 | if key1 in keys and key2 in keys and key not in keys:
57 | metric = {}
58 | for k in topk:
59 | key1 = 'precision@'+str(k)
60 | key2 = 'recall@'+str(k)
61 | key = 'f1@'+str(k)
62 | precision = result_dict[key1]
63 | recall = result_dict[key2]
64 | if (precision + recall) == 0:
65 | f1 = 0
66 | else:
67 | f1 = round(2*precision*recall/(precision + recall), self.config['metric_decimal_place'])
68 | metric[key] = f1
69 | result_dict.update(metric)
70 | return result_dict
71 |
--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
1 | field_separator: ","
2 | seq_separator: " "
3 |
4 | gpu_id: 0
5 | use_gpu: True
6 | show_progress: False
7 | save_dataset: False
8 | save_dataloaders: False
9 |
10 | ############### data setting ###############
11 | seed: 2022
12 | dataset: depaulmovie
13 | # define data_path as the parent directory of your data folder
14 | # data_path: d:\dataset\
15 |
16 | USER_ID_FIELD: user_id
17 | ITEM_ID_FIELD: item_id
18 | RATING_FIELD: rating
19 | CONTEXT_SITUATION_FIELD: contexts
20 | USER_CONTEXT_FIELD: uc_id
21 |
22 | # note: you can use either load or unload, cannot use them both
23 | # load_col is used to load specific columns; unload_col is used to ignore selected columns
24 | # set "load_col: ~", if you want to load all cols
25 | # load_col: {'inter': ['user_id','item_id','rating','contexts','uc_id']}
26 | # unload_col: {'inter': ['contexts']}
27 | # by default, we load all cols, unless there are some special requirements
28 | load_col: ~
29 |
30 | # used for topN ranking only
31 | LABEL_FIELD: label
32 | threshold:
33 | rating: 0
34 | # the current library does not support negative sampling
35 | neg_sampling: ~
36 |
37 | ############### model setting ###############
38 | model: NeuCMFii
39 |
40 | # General model
41 | epochs: 50
42 | train_batch_size: 500
43 | eval_batch_size: 409600
44 | learner: adam
45 | # learner: adam, RMSprop
46 |
47 | stopping_step: 10
48 | clip_grad_norm: ~
49 | # clip_grad_norm: {'max_norm': 5, 'norm_type': 2}
50 | weight_decay: 0.0
51 |
52 | # NeuCF models
53 | mf_embedding_size: 64
54 | mlp_embedding_size: 64
55 | mlp_hidden_size: [128,64,32]
56 | learning_rate: 0.01
57 | dropout_prob: 0.1
58 |
59 | #tf_train: True
60 | mf_train: True
61 | mlp_train: True
62 |
63 | # FM models
64 | embedding_size: 64
65 | #mlp_hidden_size: [128,64,32]
66 | #learning_rate: 0.01
67 | #dropout_prob: 0.3
68 |
69 | ############### Evaluation setting ###############
70 | eval_args:
71 | # split: {'RS': [0.8, 0.2]} # hold-out evaluation
72 | split: {'CV': 5, 'num_processes': 4} # N-fold cross validation by multiprocessing
73 | group_by: user
74 | mode: labeled # do not change it, DeepCARSKit only support this mode
75 | order: RO
76 |
77 | # indicate the task is ranking or rating prediction
78 | # evaluation metrics automatically selected based on True/False setting here
79 | ranking: False
80 | # indicate activation function for ranking task
81 | # LeakyReLu is the default activation function for both ranking or rating prediction
82 | sigmoid: False
83 |
84 | # define metrics for ranking and rating prediction tasks
85 | ranking_valid_metric: Recall@10
86 | ranking_metrics: ['Precision','Recall','NDCG','MRR','MAP']
87 | topk: [10,20,30]
88 |
89 | err_valid_metric: MAE
90 | err_metrics: ['MAE','RMSE','AUC']
91 |
92 | ############### Output setting ###############
93 | loss_decimal_place: 4
94 | metric_decimal_place: 4
95 |
96 |
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/deepcarskit/model/fms/deepfm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2020/7/8
3 | # @Author : Shanlei Mu
4 | # @Email : slmu@ruc.edu.cn
5 | # @File : deepfm.py
6 |
7 | # UPDATE:
8 | # @Time : 2020/8/14
9 | # @Author : Zihan Lin
10 | # @Email : linzihan.super@foxmain.com
11 |
12 | # UPDATE:
13 | # @Time : 2021/12
14 | # @Author : Yong Zheng
15 | # @Notes : made changes to adapt it for CARS
16 |
17 | r"""
18 | DeepFM
19 | ################################################
20 | References
21 | -----
22 | Huifeng Guo et al. "DeepFM: A Factorization-Machine based Neural Network for CTR Prediction." in IJCAI 2017.
23 |
24 | Notes
25 | -----
26 | context variables are treated as individual dimensions
27 | """
28 |
29 | import torch.nn as nn
30 | from torch.nn.init import xavier_normal_, constant_
31 |
32 | from deepcarskit.model.context_recommender import ContextRecommender
33 | from recbole.model.layers import BaseFactorizationMachine, MLPLayers
34 | from recbole.utils import EvaluatorType
35 |
36 |
37 | class DeepFM(ContextRecommender):
38 | """DeepFM is a DNN enhanced FM which both use a DNN and a FM to calculate feature interaction.
39 | Also DeepFM can be seen as a combination of FNN and FM.
40 |
41 | """
42 |
43 | def __init__(self, config, dataset):
44 | super(DeepFM, self).__init__(config, dataset)
45 |
46 | # load parameters info
47 | self.config = config
48 | self.mlp_hidden_size = config['mlp_hidden_size']
49 | self.dropout_prob = config['dropout_prob']
50 |
51 | # define layers and loss
52 | self.fm = BaseFactorizationMachine(reduce_sum=True)
53 | size_list = [self.embedding_size * self.num_feature_field] + self.mlp_hidden_size
54 | self.mlp_layers = MLPLayers(size_list, self.dropout_prob)
55 | self.deep_predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) # Linear product to the final score
56 |
57 | # parameters initialization
58 | self.apply(self._init_weights)
59 |
60 | def _init_weights(self, module):
61 | if isinstance(module, nn.Embedding):
62 | xavier_normal_(module.weight.data)
63 | elif isinstance(module, nn.Linear):
64 | xavier_normal_(module.weight.data)
65 | if module.bias is not None:
66 | constant_(module.bias.data, 0)
67 |
68 | def forward(self, interaction):
69 | deepfm_all_embeddings = self.concat_embed_input_fields(interaction) # [batch_size, num_field, embed_dim]
70 | batch_size = deepfm_all_embeddings.shape[0]
71 | y_fm = self.first_order_linear(interaction) + self.fm(deepfm_all_embeddings)
72 |
73 | y_deep = self.deep_predict_layer(self.mlp_layers(deepfm_all_embeddings.view(batch_size, -1)))
74 | y = self.actfun(y_fm + y_deep)
75 | return y.squeeze(-1)
76 |
77 | def calculate_loss(self, interaction):
78 | label = interaction[self.LABEL]
79 | output = self.forward(interaction)
80 | return self.loss(output, label)
81 |
82 | def predict(self, interaction):
83 | return self.forward(interaction)
84 |
--------------------------------------------------------------------------------
/deepcarskit/model/layers.py:
--------------------------------------------------------------------------------
1 |
2 | # @Time : 2021/12
3 | # @Author : Yong Zheng
4 | # @Notes : Inherit from recbole.model.layers.FMFirstOrderLinear
5 |
6 |
7 |
8 | """
9 | deepcarskit.model.layers
10 | #############################
11 | Common Layers in recommender system
12 | """
13 |
14 | from recbole.model.layers import FMFirstOrderLinear
15 | from recbole.model.layers import FMEmbedding
16 |
17 | import numpy as np
18 | import torch
19 | import torch.nn as nn
20 |
21 | from recbole.utils import FeatureType, FeatureSource
22 |
23 |
24 |
25 | class FMFirstOrderLinear(FMFirstOrderLinear):
26 | """Calculate the first order score of the input features.
27 | This class is a member of ContextRecommender, you can call it easily when inherit ContextRecommender.
28 |
29 | """
30 |
31 | def __init__(self, config, dataset, output_dim=1):
32 |
33 | super(FMFirstOrderLinear, self).__init__(config, dataset, output_dim)
34 | self.field_names = dataset.fields(
35 | source=[
36 | FeatureSource.INTERACTION,
37 | FeatureSource.USER,
38 | FeatureSource.USER_ID,
39 | FeatureSource.ITEM,
40 | FeatureSource.ITEM_ID,
41 | ]
42 | )
43 | if config['ranking']:
44 | self.LABEL = config['LABEL_FIELD']
45 | else:
46 | self.LABEL = config['RATING_FIELD']
47 | self.device = config['device']
48 | self.token_field_names = []
49 | self.token_field_dims = []
50 | self.float_field_names = []
51 | self.float_field_dims = []
52 | self.token_seq_field_names = []
53 | self.token_seq_field_dims = []
54 | for field_name in self.field_names:
55 | if field_name == config['RATING_FIELD'] or field_name == config['LABEL_FIELD']:
56 | continue
57 | if dataset.field2type[field_name] == FeatureType.TOKEN:
58 | self.token_field_names.append(field_name)
59 | self.token_field_dims.append(dataset.num(field_name))
60 | elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ:
61 | self.token_seq_field_names.append(field_name)
62 | self.token_seq_field_dims.append(dataset.num(field_name))
63 | else:
64 | self.float_field_names.append(field_name)
65 | self.float_field_dims.append(dataset.num(field_name))
66 | if len(self.token_field_dims) > 0:
67 | self.token_field_offsets = np.array((0, *np.cumsum(self.token_field_dims)[:-1]), dtype=np.long)
68 | self.token_embedding_table = FMEmbedding(self.token_field_dims, self.token_field_offsets, output_dim)
69 | if len(self.float_field_dims) > 0:
70 | self.float_embedding_table = nn.Embedding(np.sum(self.float_field_dims, dtype=np.int32), output_dim)
71 | if len(self.token_seq_field_dims) > 0:
72 | self.token_seq_embedding_table = nn.ModuleList()
73 | for token_seq_field_dim in self.token_seq_field_dims:
74 | self.token_seq_embedding_table.append(nn.Embedding(token_seq_field_dim, output_dim))
75 |
76 | self.bias = nn.Parameter(torch.zeros((output_dim,)), requires_grad=True)
--------------------------------------------------------------------------------
/deepcarskit/utils/logger.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2020/8/7
3 | # @Author : Zihan Lin
4 | # @Email : linzihan.super@foxmail.com
5 |
6 | # UPDATE
7 | # @Time : 2021/3/7
8 | # @Author : Jiawei Guan
9 | # @Email : guanjw@ruc.edu.cn
10 |
11 | # UPDATE:
12 | # @Time : 2021/12
13 | # @Author : Yong Zheng
14 | # @Notes : made light changes to adapt it for CARS
15 |
16 | """
17 | deepcarskit.utils.logger
18 | ###############################
19 | """
20 |
21 | import logging
22 | import os
23 | import colorlog
24 | import re
25 |
26 | from recbole.utils.utils import get_local_time, ensure_dir
27 | from colorama import init
28 |
29 | log_colors_config = {
30 | 'DEBUG': 'cyan',
31 | 'WARNING': 'yellow',
32 | 'ERROR': 'red',
33 | 'CRITICAL': 'red',
34 | }
35 |
36 |
37 | class RemoveColorFilter(logging.Filter):
38 |
39 | def filter(self, record):
40 | if record:
41 | ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
42 | record.msg = ansi_escape.sub('', str(record.msg))
43 | return True
44 |
45 |
46 | def set_color(log, color, highlight=True):
47 | color_set = ['black', 'red', 'green', 'yellow', 'blue', 'pink', 'cyan', 'white']
48 | try:
49 | index = color_set.index(color)
50 | except:
51 | index = len(color_set) - 1
52 | prev_log = '\033['
53 | if highlight:
54 | prev_log += '1;3'
55 | else:
56 | prev_log += '0;3'
57 | prev_log += str(index) + 'm'
58 | return prev_log + log + '\033[0m'
59 |
60 |
61 | def init_logger(config):
62 | """
63 | A logger that can show a message on standard output and write it into the
64 | file named `filename` simultaneously.
65 | All the message that you want to log MUST be str.
66 |
67 | Args:
68 | config (Config): An instance object of Config, used to record parameter information.
69 |
70 | Example:
71 | >>> logger = logging.getLogger(config)
72 | >>> logger.debug(train_state)
73 | >>> logger.info(train_result)
74 | """
75 | init(autoreset=True)
76 | LOGROOT = './log/'
77 | dir_name = os.path.dirname(LOGROOT)
78 | ensure_dir(dir_name)
79 |
80 | logfilename = '{}-{}-{}.log'.format(config['dataset'], config['model'], get_local_time())
81 |
82 | logfilepath = os.path.join(LOGROOT, logfilename)
83 |
84 | filefmt = "%(asctime)-15s %(levelname)s %(message)s"
85 | filedatefmt = "%a %d %b %Y %H:%M:%S"
86 | fileformatter = logging.Formatter(filefmt, filedatefmt)
87 |
88 | sfmt = "%(log_color)s%(asctime)-15s %(levelname)s %(message)s"
89 | sdatefmt = "%d %b %H:%M"
90 | sformatter = colorlog.ColoredFormatter(sfmt, sdatefmt, log_colors=log_colors_config)
91 | if config['state'] is None or config['state'].lower() == 'info':
92 | level = logging.INFO
93 | elif config['state'].lower() == 'debug':
94 | level = logging.DEBUG
95 | elif config['state'].lower() == 'error':
96 | level = logging.ERROR
97 | elif config['state'].lower() == 'warning':
98 | level = logging.WARNING
99 | elif config['state'].lower() == 'critical':
100 | level = logging.CRITICAL
101 | else:
102 | level = logging.INFO
103 |
104 | fh = logging.FileHandler(logfilepath)
105 | fh.setLevel(level)
106 | fh.setFormatter(fileformatter)
107 | remove_color_filter = RemoveColorFilter()
108 | fh.addFilter(remove_color_filter)
109 |
110 | sh = logging.StreamHandler()
111 | sh.setLevel(level)
112 | sh.setFormatter(sformatter)
113 |
114 | logging.basicConfig(level=level, handlers=[sh, fh])
115 | return fh, logfilepath
116 |
--------------------------------------------------------------------------------
/deepcarskit/evaluator/collector.py:
--------------------------------------------------------------------------------
1 | # @Time : 2021/12
2 | # @Author : Yong Zheng
3 | # @Notes : Inherit from recbole.evaluator.Collector
4 |
5 | """
6 | recbole.evaluator.collector
7 | ################################################
8 | """
9 |
10 | from recbole.evaluator.register import Register
11 | from recbole.evaluator import Collector, DataStruct
12 | import torch
13 |
14 |
15 | class CARSCollector(Collector):
16 | """The collector is used to collect the resource for evaluator.
17 | As the evaluation metrics are various, the needed resource not only contain the recommended result
18 | but also other resource from data and model. They all can be collected by the collector during the training
19 | and evaluation process.
20 |
21 | This class is only used in Trainer.
22 |
23 | """
24 |
25 | def __init__(self, config):
26 | self.config = config
27 | self.data_struct = DataStruct()
28 | self.register = Register(config)
29 | self.full = ('full' in config['eval_args']['mode'])
30 | self.topk = self.config['topk']
31 | self.device = self.config['device']
32 |
33 | def eval_batch_collect(
34 | self, scores_tensor: torch.Tensor, interaction, positive_u: torch.Tensor, positive_i: torch.Tensor
35 | ):
36 | """ Collect the evaluation resource from batched eval data and batched model output.
37 | Args:
38 | scores_tensor (Torch.Tensor): the output tensor of model with the shape of `(N, )`
39 | interaction(Interaction): batched eval data.
40 | positive_u(Torch.Tensor): the row index of positive items for each user.
41 | positive_i(Torch.Tensor): the positive item id for each user.
42 | """
43 | if self.register.need('rec.items'):
44 |
45 | # get topk
46 | _, topk_idx = torch.topk(scores_tensor, max(self.topk), dim=-1) # n_users x k
47 | self.data_struct.update_tensor('rec.items', topk_idx)
48 |
49 | if self.register.need('rec.topk'):
50 |
51 | _, topk_idx = torch.topk(scores_tensor, max(self.topk), dim=-1) # n_users x k
52 | pos_matrix = torch.zeros_like(scores_tensor, dtype=torch.int)
53 | pos_matrix[positive_u, positive_i] = 1
54 | pos_len_list = pos_matrix.sum(dim=1, keepdim=True)
55 | pos_idx = torch.gather(pos_matrix, dim=1, index=topk_idx)
56 | result = torch.cat((pos_idx, pos_len_list), dim=1)
57 | self.data_struct.update_tensor('rec.topk', result)
58 |
59 | if self.register.need('rec.meanrank'):
60 |
61 | desc_scores, desc_index = torch.sort(scores_tensor, dim=-1, descending=True)
62 |
63 | # get the index of positive items in the ranking list
64 | pos_matrix = torch.zeros_like(scores_tensor)
65 | pos_matrix[positive_u, positive_i] = 1
66 | pos_index = torch.gather(pos_matrix, dim=1, index=desc_index)
67 |
68 | avg_rank = self._average_rank(desc_scores)
69 | pos_rank_sum = torch.where(pos_index == 1, avg_rank, torch.zeros_like(avg_rank)).sum(dim=-1, keepdim=True)
70 |
71 | pos_len_list = pos_matrix.sum(dim=1, keepdim=True)
72 | user_len_list = desc_scores.argmin(dim=1, keepdim=True)
73 | result = torch.cat((pos_rank_sum, user_len_list, pos_len_list), dim=1)
74 | self.data_struct.update_tensor('rec.meanrank', result)
75 |
76 | if self.register.need('rec.score'):
77 |
78 | self.data_struct.update_tensor('rec.score', scores_tensor)
79 |
80 | if self.register.need('data.label'):
81 | self.label_field = self.config['LABEL_FIELD']
82 | if self.config['ranking']:
83 | self.data_struct.update_tensor('data.label', interaction[self.label_field].to(self.device))
84 | else:
85 | self.data_struct.update_tensor('data.label', interaction[self.config['RATING_FIELD']].to(self.device))
86 |
--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmf0w.py:
--------------------------------------------------------------------------------
1 | # @Time : 2021/12
2 | # @Author : Yong Zheng
3 |
4 |
5 | r"""
6 | NeuCMF0w
7 | ################################################
8 | References
9 | -----
10 | Unger, M., Tuzhilin, A., & Livne, A. (2020). Context-aware recommendations based on deep learning frameworks. ACM Transactions on Management Information Systems (TMIS), 11(2), 1-15.
11 |
12 | Notes
13 | -----
14 | 1). NeuCMF0w has 2 towers (MLP and MF), and it fuses contexts into MLP tower only.
15 |
16 | 2). NeuCMF0w utilizes context situation as a whole/a single dimension to be embedded
17 | """
18 |
19 | import torch
20 | import torch.nn as nn
21 | from torch.nn.init import normal_
22 |
23 | from deepcarskit.model.context_recommender import ContextRecommender
24 | from recbole.model.layers import MLPLayers
25 | from recbole.utils import InputType, EvaluatorType
26 |
27 |
28 | class NeuCMF0w(ContextRecommender):
29 |
30 | input_type = InputType.POINTWISE
31 |
32 | def __init__(self, config, dataset):
33 | super(NeuCMF0w, self).__init__(config, dataset)
34 |
35 | # load parameters info
36 | self.mf_embedding_size = config['mf_embedding_size']
37 | self.mlp_embedding_size = config['mlp_embedding_size']
38 | self.mlp_hidden_size = config['mlp_hidden_size']
39 | self.dropout_prob = config['dropout_prob']
40 | self.mf_train = config['mf_train']
41 | self.mlp_train = config['mlp_train']
42 | self.use_pretrain = config['use_pretrain']
43 | self.mf_pretrain_path = config['mf_pretrain_path']
44 | self.mlp_pretrain_path = config['mlp_pretrain_path']
45 |
46 | # define layers and loss
47 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size)
48 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size)
49 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
50 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
51 | self.context_situation_mlp_embedding = nn.Embedding(self.n_context_situation, self.mlp_embedding_size)
52 |
53 | # mlp layers = user, item, context_situation
54 | self.mlp_layers = MLPLayers([3 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
55 | self.mlp_layers.logger = None # remove logger to use torch.save()
56 | if self.mf_train and self.mlp_train:
57 | self.predict_layer = nn.Linear(self.mf_embedding_size + self.mlp_hidden_size[-1], 1)
58 | elif self.mf_train:
59 | self.predict_layer = nn.Linear(self.mf_embedding_size, 1)
60 | elif self.mlp_train:
61 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
62 |
63 | # parameters initialization
64 | if self.use_pretrain:
65 | self.load_pretrain()
66 | else:
67 | self.apply(self._init_weights)
68 |
69 | def _init_weights(self, module):
70 | if isinstance(module, nn.Embedding):
71 | normal_(module.weight.data, mean=0.0, std=0.01)
72 |
73 | def forward(self, user, item, context_situation):
74 | user_mf_e = self.user_mf_embedding(user)
75 | item_mf_e = self.item_mf_embedding(item)
76 | user_mlp_e = self.user_mlp_embedding(user)
77 | item_mlp_e = self.item_mlp_embedding(item)
78 | context_situation_e = self.context_situation_mlp_embedding(context_situation)
79 | if self.mf_train:
80 | mf_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size]
81 | if self.mlp_train:
82 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_e), -1)) # [batch_size, layers[-1]]
83 |
84 | if self.mf_train and self.mlp_train:
85 | output = self.actfun(self.predict_layer(torch.cat((mf_output, mlp_output), -1)))
86 | elif self.mf_train:
87 | output = self.actfun(self.predict_layer(mf_output))
88 | elif self.mlp_train:
89 | output = self.actfun(self.predict_layer(mlp_output))
90 | else:
91 | raise RuntimeError('mf_train and mlp_train can not be False at the same time')
92 | return output.squeeze(-1)
93 |
94 | def calculate_loss(self, interaction):
95 | user = interaction[self.USER_ID]
96 | item = interaction[self.ITEM_ID]
97 | context_situation = interaction[self.CONTEXT_SITUATION_ID]
98 | label = interaction[self.LABEL]
99 |
100 | output = self.forward(user, item, context_situation)
101 | return self.loss(output, label)
102 |
103 | def predict(self, interaction):
104 | user = interaction[self.USER_ID]
105 | item = interaction[self.ITEM_ID]
106 | context_situation = interaction[self.CONTEXT_SITUATION_ID]
107 | return self.forward(user, item, context_situation)
108 |
109 | def dump_parameters(self):
110 | r"""A simple implementation of dumping model parameters for pretrain.
111 |
112 | """
113 | if self.mf_train and not self.mlp_train:
114 | save_path = self.mf_pretrain_path
115 | torch.save(self, save_path)
116 | elif self.mlp_train and not self.mf_train:
117 | save_path = self.mlp_pretrain_path
118 | torch.save(self, save_path)
119 |
--------------------------------------------------------------------------------
/deepcarskit/evaluator/base_metric.py:
--------------------------------------------------------------------------------
1 | # @Time : 2020/10/21
2 | # @Author : Kaiyuan Li
3 | # @email : tsotfsk@outlook.com
4 |
5 | # UPDATE
6 | # @Time : 2020/10/21, 2021/8/29
7 | # @Author : Kaiyuan Li, Zhichao Feng
8 | # @email : tsotfsk@outlook.com, fzcbupt@gmail.com
9 |
10 | # UPDATE:
11 | # @Time : 2021/12
12 | # @Author : Yong Zheng
13 | # @Notes : made light changes to adapt it for CARS
14 |
15 | """
16 | deepcarskit.evaluator.abstract_metric
17 | #####################################
18 | """
19 | import numpy
20 | import torch
21 | from recbole.utils import EvaluatorType
22 |
23 |
24 | class AbstractMetric(object):
25 | """:class:`AbstractMetric` is the base object of all metrics. If you want to
26 | implement a metric, you should inherit this class.
27 |
28 | Args:
29 | config (Config): the config of evaluator.
30 | """
31 | smaller = False
32 |
33 | def __init__(self, config):
34 | self.decimal_place = config['metric_decimal_place']
35 |
36 | def calculate_metric(self, dataobject):
37 | """Get the dictionary of a metric.
38 |
39 | Args:
40 | dataobject(DataStruct): it contains all the information needed to calculate metrics.
41 |
42 | Returns:
43 | dict: such as ``{'metric@10': 3153, 'metric@20': 0.3824}``
44 | """
45 | raise NotImplementedError('Method [calculate_metric] should be implemented.')
46 |
47 |
48 | class TopkMetric(AbstractMetric):
49 | """:class:`TopkMetric` is a base object of top-k metrics. If you want to
50 | implement an top-k metric, you can inherit this class.
51 |
52 | Args:
53 | config (Config): The config of evaluator.
54 | """
55 | metric_type = EvaluatorType.RANKING
56 | metric_need = ['uc', 'rec.topk']
57 |
58 | def __init__(self, config):
59 | super().__init__(config)
60 | self.topk = config['topk']
61 |
62 | def used_info(self, dataobject):
63 | """Get the bool matrix indicating whether the corresponding item is positive
64 | and number of positive items for each user.
65 | """
66 | rec_mat = dataobject.get('rec.topk')
67 | topk_idx, pos_len_list = torch.split(rec_mat, [max(self.topk), 1], dim=1)
68 | return topk_idx.to(torch.bool).numpy(), pos_len_list.squeeze(-1).numpy()
69 |
70 | def topk_result(self, metric, value):
71 | """Match the metric value to the `k` and put them in `dictionary` form.
72 |
73 | Args:
74 | metric(str): the name of calculated metric.
75 | value(numpy.ndarray): metrics for each user, including values from `metric@1` to `metric@max(self.topk)`.
76 |
77 | Returns:
78 | dict: metric values required in the configuration.
79 | """
80 |
81 | metric_dict = {}
82 | avg_result = value.mean(axis=0)
83 | for k in self.topk:
84 | key = '{}@{}'.format(metric, k)
85 | metric_dict[key] = round(avg_result[k - 1], self.decimal_place)
86 | return metric_dict
87 |
88 | def metric_info(self, pos_index, pos_len=None):
89 | """Calculate the value of the metric.
90 |
91 | Args:
92 | pos_index(numpy.ndarray): a bool matrix, shape of ``n_users * max(topk)``. The item with the (j+1)-th \
93 | highest score of i-th user is positive if ``pos_index[i][j] == True`` and negative otherwise.
94 | pos_len(numpy.ndarray): a vector representing the number of positive items per user, shape of ``(n_users,)``.
95 |
96 | Returns:
97 | numpy.ndarray: metrics for each user, including values from `metric@1` to `metric@max(self.topk)`.
98 | """
99 | raise NotImplementedError('Method [metric_info] of top-k metric should be implemented.')
100 |
101 |
102 | class LossMetric(AbstractMetric):
103 | """:class:`LossMetric` is a base object of loss based metrics and AUC. If you want to
104 | implement an loss based metric, you can inherit this class.
105 |
106 | Args:
107 | config (Config): The config of evaluator.
108 | """
109 | metric_type = EvaluatorType.VALUE
110 | metric_need = ['rec.score', 'data.label']
111 |
112 | def __init__(self, config):
113 | super().__init__(config)
114 | self.config = config
115 |
116 | def used_info(self, dataobject):
117 | """Get scores that model predicted and the ground truth."""
118 | preds = dataobject.get('rec.score')
119 | trues = dataobject.get('data.label')
120 |
121 | return preds.squeeze(-1).numpy(), trues.squeeze(-1).numpy()
122 |
123 | def output_metric(self, metric, dataobject):
124 | preds, trues = self.used_info(dataobject)
125 | result = self.metric_info(preds, trues)
126 | result=round(result, self.decimal_place)
127 | return {metric: result}
128 |
129 | def metric_info(self, preds, trues):
130 | """Calculate the value of the metric.
131 |
132 | Args:
133 | preds (numpy.ndarray): the scores predicted by model, a one-dimensional vector.
134 | trues (numpy.ndarray): the label of items, which has the same shape as ``preds``.
135 |
136 | Returns:
137 | float: The value of the metric.
138 | """
139 | raise NotImplementedError('Method [metric_info] of loss-based metric should be implemented.')
140 |
--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmfw0.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2022
3 | # @Author : Yong Zheng
4 |
5 |
6 |
7 | r"""
8 | NeuCMFw0
9 | ################################################
10 | References
11 | -----
12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022
13 |
14 | Notes
15 | -----
16 | 1). NeuCMFw0 has 4 towers: MLP tower without contexts, MF tower with UI, MF with UC, MF with IC
17 |
18 | 2). w => we consider context situation as a whole/single dimension and create embedding for it, when we fuse them into the MF towers
19 | """
20 |
21 | import torch
22 | import torch.nn as nn
23 | from torch.nn.init import normal_
24 |
25 | from deepcarskit.model.context_recommender import ContextRecommender
26 | from recbole.model.layers import MLPLayers
27 | from recbole.utils import InputType, EvaluatorType
28 |
29 |
30 | class NeuCMFw0(ContextRecommender):
31 |
32 | input_type = InputType.POINTWISE
33 |
34 | def __init__(self, config, dataset):
35 | super(NeuCMFw0, self).__init__(config, dataset)
36 |
37 | # load parameters info
38 | self.mf_embedding_size = config['mf_embedding_size']
39 | self.mlp_embedding_size = config['mlp_embedding_size']
40 | self.mlp_hidden_size = config['mlp_hidden_size']
41 | self.dropout_prob = config['dropout_prob']
42 | self.mf_train = config['mf_train']
43 | self.mlp_train = config['mlp_train']
44 | self.use_pretrain = config['use_pretrain']
45 | self.mf_pretrain_path = config['mf_pretrain_path']
46 | self.mlp_pretrain_path = config['mlp_pretrain_path']
47 |
48 | # define layers and loss
49 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size)
50 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size)
51 | self.context_situation_mf_embedding = nn.Embedding(self.n_context_situation, self.mf_embedding_size)
52 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
53 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
54 | self.context_situation_mlp_embedding = nn.Embedding(self.n_context_situation, self.mlp_embedding_size)
55 |
56 | # mlp layers = user, item
57 | self.mlp_layers = MLPLayers([2 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
58 | self.mlp_layers.logger = None # remove logger to use torch.save()
59 | if self.mf_train and self.mlp_train:
60 | self.predict_layer = nn.Linear(3 * self.mf_embedding_size + self.mlp_hidden_size[-1], 1)
61 | elif self.mf_train:
62 | self.predict_layer = nn.Linear(3 * self.mf_embedding_size, 1)
63 | elif self.mlp_train:
64 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
65 |
66 | # parameters initialization
67 | if self.use_pretrain:
68 | self.load_pretrain()
69 | else:
70 | self.apply(self._init_weights)
71 |
72 | def _init_weights(self, module):
73 | if isinstance(module, nn.Embedding):
74 | normal_(module.weight.data, mean=0.0, std=0.01)
75 |
76 | def forward(self, user, item, context_situation):
77 | user_mf_e = self.user_mf_embedding(user)
78 | item_mf_e = self.item_mf_embedding(item)
79 | context_situation_mf_e = self.context_situation_mf_embedding(context_situation)
80 | user_mlp_e = self.user_mlp_embedding(user)
81 | item_mlp_e = self.item_mlp_embedding(item)
82 | if self.mf_train:
83 | mf_ui_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size]
84 | mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e) # [batch_size, embedding_size]
85 | mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e) # [batch_size, embedding_size]
86 | if self.mlp_train:
87 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e), -1)) # [batch_size, layers[-1]]
88 |
89 | if self.mf_train and self.mlp_train:
90 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1)))
91 | elif self.mf_train:
92 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1)))
93 | elif self.mlp_train:
94 | output = self.actfun(self.predict_layer(mlp_output))
95 | else:
96 | raise RuntimeError('mf_train and mlp_train can not be False at the same time')
97 | return output.squeeze(-1)
98 |
99 | def calculate_loss(self, interaction):
100 | user = interaction[self.USER_ID]
101 | item = interaction[self.ITEM_ID]
102 | context_situation = interaction[self.CONTEXT_SITUATION_ID]
103 | label = interaction[self.LABEL]
104 |
105 | output = self.forward(user, item, context_situation)
106 | return self.loss(output, label)
107 |
108 | def predict(self, interaction):
109 | user = interaction[self.USER_ID]
110 | item = interaction[self.ITEM_ID]
111 | context_situation = interaction[self.CONTEXT_SITUATION_ID]
112 | return self.forward(user, item, context_situation)
113 |
114 | def dump_parameters(self):
115 | r"""A simple implementation of dumping model parameters for pretrain.
116 |
117 | """
118 | if self.mf_train and not self.mlp_train:
119 | save_path = self.mf_pretrain_path
120 | torch.save(self, save_path)
121 | elif self.mlp_train and not self.mf_train:
122 | save_path = self.mlp_pretrain_path
123 | torch.save(self, save_path)
124 |
--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmf0i.py:
--------------------------------------------------------------------------------
1 | # @Time : 2021/12
2 | # @Author : Yong Zheng
3 |
4 | r"""
5 | NeuCMF0i
6 | ################################################
7 | References
8 | -----
9 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022
10 |
11 | Notes
12 | -----
13 | 1). NeuCMF0i has 2 towers (MLP and MF), and it fuses contexts into MLP tower only.
14 |
15 | 2). NeuCMF0i creates embedding for each individual context conditions.
16 | """
17 |
18 | import torch
19 | import torch.nn as nn
20 | from torch.nn.init import normal_
21 |
22 | from deepcarskit.model.context_recommender import ContextRecommender
23 | from recbole.model.layers import MLPLayers
24 | from recbole.utils import InputType, EvaluatorType
25 |
26 |
27 | class NeuCMF0i(ContextRecommender):
28 |
29 | input_type = InputType.POINTWISE
30 |
31 | def __init__(self, config, dataset):
32 | super(NeuCMF0i, self).__init__(config, dataset)
33 |
34 | # load parameters info
35 | self.mf_embedding_size = config['mf_embedding_size']
36 | self.mlp_embedding_size = config['mlp_embedding_size']
37 | self.mlp_hidden_size = config['mlp_hidden_size']
38 | self.dropout_prob = config['dropout_prob']
39 | self.mf_train = config['mf_train']
40 | self.mlp_train = config['mlp_train']
41 | self.use_pretrain = config['use_pretrain']
42 | self.mf_pretrain_path = config['mf_pretrain_path']
43 | self.mlp_pretrain_path = config['mlp_pretrain_path']
44 |
45 | # define layers and loss
46 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size)
47 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size)
48 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
49 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
50 | self.context_dimensions_mlp_embedding = []
51 | for i in range(0, self.n_contexts_dim):
52 | self.context_dimensions_mlp_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mlp_embedding_size).to(self.device))
53 |
54 | # mlp layers = user, item, context_situation
55 | self.mlp_layers = MLPLayers([(2 + self.n_contexts_dim) * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
56 | self.mlp_layers.logger = None # remove logger to use torch.save()
57 | if self.mf_train and self.mlp_train:
58 | self.predict_layer = nn.Linear(self.mf_embedding_size + self.mlp_hidden_size[-1], 1)
59 | elif self.mf_train:
60 | self.predict_layer = nn.Linear(self.mf_embedding_size, 1)
61 | elif self.mlp_train:
62 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
63 |
64 | # parameters initialization
65 | if self.use_pretrain:
66 | self.load_pretrain()
67 | else:
68 | self.apply(self._init_weights)
69 |
70 | def _init_weights(self, module):
71 | if isinstance(module, nn.Embedding):
72 | normal_(module.weight.data, mean=0.0, std=0.01)
73 |
74 | def forward(self, user, item, context_situation_list):
75 | user_mf_e = self.user_mf_embedding(user)
76 | item_mf_e = self.item_mf_embedding(item)
77 | user_mlp_e = self.user_mlp_embedding(user)
78 | item_mlp_e = self.item_mlp_embedding(item)
79 | context_situation_e = None
80 | for i in range(0, self.n_contexts_dim):
81 | condition = context_situation_list[i]
82 | embd = self.context_dimensions_mlp_embedding[i](condition)
83 | if context_situation_e is None:
84 | context_situation_e = embd
85 | else:
86 | context_situation_e = torch.cat((context_situation_e, embd), -1)
87 | if self.mf_train:
88 | mf_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size]
89 | if self.mlp_train:
90 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_e), -1)) # [batch_size, layers[-1]]
91 | if self.mf_train and self.mlp_train:
92 | output = self.actfun(self.predict_layer(torch.cat((mf_output, mlp_output), -1)))
93 | elif self.mf_train:
94 | output = self.actfun(self.predict_layer(mf_output))
95 | elif self.mlp_train:
96 | output = self.actfun(self.predict_layer(mlp_output))
97 | else:
98 | raise RuntimeError('mf_train and mlp_train can not be False at the same time')
99 | return output.squeeze(-1)
100 |
101 | def calculate_loss(self, interaction):
102 | user = interaction[self.USER_ID]
103 | item = interaction[self.ITEM_ID]
104 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
105 | label = interaction[self.LABEL]
106 |
107 | output = self.forward(user, item, context_situation_list)
108 | return self.loss(output, label)
109 |
110 | def predict(self, interaction):
111 | user = interaction[self.USER_ID]
112 | item = interaction[self.ITEM_ID]
113 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
114 | return self.forward(user, item, context_situation_list)
115 |
116 | def dump_parameters(self):
117 | r"""A simple implementation of dumping model parameters for pretrain.
118 |
119 | """
120 | if self.mf_train and not self.mlp_train:
121 | save_path = self.mf_pretrain_path
122 | torch.save(self, save_path)
123 | elif self.mlp_train and not self.mf_train:
124 | save_path = self.mlp_pretrain_path
125 | torch.save(self, save_path)
--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmfww.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2022
3 | # @Author : Yong Zheng
4 |
5 |
6 |
7 | r"""
8 | NeuCMFww
9 | ################################################
10 | References
11 | -----
12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022
13 |
14 | Notes
15 | -----
16 | 1). NeuCMFww has 4 towers: MLP tower with contexts, MF tower with UI, MF with UC, MF with IC
17 |
18 | 2). ww => we consider context situation as a whole/single dimension and create embedding for it, when we fuse contexts into the MLP and MF towers
19 | """
20 |
21 | import torch
22 | import torch.nn as nn
23 | from torch.nn.init import normal_
24 |
25 | from deepcarskit.model.context_recommender import ContextRecommender
26 | from recbole.model.layers import MLPLayers
27 | from recbole.utils import InputType, EvaluatorType
28 |
29 |
30 | class NeuCMFww(ContextRecommender):
31 |
32 | input_type = InputType.POINTWISE
33 |
34 | def __init__(self, config, dataset):
35 | super(NeuCMFww, self).__init__(config, dataset)
36 |
37 | # load parameters info
38 | self.mf_embedding_size = config['mf_embedding_size']
39 | self.mlp_embedding_size = config['mlp_embedding_size']
40 | self.mlp_hidden_size = config['mlp_hidden_size']
41 | self.dropout_prob = config['dropout_prob']
42 | self.mf_train = config['mf_train']
43 | self.mlp_train = config['mlp_train']
44 | self.use_pretrain = config['use_pretrain']
45 | self.mf_pretrain_path = config['mf_pretrain_path']
46 | self.mlp_pretrain_path = config['mlp_pretrain_path']
47 |
48 | # define layers and loss
49 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size)
50 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size)
51 | self.context_situation_mf_embedding = nn.Embedding(self.n_context_situation, self.mf_embedding_size)
52 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
53 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
54 | self.context_situation_mlp_embedding = nn.Embedding(self.n_context_situation, self.mlp_embedding_size)
55 |
56 | # mlp layers = user, item, context_situation
57 | self.mlp_layers = MLPLayers([3 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
58 | self.mlp_layers.logger = None # remove logger to use torch.save()
59 | if self.mf_train and self.mlp_train:
60 | self.predict_layer = nn.Linear(3 * self.mf_embedding_size + self.mlp_hidden_size[-1], 1)
61 | elif self.mf_train:
62 | self.predict_layer = nn.Linear(3 * self.mf_embedding_size, 1)
63 | elif self.mlp_train:
64 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
65 |
66 | # parameters initialization
67 | if self.use_pretrain:
68 | self.load_pretrain()
69 | else:
70 | self.apply(self._init_weights)
71 |
72 | def _init_weights(self, module):
73 | if isinstance(module, nn.Embedding):
74 | normal_(module.weight.data, mean=0.0, std=0.01)
75 |
76 | def forward(self, user, item, context_situation):
77 | user_mf_e = self.user_mf_embedding(user)
78 | item_mf_e = self.item_mf_embedding(item)
79 | context_situation_mf_e = self.context_situation_mf_embedding(context_situation)
80 | user_mlp_e = self.user_mlp_embedding(user)
81 | item_mlp_e = self.item_mlp_embedding(item)
82 | context_situation_mlp_e = self.context_situation_mlp_embedding(context_situation)
83 | if self.mf_train:
84 | mf_ui_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size]
85 | mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e) # [batch_size, embedding_size]
86 | mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e) # [batch_size, embedding_size]
87 | if self.mlp_train:
88 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_mlp_e), -1)) # [batch_size, layers[-1]]
89 |
90 | if self.mf_train and self.mlp_train:
91 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1)))
92 | elif self.mf_train:
93 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1)))
94 | elif self.mlp_train:
95 | output = self.actfun(self.predict_layer(mlp_output))
96 | else:
97 | raise RuntimeError('mf_train and mlp_train can not be False at the same time')
98 | return output.squeeze(-1)
99 |
100 | def calculate_loss(self, interaction):
101 | user = interaction[self.USER_ID]
102 | item = interaction[self.ITEM_ID]
103 | context_situation = interaction[self.CONTEXT_SITUATION_ID]
104 | label = interaction[self.LABEL]
105 |
106 | output = self.forward(user, item, context_situation)
107 | return self.loss(output, label)
108 |
109 | def predict(self, interaction):
110 | user = interaction[self.USER_ID]
111 | item = interaction[self.ITEM_ID]
112 | context_situation = interaction[self.CONTEXT_SITUATION_ID]
113 | return self.forward(user, item, context_situation)
114 |
115 | def dump_parameters(self):
116 | r"""A simple implementation of dumping model parameters for pretrain.
117 |
118 | """
119 | if self.mf_train and not self.mlp_train:
120 | save_path = self.mf_pretrain_path
121 | torch.save(self, save_path)
122 | elif self.mlp_train and not self.mf_train:
123 | save_path = self.mlp_pretrain_path
124 | torch.save(self, save_path)
125 |
--------------------------------------------------------------------------------
/deepcarskit/trainer/trainer.py:
--------------------------------------------------------------------------------
1 | # @Time : 2021/12
2 | # @Author : Yong Zheng
3 | # @Notes : Inherit from recbole.trainer.Trainer
4 |
5 | r"""
6 | recbole.trainer.trainer
7 | ################################
8 | """
9 |
10 |
11 | import numpy as np
12 | import torch
13 |
14 | from tqdm import tqdm
15 |
16 | from deepcarskit.data import LabledDataSortEvalDataLoader
17 | from deepcarskit.evaluator import CARSCollector
18 | from recbole.trainer import Trainer
19 | from recbole.data import FullSortEvalDataLoader
20 | from recbole.utils import EvaluatorType, set_color, get_gpu_usage
21 | from deepcarskit.evaluator import Evaluator
22 |
23 | class CARSTrainer(Trainer):
24 | r"""The basic Trainer for basic training and evaluation strategies in recommender systems. This class defines common
25 | functions for training and evaluation processes of most recommender system models, including fit(), evaluate(),
26 | resume_checkpoint() and some other features helpful for model training and evaluation.
27 |
28 | Generally speaking, this class can serve most recommender system models, If the training process of the model is to
29 | simply optimize a single loss without involving any complex training strategies, such as adversarial learning,
30 | pre-training and so on.
31 |
32 | Initializing the Trainer needs two parameters: `config` and `model`. `config` records the parameters information
33 | for controlling training and evaluation, such as `learning_rate`, `epochs`, `eval_step` and so on.
34 | `model` is the instantiated object of a Model Class.
35 |
36 | """
37 |
38 | def __init__(self, config, model):
39 | super(CARSTrainer, self).__init__(config, model)
40 | self.eval_collector = CARSCollector(config)
41 | self.evaluator = Evaluator(config)
42 |
43 | def _labled_data_sort_batch_eval(self, batched_data):
44 | interaction, history_index, positive_u, positive_i = batched_data
45 | try:
46 | # Note: interaction without item ids
47 | scores = self.model.full_sort_predict(interaction.to(self.device))
48 | except NotImplementedError:
49 | inter_len = len(interaction)
50 | new_inter = interaction.to(self.device).repeat_interleave(self.tot_item_num)
51 | batch_size = len(new_inter)
52 | new_inter.update(self.item_tensor.repeat(inter_len))
53 | if batch_size <= self.test_batch_size:
54 | scores = self.model.predict(new_inter)
55 | else:
56 | scores = self._spilt_predict(new_inter, batch_size)
57 |
58 | scores = scores.view(-1, self.tot_item_num)
59 | scores[:, 0] = -np.inf
60 | if history_index is not None:
61 | scores[history_index] = -np.inf
62 | return interaction, scores, positive_u, positive_i
63 |
64 |
65 | @torch.no_grad()
66 | def evaluate(self, eval_data, load_best_model=True, model_file=None, show_progress=False):
67 | r"""Evaluate the model based on the eval data.
68 |
69 | Args:
70 | eval_data (DataLoader): the eval data
71 | load_best_model (bool, optional): whether load the best model in the training process, default: True.
72 | It should be set True, if users want to test the model after training.
73 | model_file (str, optional): the saved model file, default: None. If users want to test the previously
74 | trained model file, they can set this parameter.
75 | show_progress (bool): Show the progress of evaluate epoch. Defaults to ``False``.
76 |
77 | Returns:
78 | dict: eval result, key is the eval metric and value in the corresponding metric value.
79 | """
80 | if not eval_data:
81 | return
82 |
83 | if load_best_model:
84 | if model_file:
85 | checkpoint_file = model_file
86 | else:
87 | checkpoint_file = self.saved_model_file
88 | checkpoint = torch.load(checkpoint_file)
89 | self.model.load_state_dict(checkpoint['state_dict'])
90 | self.model.load_other_parameter(checkpoint.get('other_parameter'))
91 | message_output = 'Loading model structure and parameters from {}'.format(checkpoint_file)
92 | self.logger.info(message_output)
93 |
94 | self.model.eval()
95 |
96 | if isinstance(eval_data, FullSortEvalDataLoader):
97 | eval_func = self._full_sort_batch_eval
98 | if self.item_tensor is None:
99 | self.item_tensor = eval_data.dataset.get_item_feature().to(self.device)
100 | elif isinstance(eval_data, LabledDataSortEvalDataLoader):
101 | eval_func = self._labled_data_sort_batch_eval
102 | if self.item_tensor is None:
103 | self.item_tensor = eval_data.dataset.get_item_feature().to(self.device)
104 | else:
105 | eval_func = self._neg_sample_batch_eval
106 | if self.config['eval_type'] == EvaluatorType.RANKING:
107 | self.tot_item_num = eval_data.dataset.item_num
108 |
109 | iter_data = (
110 | tqdm(
111 | eval_data,
112 | total=len(eval_data),
113 | ncols=100,
114 | desc=set_color(f"Evaluate ", 'pink'),
115 | ) if show_progress else eval_data
116 | )
117 | for batch_idx, batched_data in enumerate(iter_data):
118 | interaction, scores, positive_u, positive_i = eval_func(batched_data)
119 | if self.gpu_available and show_progress:
120 | iter_data.set_postfix_str(set_color('GPU RAM: ' + get_gpu_usage(self.device), 'yellow'))
121 | self.eval_collector.eval_batch_collect(scores, interaction, positive_u, positive_i)
122 | self.eval_collector.model_collect(self.model)
123 | struct = self.eval_collector.get_data_struct()
124 | result = self.evaluator.evaluate(struct)
125 |
126 | return result
127 |
128 |
129 |
--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmfi0.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2022
3 | # @Author : Yong Zheng
4 |
5 |
6 |
7 | r"""
8 | NeuCMFi0
9 | ################################################
10 | References
11 | -----
12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022
13 |
14 | Notes
15 | -----
16 | 1). NeuCMFi0 has 4 towers: MLP tower without contexts, MF tower with UI, MF with UC, MF with IC
17 |
18 | 2). i => we create embeddings for each individual context conditions when we fuse them into the MF towers
19 | """
20 |
21 | import torch
22 | import torch.nn as nn
23 | from torch.nn.init import normal_
24 |
25 | from deepcarskit.model.context_recommender import ContextRecommender
26 | from recbole.model.layers import MLPLayers
27 | from recbole.utils import InputType, EvaluatorType
28 |
29 |
30 | class NeuCMFi0(ContextRecommender):
31 |
32 | input_type = InputType.POINTWISE
33 |
34 | def __init__(self, config, dataset):
35 | super(NeuCMFi0, self).__init__(config, dataset)
36 |
37 | # load parameters info
38 | self.mf_embedding_size = config['mf_embedding_size']
39 | self.mlp_embedding_size = config['mlp_embedding_size']
40 | self.mlp_hidden_size = config['mlp_hidden_size']
41 | self.dropout_prob = config['dropout_prob']
42 | self.mf_train = config['mf_train']
43 | self.mlp_train = config['mlp_train']
44 | self.use_pretrain = config['use_pretrain']
45 | self.mf_pretrain_path = config['mf_pretrain_path']
46 | self.mlp_pretrain_path = config['mlp_pretrain_path']
47 |
48 | # define layers and loss
49 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size*self.n_contexts_dim)
50 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size*self.n_contexts_dim)
51 | self.context_situation_mf_embedding = []
52 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
53 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
54 | self.context_dimensions_mlp_embedding = []
55 | for i in range(0, self.n_contexts_dim):
56 | self.context_dimensions_mlp_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mlp_embedding_size).to(self.device))
57 | self.context_situation_mf_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mf_embedding_size).to(self.device))
58 | num_mf_towers = 3
59 |
60 | # mlp layers = user, item
61 | self.mlp_layers = MLPLayers([2 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
62 | self.mlp_layers.logger = None # remove logger to use torch.save()
63 | if self.mf_train and self.mlp_train:
64 | self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim + self.mlp_hidden_size[-1], 1)
65 | elif self.mf_train:
66 | self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim, 1)
67 | elif self.mlp_train:
68 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
69 |
70 | # parameters initialization
71 | if self.use_pretrain:
72 | self.load_pretrain()
73 | else:
74 | self.apply(self._init_weights)
75 |
76 | def _init_weights(self, module):
77 | if isinstance(module, nn.Embedding):
78 | normal_(module.weight.data, mean=0.0, std=0.01)
79 |
80 | def forward(self, user, item, context_situation_list):
81 | user_mf_e = self.user_mf_embedding(user)
82 | item_mf_e = self.item_mf_embedding(item)
83 |
84 | context_situation_mf_e = None
85 | for i in range(0, self.n_contexts_dim):
86 | condition = context_situation_list[i]
87 | embd = self.context_dimensions_mlp_embedding[i](condition)
88 | if context_situation_mf_e is None:
89 | context_situation_mf_e = embd
90 | else:
91 | context_situation_mf_e = torch.cat((context_situation_mf_e, embd), -1)
92 |
93 | user_mlp_e = self.user_mlp_embedding(user)
94 | item_mlp_e = self.item_mlp_embedding(item)
95 | if self.mf_train:
96 | mf_ui_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size]
97 | mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e) # [batch_size, embedding_size]
98 | mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e) # [batch_size, embedding_size]
99 | if self.mlp_train:
100 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e), -1)) # [batch_size, layers[-1]]
101 |
102 | if self.mf_train and self.mlp_train:
103 | output = self.actfun(
104 | self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1)))
105 | elif self.mf_train:
106 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1)))
107 | elif self.mlp_train:
108 | output = self.actfun(self.predict_layer(mlp_output))
109 | else:
110 | raise RuntimeError('mf_train and mlp_train can not be False at the same time')
111 | return output.squeeze(-1)
112 |
113 | def calculate_loss(self, interaction):
114 | user = interaction[self.USER_ID]
115 | item = interaction[self.ITEM_ID]
116 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
117 | label = interaction[self.LABEL]
118 |
119 | output = self.forward(user, item, context_situation_list)
120 | return self.loss(output, label)
121 |
122 | def predict(self, interaction):
123 | user = interaction[self.USER_ID]
124 | item = interaction[self.ITEM_ID]
125 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
126 | return self.forward(user, item, context_situation_list)
127 |
128 | def dump_parameters(self):
129 | r"""A simple implementation of dumping model parameters for pretrain.
130 |
131 | """
132 | if self.mf_train and not self.mlp_train:
133 | save_path = self.mf_pretrain_path
134 | torch.save(self, save_path)
135 | elif self.mlp_train and not self.mf_train:
136 | save_path = self.mlp_pretrain_path
137 | torch.save(self, save_path)
138 |
--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmfii.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2022
3 | # @Author : Yong Zheng
4 |
5 |
6 |
7 | r"""
8 | NeuCMFii
9 | ################################################
10 | References
11 | -----
12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022
13 |
14 | Notes
15 | -----
16 | 1). NeuCMFii has 4 towers: MLP tower with contexts, MF tower with UI, MF with UC, MF with IC
17 |
18 | 2). ii => we create embeddings for each individual context conditions when we fuse contexts into the MLP and MF towers
19 | """
20 |
21 | import torch
22 | import torch.nn as nn
23 | from torch.nn.init import normal_
24 |
25 | from deepcarskit.model.context_recommender import ContextRecommender
26 | from recbole.model.layers import MLPLayers
27 | from recbole.utils import InputType, EvaluatorType
28 |
29 |
30 | class NeuCMFii(ContextRecommender):
31 |
32 | input_type = InputType.POINTWISE
33 |
34 | def __init__(self, config, dataset):
35 | super(NeuCMFii, self).__init__(config, dataset)
36 |
37 | # load parameters info
38 | self.mf_embedding_size = config['mf_embedding_size']
39 | self.mlp_embedding_size = config['mlp_embedding_size']
40 | self.mlp_hidden_size = config['mlp_hidden_size']
41 | self.dropout_prob = config['dropout_prob']
42 | self.mf_train = config['mf_train']
43 | self.mlp_train = config['mlp_train']
44 | self.use_pretrain = config['use_pretrain']
45 | self.mf_pretrain_path = config['mf_pretrain_path']
46 | self.mlp_pretrain_path = config['mlp_pretrain_path']
47 |
48 | # define layers and loss
49 | self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size*self.n_contexts_dim)
50 | self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size*self.n_contexts_dim)
51 | self.context_situation_mf_embedding = []
52 | self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
53 | self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
54 | self.context_dimensions_mlp_embedding = []
55 | for i in range(0, self.n_contexts_dim):
56 | self.context_dimensions_mlp_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mlp_embedding_size).to(self.device))
57 | self.context_situation_mf_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mf_embedding_size).to(self.device))
58 | num_mf_towers = 3
59 |
60 | # mlp layers = user, item, context_situation
61 | self.mlp_layers = MLPLayers([(2 + self.n_contexts_dim) * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
62 | self.mlp_layers.logger = None # remove logger to use torch.save()
63 | if self.mf_train and self.mlp_train:
64 | self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim + self.mlp_hidden_size[-1], 1)
65 | elif self.mf_train:
66 | self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim, 1)
67 | elif self.mlp_train:
68 | self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
69 |
70 | # parameters initialization
71 | if self.use_pretrain:
72 | self.load_pretrain()
73 | else:
74 | self.apply(self._init_weights)
75 |
76 | def _init_weights(self, module):
77 | if isinstance(module, nn.Embedding):
78 | normal_(module.weight.data, mean=0.0, std=0.01)
79 |
80 | def forward(self, user, item, context_situation_list):
81 | user_mf_e = self.user_mf_embedding(user)
82 | item_mf_e = self.item_mf_embedding(item)
83 |
84 | context_situation_mf_e = None
85 | for i in range(0, self.n_contexts_dim):
86 | condition = context_situation_list[i]
87 | embd = self.context_dimensions_mlp_embedding[i](condition)
88 | if context_situation_mf_e is None:
89 | context_situation_mf_e = embd
90 | else:
91 | context_situation_mf_e = torch.cat((context_situation_mf_e, embd), -1)
92 |
93 | user_mlp_e = self.user_mlp_embedding(user)
94 | item_mlp_e = self.item_mlp_embedding(item)
95 | context_situation_e = None
96 | for i in range(0, self.n_contexts_dim):
97 | condition = context_situation_list[i]
98 | embd = self.context_dimensions_mlp_embedding[i](condition)
99 | if context_situation_e is None:
100 | context_situation_e = embd
101 | else:
102 | context_situation_e = torch.cat((context_situation_e, embd), -1)
103 | if self.mf_train:
104 | mf_ui_output = torch.mul(user_mf_e, item_mf_e) # [batch_size, embedding_size]
105 | mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e) # [batch_size, embedding_size]
106 | mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e) # [batch_size, embedding_size]
107 | if self.mlp_train:
108 | mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_e), -1)) # [batch_size, layers[-1]]
109 |
110 | if self.mf_train and self.mlp_train:
111 | output = self.actfun(
112 | self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1)))
113 | elif self.mf_train:
114 | output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1)))
115 | elif self.mlp_train:
116 | output = self.actfun(self.predict_layer(mlp_output))
117 | else:
118 | raise RuntimeError('mf_train and mlp_train can not be False at the same time')
119 | return output.squeeze(-1)
120 |
121 | def calculate_loss(self, interaction):
122 | user = interaction[self.USER_ID]
123 | item = interaction[self.ITEM_ID]
124 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
125 | label = interaction[self.LABEL]
126 |
127 | output = self.forward(user, item, context_situation_list)
128 | return self.loss(output, label)
129 |
130 | def predict(self, interaction):
131 | user = interaction[self.USER_ID]
132 | item = interaction[self.ITEM_ID]
133 | context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
134 | return self.forward(user, item, context_situation_list)
135 |
136 | def dump_parameters(self):
137 | r"""A simple implementation of dumping model parameters for pretrain.
138 |
139 | """
140 | if self.mf_train and not self.mlp_train:
141 | save_path = self.mf_pretrain_path
142 | torch.save(self, save_path)
143 | elif self.mlp_train and not self.mf_train:
144 | save_path = self.mlp_pretrain_path
145 | torch.save(self, save_path)
146 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # DeepCARSKit
3 |
4 | *A Deep Learning Based Context-Aware Recommendation Library*
5 |
6 | [](./LICENSE)
7 | [](https://carskit.github.io/)
8 | [](https://badges.aleen42.com/src/python.svg)
9 | [](https://scholar.google.com/citations?view_op=view_citation&hl=en&citation_for_view=0FENWMcAAAAJ:Bg7qf7VwUHIC)
10 | [](https://doi.org/10.1016/j.simpa.2022.100292)
11 |
12 | [](https://carskit.github.io/)
13 |
14 |
15 | ## History
16 | + **[CARSKit](https://github.com/irecsys/CARSKit)** was released in 2015, and it was the first open-source library for
17 | context-aware recommendations. There were no more significant updates in CARSKit since 2019. It was a library built based on Java and [Librec](https://github.com/guoguibing/librec) v1.3.
18 | There is a version in Python, [CARSKit-API](https://github.com/WagnoLeaoSergio/CARSKit_API), which is a python wrapper of CARSKit.
19 | + Recommender systems based on deep learning have been well-developed in recent years. The context-aware
20 | recommendation models based on traditional collaborative filtering (e.g., KNN-based CF, matrix factorization) turned out to
21 | be out-dated. Therefore, we develop and release [DeepCARSKit](https://github.com/irecsys/DeepCARSKit) which was built upon the [RecBole](https://recbole.io/) v1.0.0 recommendation library.
22 | DeepCARSKit is *a Deep Learning Based Context-Aware Recommendation Library* which can be run with correct setting based on Python and [PyTorch](https://pytorch.org/).
23 |
24 |
25 | ## Feature
26 | + **Implemented Deep Context-Aware Recommendation Models.** Currently, we support the CARS models built based on factorization machines (FM) and
27 | Neural Collaborative Filtering (NeuCF and NeuMF). More algorithms will be added.
28 |
29 | + **Multiple Data Splits & Evaluation Options.** We provide evaluations based on both hold-out and N-fold cross validations.
30 |
31 | + **Extensive and Standard Evaluation Protocols.** We rewrite codes in RecBole to adapt the evaluations for context-aware recommendations.
32 | Particularly, item recommendations can be produced for each unique combination of (user and context situation). Relevance and Ranking metrics,
33 | such as precision, recall, NDCG, MRR, can be calculated by taking context information into consideration.
34 |
35 | + **Autosave Best Logs.** DeepCARSKit can automatically save the best log/configuration of the models you run, in the folder of 'log/best/'.
36 |
37 | + **Other Features.** Other characteristic in DeepCARSKit are inherited from RecBole, suc as GPU accelerations.
38 |
39 |
40 | ## News & Updates
41 | **11/13/2024**: We release DeepCARSKit v1.0.1
42 | + Update requirements.txt
43 | + Address the randomness issue in N-fold cross validation by utilizing multiprocessing
44 |
45 | **03/19/2022**: We release DeepCARSKit v1.0.0
46 |
47 | ## Documents
48 | + [DeepCARSKit API](https://carskit.github.io/doc/DeepCARSKit/index.html)
49 | + [RecBole API](https://recbole.io/docs/)
50 | + Yong Zheng. "[DeepCARSKit: A Deep Learning Based Context-Aware Recommendation Library](https://doi.org/10.1016/j.simpa.2022.100292)", Software Impacts, Vol. 13, Elsevier, 2022
51 | + Yong Zheng. "[DeepCARSKit: A Demo and User Guide](https://doi.org/10.1145/3511047.3536417)", Adjunct Proceedings of the 30th ACM Conference on User Modeling, Adaptation and Personalization (ACM UMAP), Spain, July, 2022
52 | + Yong Zheng, Gonzalo Florez Arias. "[A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations](https://doi.org/10.1145/3511047.3536404)", Adjunct Proceedings of the 30th ACM Conference on User Modeling, Adaptation and Personalization (ACM UMAP), Spain, July, 2022
53 |
54 |
55 |
56 |
57 | ## Installation
58 | DeepCARSKit works with the following operating systems:
59 |
60 | * Linux
61 | * Windows 10
62 | * macOS X
63 |
64 | DeepCARSKit requires Python version 3.7 or later, torch version 1.7.0 or later, and RecBole version 1.0.1.
65 | For more details, you can refer to the list of [requirements](https://github.com/irecsys/DeepCARSKit/blob/main/requirements.txt).
66 | If you want to use DeepCARSKit with GPU,
67 | please ensure that CUDA or cudatoolkit version is 9.2 or later.
68 | This requires NVIDIA driver version >= 396.26 (for Linux) or >= 397.44 (for Windows10).
69 |
70 | The DeepCARSKit library was successfully tested by using the following environments:
71 | - `python==3.9.20`
72 | - `recbole==1.0.1`
73 | - `numpy==1.20.0`
74 | - `scipy==1.6.0`
75 | - `lightgbm==4.5.0`
76 | - `xgboost==2.1.1`
77 |
78 | More info about installation from conda and pip will be released later.
79 | Currenly, you can make a git clone of the source codes. We will pulish it to pypi and conda in next release.
80 |
81 | ## Quick-Start
82 | With the source code, you can use the provided script for initial usage of our library:
83 |
84 | ```bash
85 | python run.py
86 | ```
87 |
88 | This script will run the NeuCMFi model on the DePaulMovie dataset.
89 |
90 | ### Data Sets & Preparation
91 | A list of available data sets for research on context-aware recommender systems can be found [here](https://github.com/irecsys/CARSKit/tree/master/context-aware_data_sets).
92 | We provide two data sets (i.e., DePaulMovie and TripAdvisor) in the library. You can refer to its data format, such as [depaulmovie.inter](https://github.com/irecsys/DeepCARSKit/blob/main/dataset/depaulmovie/depaulmovie.inter).
93 |
94 | More specifically, you need to prepare a data set looks like this: (use 'float' and 'token' to indicate numerical and nominal variables)
95 |
96 | + user_id:token
97 | + item_id:token
98 | + rating:float
99 | + context variable 1:token
100 | + context variable 2:token
101 | + context variable N:token
102 | + contexts:token => a concatenation of context conditions
103 | + uc_id:token => a concatenation of user_id and contexts
104 |
105 | ### Algorithms in NeuCMF Framework
106 | An extensive NeuCMF framework is included in the DeepCARSKit library. There are multiple variants of the NeuCMF models in this framework.
107 |
108 | [](https://carskit.github.io/)
109 |
110 |
111 | ### Hyperparameter tuning
112 | You can tune up the parameters from the configuration file, config.yaml
113 |
114 | A user guide with more and more details is on the way...
115 |
116 |
117 | ## Major Releases
118 | | Releases | Date |
119 | |----------|------------|
120 | | v1.0.1 | 11/13/2024 |
121 | | v1.0.0 | 03/19/2022 |
122 |
123 |
124 |
125 | ## Cite
126 | If you find DeepCARSKit useful for your research or development, please cite the following paper:
127 |
128 | ```
129 | @article{deepcarskit,
130 | title={DeepCARSKit: A Deep Learning Based Context-Aware Recommendation Library},
131 | author={Zheng, Yong},
132 | journal={Software Impacts},
133 | volume={13},
134 | pages={100292},
135 | year={2022},
136 | publisher={Elsevier}
137 | }
138 | ```
139 | ## Contributing
140 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
141 | Please make sure to update tests as appropriate.
142 |
143 | We welcome collaborations and contributors to the DeepCARSKit. Your names will be listed here.
144 |
145 | ## Sponsors
146 | The current project was supported by Google Cloud Platform. We are looking for more sponsors to support the development and distribution of this libraray.
147 | If you are interested in sponsorship, please let me know. Our official email is DeepCARSKit [at] gmail [dot] com.
148 |
149 | ## License
150 | [MIT License](./LICENSE)
151 |
--------------------------------------------------------------------------------
/deepcarskit/data/dataloader/general_dataloader.py:
--------------------------------------------------------------------------------
1 | # @Time : 2021/12
2 | # @Author : Yong Zheng
3 | # @Notes : added LabledDataSortEvalDataLoader for context-aware ranking evaluations
4 |
5 | """
6 | deepcarskit.data.dataloader.general_dataloader
7 | ################################################
8 | """
9 |
10 | import numpy as np
11 | import torch
12 |
13 | from recbole.data.dataloader.general_dataloader import FullSortEvalDataLoader
14 | from recbole.data.interaction import Interaction, cat_interactions
15 | from recbole.utils import InputType, ModelType
16 | from collections import defaultdict
17 | from logging import getLogger
18 |
19 | class FullSortEvalDataLoader(FullSortEvalDataLoader):
20 | def __init__(self, config, dataset, sampler, shuffle=False, used_ids=None):
21 | super().__init__(config, dataset, sampler, shuffle=shuffle)
22 |
23 |
24 | class LabledDataSortEvalDataLoader(FullSortEvalDataLoader):
25 | """:class:`FullSortEvalDataLoader` is a dataloader for full-sort evaluation. In order to speed up calculation,
26 | this dataloader would only return then user part of interactions, positive items and used items.
27 | It would not return negative items.
28 |
29 | Args:
30 | config (Config): The config of dataloader.
31 | dataset (Dataset): The dataset of dataloader.
32 | sampler (Sampler): The sampler of dataloader.
33 | shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``.
34 |
35 | used_item = all items that users have interacted in the training and evaluation set.
36 | positve_item = all items that users have interacted in the evaluation set.
37 | history_item = all items that users have interacted in the training set.
38 | """
39 |
40 | def __init__(self, config, dataset, sampler, shuffle=False, used_ids=None):
41 | self.uid_field = dataset.uid_field
42 | self.iid_field = dataset.iid_field
43 | self.is_sequential = config['MODEL_TYPE'] == ModelType.SEQUENTIAL
44 |
45 | self.user_id = config['USER_ID_FIELD']
46 | self.item_id = config['ITEM_ID_FIELD']
47 | self.uc_id = config['USER_CONTEXT_FIELD']
48 | self.LABEL = config['LABEL_FIELD']
49 |
50 | if not self.is_sequential:
51 | multidict_uc_items = self._get_multidict(dataset) # uc and rated items
52 |
53 | '''
54 | uc_positve_item = all items that uc have rated and must be positive in the evaluation set.
55 | uc_history_item = all items that uc have rated in the training set.
56 | '''
57 | self.ucid_list = multidict_uc_items.keys()
58 | self.uc_num = max(self.ucid_list)+1
59 | self.ucid2items_num = np.zeros(self.uc_num, dtype=np.int64)
60 | self.ucid2positive_item = np.array([None] * self.uc_num)
61 | self.ucid2history_item = np.array([None] * self.uc_num)
62 | self.ucid_condidates={}
63 |
64 | # rated items (positive AND negative) for each uc in the training set
65 | ucid2used_item = used_ids
66 |
67 | for ucid in self.ucid_list:
68 |
69 | uc_positve_itemlist = set(multidict_uc_items[ucid])
70 | self.ucid2positive_item[ucid] = torch.tensor(list(uc_positve_itemlist), dtype=torch.int64)
71 |
72 | self.ucid2items_num[ucid] = len(uc_positve_itemlist)
73 |
74 | uc_history_itemlist = ucid2used_item[ucid]
75 |
76 | self.ucid2history_item[ucid] = torch.tensor(list(uc_history_itemlist), dtype=torch.int64)
77 |
78 | # get uid and context information from uc innerid
79 | context_fields = dataset._get_context_fields()
80 | uid_list = []
81 | dict_context = {}
82 | for context in context_fields:
83 | dict_context[context]=[]
84 |
85 | for ucid in self.ucid_list:
86 | uid = dataset._get_uid_from_usercontexts(ucid)
87 | uid_list.append(uid)
88 | tuple_context = dataset._get_context_tuple_from_usercontexts(ucid)
89 | for i in range(0,len(context_fields)):
90 | context = context_fields[i]
91 | dict_context[context].append(tuple_context[i])
92 |
93 | self.ucid_list = torch.tensor(list(self.ucid_list), dtype=torch.int64)
94 | uid_list = torch.tensor(list(uid_list), dtype=torch.int64)
95 | # add uc data into data for predictions
96 | self.uc_df = dataset.join(Interaction({self.uid_field: uid_list, self.uc_id: self.ucid_list}))
97 | for context in dict_context.keys():
98 | new_inter = dataset.join(Interaction({context: torch.tensor(list(dict_context[context]), dtype=torch.int64)}))
99 | self.uc_df.update(new_inter)
100 |
101 |
102 | self.config = config
103 | self.logger = getLogger()
104 | self.dataset = dataset
105 | self.sampler = sampler
106 | self.batch_size = self.step = None
107 | self.shuffle = shuffle
108 | self.pr = 0
109 | self._init_batch_size_and_step()
110 |
111 | def _get_multidict(self, dataset):
112 | matrix_uc_item = dataset._create_sparse_matrix(dataset.inter_feat, self.uc_id, self.item_id, 'coo',
113 | self.LABEL)
114 | # multidict_u_uc = defaultdict(list)
115 | # key = userid, value = Dict (key = uc, value = decending ranked items with ratings)
116 | # will get a list of Dict, given a userid
117 | multidict_uc_items = defaultdict(list)
118 | multidict_uc_items_positives = defaultdict(list)
119 |
120 |
121 | rows, cols = matrix_uc_item.shape
122 | for uc_id in range(1, rows):
123 | # Index = 0 => [PAD]
124 | # uc_id == inner id for user_context
125 | uc_items = matrix_uc_item.getrow(uc_id) # csr_matrix
126 | items = uc_items.indices # a list of items
127 | rates = uc_items.data # a list of ratings
128 | num_rates = len(rates)
129 |
130 | if num_rates == 0:
131 | continue
132 |
133 | dict_item_rating = {}
134 |
135 | for i in range(0, num_rates):
136 | key = items[i]
137 | value = rates[i]
138 | dict_item_rating[key] = value
139 | # sort items based on ratings
140 | dict_item_rating_decending = sorted(dict_item_rating.items(), key=lambda x: x[1], reverse=True)
141 | # add these items into dict which uses uc as key
142 | for items in dict_item_rating_decending:
143 | multidict_uc_items[uc_id].append(items[0])
144 | return multidict_uc_items
145 |
146 | @property
147 | def pr_end(self):
148 | if not self.is_sequential:
149 | return len(self.ucid_list)
150 | else:
151 | return len(self.dataset)
152 |
153 | def _next_batch_data(self):
154 | if not self.is_sequential:
155 | uc_df = self.uc_df[self.pr:self.pr + self.step]
156 | ucid_list = list(uc_df[self.uc_id])
157 |
158 | history_item = self.ucid2history_item[ucid_list]
159 | positive_item = self.ucid2positive_item[ucid_list]
160 |
161 | history_u = torch.cat([torch.full_like(hist_iid, i) for i, hist_iid in enumerate(history_item)])
162 | history_i = torch.cat(list(history_item))
163 |
164 | positive_u = torch.cat([torch.full_like(pos_iid, i) for i, pos_iid in enumerate(positive_item)])
165 | positive_i = torch.cat(list(positive_item))
166 |
167 | self.pr += self.step
168 | return uc_df, (history_u, history_i), positive_u, positive_i
169 | else:
170 | interaction = self.dataset[self.pr:self.pr + self.step]
171 | inter_num = len(interaction)
172 | positive_u = torch.arange(inter_num)
173 | positive_i = interaction[self.iid_field]
174 |
175 | self.pr += self.step
176 | return interaction, None, positive_u, positive_i
177 |
178 |
179 |
180 |
--------------------------------------------------------------------------------
/style.cfg:
--------------------------------------------------------------------------------
1 | [style]
2 | # Align closing bracket with visual indentation.
3 | align_closing_bracket_with_visual_indent=True
4 |
5 | # Allow dictionary keys to exist on multiple lines. For example:
6 | #
7 | # x = {
8 | # ('this is the first element of a tuple',
9 | # 'this is the second element of a tuple'):
10 | # value,
11 | # }
12 | allow_multiline_dictionary_keys=False
13 |
14 | # Allow lambdas to be formatted on more than one line.
15 | allow_multiline_lambdas=False
16 |
17 | # Allow splitting before a default / named assignment in an argument list.
18 | allow_split_before_default_or_named_assigns=True
19 |
20 | # Allow splits before the dictionary value.
21 | allow_split_before_dict_value=True
22 |
23 | # Let spacing indicate operator precedence. For example:
24 | #
25 | # a = 1 * 2 + 3 / 4
26 | # b = 1 / 2 - 3 * 4
27 | # c = (1 + 2) * (3 - 4)
28 | # d = (1 - 2) / (3 + 4)
29 | # e = 1 * 2 - 3
30 | # f = 1 + 2 + 3 + 4
31 | #
32 | # will be formatted as follows to indicate precedence:
33 | #
34 | # a = 1*2 + 3/4
35 | # b = 1/2 - 3*4
36 | # c = (1+2) * (3-4)
37 | # d = (1-2) / (3+4)
38 | # e = 1*2 - 3
39 | # f = 1 + 2 + 3 + 4
40 | #
41 | arithmetic_precedence_indication=False
42 |
43 | # Number of blank lines surrounding top-level function and class
44 | # definitions.
45 | blank_lines_around_top_level_definition=2
46 |
47 | # Insert a blank line before a class-level docstring.
48 | blank_line_before_class_docstring=False
49 |
50 | # Insert a blank line before a module docstring.
51 | blank_line_before_module_docstring=True
52 |
53 | # Insert a blank line before a 'def' or 'class' immediately nested
54 | # within another 'def' or 'class'. For example:
55 | #
56 | # class Foo:
57 | # # <------ this blank line
58 | # def method():
59 | # ...
60 | blank_line_before_nested_class_or_def=True
61 |
62 | # Do not split consecutive brackets. Only relevant when
63 | # dedent_closing_brackets is set. For example:
64 | #
65 | # call_func_that_takes_a_dict(
66 | # {
67 | # 'key1': 'value1',
68 | # 'key2': 'value2',
69 | # }
70 | # )
71 | #
72 | # would reformat to:
73 | #
74 | # call_func_that_takes_a_dict({
75 | # 'key1': 'value1',
76 | # 'key2': 'value2',
77 | # })
78 | coalesce_brackets=True
79 |
80 | # The column limit.
81 | column_limit=120
82 |
83 | # The style for continuation alignment. Possible values are:
84 | #
85 | # - SPACE: Use spaces for continuation alignment. This is default behavior.
86 | # - FIXED: Use fixed number (CONTINUATION_INDENT_WIDTH) of columns
87 | # (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs or
88 | # CONTINUATION_INDENT_WIDTH spaces) for continuation alignment.
89 | # - VALIGN-RIGHT: Vertically align continuation lines to multiple of
90 | # INDENT_WIDTH columns. Slightly right (one tab or a few spaces) if
91 | # cannot vertically align continuation lines with indent characters.
92 | continuation_align_style=SPACE
93 |
94 | # Indent width used for line continuations.
95 | continuation_indent_width=4
96 |
97 | # Put closing brackets on a separate line, dedented, if the bracketed
98 | # expression can't fit in a single line. Applies to all kinds of brackets,
99 | # including function definitions and calls. For example:
100 | #
101 | # config = {
102 | # 'key1': 'value1',
103 | # 'key2': 'value2',
104 | # } # <--- this bracket is dedented and on a separate line
105 | #
106 | # time_series = self.remote_client.query_entity_counters(
107 | # entity='dev3246.region1',
108 | # key='dns.query_latency_tcp',
109 | # transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
110 | # start_ts=now()-timedelta(days=3),
111 | # end_ts=now(),
112 | # ) # <--- this bracket is dedented and on a separate line
113 | dedent_closing_brackets=True
114 |
115 | # Disable the heuristic which places each list element on a separate line
116 | # if the list is comma-terminated.
117 | disable_ending_comma_heuristic=False
118 |
119 | # Place each dictionary entry onto its own line.
120 | each_dict_entry_on_separate_line=True
121 |
122 | # Require multiline dictionary even if it would normally fit on one line.
123 | # For example:
124 | #
125 | # config = {
126 | # 'key1': 'value1'
127 | # }
128 | force_multiline_dict=False
129 |
130 | # The regex for an i18n comment. The presence of this comment stops
131 | # reformatting of that line, because the comments are required to be
132 | # next to the string they translate.
133 | i18n_comment=
134 |
135 | # The i18n function call names. The presence of this function stops
136 | # reformattting on that line, because the string it has cannot be moved
137 | # away from the i18n comment.
138 | i18n_function_call=
139 |
140 | # Indent blank lines.
141 | indent_blank_lines=False
142 |
143 | # Put closing brackets on a separate line, indented, if the bracketed
144 | # expression can't fit in a single line. Applies to all kinds of brackets,
145 | # including function definitions and calls. For example:
146 | #
147 | # config = {
148 | # 'key1': 'value1',
149 | # 'key2': 'value2',
150 | # } # <--- this bracket is indented and on a separate line
151 | #
152 | # time_series = self.remote_client.query_entity_counters(
153 | # entity='dev3246.region1',
154 | # key='dns.query_latency_tcp',
155 | # transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
156 | # start_ts=now()-timedelta(days=3),
157 | # end_ts=now(),
158 | # ) # <--- this bracket is indented and on a separate line
159 | indent_closing_brackets=False
160 |
161 | # Indent the dictionary value if it cannot fit on the same line as the
162 | # dictionary key. For example:
163 | #
164 | # config = {
165 | # 'key1':
166 | # 'value1',
167 | # 'key2': value1 +
168 | # value2,
169 | # }
170 | indent_dictionary_value=False
171 |
172 | # The number of columns to use for indentation.
173 | indent_width=4
174 |
175 | # Join short lines into one line. E.g., single line 'if' statements.
176 | join_multiple_lines=True
177 |
178 | # Do not include spaces around selected binary operators. For example:
179 | #
180 | # 1 + 2 * 3 - 4 / 5
181 | #
182 | # will be formatted as follows when configured with "*,/":
183 | #
184 | # 1 + 2*3 - 4/5
185 | no_spaces_around_selected_binary_operators=
186 |
187 | # Use spaces around default or named assigns.
188 | spaces_around_default_or_named_assign=False
189 |
190 | # Adds a space after the opening '{' and before the ending '}' dict delimiters.
191 | #
192 | # {1: 2}
193 | #
194 | # will be formatted as:
195 | #
196 | # { 1: 2 }
197 | spaces_around_dict_delimiters=False
198 |
199 | # Adds a space after the opening '[' and before the ending ']' list delimiters.
200 | #
201 | # [1, 2]
202 | #
203 | # will be formatted as:
204 | #
205 | # [ 1, 2 ]
206 | spaces_around_list_delimiters=False
207 |
208 | # Use spaces around the power operator.
209 | spaces_around_power_operator=True
210 |
211 | # Use spaces around the subscript / slice operator. For example:
212 | #
213 | # my_list[1 : 10 : 2]
214 | spaces_around_subscript_colon=False
215 |
216 | # Adds a space after the opening '(' and before the ending ')' tuple delimiters.
217 | #
218 | # (1, 2, 3)
219 | #
220 | # will be formatted as:
221 | #
222 | # ( 1, 2, 3 )
223 | spaces_around_tuple_delimiters=False
224 |
225 | # The number of spaces required before a trailing comment.
226 | # This can be a single value (representing the number of spaces
227 | # before each trailing comment) or list of values (representing
228 | # alignment column values; trailing comments within a block will
229 | # be aligned to the first column value that is greater than the maximum
230 | # line length within the block). For example:
231 | #
232 | # With spaces_before_comment=5:
233 | #
234 | # 1 + 1 # Adding values
235 | #
236 | # will be formatted as:
237 | #
238 | # 1 + 1 # Adding values <-- 5 spaces between the end of the statement and comment
239 | #
240 | # With spaces_before_comment=15, 20:
241 | #
242 | # 1 + 1 # Adding values
243 | # two + two # More adding
244 | #
245 | # longer_statement # This is a longer statement
246 | # short # This is a shorter statement
247 | #
248 | # a_very_long_statement_that_extends_beyond_the_final_column # Comment
249 | # short # This is a shorter statement
250 | #
251 | # will be formatted as:
252 | #
253 | # 1 + 1 # Adding values <-- end of line comments in block aligned to col 15
254 | # two + two # More adding
255 | #
256 | # longer_statement # This is a longer statement <-- end of line comments in block aligned to col 20
257 | # short # This is a shorter statement
258 | #
259 | # a_very_long_statement_that_extends_beyond_the_final_column # Comment <-- the end of line comments are aligned based on the line length
260 | # short # This is a shorter statement
261 | #
262 | spaces_before_comment=2
263 |
264 | # Insert a space between the ending comma and closing bracket of a list,
265 | # etc.
266 | space_between_ending_comma_and_closing_bracket=False
267 |
268 | # Use spaces inside brackets, braces, and parentheses. For example:
269 | #
270 | # method_call( 1 )
271 | # my_dict[ 3 ][ 1 ][ get_index( *args, **kwargs ) ]
272 | # my_set = { 1, 2, 3 }
273 | space_inside_brackets=False
274 |
275 | # Split before arguments
276 | split_all_comma_separated_values=False
277 |
278 | # Split before arguments, but do not split all subexpressions recursively
279 | # (unless needed).
280 | split_all_top_level_comma_separated_values=False
281 |
282 | # Split before arguments if the argument list is terminated by a
283 | # comma.
284 | split_arguments_when_comma_terminated=False
285 |
286 | # Set to True to prefer splitting before '+', '-', '*', '/', '//', or '@'
287 | # rather than after.
288 | split_before_arithmetic_operator=False
289 |
290 | # Set to True to prefer splitting before '&', '|' or '^' rather than
291 | # after.
292 | split_before_bitwise_operator=True
293 |
294 | # Split before the closing bracket if a list or dict literal doesn't fit on
295 | # a single line.
296 | split_before_closing_bracket=True
297 |
298 | # Split before a dictionary or set generator (comp_for). For example, note
299 | # the split before the 'for':
300 | #
301 | # foo = {
302 | # variable: 'Hello world, have a nice day!'
303 | # for variable in bar if variable != 42
304 | # }
305 | split_before_dict_set_generator=True
306 |
307 | # Split before the '.' if we need to split a longer expression:
308 | #
309 | # foo = ('This is a really long string: {}, {}, {}, {}'.format(a, b, c, d))
310 | #
311 | # would reformat to something like:
312 | #
313 | # foo = ('This is a really long string: {}, {}, {}, {}'
314 | # .format(a, b, c, d))
315 | split_before_dot=False
316 |
317 | # Split after the opening paren which surrounds an expression if it doesn't
318 | # fit on a single line.
319 | split_before_expression_after_opening_paren=False
320 |
321 | # If an argument / parameter list is going to be split, then split before
322 | # the first argument.
323 | split_before_first_argument=False
324 |
325 | # Set to True to prefer splitting before 'and' or 'or' rather than
326 | # after.
327 | split_before_logical_operator=True
328 |
329 | # Split named assignments onto individual lines.
330 | split_before_named_assigns=True
331 |
332 | # Set to True to split list comprehensions and generators that have
333 | # non-trivial expressions and multiple clauses before each of these
334 | # clauses. For example:
335 | #
336 | # result = [
337 | # a_long_var + 100 for a_long_var in xrange(1000)
338 | # if a_long_var % 10]
339 | #
340 | # would reformat to something like:
341 | #
342 | # result = [
343 | # a_long_var + 100
344 | # for a_long_var in xrange(1000)
345 | # if a_long_var % 10]
346 | split_complex_comprehension=False
347 |
348 | # The penalty for splitting right after the opening bracket.
349 | split_penalty_after_opening_bracket=300
350 |
351 | # The penalty for splitting the line after a unary operator.
352 | split_penalty_after_unary_operator=10000
353 |
354 | # The penalty of splitting the line around the '+', '-', '*', '/', '//',
355 | # ``%``, and '@' operators.
356 | split_penalty_arithmetic_operator=300
357 |
358 | # The penalty for splitting right before an if expression.
359 | split_penalty_before_if_expr=0
360 |
361 | # The penalty of splitting the line around the '&', '|', and '^'
362 | # operators.
363 | split_penalty_bitwise_operator=300
364 |
365 | # The penalty for splitting a list comprehension or generator
366 | # expression.
367 | split_penalty_comprehension=80
368 |
369 | # The penalty for characters over the column limit.
370 | split_penalty_excess_character=7000
371 |
372 | # The penalty incurred by adding a line split to the unwrapped line. The
373 | # more line splits added the higher the penalty.
374 | split_penalty_for_added_line_split=30
375 |
376 | # The penalty of splitting a list of "import as" names. For example:
377 | #
378 | # from a_very_long_or_indented_module_name_yada_yad import (long_argument_1,
379 | # long_argument_2,
380 | # long_argument_3)
381 | #
382 | # would reformat to something like:
383 | #
384 | # from a_very_long_or_indented_module_name_yada_yad import (
385 | # long_argument_1, long_argument_2, long_argument_3)
386 | split_penalty_import_names=0
387 |
388 | # The penalty of splitting the line around the 'and' and 'or'
389 | # operators.
390 | split_penalty_logical_operator=300
391 |
392 | # Use the Tab character for indentation.
393 | use_tabs=False
394 |
395 |
--------------------------------------------------------------------------------
/deepcarskit/quick_start/quick_start.py:
--------------------------------------------------------------------------------
1 | # @Time : 2020/10/6
2 | # @Author : Shanlei Mu
3 | # @Email : slmu@ruc.edu.cn
4 |
5 |
6 | # UPDATE:
7 | # @Time : 2021/12
8 | # @Author : Yong Zheng
9 | # @Notes : made several changes to adapt it for CARS
10 |
11 | """
12 | deepcarskit.quick_start
13 | ########################
14 | """
15 | import logging
16 | from logging import getLogger
17 | import shutil
18 | import glob
19 | import os
20 |
21 | import torch
22 | import pickle
23 |
24 |
25 | # from past.builtins import raw_input
26 |
27 | from deepcarskit.config import CARSConfig
28 | from deepcarskit.data import create_dataset, data_preparation, save_split_dataloaders, load_split_dataloaders
29 | from deepcarskit.utils.utils import get_model, get_trainer
30 | from deepcarskit.utils import init_logger, init_seed, set_color
31 | from multiprocessing.dummy import Pool as ThreadPool
32 | from multiprocessing import Pool
33 | from recbole.utils import EvaluatorType
34 |
35 |
36 | def eval_folds(args_tuple):
37 | train_data_fold = args_tuple[0]
38 | valid_data_fold = args_tuple[1]
39 |
40 | config = args_tuple[2]
41 | init_seed(config['seed'], config['reproducibility'])
42 |
43 | logger = args_tuple[3]
44 | fold = args_tuple[4]
45 |
46 | if config['save_dataloaders']:
47 | save_split_dataloaders(config, dataloaders=(train_data_fold, valid_data_fold))
48 |
49 | # model loading and initialization
50 | init_seed(config['seed'], config['reproducibility'])
51 | model = get_model(config['model'])(config, train_data_fold.dataset).to(config['device'])
52 |
53 | # trainer loading and initialization
54 | trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
55 | name = trainer.saved_model_file
56 | ind = name.rindex('.')
57 | lname = list(name)
58 | lname.insert(ind, '_f'+str(fold))
59 | trainer.saved_model_file = ''.join(lname)
60 |
61 | # model training
62 | best_valid_score_fold, best_valid_result_fold = trainer.fit(
63 | train_data_fold, valid_data_fold, saved=True, show_progress=config['show_progress']
64 | )
65 | msghead = 'Fold ' + str(fold) + ' completed: '
66 | logger.info(set_color(msghead, 'yellow') + f': {best_valid_result_fold}')
67 |
68 | return best_valid_score_fold, best_valid_result_fold
69 |
70 |
71 | def run(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True):
72 | r""" A fast running api, which includes the complete process of
73 | training and testing a model on a specified dataset
74 |
75 | Args:
76 | model (str, optional): Model name. Defaults to ``None``.
77 | dataset (str, optional): Dataset name. Defaults to ``None``.
78 | config_file_list (list, optional): Config files used to modify experiment parameters. Defaults to ``None``.
79 | config_dict (dict, optional): Parameters dictionary used to modify experiment parameters. Defaults to ``None``.
80 | saved (bool, optional): Whether to save the model. Defaults to ``True``.
81 | """
82 | # configurations initialization
83 | config = CARSConfig(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict)
84 | init_seed(config['seed'], config['reproducibility'])
85 |
86 | # logger initialization
87 | log_handler, log_filepath = init_logger(config)
88 | logger = getLogger()
89 |
90 | logger.info(config)
91 |
92 | # dataset filtering
93 | dataset = create_dataset(config)
94 | if config['save_dataset']:
95 | dataset.save()
96 | logger.info(dataset)
97 |
98 | # dataset splitting
99 | # train_data, valid_data, test_data = data_preparation(config, dataset)
100 | train_data, valid_data = data_preparation(config, dataset)
101 |
102 |
103 | CV = False
104 | if isinstance(train_data, list):
105 | CV = True
106 | n_folds = len(train_data)
107 |
108 | if CV:
109 | list_train_test = []
110 | for i in range(n_folds):
111 | t = (train_data[i], valid_data[i], config, logger, (i+1))
112 | list_train_test.append(t)
113 |
114 | # pool = ThreadPool()
115 | # rsts = pool.map(eval_folds, list_train_test)
116 | # pool.close()
117 | # pool.join()
118 | # print('cpu count: ', os.cpu_count())
119 |
120 | num_processes = config['eval_args']['split']['num_processes']
121 | with Pool(processes=num_processes) as pool:
122 | rsts = pool.map(eval_folds, list_train_test)
123 |
124 | best_valid_score = 0
125 | best_valid_result = {}
126 |
127 | for rst_fold in rsts:
128 | valid_score_fold = rst_fold[0]
129 | valid_result_fold = rst_fold[1]
130 |
131 | best_valid_score += valid_score_fold
132 | if not best_valid_result:
133 | best_valid_result = valid_result_fold
134 | else:
135 | for key in best_valid_result.keys():
136 | best_valid_result[key] = best_valid_result[key] + valid_result_fold[key]
137 |
138 | best_valid_score = round(best_valid_score/n_folds, config['metric_decimal_place'])
139 | for key in best_valid_result:
140 | best_valid_result[key] = round(best_valid_result[key]/n_folds, config['metric_decimal_place'])
141 | msghead = 'Data: '+config['dataset']+', Results on '+str(n_folds)+' CV: best valid by '+config['model']
142 | layers = [str(int) for int in config['mlp_hidden_size']]
143 | layers = ' '.join(layers)
144 | logger.info(set_color(msghead, 'yellow') + f': {best_valid_result}'+', lrate: '+str(config['learning_rate'])+', layers: ['+layers+']')
145 | log_handler.close()
146 | logger.removeHandler(log_handler)
147 | logger_name = log_filepath[:-4] + "_" + config['valid_metric'] + " = " + str(best_valid_score) + ".log"
148 | shutil.move(log_filepath, logger_name)
149 | update_best_log(config, logger_name, best_valid_result)
150 | else:
151 | if config['save_dataloaders']:
152 | save_split_dataloaders(config, dataloaders=(train_data, valid_data))
153 |
154 | # model loading and initialization
155 | init_seed(config['seed'], config['reproducibility'])
156 | model = get_model(config['model'])(config, train_data.dataset).to(config['device'])
157 | logger.info(model)
158 |
159 | # trainer loading and initialization
160 | trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
161 |
162 | # model training
163 | best_valid_score, best_valid_result = trainer.fit(
164 | train_data, valid_data, saved=saved, show_progress=config['show_progress']
165 | )
166 |
167 | # model evaluation
168 | # test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress'])
169 |
170 | msghead = 'Data: '+config['dataset']+', best valid by '+config['model']
171 | logger.info(set_color(msghead, 'yellow') + f': {best_valid_result}')
172 | # logger.info(set_color('test result', 'yellow') + f': {test_result}')
173 | log_handler.close()
174 | logger.removeHandler(log_handler)
175 | logger_name = log_filepath[:-4] + "_" + config['valid_metric'] + " = " + str(best_valid_score) + ".log"
176 | shutil.move(log_filepath, logger_name)
177 | update_best_log(config, logger_name, best_valid_result)
178 |
179 | '''
180 | # example of predictions by context recommender
181 | # note, raw value in the original data is expected to be transformed to inner ID
182 |
183 | # rawid <--->innderid
184 | print("innerid: ", dataset._get_innderid_from_rawid("user_id", "1003"))
185 | print("rawid: ", dataset._get_rawid_from_innerid("user_id", 1))
186 |
187 | userid = dataset._get_innderid_from_rawid("user_id","1003")
188 | itemid = dataset._get_innderid_from_rawid("item_id","tt0120912")
189 | timeid = dataset._get_innderid_from_rawid("time","Weekday")
190 | locid = dataset._get_innderid_from_rawid("location","Cinema")
191 | cmpid = dataset._get_innderid_from_rawid("companion","Alone")
192 |
193 | user = torch.tensor([userid])
194 | item = torch.tensor([itemid])
195 | contexts = []
196 | contexts.append(torch.tensor([timeid]))
197 | contexts.append(torch.tensor([locid]))
198 | contexts.append(torch.tensor([cmpid]))
199 | print(userid, ', ', itemid, ', ', timeid, ', ', locid, ', ', cmpid)
200 | print("prediction: ",model.forward(user, item, contexts))
201 | exit()
202 | '''
203 |
204 | return {
205 | 'best_valid_score': best_valid_score,
206 | 'valid_score_bigger': config['valid_metric_bigger'],
207 | 'best_valid_result': best_valid_result,
208 | # 'test_result': test_result
209 | }
210 |
211 | def update_best_log(config, newlog, best_valid_result):
212 | dataset = config['dataset']
213 | # compare which log file is better
214 | ranking = False
215 | if config['eval_type'] == EvaluatorType.RANKING:
216 | ranking = True
217 | metric = config['ranking_valid_metric']
218 | else:
219 | metric = config['err_valid_metric']
220 |
221 | metric_value = best_valid_result[metric.lower()]
222 |
223 | end = newlog.rindex('.')
224 | s1 = newlog.index('-')
225 | s2 = newlog.index('-', s1 + 1, end)
226 | model = newlog[s1 + 1:s2]
227 |
228 | match = [dataset, model, metric]
229 |
230 |
231 | folder_best = './log/best/'
232 | existing_logs = glob.glob(folder_best+'/*.log')
233 |
234 | found = False
235 | oldlog = None
236 | for file in existing_logs:
237 | if all(x in file for x in match):
238 | oldlog = file
239 | found = True
240 | break
241 |
242 | newlog_filename = newlog[newlog.rindex('/')+1:]
243 |
244 | if not found:
245 | shutil.copyfile(newlog, folder_best+newlog_filename)
246 | else:
247 | newvalue = metric_value
248 | oldvalue = float(oldlog[oldlog.rindex('=') + 1: oldlog.rindex('.')])
249 |
250 | if ranking:
251 | if newvalue > oldvalue:
252 | shutil.copyfile(newlog, folder_best+newlog_filename)
253 | os.remove(oldlog)
254 | impro = (newvalue - oldvalue) / oldvalue
255 | print('Better results! improvement: {:.2%}'.format(impro) + ', best log saved in ' + folder_best + newlog_filename)
256 | else:
257 | if newvalue < oldvalue:
258 | shutil.copyfile(newlog, folder_best+newlog_filename)
259 | os.remove(oldlog)
260 | impro = (oldvalue - newvalue) / oldvalue
261 | print('Better results! improvement: {:.2%}'.format(impro) + ', best log saved in ' + folder_best + newlog_filename)
262 | return
263 |
264 |
265 |
266 | def objective_function(config_dict=None, config_file_list=None, saved=True):
267 | r""" The default objective_function used in HyperTuning
268 |
269 | Args:
270 | config_dict (dict, optional): Parameters dictionary used to modify experiment parameters. Defaults to ``None``.
271 | config_file_list (list, optional): Config files used to modify experiment parameters. Defaults to ``None``.
272 | saved (bool, optional): Whether to save the model. Defaults to ``True``.
273 | """
274 |
275 | config = CARSConfig(config_dict=config_dict, config_file_list=config_file_list)
276 | init_seed(config['seed'], config['reproducibility'])
277 | logging.basicConfig(level=logging.ERROR)
278 | dataset = create_dataset(config)
279 | train_data, valid_data, test_data = data_preparation(config, dataset)
280 | init_seed(config['seed'], config['reproducibility'])
281 | model = get_model(config['model'])(config, train_data.dataset).to(config['device'])
282 | trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
283 | best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, verbose=False, saved=saved)
284 | # test_result = trainer.evaluate(test_data, load_best_model=saved)
285 |
286 | return {
287 | 'best_valid_score': best_valid_score,
288 | 'valid_score_bigger': config['valid_metric_bigger'],
289 | 'best_valid_result': best_valid_result,
290 | # 'test_result': test_result
291 | }
292 |
293 |
294 | def load_data_and_model(model_file, dataset_file=None, dataloader_file=None):
295 | r"""Load filtered dataset, split dataloaders and saved model.
296 |
297 | Args:
298 | model_file (str): The path of saved model file.
299 | dataset_file (str, optional): The path of filtered dataset. Defaults to ``None``.
300 | dataloader_file (str, optional): The path of split dataloaders. Defaults to ``None``.
301 |
302 | Note:
303 | The :attr:`dataset` will be loaded or created according to the following strategy:
304 | If :attr:`dataset_file` is not ``None``, the :attr:`dataset` will be loaded from :attr:`dataset_file`.
305 | If :attr:`dataset_file` is ``None`` and :attr:`dataloader_file` is ``None``,
306 | the :attr:`dataset` will be created according to :attr:`config`.
307 | If :attr:`dataset_file` is ``None`` and :attr:`dataloader_file` is not ``None``,
308 | the :attr:`dataset` will neither be loaded or created.
309 |
310 | The :attr:`dataloader` will be loaded or created according to the following strategy:
311 | If :attr:`dataloader_file` is not ``None``, the :attr:`dataloader` will be loaded from :attr:`dataloader_file`.
312 | If :attr:`dataloader_file` is ``None``, the :attr:`dataloader` will be created according to :attr:`config`.
313 |
314 | Returns:
315 | tuple:
316 | - config (Config): An instance object of Config, which record parameter information in :attr:`model_file`.
317 | - model (AbstractRecommender): The model load from :attr:`model_file`.
318 | - dataset (Dataset): The filtered dataset.
319 | - train_data (AbstractDataLoader): The dataloader for training.
320 | - valid_data (AbstractDataLoader): The dataloader for validation.
321 | - test_data (AbstractDataLoader): The dataloader for testing.
322 | """
323 | checkpoint = torch.load(model_file)
324 | config = checkpoint['config']
325 | init_seed(config['seed'], config['reproducibility'])
326 | init_logger(config)
327 |
328 | dataset = None
329 | if dataset_file:
330 | with open(dataset_file, 'rb') as f:
331 | dataset = pickle.load(f)
332 |
333 | if dataloader_file:
334 | train_data, valid_data, test_data = load_split_dataloaders(dataloader_file)
335 | else:
336 | if dataset is None:
337 | dataset = create_dataset(config)
338 | train_data, valid_data, test_data = data_preparation(config, dataset)
339 |
340 | init_seed(config['seed'], config['reproducibility'])
341 | model = get_model(config['model'])(config, train_data.dataset).to(config['device'])
342 | model.load_state_dict(checkpoint['state_dict'])
343 | model.load_other_parameter(checkpoint.get('other_parameter'))
344 |
345 | return config, model, dataset, train_data, valid_data, test_data
346 |
--------------------------------------------------------------------------------
/deepcarskit/data/utils.py:
--------------------------------------------------------------------------------
1 | # @Time : 2020/7/21
2 | # @Author : Yupeng Hou
3 | # @Email : houyupeng@ruc.edu.cn
4 |
5 | # UPDATE:
6 | # @Time : 2021/7/9, 2020/9/17, 2020/8/31, 2021/2/20, 2021/3/1
7 | # @Author : Yupeng Hou, Yushuo Chen, Kaiyuan Li, Haoran Cheng, Jiawei Guan
8 | # @Email : houyupeng@ruc.edu.cn, chenyushuo@ruc.edu.cn, tsotfsk@outlook.com, chenghaoran29@foxmail.com, guanjw@ruc.edu.cn
9 |
10 | # UPDATE:
11 | # @Time : 2021/12
12 | # @Author : Yong Zheng
13 | # @Notes : made several changes to adapt it for CARS
14 |
15 |
16 | """
17 | deepcarskit.data.utils
18 | ########################
19 | """
20 |
21 | import copy
22 | import importlib
23 | import os
24 | import pickle
25 |
26 | from deepcarskit.data.dataloader import *
27 | from recbole.data.dataloader import TrainDataLoader, NegSampleEvalDataLoader, KnowledgeBasedDataLoader, UserDataLoader
28 | from recbole.sampler import KGSampler, Sampler, RepeatableSampler
29 | from recbole.utils import ModelType, ensure_dir, get_local_time, set_color
30 | from recbole.utils import EvaluatorType
31 | from logging import getLogger
32 |
33 |
34 | def create_dataset(config):
35 | """Create dataset according to :attr:`config['model']` and :attr:`config['MODEL_TYPE']`.
36 |
37 | Args:
38 | config (Config): An instance object of Config, used to record parameter information.
39 | Returns:
40 | Dataset: Constructed dataset.
41 | """
42 | # David Wang: import the model dynamically
43 | dataset_module = importlib.import_module('deepcarskit.data.dataset')
44 | if hasattr(dataset_module, config['model'] + 'Dataset'):
45 | """ David Wang:
46 | if a data set is name after Dataset in custom data set model, return the data set class object
47 | """
48 | return getattr(dataset_module, config['model'] + 'Dataset')(config)
49 | else:
50 | model_type = config['MODEL_TYPE']
51 | if model_type == ModelType.SEQUENTIAL:
52 | from .dataset import SequentialDataset
53 | return SequentialDataset(config)
54 | elif model_type == ModelType.KNOWLEDGE:
55 | from .dataset import KnowledgeBasedDataset
56 | return KnowledgeBasedDataset(config)
57 | elif model_type == ModelType.DECISIONTREE:
58 | from .dataset import DecisionTreeDataset
59 | return DecisionTreeDataset(config)
60 | else:
61 | from .dataset import Dataset
62 | return Dataset(config)
63 |
64 |
65 | def save_split_dataloaders(config, dataloaders):
66 | """Save split dataloaders.
67 |
68 | Args:
69 | config (Config): An instance object of Config, used to record parameter information.
70 | dataloaders (tuple of AbstractDataLoader): The split dataloaders.
71 | """
72 | save_path = config['checkpoint_dir']
73 | saved_dataloaders_file = f'{config["dataset"]}-for-{config["model"]}-dataloader.pth'
74 | file_path = os.path.join(save_path, saved_dataloaders_file)
75 | logger = getLogger()
76 | logger.info(set_color('Saved split dataloaders', 'blue') + f': {file_path}')
77 | with open(file_path, 'wb') as f:
78 | pickle.dump(dataloaders, f)
79 |
80 |
81 | def load_split_dataloaders(saved_dataloaders_file):
82 | """Load split dataloaders.
83 |
84 | Args:
85 | saved_dataloaders_file (str): The path of split dataloaders.
86 |
87 | Returns:
88 | dataloaders (tuple of AbstractDataLoader): The split dataloaders.
89 | """
90 | with open(saved_dataloaders_file, 'rb') as f:
91 | dataloaders = pickle.load(f)
92 | return dataloaders
93 |
94 |
95 | def data_preparation(config, dataset, save=False):
96 | """Split the dataset by :attr:`config['eval_args']` and create training, validation and test dataloader.
97 |
98 | Args:
99 | config (Config): An instance object of Config, used to record parameter information.
100 | dataset (Dataset): An instance object of Dataset, which contains all interaction records.
101 | save (bool, optional): If ``True``, it will call :func:`save_datasets` to save split dataset.
102 | Defaults to ``False``.
103 |
104 | Returns:
105 | tuple:
106 | - train_data (AbstractDataLoader): The dataloader for training.
107 | - valid_data (AbstractDataLoader): The dataloader for validation.
108 | - test_data (AbstractDataLoader): The dataloader for testing.
109 | """
110 | model_type = config['MODEL_TYPE']
111 | # David Wang: make a copy since dataset.build() will modify the .inter_feat attribute to Interaction object
112 | dataset = copy.copy(dataset)
113 | # David Wang: read data file and create 3 pandas DateFrame data sets
114 |
115 |
116 |
117 | CV = True
118 | built_datasets = dataset.build()
119 | if isinstance(built_datasets, list):
120 | CV = False
121 | logger = getLogger()
122 |
123 | # dict
124 | # key = number of fold
125 | # value = [train, valid set]
126 |
127 | if CV:
128 | train = []
129 | valid = []
130 | for fold in built_datasets:
131 | train_dataset, valid_dataset = built_datasets[fold]
132 | train_sampler, valid_sampler = create_samplers(config, dataset, built_datasets[fold])
133 | used_ids = get_used_ids(config, dataset=train_dataset)
134 |
135 | if model_type != ModelType.KNOWLEDGE:
136 | train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, shuffle=True)
137 | else:
138 | kg_sampler = KGSampler(dataset, config['train_neg_sample_args']['distribution'])
139 | train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, kg_sampler, shuffle=True)
140 |
141 | if config['ranking']:
142 | valid_data_loader = get_dataloader(config, 'evaluation')
143 | valid_data = valid_data_loader(config, valid_dataset, valid_sampler, shuffle=False, used_ids=used_ids)
144 | else:
145 | valid_data = get_dataloader(config, 'evaluation')(config, valid_dataset, valid_sampler, shuffle=False)
146 |
147 | logger.info(
148 | set_color('[Training]: ', 'pink') + set_color('train_batch_size', 'cyan') + ' = ' +
149 | set_color(f'[{config["train_batch_size"]}]', 'yellow') + set_color(' negative sampling', 'cyan') + ': ' +
150 | set_color(f'[{config["neg_sampling"]}]', 'yellow')
151 | )
152 | logger.info(
153 | set_color('[Evaluation]: ', 'pink') + set_color('eval_batch_size', 'cyan') + ' = ' +
154 | set_color(f'[{config["eval_batch_size"]}]', 'yellow') + set_color(' eval_args', 'cyan') + ': ' +
155 | set_color(f'[{config["eval_args"]}]', 'yellow')
156 | )
157 | train.append(train_data)
158 | valid.append(valid_data)
159 | # if save:
160 | # save_split_dataloaders(config, dataloaders=(train_data, valid_data))
161 |
162 | return train, valid
163 | else:
164 | train_dataset, valid_dataset = built_datasets
165 | train_sampler, valid_sampler = create_samplers(config, dataset, built_datasets)
166 | used_ids = get_used_ids(config, dataset=train_dataset)
167 |
168 | if model_type != ModelType.KNOWLEDGE:
169 | train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, shuffle=True)
170 | else:
171 | kg_sampler = KGSampler(dataset, config['train_neg_sample_args']['distribution'])
172 | train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, kg_sampler, shuffle=True)
173 |
174 | if config['ranking']:
175 | valid_data = get_dataloader(config, 'evaluation')(config, valid_dataset, valid_sampler, shuffle=False, used_ids=used_ids)
176 | else:
177 | valid_data = get_dataloader(config, 'evaluation')(config, valid_dataset, valid_sampler, shuffle=False)
178 |
179 | logger.info(
180 | set_color('[Training]: ', 'pink') + set_color('train_batch_size', 'cyan') + ' = ' +
181 | set_color(f'[{config["train_batch_size"]}]', 'yellow') + set_color(' negative sampling', 'cyan') + ': ' +
182 | set_color(f'[{config["neg_sampling"]}]', 'yellow')
183 | )
184 | logger.info(
185 | set_color('[Evaluation]: ', 'pink') + set_color('eval_batch_size', 'cyan') + ' = ' +
186 | set_color(f'[{config["eval_batch_size"]}]', 'yellow') + set_color(' eval_args', 'cyan') + ': ' +
187 | set_color(f'[{config["eval_args"]}]', 'yellow')
188 | )
189 | if save:
190 | save_split_dataloaders(config, dataloaders=(train_data, valid_data))
191 |
192 | return train_data, valid_data
193 |
194 |
195 | def get_dataloader(config, phase):
196 | """Return a dataloader class according to :attr:`config` and :attr:`phase`.
197 |
198 | Args:
199 | config (Config): An instance object of Config, used to record parameter information.
200 | phase (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'.
201 |
202 | Returns:
203 | type: The dataloader class that meets the requirements in :attr:`config` and :attr:`phase`.
204 | """
205 | register_table = {
206 | "MultiDAE": _get_AE_dataloader,
207 | "MultiVAE": _get_AE_dataloader,
208 | 'MacridVAE': _get_AE_dataloader,
209 | 'CDAE': _get_AE_dataloader,
210 | 'ENMF': _get_AE_dataloader,
211 | 'RaCT': _get_AE_dataloader,
212 | 'RecVAE': _get_AE_dataloader,
213 | }
214 |
215 | if config['model'] in register_table:
216 | return register_table[config['model']](config, phase)
217 |
218 | model_type = config['MODEL_TYPE']
219 | if phase == 'train':
220 | if model_type != ModelType.KNOWLEDGE:
221 | return TrainDataLoader
222 | else:
223 | return KnowledgeBasedDataLoader
224 | else:
225 | eval_strategy = config['eval_neg_sample_args']['strategy']
226 | if eval_strategy in {'none', 'by'}:
227 | if config['eval_type'] == EvaluatorType.RANKING:
228 | return LabledDataSortEvalDataLoader
229 | else:
230 | return NegSampleEvalDataLoader
231 | elif eval_strategy == 'full':
232 | return FullSortEvalDataLoader
233 |
234 | def get_used_ids(config, dataset):
235 | """
236 | Returns:
237 | dict: Used item_ids is the same as positive item_ids.
238 | Key is phase, and value is a numpy.ndarray which index is user_id, and element is a set of item_ids.
239 | """
240 | used_item_id = None
241 | uc_num = dataset.user_context_num
242 | iid_field = dataset.iid_field
243 | ucid_field = dataset.ucid_field
244 | last = [set() for _ in range(uc_num)]
245 | cur = np.array([set(s) for s in last])
246 | for ucid, iid in zip(dataset.inter_feat[ucid_field].numpy(), dataset.inter_feat[iid_field].numpy()):
247 | cur[ucid].add(iid)
248 | last = used_item_id = cur
249 |
250 | for used_item_set in used_item_id:
251 | if len(used_item_set) + 1 == dataset.item_num: # [pad] is a item.
252 | raise ValueError(
253 | 'Some users have interacted with all items, '
254 | 'which we can not sample negative items for them. '
255 | 'Please set `user_inter_num_interval` to filter those users.'
256 | )
257 | return used_item_id
258 |
259 | def _get_AE_dataloader(config, phase):
260 | """Customized function for VAE models to get correct dataloader class.
261 |
262 | Args:
263 | config (Config): An instance object of Config, used to record parameter information.
264 | phase (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'.
265 |
266 | Returns:
267 | type: The dataloader class that meets the requirements in :attr:`config` and :attr:`phase`.
268 | """
269 | if phase == 'train':
270 | return UserDataLoader
271 | else:
272 | eval_strategy = config['eval_neg_sample_args']['strategy']
273 | if eval_strategy in {'none', 'by'}:
274 | return NegSampleEvalDataLoader
275 | elif eval_strategy == 'full':
276 | return FullSortEvalDataLoader
277 |
278 |
279 | def create_samplers(config, dataset, built_datasets):
280 | """Create sampler for training, validation and testing.
281 |
282 | Args:
283 | config (Config): An instance object of Config, used to record parameter information.
284 | dataset (Dataset): An instance object of Dataset, which contains all interaction records.
285 | built_datasets (list of Dataset): A list of split Dataset, which contains dataset for
286 | training, validation and testing.
287 |
288 | Returns:
289 | tuple:
290 | - train_sampler (AbstractSampler): The sampler for training.
291 | - valid_sampler (AbstractSampler): The sampler for validation.
292 | - test_sampler (AbstractSampler): The sampler for testing.
293 | """
294 | phases = ['train', 'valid']
295 | train_neg_sample_args = config['train_neg_sample_args']
296 | eval_neg_sample_args = config['eval_neg_sample_args']
297 |
298 | sampler = None
299 | train_sampler, valid_sampler = None, None
300 |
301 | if train_neg_sample_args['strategy'] != 'none':
302 | if not config['repeatable']:
303 | sampler = Sampler(phases, built_datasets, train_neg_sample_args['distribution'])
304 | else:
305 | sampler = RepeatableSampler(phases, dataset, train_neg_sample_args['distribution'])
306 | train_sampler = sampler.set_phase('train')
307 |
308 | if eval_neg_sample_args['strategy'] != 'none':
309 | if sampler is None:
310 | if not config['repeatable']:
311 | sampler = Sampler(phases, built_datasets, eval_neg_sample_args['distribution'])
312 | else:
313 | sampler = RepeatableSampler(phases, dataset, eval_neg_sample_args['distribution'])
314 | else:
315 | sampler.set_distribution(eval_neg_sample_args['distribution'])
316 | valid_sampler = sampler.set_phase('valid')
317 |
318 | return train_sampler, valid_sampler
319 |
320 | '''
321 | def create_samplers(config, dataset, built_datasets):
322 | """Create sampler for training, validation and testing.
323 |
324 | Args:
325 | config (Config): An instance object of Config, used to record parameter information.
326 | dataset (Dataset): An instance object of Dataset, which contains all interaction records.
327 | built_datasets (list of Dataset): A list of split Dataset, which contains dataset for
328 | training, validation and testing.
329 |
330 | Returns:
331 | tuple:
332 | - train_sampler (AbstractSampler): The sampler for training.
333 | - valid_sampler (AbstractSampler): The sampler for validation.
334 | - test_sampler (AbstractSampler): The sampler for testing.
335 | """
336 | phases = ['train', 'valid', 'test']
337 | train_neg_sample_args = config['train_neg_sample_args']
338 | eval_neg_sample_args = config['eval_neg_sample_args']
339 | sampler = None
340 | train_sampler, valid_sampler, test_sampler = None, None, None
341 |
342 | if train_neg_sample_args['strategy'] != 'none':
343 | if not config['repeatable']:
344 | sampler = Sampler(phases, built_datasets, train_neg_sample_args['distribution'])
345 | else:
346 | sampler = RepeatableSampler(phases, dataset, train_neg_sample_args['distribution'])
347 | train_sampler = sampler.set_phase('train')
348 |
349 | if eval_neg_sample_args['strategy'] != 'none':
350 | if sampler is None:
351 | if not config['repeatable']:
352 | sampler = Sampler(phases, built_datasets, eval_neg_sample_args['distribution'])
353 | else:
354 | sampler = RepeatableSampler(phases, dataset, eval_neg_sample_args['distribution'])
355 | else:
356 | sampler.set_distribution(eval_neg_sample_args['distribution'])
357 | valid_sampler = sampler.set_phase('valid')
358 | test_sampler = sampler.set_phase('test')
359 |
360 | return train_sampler, valid_sampler, test_sampler
361 | '''
--------------------------------------------------------------------------------
/deepcarskit/model/context_recommender.py:
--------------------------------------------------------------------------------
1 | # @Time : 2021/12
2 | # @Author : Yong Zheng
3 |
4 | import numpy as np
5 | import torch
6 | import torch.nn as nn
7 |
8 | from recbole.model.abstract_recommender import AbstractRecommender
9 | from recbole.model.layers import FMEmbedding
10 | from recbole.utils import ModelType, InputType, FeatureSource, FeatureType, set_color, EvaluatorType
11 | from deepcarskit.model.layers import FMFirstOrderLinear
12 |
13 | class ContextRecommender(AbstractRecommender):
14 | """This is a abstract context-aware recommender. All the context-aware model should implement this class.
15 | The base context-aware recommender class provide the basic embedding function of feature fields which also
16 | contains a first-order part of feature fields.
17 | """
18 | type = ModelType.CONTEXT
19 | input_type = InputType.POINTWISE
20 |
21 | def __init__(self, config, dataset):
22 | super(ContextRecommender, self).__init__()
23 | self.config = config
24 |
25 | self.field_names = dataset.fields(
26 | source=[
27 | FeatureSource.INTERACTION,
28 | FeatureSource.USER,
29 | FeatureSource.USER_ID,
30 | FeatureSource.ITEM,
31 | FeatureSource.ITEM_ID,
32 | ]
33 | )
34 |
35 | self.USER_ID = config['USER_ID_FIELD']
36 | self.ITEM_ID = config['ITEM_ID_FIELD']
37 | self.CONTEXT_SITUATION_ID = config['CONTEXT_SITUATION_FIELD']
38 |
39 | self.actfun = nn.LeakyReLU()
40 | self.loss = nn.MSELoss()
41 | if config['ranking']:
42 | self.LABEL = config['LABEL_FIELD']
43 | if config['sigmoid']:
44 | self.actfun = nn.Sigmoid()
45 | self.loss = nn.BCELoss()
46 | else:
47 | self.LABEL = config['RATING_FIELD']
48 |
49 | self.CONTEXTS = []
50 | for i in range(2, len(self.field_names)):
51 | if self.field_names[i] == config['LABEL_FIELD'] or self.field_names[i] == config['USER_CONTEXT_FIELD'] or self.field_names[i] == config['RATING_FIELD']:
52 | continue
53 | else:
54 | self.CONTEXTS.append(self.field_names[i])
55 |
56 | self.n_context_situation = 0
57 | if self.CONTEXT_SITUATION_ID in self.CONTEXTS:
58 | self.n_context_situation = dataset.num(self.CONTEXT_SITUATION_ID)
59 | self.CONTEXTS.remove(self.CONTEXT_SITUATION_ID)
60 |
61 | msghead = "Loaded context variables: "
62 | if self.n_context_situation == 0:
63 | msg = ' '.join(self.CONTEXTS) + ', without context situation ID: ' + self.CONTEXT_SITUATION_ID
64 | else:
65 | msg = ' '.join(self.CONTEXTS) + ', with context situation ID: ' + self.CONTEXT_SITUATION_ID
66 | self.logger.info(set_color(msghead, 'yellow') + msg)
67 |
68 | self.n_users = dataset.num(self.USER_ID)
69 | self.n_items = dataset.num(self.ITEM_ID)
70 | # number of context variables
71 | self.n_contexts_dim = len(self.CONTEXTS)
72 | # number of context conditions in each dimension
73 | self.n_contexts_conditions = []
74 |
75 | for i in range(self.n_contexts_dim):
76 | dim=self.CONTEXTS[i]
77 | n_dim = dataset.num(dim)
78 | self.n_contexts_conditions.append(n_dim)
79 |
80 | self.embedding_size = config['embedding_size']
81 | self.device = config['device']
82 | self.double_tower = config['double_tower']
83 | if self.double_tower is None:
84 | self.double_tower = False
85 | self.token_field_names = []
86 | self.token_field_dims = []
87 | self.float_field_names = []
88 | self.float_field_dims = []
89 | self.token_seq_field_names = []
90 | self.token_seq_field_dims = []
91 | self.num_feature_field = 0
92 |
93 | if self.double_tower:
94 | self.user_field_names = dataset.fields(source=[FeatureSource.USER, FeatureSource.USER_ID])
95 | self.item_field_names = dataset.fields(source=[FeatureSource.ITEM, FeatureSource.ITEM_ID])
96 | self.field_names = self.user_field_names + self.item_field_names
97 | self.user_token_field_num = 0
98 | self.user_float_field_num = 0
99 | self.user_token_seq_field_num = 0
100 | for field_name in self.user_field_names:
101 | if dataset.field2type[field_name] == FeatureType.TOKEN:
102 | self.user_token_field_num += 1
103 | elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ:
104 | self.user_token_seq_field_num += 1
105 | else:
106 | self.user_float_field_num += dataset.num(field_name)
107 | self.item_token_field_num = 0
108 | self.item_float_field_num = 0
109 | self.item_token_seq_field_num = 0
110 | for field_name in self.item_field_names:
111 | if dataset.field2type[field_name] == FeatureType.TOKEN:
112 | self.item_token_field_num += 1
113 | elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ:
114 | self.item_token_seq_field_num += 1
115 | else:
116 | self.item_float_field_num += dataset.num(field_name)
117 |
118 | for field_name in self.field_names:
119 | if field_name == self.config['RATING_FIELD'] or field_name == self.config['LABEL_FIELD']:
120 | continue
121 | if dataset.field2type[field_name] == FeatureType.TOKEN:
122 | self.token_field_names.append(field_name)
123 | self.token_field_dims.append(dataset.num(field_name))
124 | elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ:
125 | self.token_seq_field_names.append(field_name)
126 | self.token_seq_field_dims.append(dataset.num(field_name))
127 | else:
128 | self.float_field_names.append(field_name)
129 | self.float_field_dims.append(dataset.num(field_name))
130 | self.num_feature_field += 1
131 | if len(self.token_field_dims) > 0:
132 | self.token_field_offsets = np.array((0, *np.cumsum(self.token_field_dims)[:-1]), dtype=np.long)
133 | self.token_embedding_table = FMEmbedding(
134 | self.token_field_dims, self.token_field_offsets, self.embedding_size
135 | )
136 | if len(self.float_field_dims) > 0:
137 | self.float_embedding_table = nn.Embedding(
138 | np.sum(self.float_field_dims, dtype=np.int32), self.embedding_size
139 | )
140 | if len(self.token_seq_field_dims) > 0:
141 | self.token_seq_embedding_table = nn.ModuleList()
142 | for token_seq_field_dim in self.token_seq_field_dims:
143 | self.token_seq_embedding_table.append(nn.Embedding(token_seq_field_dim, self.embedding_size))
144 |
145 | self.first_order_linear = FMFirstOrderLinear(config, dataset)
146 |
147 | def embed_float_fields(self, float_fields, embed=True):
148 | """Embed the float feature columns
149 |
150 | Args:
151 | float_fields (torch.FloatTensor): The input dense tensor. shape of [batch_size, num_float_field]
152 | embed (bool): Return the embedding of columns or just the columns itself. Defaults to ``True``.
153 |
154 | Returns:
155 | torch.FloatTensor: The result embedding tensor of float columns.
156 | """
157 | # input Tensor shape : [batch_size, num_float_field]
158 | if not embed or float_fields is None:
159 | return float_fields
160 |
161 | num_float_field = float_fields.shape[1]
162 | # [batch_size, num_float_field]
163 | index = torch.arange(0, num_float_field).unsqueeze(0).expand_as(float_fields).long().to(self.device)
164 |
165 | # [batch_size, num_float_field, embed_dim]
166 | float_embedding = self.float_embedding_table(index)
167 | float_embedding = torch.mul(float_embedding, float_fields.unsqueeze(2))
168 |
169 | return float_embedding
170 |
171 | def getContextSituationList(self, interaction, context_dims):
172 | situation = []
173 | for dim in context_dims:
174 | situation.append(interaction[dim].tolist())
175 | situation = torch.tensor(situation).to(self.device)
176 | return situation
177 |
178 | def getContextSituationDict(self, interaction, context_dims):
179 | situation = {}
180 | for dim in context_dims:
181 | situation[dim] = interaction[dim]
182 | return situation
183 |
184 | def embed_token_fields(self, token_fields):
185 | """Embed the token feature columns
186 |
187 | Args:
188 | token_fields (torch.LongTensor): The input tensor. shape of [batch_size, num_token_field]
189 |
190 | Returns:
191 | torch.FloatTensor: The result embedding tensor of token columns.
192 | """
193 | # input Tensor shape : [batch_size, num_token_field]
194 | if token_fields is None:
195 | return None
196 | # [batch_size, num_token_field, embed_dim]
197 | token_embedding = self.token_embedding_table(token_fields)
198 |
199 | return token_embedding
200 |
201 | def embed_token_seq_fields(self, token_seq_fields, mode='mean'):
202 | """Embed the token feature columns
203 |
204 | Args:
205 | token_seq_fields (torch.LongTensor): The input tensor. shape of [batch_size, seq_len]
206 | mode (str): How to aggregate the embedding of feature in this field. default=mean
207 |
208 | Returns:
209 | torch.FloatTensor: The result embedding tensor of token sequence columns.
210 | """
211 | # input is a list of Tensor shape of [batch_size, seq_len]
212 | fields_result = []
213 | for i, token_seq_field in enumerate(token_seq_fields):
214 | embedding_table = self.token_seq_embedding_table[i]
215 | mask = token_seq_field != 0 # [batch_size, seq_len]
216 | mask = mask.float()
217 | value_cnt = torch.sum(mask, dim=1, keepdim=True) # [batch_size, 1]
218 |
219 | token_seq_embedding = embedding_table(token_seq_field) # [batch_size, seq_len, embed_dim]
220 |
221 | mask = mask.unsqueeze(2).expand_as(token_seq_embedding) # [batch_size, seq_len, embed_dim]
222 | if mode == 'max':
223 | masked_token_seq_embedding = token_seq_embedding - (1 - mask) * 1e9 # [batch_size, seq_len, embed_dim]
224 | result = torch.max(masked_token_seq_embedding, dim=1, keepdim=True) # [batch_size, 1, embed_dim]
225 | elif mode == 'sum':
226 | masked_token_seq_embedding = token_seq_embedding * mask.float()
227 | result = torch.sum(masked_token_seq_embedding, dim=1, keepdim=True) # [batch_size, 1, embed_dim]
228 | else:
229 | masked_token_seq_embedding = token_seq_embedding * mask.float()
230 | result = torch.sum(masked_token_seq_embedding, dim=1) # [batch_size, embed_dim]
231 | eps = torch.FloatTensor([1e-8]).to(self.device)
232 | result = torch.div(result, value_cnt + eps) # [batch_size, embed_dim]
233 | result = result.unsqueeze(1) # [batch_size, 1, embed_dim]
234 | fields_result.append(result)
235 | if len(fields_result) == 0:
236 | return None
237 | else:
238 | return torch.cat(fields_result, dim=1) # [batch_size, num_token_seq_field, embed_dim]
239 |
240 | def double_tower_embed_input_fields(self, interaction):
241 | """Embed the whole feature columns in a double tower way.
242 |
243 | Args:
244 | interaction (Interaction): The input data collection.
245 |
246 | Returns:
247 | torch.FloatTensor: The embedding tensor of token sequence columns in the first part.
248 | torch.FloatTensor: The embedding tensor of float sequence columns in the first part.
249 | torch.FloatTensor: The embedding tensor of token sequence columns in the second part.
250 | torch.FloatTensor: The embedding tensor of float sequence columns in the second part.
251 |
252 | """
253 | if not self.double_tower:
254 | raise RuntimeError('Please check your model hyper parameters and set \'double tower\' as True')
255 | sparse_embedding, dense_embedding = self.embed_input_fields(interaction)
256 | if dense_embedding is not None:
257 | first_dense_embedding, second_dense_embedding = \
258 | torch.split(dense_embedding, [self.user_float_field_num, self.item_float_field_num], dim=1)
259 | else:
260 | first_dense_embedding, second_dense_embedding = None, None
261 |
262 | if sparse_embedding is not None:
263 | sizes = [
264 | self.user_token_seq_field_num, self.item_token_seq_field_num, self.user_token_field_num,
265 | self.item_token_field_num
266 | ]
267 | first_token_seq_embedding, second_token_seq_embedding, first_token_embedding, second_token_embedding = \
268 | torch.split(sparse_embedding, sizes, dim=1)
269 | first_sparse_embedding = torch.cat([first_token_seq_embedding, first_token_embedding], dim=1)
270 | second_sparse_embedding = torch.cat([second_token_seq_embedding, second_token_embedding], dim=1)
271 | else:
272 | first_sparse_embedding, second_sparse_embedding = None, None
273 |
274 | return first_sparse_embedding, first_dense_embedding, second_sparse_embedding, second_dense_embedding
275 |
276 | def concat_embed_input_fields(self, interaction):
277 | sparse_embedding, dense_embedding = self.embed_input_fields(interaction)
278 | all_embeddings = []
279 | if sparse_embedding is not None:
280 | all_embeddings.append(sparse_embedding)
281 | if dense_embedding is not None and len(dense_embedding.shape) == 3:
282 | all_embeddings.append(dense_embedding)
283 | return torch.cat(all_embeddings, dim=1) # [batch_size, num_field, embed_dim]
284 |
285 | def embed_input_fields(self, interaction):
286 | """Embed the whole feature columns.
287 |
288 | Args:
289 | interaction (Interaction): The input data collection.
290 |
291 | Returns:
292 | torch.FloatTensor: The embedding tensor of token sequence columns.
293 | torch.FloatTensor: The embedding tensor of float sequence columns.
294 | """
295 | float_fields = []
296 | for field_name in self.float_field_names:
297 | if len(interaction[field_name].shape) == 2:
298 | float_fields.append(interaction[field_name])
299 | else:
300 | float_fields.append(interaction[field_name].unsqueeze(1))
301 | if len(float_fields) > 0:
302 | float_fields = torch.cat(float_fields, dim=1) # [batch_size, num_float_field]
303 | else:
304 | float_fields = None
305 | # [batch_size, num_float_field] or [batch_size, num_float_field, embed_dim] or None
306 | float_fields_embedding = self.embed_float_fields(float_fields)
307 |
308 | token_fields = []
309 | for field_name in self.token_field_names:
310 | token_fields.append(interaction[field_name].unsqueeze(1))
311 | if len(token_fields) > 0:
312 | token_fields = torch.cat(token_fields, dim=1) # [batch_size, num_token_field]
313 | else:
314 | token_fields = None
315 | # [batch_size, num_token_field, embed_dim] or None
316 | token_fields_embedding = self.embed_token_fields(token_fields)
317 |
318 | token_seq_fields = []
319 | for field_name in self.token_seq_field_names:
320 | token_seq_fields.append(interaction[field_name])
321 | # [batch_size, num_token_seq_field, embed_dim] or None
322 | token_seq_fields_embedding = self.embed_token_seq_fields(token_seq_fields)
323 |
324 | if token_fields_embedding is None:
325 | sparse_embedding = token_seq_fields_embedding
326 | else:
327 | if token_seq_fields_embedding is None:
328 | sparse_embedding = token_fields_embedding
329 | else:
330 | sparse_embedding = torch.cat([token_fields_embedding, token_seq_fields_embedding], dim=1)
331 |
332 | dense_embedding = float_fields_embedding
333 |
334 | # sparse_embedding shape: [batch_size, num_token_seq_field+num_token_field, embed_dim] or None
335 | # dense_embedding shape: [batch_size, num_float_field] or [batch_size, num_float_field, embed_dim] or None
336 | return sparse_embedding, dense_embedding
337 |
--------------------------------------------------------------------------------