├── log
    └── best
    │   └── EMPTY
├── conda
    ├── build.sh
    ├── conda_release.sh
    └── meta.yaml
├── deepcarskit
    ├── properties
    │   ├── model
    │   │   ├── Pop.yaml
    │   │   ├── BPR.yaml
    │   │   ├── EASE.yaml
    │   │   ├── FM.yaml
    │   │   ├── LR.yaml
    │   │   ├── FPMC.yaml
    │   │   ├── ItemKNN.yaml
    │   │   ├── TransRec.yaml
    │   │   ├── STAMP.yaml
    │   │   ├── SRGNN.yaml
    │   │   ├── LightGCN.yaml
    │   │   ├── NPE.yaml
    │   │   ├── LINE.yaml
    │   │   ├── SpectralCF.yaml
    │   │   ├── CFKG.yaml
    │   │   ├── CKE.yaml
    │   │   ├── MultiDAE.yaml
    │   │   ├── AFM.yaml
    │   │   ├── FISM.yaml
    │   │   ├── FNN.yaml
    │   │   ├── NFM.yaml
    │   │   ├── WideDeep.yaml
    │   │   ├── DeepFM.yaml
    │   │   ├── ENMF.yaml
    │   │   ├── SLIMElastic.yaml
    │   │   ├── FOSSIL.yaml
    │   │   ├── HGN.yaml
    │   │   ├── RippleNet.yaml
    │   │   ├── SHAN.yaml
    │   │   ├── DSSM.yaml
    │   │   ├── Caser.yaml
    │   │   ├── KGCN.yaml
    │   │   ├── NARM.yaml
    │   │   ├── DIN.yaml
    │   │   ├── GRU4Rec.yaml
    │   │   ├── DGCF.yaml
    │   │   ├── RepeatNet.yaml
    │   │   ├── DCN.yaml
    │   │   ├── NGCF.yaml
    │   │   ├── FFM.yaml
    │   │   ├── GRU4RecKG.yaml
    │   │   ├── KGAT.yaml
    │   │   ├── KGNNLS.yaml
    │   │   ├── MultiVAE.yaml
    │   │   ├── KSR.yaml
    │   │   ├── NextItNet.yaml
    │   │   ├── PNN.yaml
    │   │   ├── DIEN.yaml
    │   │   ├── AutoInt.yaml
    │   │   ├── FwFM.yaml
    │   │   ├── HRM.yaml
    │   │   ├── GCMC.yaml
    │   │   ├── xDeepFM.yaml
    │   │   ├── GRU4RecF.yaml
    │   │   ├── NAIS.yaml
    │   │   ├── CDAE.yaml
    │   │   ├── KTUP.yaml
    │   │   ├── ConvNCF.yaml
    │   │   ├── MKR.yaml
    │   │   ├── RecVAE.yaml
    │   │   ├── MacridVAE.yaml
    │   │   ├── NeuMF.yaml
    │   │   ├── SASRec.yaml
    │   │   ├── BERT4Rec.yaml
    │   │   ├── GCSAN.yaml
    │   │   ├── RaCT.yaml
    │   │   ├── FDSA.yaml
    │   │   ├── SASRecF.yaml
    │   │   ├── DMF.yaml
    │   │   ├── NNCF.yaml
    │   │   ├── S3Rec.yaml
    │   │   ├── lightgbm.yaml
    │   │   └── xgboost.yaml
    │   ├── quick_start_config
    │   │   ├── sequential.yaml
    │   │   ├── knowledge_base.yaml
    │   │   ├── context-aware.yaml
    │   │   └── sequential_embedding_model.yaml
    │   └── overall.yaml
    ├── model
    │   ├── ae
    │   │   └── __init__.py
    │   ├── fms
    │   │   ├── __init__.py
    │   │   ├── fm.py
    │   │   └── deepfm.py
    │   ├── neucf
    │   │   ├── __init__.py
    │   │   ├── neucmf0w.py
    │   │   ├── neucmfw0.py
    │   │   ├── neucmf0i.py
    │   │   ├── neucmfww.py
    │   │   ├── neucmfi0.py
    │   │   └── neucmfii.py
    │   ├── __init__.py
    │   ├── layers.py
    │   └── context_recommender.py
    ├── data
    │   ├── dataset
    │   │   └── __init__.py
    │   ├── dataloader
    │   │   ├── __init__.py
    │   │   └── general_dataloader.py
    │   ├── __init__.py
    │   └── utils.py
    ├── config
    │   ├── __init__.py
    │   └── configurator.py
    ├── quick_start
    │   ├── __init__.py
    │   └── quick_start.py
    ├── __init__.py
    ├── trainer
    │   ├── __init__.py
    │   └── trainer.py
    ├── evaluator
    │   ├── __init__.py
    │   ├── evaluator.py
    │   ├── collector.py
    │   └── base_metric.py
    └── utils
    │   ├── __init__.py
    │   ├── utils.py
    │   └── logger.py
├── images
    ├── NeuCMF.png
    └── intro-img1.jpg
├── requirements.txt
├── MANIFEST.in
├── .gitignore
├── check_gpu.py
├── check_torch.py
├── .github
    └── FUNDING.yml
├── run.py
├── LICENSE
├── dataset
    ├── tripadvisor
    │   └── ReadMe.html
    └── depaulmovie
    │   └── ReadMe.html
├── setup.py
├── config.yaml
├── README.md
└── style.cfg


/log/best/EMPTY:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/conda/build.sh:
--------------------------------------------------------------------------------
1 | $PYTHON setup.py install


--------------------------------------------------------------------------------
/deepcarskit/properties/model/Pop.yaml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/BPR.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64


--------------------------------------------------------------------------------
/deepcarskit/properties/model/EASE.yaml:
--------------------------------------------------------------------------------
1 | reg_weight: 250.0


--------------------------------------------------------------------------------
/deepcarskit/properties/model/FM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10


--------------------------------------------------------------------------------
/deepcarskit/properties/model/LR.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10


--------------------------------------------------------------------------------
/deepcarskit/properties/model/FPMC.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/ItemKNN.yaml:
--------------------------------------------------------------------------------
1 | k: 100
2 | shrink: 0.0


--------------------------------------------------------------------------------
/deepcarskit/properties/model/TransRec.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64


--------------------------------------------------------------------------------
/deepcarskit/model/ae/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.model.ae import *


--------------------------------------------------------------------------------
/deepcarskit/model/fms/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.model.fms import *


--------------------------------------------------------------------------------
/deepcarskit/model/neucf/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.model.neucf import *


--------------------------------------------------------------------------------
/deepcarskit/properties/model/STAMP.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_type: 'CE'
3 | 


--------------------------------------------------------------------------------
/deepcarskit/data/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.data.dataset.dataset import Dataset


--------------------------------------------------------------------------------
/deepcarskit/config/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.config.configurator import CARSConfig
2 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/SRGNN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | step: 1
3 | loss_type: 'CE'


--------------------------------------------------------------------------------
/images/NeuCMF.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irecsys/DeepCARSKit/HEAD/images/NeuCMF.png


--------------------------------------------------------------------------------
/deepcarskit/properties/model/LightGCN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_layers: 2
3 | reg_weight: 1e-05


--------------------------------------------------------------------------------
/deepcarskit/properties/model/NPE.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_type: "CE"
3 | dropout_prob: 0.3


--------------------------------------------------------------------------------
/images/intro-img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irecsys/DeepCARSKit/HEAD/images/intro-img1.jpg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | recbole==1.0.1
2 | scipy==1.6.0
3 | numpy==1.20.0
4 | xgboost
5 | torch_geometric


--------------------------------------------------------------------------------
/deepcarskit/data/dataloader/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.data.dataloader.general_dataloader import *


--------------------------------------------------------------------------------
/deepcarskit/properties/model/LINE.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | order: 2
3 | second_order_loss_weight: 1


--------------------------------------------------------------------------------
/deepcarskit/properties/model/SpectralCF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_layers: 4
3 | reg_weight: 1e-03


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include deepcarskit/properties *
2 | recursive-include deepcarskit/dataset_example *
3 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/CFKG.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_function: 'inner_product'
3 | margin: 1.0
4 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/CKE.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | kg_embedding_size: 64
3 | reg_weights: [1e-2,1e-2]


--------------------------------------------------------------------------------
/deepcarskit/properties/model/MultiDAE.yaml:
--------------------------------------------------------------------------------
1 | mlp_hidden_size: [600]
2 | latent_dimension: 64
3 | dropout_prob: 0.5


--------------------------------------------------------------------------------
/deepcarskit/properties/model/AFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | attention_size: 25
3 | dropout_prob: 0.3
4 | reg_weight: 2


--------------------------------------------------------------------------------
/deepcarskit/properties/model/FISM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | split_to: 0
3 | reg_weights: [1e-2, 1e-2]
4 | alpha: 0


--------------------------------------------------------------------------------
/deepcarskit/properties/model/FNN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [256, 256, 256]
3 | dropout_prob: 0.2


--------------------------------------------------------------------------------
/deepcarskit/properties/model/NFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [64, 64, 64]
3 | dropout_prob: 0.0
4 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/WideDeep.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [32, 16, 8]
3 | dropout_prob: 0.1


--------------------------------------------------------------------------------
/deepcarskit/properties/model/DeepFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [128, 128, 128]
3 | dropout_prob: 0.2
4 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/ENMF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | dropout_prob: 0.7
3 | reg_weight: 0.0
4 | negative_weight: 0.5


--------------------------------------------------------------------------------
/deepcarskit/properties/model/SLIMElastic.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | l1_ratio: 0.02
3 | positive_only: True
4 | hide_item: True
5 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/FOSSIL.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_type: "CE"
3 | reg_weight: 0.00
4 | order_len: 3
5 | alpha: 0.6


--------------------------------------------------------------------------------
/deepcarskit/properties/model/HGN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_type: 'BPR'
3 | pooling_type: "average"
4 | reg_weight: [0.00,0.00]


--------------------------------------------------------------------------------
/deepcarskit/properties/model/RippleNet.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_hop: 2
3 | n_memory: 16
4 | kg_weight: 0.01
5 | reg_weight: 1e-7


--------------------------------------------------------------------------------
/deepcarskit/properties/model/SHAN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | short_item_length: 2
3 | loss_type: "CE"
4 | reg_weight: [0.01,0.0001]


--------------------------------------------------------------------------------
/deepcarskit/properties/model/DSSM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [256, 256, 256]
3 | dropout_prob: 0.3
4 | double_tower: True


--------------------------------------------------------------------------------
/deepcarskit/quick_start/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.quick_start.quick_start import run, objective_function, load_data_and_model
2 | 
3 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/Caser.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | dropout_prob: 0.4
3 | reg_weight: 1e-4
4 | nv: 8
5 | nh: 16
6 | loss_type: 'CE'


--------------------------------------------------------------------------------
/deepcarskit/properties/model/KGCN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_iter: 1
3 | aggregator: "sum"
4 | reg_weight: 1e-7
5 | neighbor_sample_size: 4


--------------------------------------------------------------------------------
/deepcarskit/properties/model/NARM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size: 128
3 | n_layers: 1
4 | dropout_probs: [0.25,0.5]
5 | loss_type: 'CE'


--------------------------------------------------------------------------------
/deepcarskit/properties/model/DIN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [256,256,256]
3 | dropout_prob: 0
4 | pooling_mode: 'mean'
5 | 
6 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/GRU4Rec.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size: 128
3 | num_layers: 1
4 | dropout_prob: 0.3
5 | loss_type: 'CE'
6 | 


--------------------------------------------------------------------------------
/deepcarskit/model/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/DGCF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_factors: 4
3 | n_iterations: 2
4 | n_layers: 1
5 | reg_weight: 1e-3
6 | cor_weight: 0.01


--------------------------------------------------------------------------------
/deepcarskit/properties/model/RepeatNet.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | loss_type: "CE"
3 | hidden_size: 64
4 | joint_train: False
5 | dropout_prob: 0.5
6 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/DCN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [256, 256, 256]
3 | cross_layer_num: 6
4 | reg_weight: 2
5 | dropout_prob: 0.2


--------------------------------------------------------------------------------
/deepcarskit/properties/model/NGCF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size_list: [64,64,64]
3 | node_dropout: 0.0
4 | message_dropout: 0.1
5 | reg_weight: 1e-5


--------------------------------------------------------------------------------
/deepcarskit/properties/model/FFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | # define fields: key: field's id, value: features in this field. can be ignored.
3 | fields: ~
4 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/GRU4RecKG.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size: 128
3 | num_layers: 1
4 | dropout_prob: 0.1
5 | freeze_kg: True
6 | loss_type: 'CE'


--------------------------------------------------------------------------------
/deepcarskit/properties/model/KGAT.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | kg_embedding_size: 64
3 | layers: [64]
4 | mess_dropout: 0.1
5 | reg_weight: 1e-5
6 | aggregator_type: 'bi'


--------------------------------------------------------------------------------
/deepcarskit/properties/model/KGNNLS.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | n_iter: 1
3 | aggregator: "sum"
4 | reg_weight: 1e-7
5 | neighbor_sample_size: 4
6 | 
7 | ls_weight: 0.5


--------------------------------------------------------------------------------
/deepcarskit/properties/model/MultiVAE.yaml:
--------------------------------------------------------------------------------
1 | mlp_hidden_size: [600]
2 | latent_dimension: 128
3 | dropout_prob: 0.5
4 | anneal_cap: 0.2
5 | total_anneal_steps: 200000
6 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/quick_start_config/sequential.yaml:
--------------------------------------------------------------------------------
1 | eval_args:
2 |   split: {'LS': 'valid_and_test'}
3 |   order: TO
4 |   mode: full
5 | repeatable: True
6 | 


--------------------------------------------------------------------------------
/deepcarskit/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | from __future__ import division
4 | 
5 | __version__ = '1.0.1'


--------------------------------------------------------------------------------
/deepcarskit/properties/model/KSR.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size: 128
3 | num_layers: 1
4 | dropout_prob: 0.1
5 | loss_type: 'CE'
6 | freeze_kg: False
7 | gamma: 10


--------------------------------------------------------------------------------
/deepcarskit/properties/model/NextItNet.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | kernel_size: 3
3 | block_num: 5
4 | dilations: [1,4]
5 | reg_weight: 1e-5
6 | loss_type: 'CE'
7 | 
8 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/PNN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [128, 256, 128]
3 | dropout_prob: 0.0
4 | reg_weight: 0
5 | use_inner: True
6 | use_outer: False


--------------------------------------------------------------------------------
/deepcarskit/properties/model/DIEN.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [256,256,256]
3 | dropout_prob: 0
4 | pooling_mode: 'mean'
5 | gru_type: 'AUGRU'
6 | alpha: 1
7 | 


--------------------------------------------------------------------------------
/deepcarskit/data/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.data.utils import *
2 | 
3 | __all__ = ['create_dataset', 'data_preparation', 'save_split_dataloaders', 'load_split_dataloaders']
4 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/AutoInt.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | attention_size: 16
3 | n_layers: 3
4 | num_heads: 2
5 | dropout_probs: [0.2,0.2,0.2]
6 | mlp_hidden_size: [128,128]


--------------------------------------------------------------------------------
/deepcarskit/properties/model/FwFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | dropout_prob: 0.0
3 | # define fields: key: field's id, value: features in this field. can be ignored.
4 | fields: ~
5 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/HRM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | high_order: 2
3 | loss_type: "CE"
4 | dropout_prob: 0.2
5 | pooling_type_layer_1: "max"
6 | pooling_type_layer_2: "max"


--------------------------------------------------------------------------------
/deepcarskit/properties/model/GCMC.yaml:
--------------------------------------------------------------------------------
1 | accum: "stack"
2 | gcn_output_dim: 500
3 | embedding_size: 64
4 | dropout_prob: 0.3
5 | sparse_feature: True
6 | class_num: 2
7 | num_basis_functions: 2


--------------------------------------------------------------------------------
/deepcarskit/properties/model/xDeepFM.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 10
2 | mlp_hidden_size: [128,128,128]
3 | reg_weight: 5e-4
4 | dropout_prob: 0.2
5 | direct: False
6 | cin_layer_size: [100,100,100]


--------------------------------------------------------------------------------
/deepcarskit/properties/model/GRU4RecF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | hidden_size: 128
3 | num_layers: 1
4 | dropout_prob: 0.3
5 | selected_features: ['class']
6 | pooling_mode: 'sum'
7 | loss_type: 'CE'


--------------------------------------------------------------------------------
/deepcarskit/properties/model/NAIS.yaml:
--------------------------------------------------------------------------------
1 | algorithm: prod
2 | embedding_size: 64
3 | weight_size: 64
4 | split_to: 0
5 | reg_weights: [1e-7, 1e-7, 1e-5]
6 | alpha: 0
7 | beta: 0.5
8 | pretrain_path: ~


--------------------------------------------------------------------------------
/deepcarskit/properties/model/CDAE.yaml:
--------------------------------------------------------------------------------
1 | loss_type: BCE
2 | hid_activation: relu
3 | out_activation: sigmoid
4 | corruption_ratio: 0.5
5 | embedding_size: 64
6 | reg_weight_1: 0.
7 | reg_weight_2: 0.01
8 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/KTUP.yaml:
--------------------------------------------------------------------------------
1 | train_rec_step: 5
2 | train_kg_step: 5
3 | embedding_size: 64
4 | use_st_gumbel: True
5 | L1_flag: False
6 | margin: 1.0
7 | kg_weight: 1.0
8 | align_weight: 1.0


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.bak
 2 | *.log
 3 | *.pth
 4 | *.pyc
 5 | *.zip
 6 | *.ttf
 7 | *.xml
 8 | *.iml
 9 | events.out.*
10 | saved/
11 | log/
12 | log_tensorboard/
13 | doc/
14 | **/__pycache__/
15 | .idea/
16 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/ConvNCF.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | cnn_channels: [1, 32, 32, 32, 32]
3 | cnn_kernels: [4, 4, 2, 2]
4 | cnn_strides: [4, 4, 2, 2]
5 | dropout_prob: 0.2
6 | reg_weights: [0.1, 0.1]


--------------------------------------------------------------------------------
/deepcarskit/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.trainer.trainer import CARSTrainer
2 | from recbole.trainer import *
3 | 
4 | __all__ = ['Trainer', 'KGTrainer', 'KGATTrainer', 'S3RecTrainer', 'CARSTrainer']
5 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/MKR.yaml:
--------------------------------------------------------------------------------
1 | embedding_size: 64
2 | kg_embedding_size: 64
3 | low_layers_num: 1
4 | high_layers_num: 1
5 | reg_weight: 1e-6
6 | use_inner_product: True
7 | kge_interval: 3
8 | dropout_prob: 0.0


--------------------------------------------------------------------------------
/deepcarskit/properties/quick_start_config/knowledge_base.yaml:
--------------------------------------------------------------------------------
1 | load_col: 
2 |     inter: ['user_id', 'item_id', 'rating', 'timestamp']
3 |     kg: ['head_id', 'relation_id', 'tail_id']
4 |     link: ['item_id', 'entity_id']


--------------------------------------------------------------------------------
/deepcarskit/properties/model/RecVAE.yaml:
--------------------------------------------------------------------------------
1 | hidden_dimension: 600
2 | latent_dimension: 200
3 | dropout_prob: 0.5
4 | beta: 0.2
5 | mixture_weights: [0.15, 0.75, 0.1]
6 | gamma: 0.005
7 | n_enc_epochs: 3
8 | n_dec_epochs: 1
9 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/quick_start_config/context-aware.yaml:
--------------------------------------------------------------------------------
1 | eval_args:
2 |   split: {'RS':[0.8,0.1,0.1]}
3 |   order: RO
4 |   group_by: ~
5 |   mode: labeled
6 | neg_sampling: ~
7 | metrics: ['AUC', 'LogLoss']
8 | valid_metric: AUC


--------------------------------------------------------------------------------
/deepcarskit/properties/quick_start_config/sequential_embedding_model.yaml:
--------------------------------------------------------------------------------
1 | load_col: 
2 |     inter: ['user_id', 'item_id', 'rating', 'timestamp']
3 |     ent: ['ent_id', 'ent_emb']
4 | additional_feat_suffix: ent
5 | repeatable: True
6 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/MacridVAE.yaml:
--------------------------------------------------------------------------------
 1 | embedding_size: 64
 2 | drop_out: 0.5
 3 | kfac: 10
 4 | nogb: False
 5 | std: 0.01
 6 | encoder_hidden_size: [600]
 7 | tau: 0.1
 8 | anneal_cap: 0.2
 9 | total_anneal_steps: 200000
10 | reg_weights: [0, 0]


--------------------------------------------------------------------------------
/deepcarskit/properties/model/NeuMF.yaml:
--------------------------------------------------------------------------------
 1 | mf_embedding_size: 64
 2 | mlp_embedding_size: 64
 3 | mlp_hidden_size: [128,64]
 4 | dropout_prob: 0.1
 5 | mf_train: True
 6 | mlp_train: True
 7 | 
 8 | use_pretrain: False
 9 | mf_pretrain_path: ~
10 | mlp_pretrain_path: ~


--------------------------------------------------------------------------------
/deepcarskit/properties/model/SASRec.yaml:
--------------------------------------------------------------------------------
 1 | n_layers: 2
 2 | n_heads: 2
 3 | hidden_size: 64
 4 | inner_size: 256
 5 | hidden_dropout_prob: 0.5
 6 | attn_dropout_prob: 0.5
 7 | hidden_act: 'gelu'
 8 | layer_norm_eps: 1e-12
 9 | initializer_range: 0.02
10 | loss_type: 'CE'


--------------------------------------------------------------------------------
/conda/conda_release.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | conda-build --python 3.7 .
4 | printf "python 3.7 version is released \n"
5 | conda-build --python 3.8 .
6 | printf "python 3.8 version is released \n"
7 | conda-build --python 3.9 .
8 | printf "python 3.9 version is released \n"
9 | 


--------------------------------------------------------------------------------
/deepcarskit/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | from deepcarskit.evaluator.base_metric import *
2 | from recbole.evaluator.metrics import *
3 | from deepcarskit.evaluator.evaluator import *
4 | from recbole.evaluator.register import *
5 | from deepcarskit.evaluator.collector import *
6 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/BERT4Rec.yaml:
--------------------------------------------------------------------------------
 1 | n_layers: 2
 2 | n_heads: 2
 3 | hidden_size: 64
 4 | inner_size: 256
 5 | hidden_dropout_prob: 0.5
 6 | attn_dropout_prob: 0.5
 7 | hidden_act: 'gelu'
 8 | layer_norm_eps: 1e-12
 9 | initializer_range: 0.02
10 | mask_ratio: 0.2
11 | loss_type: 'CE'


--------------------------------------------------------------------------------
/deepcarskit/properties/model/GCSAN.yaml:
--------------------------------------------------------------------------------
 1 | n_layers: 1
 2 | n_heads: 1
 3 | hidden_size: 64
 4 | inner_size: 256
 5 | hidden_dropout_prob: 0.2
 6 | attn_dropout_prob: 0.2
 7 | hidden_act: 'gelu'
 8 | layer_norm_eps: 1e-12
 9 | initializer_range: 0.02
10 | step: 1
11 | weight: 0.6
12 | reg_weight: 5e-5
13 | loss_type: 'CE'


--------------------------------------------------------------------------------
/deepcarskit/properties/model/RaCT.yaml:
--------------------------------------------------------------------------------
 1 | mlp_hidden_size: [600]
 2 | latent_dimension: 256
 3 | dropout_prob: 0.5
 4 | anneal_cap: 0.2
 5 | total_anneal_steps: 200000
 6 | critic_layers: [100,100,10]
 7 | metrics_k: 100
 8 | train_stage: 'actor_pretrain'
 9 | pretrain_epochs: 150
10 | save_step: 10
11 | pre_model_path: ''


--------------------------------------------------------------------------------
/deepcarskit/properties/model/FDSA.yaml:
--------------------------------------------------------------------------------
 1 | n_layers: 2
 2 | n_heads: 2
 3 | hidden_size: 64
 4 | inner_size: 256
 5 | hidden_dropout_prob: 0.5
 6 | attn_dropout_prob: 0.5
 7 | hidden_act: 'gelu'
 8 | layer_norm_eps: 1e-12
 9 | initializer_range: 0.02
10 | selected_features: ['class']
11 | pooling_mode: 'mean'
12 | loss_type: 'CE'


--------------------------------------------------------------------------------
/deepcarskit/properties/model/SASRecF.yaml:
--------------------------------------------------------------------------------
 1 | n_layers: 2
 2 | n_heads: 2
 3 | hidden_size: 64
 4 | inner_size: 256
 5 | hidden_dropout_prob: 0.5
 6 | attn_dropout_prob: 0.5
 7 | hidden_act: 'gelu'
 8 | layer_norm_eps: 1e-12
 9 | initializer_range: 0.02
10 | selected_features: ['class']
11 | pooling_mode: 'sum'
12 | loss_type: 'CE'


--------------------------------------------------------------------------------
/deepcarskit/properties/model/DMF.yaml:
--------------------------------------------------------------------------------
1 | # WARNING:
2 | # 1. if you set inter_matrix_type='rating', you must set `unused_col: ~` in your data config files.
3 | # 2. The dimensions of the last layer of users and items must be the same
4 | 
5 | inter_matrix_type: '01'
6 | user_embedding_size: 64
7 | item_embedding_size: 64
8 | user_hidden_size_list: [64, 64]
9 | item_hidden_size_list: [64, 64]


--------------------------------------------------------------------------------
/check_gpu.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | # Check if CUDA (GPU support) is available
 4 | gpu_available = torch.cuda.is_available()
 5 | print(f"CUDA available: {gpu_available}")
 6 | 
 7 | # If CUDA is available, get the name of the GPU
 8 | if gpu_available:
 9 |     gpu_name = torch.cuda.get_device_name(0)
10 |     print(f"GPU detected: {gpu_name}")
11 | else:
12 |     print("No GPU detected.")
13 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/NNCF.yaml:
--------------------------------------------------------------------------------
 1 | ui_embedding_size: 64
 2 | neigh_embedding_size: 32
 3 | num_conv_kernel: 128
 4 | conv_kernel_size: 5
 5 | pool_kernel_size: 5
 6 | mlp_hidden_size: [128,64,32,16]
 7 | neigh_num: 10
 8 | dropout: 0.5
 9 | 
10 | # The method to use neighborhood information, you can choose random, knn or louvain algorithom 
11 | # e.g. neigh_info_method: "knn" or neigh_info_method: "louvain"
12 | neigh_info_method: "knn"
13 | 
14 | resolution: 1
15 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/S3Rec.yaml:
--------------------------------------------------------------------------------
 1 | n_layers: 2
 2 | n_heads: 2
 3 | hidden_size: 64
 4 | inner_size: 256
 5 | hidden_dropout_prob: 0.5
 6 | attn_dropout_prob: 0.5
 7 | hidden_act: 'gelu'
 8 | layer_norm_eps: 1e-12
 9 | initializer_range: 0.02
10 | item_attribute: 'class'
11 | mask_ratio: 0.2
12 | aap_weight: 1.0
13 | mip_weight: 0.2
14 | map_weight: 1.0
15 | sp_weight: 0.5
16 | train_stage: 'pretrain'
17 | pretrain_epochs: 500
18 | save_step: 10
19 | pre_model_path: ''
20 | loss_type: 'CE'


--------------------------------------------------------------------------------
/check_torch.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | # Get the PyTorch version
 4 | torch_version = torch.__version__
 5 | print(f"PyTorch version: {torch_version}")
 6 | 
 7 | # Check if CUDA is available (indicating GPU support)
 8 | is_cuda_available = torch.cuda.is_available()
 9 | print(f"CUDA available: {is_cuda_available}")
10 | 
11 | # Determine the type of PyTorch version
12 | if is_cuda_available:
13 |     print("This is the GPU version of PyTorch.")
14 | else:
15 |     print("This is the CPU version of PyTorch.")
16 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/lightgbm.yaml:
--------------------------------------------------------------------------------
 1 | convert_token_to_onehot: False
 2 | token_num_threshold: 10000
 3 | 
 4 | # Dataset
 5 | lgb_silent: False
 6 | 
 7 | # Train
 8 | lgb_model: ~
 9 | lgb_params: 
10 |     boosting: gbdt
11 |     num_leaves: 90
12 |     min_data_in_leaf: 30
13 |     max_depth: -1
14 |     learning_rate: 0.1
15 |     objective: binary
16 |     lambda_l1: 0.1
17 |     metric: ['auc', 'binary_logloss']
18 |     force_row_wise: True
19 | lgb_learning_rates: ~
20 | lgb_num_boost_round: 300
21 | lgb_early_stopping_rounds: ~
22 | lgb_verbose_eval: 100
23 | 
24 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/model/xgboost.yaml:
--------------------------------------------------------------------------------
 1 | convert_token_to_onehot: False
 2 | token_num_threshold: 10000
 3 | 
 4 | # DMatrix
 5 | xgb_silent: ~
 6 | xgb_nthread: ~
 7 | 
 8 | xgb_model: ~
 9 | xgb_params: 
10 |     booster: gbtree
11 |     objective: binary:logistic
12 |     eval_metric: ['auc','logloss']
13 |     # gamma: 0.1
14 |     max_depth: 3
15 |     # lambda: 1
16 |     # subsample: 0.7
17 |     # colsample_bytree: 0.7
18 |     # min_child_weight: 3
19 |     eta: 1
20 |     seed: 2020
21 |     # nthread: -1
22 | xgb_num_boost_round: 100
23 | xgb_early_stopping_rounds: ~
24 | xgb_verbose_eval: 50
25 | 
26 | 


--------------------------------------------------------------------------------
/deepcarskit/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from deepcarskit.utils.logger import init_logger, set_color
 2 | from recbole.utils.utils import get_local_time, ensure_dir, get_model, get_trainer, \
 3 |     early_stopping, calculate_valid_score, dict2str, init_seed, get_tensorboard, get_gpu_usage
 4 | from recbole.utils.enum_type import *
 5 | from recbole.utils.argument_list import *
 6 | 
 7 | __all__ = [
 8 |     'init_logger', 'get_local_time', 'ensure_dir', 'get_model', 'get_trainer', 'early_stopping',
 9 |     'calculate_valid_score', 'dict2str', 'Enum', 'ModelType', 'KGDataLoaderState', 'EvaluatorType', 'InputType',
10 |     'FeatureType', 'FeatureSource', 'init_seed', 'general_arguments', 'training_arguments', 'evaluation_arguments',
11 |     'dataset_arguments', 'get_tensorboard', 'set_color', 'get_gpu_usage'
12 | ]
13 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: deepcarskit
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/deepcarskit/properties/overall.yaml:
--------------------------------------------------------------------------------
 1 | # general
 2 | gpu_id: 0
 3 | use_gpu: True
 4 | seed: 2020
 5 | state: INFO
 6 | reproducibility: True
 7 | data_path: 'dataset/'
 8 | checkpoint_dir: 'saved'
 9 | show_progress: True
10 | save_dataset: False
11 | save_dataloaders: False
12 | 
13 | # training settings
14 | epochs: 300
15 | train_batch_size: 2048
16 | learner: adam
17 | learning_rate: 0.001
18 | neg_sampling:
19 |   uniform: 1
20 | eval_step: 1
21 | stopping_step: 10
22 | clip_grad_norm: ~
23 | # clip_grad_norm:  {'max_norm': 5, 'norm_type': 2}
24 | weight_decay: 0.0
25 | 
26 | # evaluation settings
27 | eval_args: 
28 |   split: {'RS':[0.8,0.1,0.1]}
29 |   group_by: user
30 |   order: RO
31 |   mode: full
32 | repeatable: False
33 | metrics: ["Recall","MRR","NDCG","Hit","Precision"]
34 | topk: [10]
35 | valid_metric: MRR@10
36 | valid_metric_bigger: True
37 | eval_batch_size: 4096
38 | loss_decimal_place: 4
39 | metric_decimal_place: 4
40 | 


--------------------------------------------------------------------------------
/conda/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |   name: deepcarskit
 3 |   version: 1.0.1
 4 | 
 5 | source:
 6 |   path: ../
 7 | 
 8 | requirements:
 9 |   build:
10 |     - python
11 |   host:
12 |     - python
13 |     - recbole ==1.0.1
14 |     - numpy >=1.17.2
15 |     - scipy ==1.6.0
16 |     - pandas >=1.0.5
17 |     - tqdm >=4.48.2
18 |     - pyyaml >=5.1.0
19 |     - scikit-learn >=0.23.2
20 |     - pytorch >=1.7.0
21 |     - colorlog==4.7.2
22 |     - colorama==0.4.4
23 |     - tensorboard >=2.5.0
24 |   run:
25 |     - python
26 |     - recbole ==1.0.1
27 |     - numpy >=1.17.2
28 |     - scipy ==1.6.0
29 |     - pandas >=1.0.5
30 |     - tqdm >=4.48.2
31 |     - pyyaml >=5.1.0
32 |     - scikit-learn >=0.23.2
33 |     - pytorch >=1.7.0
34 |     - colorlog==4.7.2
35 |     - colorama==0.4.4
36 |     - tensorboard >=2.5.0
37 | test:
38 |   imports:
39 |     - deepcarskit
40 | 
41 | about:
42 |   home: https://github.com/irecsys/DeepCARSKit
43 |   license: MIT
44 |   summary: "A Deep Learning Based Context-Aware Recommendation Library"
45 |   
46 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # @Author : Yong Zheng
 3 | 
 4 | 
 5 | import argparse
 6 | import time
 7 | import torch
 8 | import multiprocessing as mcpu
 9 | from deepcarskit.quick_start import run
10 | from logging import getLogger
11 | 
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     print('GPU availability: ', torch.cuda.is_available())
16 | 
17 |     n_gpu = torch.cuda.device_count()
18 |     print('Num of GPU: ', n_gpu)
19 | 
20 |     if n_gpu>0:
21 |         print(torch.cuda.get_device_name(0))
22 |         print('Current GPU index: ', torch.cuda.current_device())
23 | 
24 |     logger = getLogger()
25 |     t0 = time.time()
26 |     parser = argparse.ArgumentParser()
27 |     parser.add_argument('--config_files', type=str, default='config.yaml', help='config files')
28 | 
29 |     args, _ = parser.parse_known_args()
30 | 
31 |     config_list = args.config_files.strip().split(' ') if args.config_files else None
32 |     run(config_file_list=config_list)
33 |     t1 = time.time()
34 |     total = t1 - t0
35 |     logger.info('time cost: '+ f': {total}s')


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 RUCAIBox
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/dataset/tripadvisor/ReadMe.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- saved from url=(0102)file:///C:/Users/Yong/AppData/Local/Temp/HZ$D.533.1099/HZ$D.533.1101/Travel_TripAdvisor_v2/ReadMe.html -->
 3 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=windows-1252"></head><body>
 4 | <b>Data Name:</b> TripAdvisor v2<br><br>
 5 | 
 6 | <b>Data Descriptions:</b><br>
 7 | This data was scripted from online reviews on tripadvisor.com. There is only one context: trip type (Family, Couples, Business, Solo travel, Friends). Other features about users and hotels are available. The data set is pretty sparse in ratings and contexts: 14175 ratings, 2731 users, 2269 hotels.
 8 | <br><br>
 9 | 
10 | <b>Citation Information:</b>
11 | 
12 | <ul>
13 | <li>
14 | <b>In Latex:</b><br>
15 | <a name="WI2014">@inproceedings{zheng2014contextrec,<br>
16 | title={<font color="#005577"><i>Contexts Recommendation Using Multi-label Classification</i></font>},<br>
17 | author={Zheng, Y. and Mobasher, B. and Burke, R.},<br>
18 | booktitle={Proceedings of the 13th IEEE/WIC/ACM International Conference on Web Intelligence (WI 2014)},<br>
19 | doi = {},<br>
20 | pages={},<br>
21 | year={2014},<br>
22 | organization={IEEE/WIC/ACM}<br>
23 | }</a>
24 | <br><br><br>
25 | </li>
26 | </ul>
27 | 
28 | 
29 | 
30 | </body></html>


--------------------------------------------------------------------------------
/dataset/depaulmovie/ReadMe.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- saved from url=(0098)file:///C:/Users/Yong/AppData/Local/Temp/HZ$D.327.2077/HZ$D.327.2079/Movie_DePaulMovie/ReadMe.html -->
 3 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=windows-1252"></head><body>
 4 | <b>Data Name:</b>DePaulMovie<br><br>
 5 | 
 6 | <b>Data Descriptions:</b><br>
 7 | This data was collected from surveys -- students were asked to rate movies in different time, location, and with different companions.<br><br>
 8 | 
 9 | <b>Citation Information:</b>
10 | 
11 | <ul>
12 | <li>
13 | <b>In Latex:</b><br>
14 | <a name="ICDM2015">@inproceedings{zheng2015carskit,
15 | title={<font color="#005577"><i>CARSKit: A Java-Based Context-aware Recommendation Engine</i></font>},<br>
16 | author={Zheng, Yong and Mobasher, Bamshad and Burke, Robin},<br>
17 | booktitle={Proceedings of the 15th IEEE International Conference on Data Mining Workshops},<br>
18 | year={2015},<br>
19 | publisher={IEEE}<br>
20 | } <br>
21 | </a></li><a name="ICDM2015">
22 | 
23 | <li>
24 | <b>In General:</b><br>
25 | Zheng, Y. and Mobasher, B. and Burke, R. "CARSKit: A Java-Based Context-aware Recommendation Engine". Proceedings of the 15th IEEE International Conference on Data Mining Workshops, 2015, IEEE
26 | </li>
27 | </a></ul><a name="ICDM2015">
28 | 
29 | 
30 | 
31 | </a></body></html>


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | 
 5 | import os
 6 | 
 7 | from setuptools import setup, find_packages
 8 | 
 9 | install_requires = ['numpy>=1.17.2', 'torch>=1.7.0', 'scipy==1.6.0', 'pandas>=1.0.5', 'tqdm>=4.48.2',
10 |                     'colorlog==4.7.2','colorama==0.4.4', 'numpy==1.20.0',
11 |                     'scikit_learn>=0.23.2', 'pyyaml>=5.1.0', 'tensorboard>=2.5.0', 'recbole==1.0.1']
12 | 
13 | setup_requires = []
14 | 
15 | extras_require = {
16 |     'hyperopt': ['hyperopt>=0.2.4']
17 | }
18 | 
19 | classifiers = ["License :: OSI Approved :: MIT License"]
20 | 
21 | # Readthedocs requires Sphinx extensions to be specified as part of
22 | # install_requires in order to build properly.
23 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
24 | if on_rtd:
25 |     install_requires.extend(setup_requires)
26 | 
27 | setup(
28 |     name='deepcarskit',
29 |     version=
30 |     '1.0.1',  # please remember to edit deepcarskit/__init__.py in response, once updating the version
31 |     description='A Deep Learning Based Context-Aware Recommendation Library',
32 |     long_description_content_type="text/markdown",
33 |     url='https://github.com/irecsys/DeepCARSKit',
34 |     author='Yong Zheng',
35 |     author_email='DeepCARSKit@Gmail.com',
36 |     packages=[
37 |         package for package in find_packages()
38 |         if package.startswith('deepcarskit')
39 |     ],
40 |     include_package_data=True,
41 |     install_requires=install_requires,
42 |     setup_requires=setup_requires,
43 |     extras_require=extras_require,
44 |     zip_safe=False,
45 |     classifiers=classifiers,
46 | )
47 | 


--------------------------------------------------------------------------------
/deepcarskit/model/fms/fm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time   : 2020/7/8 10:09
 3 | # @Author : Shanlei Mu
 4 | # @Email  : slmu@ruc.edu.cn
 5 | # @File   : fms.py
 6 | 
 7 | # UPDATE:
 8 | # @Time   : 2020/8/13,
 9 | # @Author : Zihan Lin
10 | # @Email  : linzihan.super@foxmain.com
11 | 
12 | # UPDATE:
13 | # @Time   : 2021/12
14 | # @Author : Yong Zheng
15 | # @Notes  : made changes to adapt it for CARS
16 | 
17 | r"""
18 | FM
19 | ################################################
20 | References
21 | -----
22 | Steffen Rendle et al. "Factorization Machines." in ICDM 2010.
23 | 
24 | Notes
25 | -----
26 | context variables are treated as individual dimensions
27 | """
28 | 
29 | import torch.nn as nn
30 | from torch.nn.init import xavier_normal_
31 | 
32 | from deepcarskit.model.context_recommender import ContextRecommender
33 | from recbole.model.layers import BaseFactorizationMachine
34 | from recbole.utils import EvaluatorType
35 | 
36 | 
37 | class FM(ContextRecommender):
38 |     """Factorization Machine considers the second-order interaction with features to predict the final score.
39 | 
40 |     """
41 | 
42 |     def __init__(self, config, dataset):
43 | 
44 |         super(FM, self).__init__(config, dataset)
45 | 
46 |         # define layers and loss
47 |         self.fm = BaseFactorizationMachine(reduce_sum=True)
48 |         self.config = config
49 | 
50 |         if self.config['eval_type'] == EvaluatorType.RANKING:
51 |             self.actfun = nn.Sigmoid()
52 |             self.loss = nn.BCELoss()
53 |             self.LABEL = self.config['LABEL_FIELD']
54 |         else:
55 |             self.actfun = nn.LeakyReLU()
56 |             self.loss = nn.MSELoss()
57 |             self.LABEL = self.config['RATING_FIELD']
58 | 
59 |         # parameters initialization
60 |         self.apply(self._init_weights)
61 | 
62 |     def _init_weights(self, module):
63 |         if isinstance(module, nn.Embedding):
64 |             xavier_normal_(module.weight.data)
65 | 
66 |     def forward(self, interaction):
67 |         fm_all_embeddings = self.concat_embed_input_fields(interaction)  # [batch_size, num_field, embed_dim]
68 |         y = self.actfun(self.first_order_linear(interaction) + self.fm(fm_all_embeddings))
69 |         return y.squeeze(-1)
70 | 
71 |     def calculate_loss(self, interaction):
72 |         label = interaction[self.LABEL]
73 | 
74 |         output = self.forward(interaction)
75 |         return self.loss(output, label)
76 | 
77 |     def predict(self, interaction):
78 |         return self.forward(interaction)
79 | 


--------------------------------------------------------------------------------
/deepcarskit/utils/utils.py:
--------------------------------------------------------------------------------
 1 | # @Time   : 2021/12
 2 | # @Author : Yong Zheng
 3 | 
 4 | """
 5 | deepcarskit.utils.utils
 6 | ################################
 7 | """
 8 | 
 9 | 
10 | import importlib
11 | 
12 | from recbole.utils.enum_type import ModelType
13 | 
14 | def get_model(model_name):
15 |     r"""Automatically select model class based on model name
16 | 
17 |     Args:
18 |         model_name (str): model name
19 | 
20 |     Returns:
21 |         Recommender: model class
22 |     """
23 | 
24 |     model_submodule_recbole = [
25 |         'general_recommender',  'sequential_recommender', 'knowledge_aware_recommender',
26 |          'exlib_recommender'
27 |     ]
28 | 
29 |     model_submodule_deepcarskit = [
30 |         'ae', 'fms', 'neucf'
31 |     ]
32 | 
33 |     model_file_name = model_name.lower()
34 |     model_module = None
35 |     for submodule in model_submodule_deepcarskit:
36 |         module_path = '.'.join(['deepcarskit.model', submodule, model_file_name])
37 |         if importlib.util.find_spec(module_path, __name__):
38 |             model_module = importlib.import_module(module_path, __name__)
39 |             break
40 | 
41 |     if model_module is None:
42 |         for submodule in model_submodule_recbole:
43 |             module_path = '.'.join(['recbole.model', submodule, model_file_name])
44 |             if importlib.util.find_spec(module_path, __name__):
45 |                 model_module = importlib.import_module(module_path, __name__)
46 |                 break
47 | 
48 |     if model_module is None:
49 |         raise ValueError('`model_name` [{}] is not the name of an existing model.'.format(model_name))
50 |     model_class = getattr(model_module, model_name)
51 |     return model_class
52 | 
53 | 
54 | def get_trainer(model_type, model_name):
55 |     r"""Automatically select trainer class based on model type and model name
56 | 
57 |     Args:
58 |         model_type (ModelType): model type
59 |         model_name (str): model name
60 | 
61 |     Returns:
62 |         Trainer: trainer class
63 |     """
64 |     try:
65 |         return getattr(importlib.import_module('deepcarskit.trainer'), model_name + 'Trainer')
66 |     except AttributeError:
67 |         if model_type == ModelType.KNOWLEDGE:
68 |             return getattr(importlib.import_module('recbole.trainer'), 'KGTrainer')
69 |         elif model_type == ModelType.TRADITIONAL:
70 |             return getattr(importlib.import_module('recbole.trainer'), 'TraditionalTrainer')
71 |         else:
72 |             return getattr(importlib.import_module('deepcarskit.trainer'), 'CARSTrainer')
73 | 


--------------------------------------------------------------------------------
/deepcarskit/config/configurator.py:
--------------------------------------------------------------------------------
 1 | # @Time   : 2021/12
 2 | # @Author : Yong Zheng
 3 | # @Notes  : Inherit from recbole.config
 4 | 
 5 | """
 6 | deepcarskit.config.configurator
 7 | ################################
 8 | """
 9 | 
10 | from deepcarskit.utils.utils import get_model
11 | from recbole.config import Config
12 | from recbole.utils import init_seed
13 | 
14 | 
15 | class CARSConfig(Config):
16 | 
17 |     def __init__(self, model=None, dataset=None, config_file_list=None, config_dict=None):
18 |         super(CARSConfig, self).__init__(model, dataset, config_file_list, config_dict)
19 | 
20 |     def _get_model_and_dataset(self, model, dataset):
21 | 
22 |         if model is None:
23 |             try:
24 |                 model = self.external_config_dict['model']
25 |             except KeyError:
26 |                 raise KeyError(
27 |                     'model need to be specified in at least one of the these ways: '
28 |                     '[model variable, config file, config dict, command line] '
29 |                 )
30 |         if not isinstance(model, str):
31 |             # if model is a class object
32 |             final_model_class = model
33 |             final_model = model.__name__
34 |         else:
35 |             # if model is a name in string format
36 |             final_model = model
37 |             final_model_class = get_model(final_model)  # need to get class object
38 | 
39 |         if dataset is None:
40 |             try:
41 |                 final_dataset = self.external_config_dict['dataset']
42 |             except KeyError:
43 |                 raise KeyError(
44 |                     'dataset need to be specified in at least one of the these ways: '
45 |                     '[dataset variable, config file, config dict, command line] '
46 |                 )
47 |         else:
48 |             final_dataset = dataset
49 | 
50 |         return final_model, final_model_class, final_dataset
51 | 
52 |     def _get_final_config_dict(self):
53 |         final_config_dict = dict()
54 |         final_config_dict.update(self.internal_config_dict)
55 |         final_config_dict.update(self.external_config_dict)
56 |         # turn on corresponding metrics according to the recommendation task
57 |         if final_config_dict['ranking']:
58 |             final_config_dict['metrics'] = final_config_dict['ranking_metrics']
59 |             final_config_dict['valid_metric'] = final_config_dict['ranking_valid_metric']
60 |         else:
61 |             final_config_dict['metrics'] = final_config_dict['err_metrics']
62 |             final_config_dict['valid_metric'] = final_config_dict['err_valid_metric']
63 |         return final_config_dict
64 | 


--------------------------------------------------------------------------------
/deepcarskit/evaluator/evaluator.py:
--------------------------------------------------------------------------------
 1 | # @Time   : 2021/12
 2 | # @Author : Yong Zheng
 3 | # @Notes  : added F1 metrics, if precision and recall defined in user requests
 4 | 
 5 | """
 6 | deepcarskit.evaluator.evaluator
 7 | #####################################
 8 | """
 9 | import numpy as np
10 | from recbole.evaluator.register import metrics_dict
11 | from recbole.evaluator.collector import DataStruct
12 | 
13 | 
14 | class Evaluator(object):
15 |     """Evaluator is used to check parameter correctness, and summarize the results of all metrics.
16 |     """
17 | 
18 |     def __init__(self, config):
19 |         self.config = config
20 |         self.metrics = [metric.lower() for metric in self.config['metrics']]
21 |         self.metric_class = {}
22 | 
23 |         for metric in self.metrics:
24 |             self.metric_class[metric] = metrics_dict[metric](self.config)
25 | 
26 |     def evaluate(self, dataobject: DataStruct):
27 |         """calculate all the metrics. It is called at the end of each epoch
28 | 
29 |         Args:
30 |             dataobject (DataStruct): It contains all the information needed for metrics.
31 | 
32 |         Returns:
33 |             dict: such as ``{'hit@20': 0.3824, 'recall@20': 0.0527, 'hit@10': 0.3153, 'recall@10': 0.0329, 'gauc': 0.9236}``
34 | 
35 |         """
36 |         result_dict = {}
37 |         topk = []
38 |         metric_f1 = False
39 |         if self.config['ranking']:
40 |             topk = self.config['topk']
41 |             if 'precision' in self.metrics and 'recall' in self.metrics:
42 |                 metric_f1 = True
43 | 
44 |         for metric in self.metrics:
45 |             # dataobject has two keys: rec.score, data.label
46 |             metric_val = self.metric_class[metric].calculate_metric(dataobject)
47 |             result_dict.update(metric_val)
48 | 
49 |             # adding F1 metric, if precision and recall were calculated
50 |             if metric_f1:
51 |                 k = topk[0]
52 |                 keys = result_dict.keys()
53 |                 key1 = 'precision@'+str(k)
54 |                 key2 = 'recall@'+str(k)
55 |                 key = 'f1@'+str(k)
56 |                 if key1 in keys and key2 in keys and key not in keys:
57 |                     metric = {}
58 |                     for k in topk:
59 |                         key1 = 'precision@'+str(k)
60 |                         key2 = 'recall@'+str(k)
61 |                         key = 'f1@'+str(k)
62 |                         precision = result_dict[key1]
63 |                         recall = result_dict[key2]
64 |                         if (precision + recall) == 0:
65 |                             f1 = 0
66 |                         else:
67 |                             f1 = round(2*precision*recall/(precision + recall), self.config['metric_decimal_place'])
68 |                         metric[key] = f1
69 |                     result_dict.update(metric)
70 |         return result_dict
71 | 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
  1 | field_separator: ","
  2 | seq_separator: " "
  3 | 
  4 | gpu_id: 0
  5 | use_gpu: True
  6 | show_progress: False
  7 | save_dataset: False
  8 | save_dataloaders: False
  9 | 
 10 | ############### data setting ###############
 11 | seed: 2022
 12 | dataset: depaulmovie
 13 | # define data_path as the parent directory of your data folder
 14 | # data_path: d:\dataset\
 15 | 
 16 | USER_ID_FIELD: user_id
 17 | ITEM_ID_FIELD: item_id
 18 | RATING_FIELD: rating
 19 | CONTEXT_SITUATION_FIELD: contexts
 20 | USER_CONTEXT_FIELD: uc_id
 21 | 
 22 | # note: you can use either load or unload, cannot use them both
 23 | # load_col is used to load specific columns; unload_col is used to ignore selected columns
 24 | # set "load_col: ~", if you want to load all cols
 25 | # load_col: {'inter': ['user_id','item_id','rating','contexts','uc_id']}
 26 | # unload_col: {'inter': ['contexts']}
 27 | # by default, we load all cols, unless there are some special requirements
 28 | load_col: ~
 29 | 
 30 | # used for topN ranking only
 31 | LABEL_FIELD: label
 32 | threshold:
 33 |     rating: 0
 34 | # the current library does not support negative sampling
 35 | neg_sampling: ~
 36 | 
 37 | ############### model setting ###############
 38 | model: NeuCMFii
 39 | 
 40 | # General model
 41 | epochs: 50
 42 | train_batch_size: 500
 43 | eval_batch_size: 409600
 44 | learner: adam
 45 | # learner: adam, RMSprop
 46 | 
 47 | stopping_step: 10
 48 | clip_grad_norm: ~
 49 | # clip_grad_norm:  {'max_norm': 5, 'norm_type': 2}
 50 | weight_decay: 0.0
 51 | 
 52 | # NeuCF models
 53 | mf_embedding_size: 64
 54 | mlp_embedding_size: 64
 55 | mlp_hidden_size: [128,64,32]
 56 | learning_rate: 0.01
 57 | dropout_prob: 0.1
 58 | 
 59 | #tf_train: True
 60 | mf_train: True
 61 | mlp_train: True
 62 | 
 63 | # FM models
 64 | embedding_size: 64
 65 | #mlp_hidden_size: [128,64,32]
 66 | #learning_rate: 0.01
 67 | #dropout_prob: 0.3
 68 | 
 69 | ############### Evaluation setting ###############
 70 | eval_args:
 71 |   # split: {'RS': [0.8, 0.2]} # hold-out evaluation
 72 |   split: {'CV': 5, 'num_processes': 4} # N-fold cross validation by multiprocessing
 73 |   group_by: user
 74 |   mode: labeled # do not change it, DeepCARSKit only support this mode
 75 |   order: RO
 76 | 
 77 | # indicate the task is ranking or rating prediction
 78 | # evaluation metrics automatically selected based on True/False setting here
 79 | ranking: False
 80 | # indicate activation function for ranking task
 81 | # LeakyReLu is the default activation function for both ranking or rating prediction
 82 | sigmoid: False
 83 | 
 84 | # define metrics for ranking and rating prediction tasks
 85 | ranking_valid_metric: Recall@10
 86 | ranking_metrics: ['Precision','Recall','NDCG','MRR','MAP']
 87 | topk: [10,20,30]
 88 | 
 89 | err_valid_metric: MAE
 90 | err_metrics: ['MAE','RMSE','AUC']
 91 | 
 92 | ############### Output setting ###############
 93 | loss_decimal_place: 4
 94 | metric_decimal_place: 4
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/deepcarskit/model/fms/deepfm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time   : 2020/7/8
 3 | # @Author : Shanlei Mu
 4 | # @Email  : slmu@ruc.edu.cn
 5 | # @File   : deepfm.py
 6 | 
 7 | # UPDATE:
 8 | # @Time   : 2020/8/14
 9 | # @Author : Zihan Lin
10 | # @Email  : linzihan.super@foxmain.com
11 | 
12 | # UPDATE:
13 | # @Time   : 2021/12
14 | # @Author : Yong Zheng
15 | # @Notes  : made changes to adapt it for CARS
16 | 
17 | r"""
18 | DeepFM
19 | ################################################
20 | References
21 | -----
22 | Huifeng Guo et al. "DeepFM: A Factorization-Machine based Neural Network for CTR Prediction." in IJCAI 2017.
23 | 
24 | Notes
25 | -----
26 | context variables are treated as individual dimensions
27 | """
28 | 
29 | import torch.nn as nn
30 | from torch.nn.init import xavier_normal_, constant_
31 | 
32 | from deepcarskit.model.context_recommender import ContextRecommender
33 | from recbole.model.layers import BaseFactorizationMachine, MLPLayers
34 | from recbole.utils import EvaluatorType
35 | 
36 | 
37 | class DeepFM(ContextRecommender):
38 |     """DeepFM is a DNN enhanced FM which both use a DNN and a FM to calculate feature interaction.
39 |     Also DeepFM can be seen as a combination of FNN and FM.
40 | 
41 |     """
42 | 
43 |     def __init__(self, config, dataset):
44 |         super(DeepFM, self).__init__(config, dataset)
45 | 
46 |         # load parameters info
47 |         self.config = config
48 |         self.mlp_hidden_size = config['mlp_hidden_size']
49 |         self.dropout_prob = config['dropout_prob']
50 | 
51 |         # define layers and loss
52 |         self.fm = BaseFactorizationMachine(reduce_sum=True)
53 |         size_list = [self.embedding_size * self.num_feature_field] + self.mlp_hidden_size
54 |         self.mlp_layers = MLPLayers(size_list, self.dropout_prob)
55 |         self.deep_predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)  # Linear product to the final score
56 | 
57 |         # parameters initialization
58 |         self.apply(self._init_weights)
59 | 
60 |     def _init_weights(self, module):
61 |         if isinstance(module, nn.Embedding):
62 |             xavier_normal_(module.weight.data)
63 |         elif isinstance(module, nn.Linear):
64 |             xavier_normal_(module.weight.data)
65 |             if module.bias is not None:
66 |                 constant_(module.bias.data, 0)
67 | 
68 |     def forward(self, interaction):
69 |         deepfm_all_embeddings = self.concat_embed_input_fields(interaction)  # [batch_size, num_field, embed_dim]
70 |         batch_size = deepfm_all_embeddings.shape[0]
71 |         y_fm = self.first_order_linear(interaction) + self.fm(deepfm_all_embeddings)
72 | 
73 |         y_deep = self.deep_predict_layer(self.mlp_layers(deepfm_all_embeddings.view(batch_size, -1)))
74 |         y = self.actfun(y_fm + y_deep)
75 |         return y.squeeze(-1)
76 | 
77 |     def calculate_loss(self, interaction):
78 |         label = interaction[self.LABEL]
79 |         output = self.forward(interaction)
80 |         return self.loss(output, label)
81 | 
82 |     def predict(self, interaction):
83 |         return self.forward(interaction)
84 | 


--------------------------------------------------------------------------------
/deepcarskit/model/layers.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # @Time   : 2021/12
 3 | # @Author : Yong Zheng
 4 | # @Notes  : Inherit from recbole.model.layers.FMFirstOrderLinear
 5 | 
 6 | 
 7 | 
 8 | """
 9 | deepcarskit.model.layers
10 | #############################
11 | Common Layers in recommender system
12 | """
13 | 
14 | from recbole.model.layers import FMFirstOrderLinear
15 | from recbole.model.layers import FMEmbedding
16 | 
17 | import numpy as np
18 | import torch
19 | import torch.nn as nn
20 | 
21 | from recbole.utils import FeatureType, FeatureSource
22 | 
23 | 
24 | 
25 | class FMFirstOrderLinear(FMFirstOrderLinear):
26 |     """Calculate the first order score of the input features.
27 |     This class is a member of ContextRecommender, you can call it easily when inherit ContextRecommender.
28 | 
29 |     """
30 | 
31 |     def __init__(self, config, dataset, output_dim=1):
32 | 
33 |         super(FMFirstOrderLinear, self).__init__(config, dataset, output_dim)
34 |         self.field_names = dataset.fields(
35 |             source=[
36 |                 FeatureSource.INTERACTION,
37 |                 FeatureSource.USER,
38 |                 FeatureSource.USER_ID,
39 |                 FeatureSource.ITEM,
40 |                 FeatureSource.ITEM_ID,
41 |             ]
42 |         )
43 |         if config['ranking']:
44 |             self.LABEL = config['LABEL_FIELD']
45 |         else:
46 |             self.LABEL = config['RATING_FIELD']
47 |         self.device = config['device']
48 |         self.token_field_names = []
49 |         self.token_field_dims = []
50 |         self.float_field_names = []
51 |         self.float_field_dims = []
52 |         self.token_seq_field_names = []
53 |         self.token_seq_field_dims = []
54 |         for field_name in self.field_names:
55 |             if field_name == config['RATING_FIELD'] or field_name == config['LABEL_FIELD']:
56 |                 continue
57 |             if dataset.field2type[field_name] == FeatureType.TOKEN:
58 |                 self.token_field_names.append(field_name)
59 |                 self.token_field_dims.append(dataset.num(field_name))
60 |             elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ:
61 |                 self.token_seq_field_names.append(field_name)
62 |                 self.token_seq_field_dims.append(dataset.num(field_name))
63 |             else:
64 |                 self.float_field_names.append(field_name)
65 |                 self.float_field_dims.append(dataset.num(field_name))
66 |         if len(self.token_field_dims) > 0:
67 |             self.token_field_offsets = np.array((0, *np.cumsum(self.token_field_dims)[:-1]), dtype=np.long)
68 |             self.token_embedding_table = FMEmbedding(self.token_field_dims, self.token_field_offsets, output_dim)
69 |         if len(self.float_field_dims) > 0:
70 |             self.float_embedding_table = nn.Embedding(np.sum(self.float_field_dims, dtype=np.int32), output_dim)
71 |         if len(self.token_seq_field_dims) > 0:
72 |             self.token_seq_embedding_table = nn.ModuleList()
73 |             for token_seq_field_dim in self.token_seq_field_dims:
74 |                 self.token_seq_embedding_table.append(nn.Embedding(token_seq_field_dim, output_dim))
75 | 
76 |         self.bias = nn.Parameter(torch.zeros((output_dim,)), requires_grad=True)


--------------------------------------------------------------------------------
/deepcarskit/utils/logger.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time   : 2020/8/7
  3 | # @Author : Zihan Lin
  4 | # @Email  : linzihan.super@foxmail.com
  5 | 
  6 | # UPDATE
  7 | # @Time   : 2021/3/7
  8 | # @Author : Jiawei Guan
  9 | # @Email  : guanjw@ruc.edu.cn
 10 | 
 11 | # UPDATE:
 12 | # @Time   : 2021/12
 13 | # @Author : Yong Zheng
 14 | # @Notes  : made light changes to adapt it for CARS
 15 | 
 16 | """
 17 | deepcarskit.utils.logger
 18 | ###############################
 19 | """
 20 | 
 21 | import logging
 22 | import os
 23 | import colorlog
 24 | import re
 25 | 
 26 | from recbole.utils.utils import get_local_time, ensure_dir
 27 | from colorama import init
 28 | 
 29 | log_colors_config = {
 30 |     'DEBUG': 'cyan',
 31 |     'WARNING': 'yellow',
 32 |     'ERROR': 'red',
 33 |     'CRITICAL': 'red',
 34 | }
 35 | 
 36 | 
 37 | class RemoveColorFilter(logging.Filter):
 38 | 
 39 |     def filter(self, record):
 40 |         if record:
 41 |             ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
 42 |             record.msg = ansi_escape.sub('', str(record.msg))
 43 |         return True
 44 | 
 45 | 
 46 | def set_color(log, color, highlight=True):
 47 |     color_set = ['black', 'red', 'green', 'yellow', 'blue', 'pink', 'cyan', 'white']
 48 |     try:
 49 |         index = color_set.index(color)
 50 |     except:
 51 |         index = len(color_set) - 1
 52 |     prev_log = '\033['
 53 |     if highlight:
 54 |         prev_log += '1;3'
 55 |     else:
 56 |         prev_log += '0;3'
 57 |     prev_log += str(index) + 'm'
 58 |     return prev_log + log + '\033[0m'
 59 | 
 60 | 
 61 | def init_logger(config):
 62 |     """
 63 |     A logger that can show a message on standard output and write it into the
 64 |     file named `filename` simultaneously.
 65 |     All the message that you want to log MUST be str.
 66 | 
 67 |     Args:
 68 |         config (Config): An instance object of Config, used to record parameter information.
 69 | 
 70 |     Example:
 71 |         >>> logger = logging.getLogger(config)
 72 |         >>> logger.debug(train_state)
 73 |         >>> logger.info(train_result)
 74 |     """
 75 |     init(autoreset=True)
 76 |     LOGROOT = './log/'
 77 |     dir_name = os.path.dirname(LOGROOT)
 78 |     ensure_dir(dir_name)
 79 | 
 80 |     logfilename = '{}-{}-{}.log'.format(config['dataset'], config['model'], get_local_time())
 81 | 
 82 |     logfilepath = os.path.join(LOGROOT, logfilename)
 83 | 
 84 |     filefmt = "%(asctime)-15s %(levelname)s  %(message)s"
 85 |     filedatefmt = "%a %d %b %Y %H:%M:%S"
 86 |     fileformatter = logging.Formatter(filefmt, filedatefmt)
 87 | 
 88 |     sfmt = "%(log_color)s%(asctime)-15s %(levelname)s  %(message)s"
 89 |     sdatefmt = "%d %b %H:%M"
 90 |     sformatter = colorlog.ColoredFormatter(sfmt, sdatefmt, log_colors=log_colors_config)
 91 |     if config['state'] is None or config['state'].lower() == 'info':
 92 |         level = logging.INFO
 93 |     elif config['state'].lower() == 'debug':
 94 |         level = logging.DEBUG
 95 |     elif config['state'].lower() == 'error':
 96 |         level = logging.ERROR
 97 |     elif config['state'].lower() == 'warning':
 98 |         level = logging.WARNING
 99 |     elif config['state'].lower() == 'critical':
100 |         level = logging.CRITICAL
101 |     else:
102 |         level = logging.INFO
103 | 
104 |     fh = logging.FileHandler(logfilepath)
105 |     fh.setLevel(level)
106 |     fh.setFormatter(fileformatter)
107 |     remove_color_filter = RemoveColorFilter()
108 |     fh.addFilter(remove_color_filter)
109 | 
110 |     sh = logging.StreamHandler()
111 |     sh.setLevel(level)
112 |     sh.setFormatter(sformatter)
113 | 
114 |     logging.basicConfig(level=level, handlers=[sh, fh])
115 |     return fh, logfilepath
116 | 


--------------------------------------------------------------------------------
/deepcarskit/evaluator/collector.py:
--------------------------------------------------------------------------------
 1 | # @Time   : 2021/12
 2 | # @Author : Yong Zheng
 3 | # @Notes  : Inherit from recbole.evaluator.Collector
 4 | 
 5 | """
 6 | recbole.evaluator.collector
 7 | ################################################
 8 | """
 9 | 
10 | from recbole.evaluator.register import Register
11 | from recbole.evaluator import Collector, DataStruct
12 | import torch
13 | 
14 | 
15 | class CARSCollector(Collector):
16 |     """The collector is used to collect the resource for evaluator.
17 |         As the evaluation metrics are various, the needed resource not only contain the recommended result
18 |         but also other resource from data and model. They all can be collected by the collector during the training
19 |         and evaluation process.
20 | 
21 |         This class is only used in Trainer.
22 | 
23 |     """
24 | 
25 |     def __init__(self, config):
26 |         self.config = config
27 |         self.data_struct = DataStruct()
28 |         self.register = Register(config)
29 |         self.full = ('full' in config['eval_args']['mode'])
30 |         self.topk = self.config['topk']
31 |         self.device = self.config['device']
32 | 
33 |     def eval_batch_collect(
34 |         self, scores_tensor: torch.Tensor, interaction, positive_u: torch.Tensor, positive_i: torch.Tensor
35 |     ):
36 |         """ Collect the evaluation resource from batched eval data and batched model output.
37 |             Args:
38 |                 scores_tensor (Torch.Tensor): the output tensor of model with the shape of `(N, )`
39 |                 interaction(Interaction): batched eval data.
40 |                 positive_u(Torch.Tensor): the row index of positive items for each user.
41 |                 positive_i(Torch.Tensor): the positive item id for each user.
42 |         """
43 |         if self.register.need('rec.items'):
44 | 
45 |             # get topk
46 |             _, topk_idx = torch.topk(scores_tensor, max(self.topk), dim=-1)  # n_users x k
47 |             self.data_struct.update_tensor('rec.items', topk_idx)
48 | 
49 |         if self.register.need('rec.topk'):
50 | 
51 |             _, topk_idx = torch.topk(scores_tensor, max(self.topk), dim=-1)  # n_users x k
52 |             pos_matrix = torch.zeros_like(scores_tensor, dtype=torch.int)
53 |             pos_matrix[positive_u, positive_i] = 1
54 |             pos_len_list = pos_matrix.sum(dim=1, keepdim=True)
55 |             pos_idx = torch.gather(pos_matrix, dim=1, index=topk_idx)
56 |             result = torch.cat((pos_idx, pos_len_list), dim=1)
57 |             self.data_struct.update_tensor('rec.topk', result)
58 | 
59 |         if self.register.need('rec.meanrank'):
60 | 
61 |             desc_scores, desc_index = torch.sort(scores_tensor, dim=-1, descending=True)
62 | 
63 |             # get the index of positive items in the ranking list
64 |             pos_matrix = torch.zeros_like(scores_tensor)
65 |             pos_matrix[positive_u, positive_i] = 1
66 |             pos_index = torch.gather(pos_matrix, dim=1, index=desc_index)
67 | 
68 |             avg_rank = self._average_rank(desc_scores)
69 |             pos_rank_sum = torch.where(pos_index == 1, avg_rank, torch.zeros_like(avg_rank)).sum(dim=-1, keepdim=True)
70 | 
71 |             pos_len_list = pos_matrix.sum(dim=1, keepdim=True)
72 |             user_len_list = desc_scores.argmin(dim=1, keepdim=True)
73 |             result = torch.cat((pos_rank_sum, user_len_list, pos_len_list), dim=1)
74 |             self.data_struct.update_tensor('rec.meanrank', result)
75 | 
76 |         if self.register.need('rec.score'):
77 | 
78 |             self.data_struct.update_tensor('rec.score', scores_tensor)
79 | 
80 |         if self.register.need('data.label'):
81 |             self.label_field = self.config['LABEL_FIELD']
82 |             if self.config['ranking']:
83 |                 self.data_struct.update_tensor('data.label', interaction[self.label_field].to(self.device))
84 |             else:
85 |                 self.data_struct.update_tensor('data.label', interaction[self.config['RATING_FIELD']].to(self.device))
86 | 


--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmf0w.py:
--------------------------------------------------------------------------------
  1 | # @Time   : 2021/12
  2 | # @Author : Yong Zheng
  3 | 
  4 | 
  5 | r"""
  6 | NeuCMF0w
  7 | ################################################
  8 | References
  9 | -----
 10 | Unger, M., Tuzhilin, A., & Livne, A. (2020). Context-aware recommendations based on deep learning frameworks. ACM Transactions on Management Information Systems (TMIS), 11(2), 1-15.
 11 | 
 12 | Notes
 13 | -----
 14 | 1). NeuCMF0w has 2 towers (MLP and MF), and it fuses contexts into MLP tower only.
 15 | 
 16 | 2). NeuCMF0w utilizes context situation as a whole/a single dimension to be embedded
 17 | """
 18 | 
 19 | import torch
 20 | import torch.nn as nn
 21 | from torch.nn.init import normal_
 22 | 
 23 | from deepcarskit.model.context_recommender import ContextRecommender
 24 | from recbole.model.layers import MLPLayers
 25 | from recbole.utils import InputType, EvaluatorType
 26 | 
 27 | 
 28 | class NeuCMF0w(ContextRecommender):
 29 | 
 30 |     input_type = InputType.POINTWISE
 31 | 
 32 |     def __init__(self, config, dataset):
 33 |         super(NeuCMF0w, self).__init__(config, dataset)
 34 | 
 35 |         # load parameters info
 36 |         self.mf_embedding_size = config['mf_embedding_size']
 37 |         self.mlp_embedding_size = config['mlp_embedding_size']
 38 |         self.mlp_hidden_size = config['mlp_hidden_size']
 39 |         self.dropout_prob = config['dropout_prob']
 40 |         self.mf_train = config['mf_train']
 41 |         self.mlp_train = config['mlp_train']
 42 |         self.use_pretrain = config['use_pretrain']
 43 |         self.mf_pretrain_path = config['mf_pretrain_path']
 44 |         self.mlp_pretrain_path = config['mlp_pretrain_path']
 45 | 
 46 |         # define layers and loss
 47 |         self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size)
 48 |         self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size)
 49 |         self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
 50 |         self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
 51 |         self.context_situation_mlp_embedding = nn.Embedding(self.n_context_situation, self.mlp_embedding_size)
 52 | 
 53 |         # mlp layers = user, item, context_situation
 54 |         self.mlp_layers = MLPLayers([3 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
 55 |         self.mlp_layers.logger = None  # remove logger to use torch.save()
 56 |         if self.mf_train and self.mlp_train:
 57 |             self.predict_layer = nn.Linear(self.mf_embedding_size + self.mlp_hidden_size[-1], 1)
 58 |         elif self.mf_train:
 59 |             self.predict_layer = nn.Linear(self.mf_embedding_size, 1)
 60 |         elif self.mlp_train:
 61 |             self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
 62 | 
 63 |         # parameters initialization
 64 |         if self.use_pretrain:
 65 |             self.load_pretrain()
 66 |         else:
 67 |             self.apply(self._init_weights)
 68 | 
 69 |     def _init_weights(self, module):
 70 |         if isinstance(module, nn.Embedding):
 71 |             normal_(module.weight.data, mean=0.0, std=0.01)
 72 | 
 73 |     def forward(self, user, item, context_situation):
 74 |         user_mf_e = self.user_mf_embedding(user)
 75 |         item_mf_e = self.item_mf_embedding(item)
 76 |         user_mlp_e = self.user_mlp_embedding(user)
 77 |         item_mlp_e = self.item_mlp_embedding(item)
 78 |         context_situation_e = self.context_situation_mlp_embedding(context_situation)
 79 |         if self.mf_train:
 80 |             mf_output = torch.mul(user_mf_e, item_mf_e)  # [batch_size, embedding_size]
 81 |         if self.mlp_train:
 82 |             mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_e), -1))  # [batch_size, layers[-1]]
 83 | 
 84 |         if self.mf_train and self.mlp_train:
 85 |             output = self.actfun(self.predict_layer(torch.cat((mf_output, mlp_output), -1)))
 86 |         elif self.mf_train:
 87 |             output = self.actfun(self.predict_layer(mf_output))
 88 |         elif self.mlp_train:
 89 |             output = self.actfun(self.predict_layer(mlp_output))
 90 |         else:
 91 |             raise RuntimeError('mf_train and mlp_train can not be False at the same time')
 92 |         return output.squeeze(-1)
 93 | 
 94 |     def calculate_loss(self, interaction):
 95 |         user = interaction[self.USER_ID]
 96 |         item = interaction[self.ITEM_ID]
 97 |         context_situation = interaction[self.CONTEXT_SITUATION_ID]
 98 |         label = interaction[self.LABEL]
 99 | 
100 |         output = self.forward(user, item, context_situation)
101 |         return self.loss(output, label)
102 | 
103 |     def predict(self, interaction):
104 |         user = interaction[self.USER_ID]
105 |         item = interaction[self.ITEM_ID]
106 |         context_situation = interaction[self.CONTEXT_SITUATION_ID]
107 |         return self.forward(user, item, context_situation)
108 | 
109 |     def dump_parameters(self):
110 |         r"""A simple implementation of dumping model parameters for pretrain.
111 | 
112 |         """
113 |         if self.mf_train and not self.mlp_train:
114 |             save_path = self.mf_pretrain_path
115 |             torch.save(self, save_path)
116 |         elif self.mlp_train and not self.mf_train:
117 |             save_path = self.mlp_pretrain_path
118 |             torch.save(self, save_path)
119 | 


--------------------------------------------------------------------------------
/deepcarskit/evaluator/base_metric.py:
--------------------------------------------------------------------------------
  1 | # @Time   : 2020/10/21
  2 | # @Author : Kaiyuan Li
  3 | # @email  : tsotfsk@outlook.com
  4 | 
  5 | # UPDATE
  6 | # @Time   : 2020/10/21, 2021/8/29
  7 | # @Author : Kaiyuan Li, Zhichao Feng
  8 | # @email  : tsotfsk@outlook.com, fzcbupt@gmail.com
  9 | 
 10 | # UPDATE:
 11 | # @Time   : 2021/12
 12 | # @Author : Yong Zheng
 13 | # @Notes  : made light changes to adapt it for CARS
 14 | 
 15 | """
 16 | deepcarskit.evaluator.abstract_metric
 17 | #####################################
 18 | """
 19 | import numpy
 20 | import torch
 21 | from recbole.utils import EvaluatorType
 22 | 
 23 | 
 24 | class AbstractMetric(object):
 25 |     """:class:`AbstractMetric` is the base object of all metrics. If you want to
 26 |         implement a metric, you should inherit this class.
 27 | 
 28 |     Args:
 29 |         config (Config): the config of evaluator.
 30 |     """
 31 |     smaller = False
 32 | 
 33 |     def __init__(self, config):
 34 |         self.decimal_place = config['metric_decimal_place']
 35 | 
 36 |     def calculate_metric(self, dataobject):
 37 |         """Get the dictionary of a metric.
 38 | 
 39 |         Args:
 40 |             dataobject(DataStruct): it contains all the information needed to calculate metrics.
 41 | 
 42 |         Returns:
 43 |             dict: such as ``{'metric@10': 3153, 'metric@20': 0.3824}``
 44 |         """
 45 |         raise NotImplementedError('Method [calculate_metric] should be implemented.')
 46 | 
 47 | 
 48 | class TopkMetric(AbstractMetric):
 49 |     """:class:`TopkMetric` is a base object of top-k metrics. If you want to
 50 |     implement an top-k metric, you can inherit this class.
 51 | 
 52 |     Args:
 53 |         config (Config): The config of evaluator.
 54 |     """
 55 |     metric_type = EvaluatorType.RANKING
 56 |     metric_need = ['uc', 'rec.topk']
 57 | 
 58 |     def __init__(self, config):
 59 |         super().__init__(config)
 60 |         self.topk = config['topk']
 61 | 
 62 |     def used_info(self, dataobject):
 63 |         """Get the bool matrix indicating whether the corresponding item is positive
 64 |         and number of positive items for each user.
 65 |         """
 66 |         rec_mat = dataobject.get('rec.topk')
 67 |         topk_idx, pos_len_list = torch.split(rec_mat, [max(self.topk), 1], dim=1)
 68 |         return topk_idx.to(torch.bool).numpy(), pos_len_list.squeeze(-1).numpy()
 69 | 
 70 |     def topk_result(self, metric, value):
 71 |         """Match the metric value to the `k` and put them in `dictionary` form.
 72 | 
 73 |         Args:
 74 |             metric(str): the name of calculated metric.
 75 |             value(numpy.ndarray): metrics for each user, including values from `metric@1` to `metric@max(self.topk)`.
 76 | 
 77 |         Returns:
 78 |             dict: metric values required in the configuration.
 79 |         """
 80 | 
 81 |         metric_dict = {}
 82 |         avg_result = value.mean(axis=0)
 83 |         for k in self.topk:
 84 |             key = '{}@{}'.format(metric, k)
 85 |             metric_dict[key] = round(avg_result[k - 1], self.decimal_place)
 86 |         return metric_dict
 87 | 
 88 |     def metric_info(self, pos_index, pos_len=None):
 89 |         """Calculate the value of the metric.
 90 | 
 91 |         Args:
 92 |             pos_index(numpy.ndarray): a bool matrix, shape of ``n_users * max(topk)``. The item with the (j+1)-th \
 93 |             highest score of i-th user is positive if ``pos_index[i][j] == True`` and negative otherwise.
 94 |             pos_len(numpy.ndarray): a vector representing the number of positive items per user, shape of ``(n_users,)``.
 95 | 
 96 |         Returns:
 97 |             numpy.ndarray: metrics for each user, including values from `metric@1` to `metric@max(self.topk)`.
 98 |         """
 99 |         raise NotImplementedError('Method [metric_info] of top-k metric should be implemented.')
100 | 
101 | 
102 | class LossMetric(AbstractMetric):
103 |     """:class:`LossMetric` is a base object of loss based metrics and AUC. If you want to
104 |     implement an loss based metric, you can inherit this class.
105 | 
106 |     Args:
107 |         config (Config): The config of evaluator.
108 |     """
109 |     metric_type = EvaluatorType.VALUE
110 |     metric_need = ['rec.score', 'data.label']
111 | 
112 |     def __init__(self, config):
113 |         super().__init__(config)
114 |         self.config = config
115 | 
116 |     def used_info(self, dataobject):
117 |         """Get scores that model predicted and the ground truth."""
118 |         preds = dataobject.get('rec.score')
119 |         trues = dataobject.get('data.label')
120 | 
121 |         return preds.squeeze(-1).numpy(), trues.squeeze(-1).numpy()
122 | 
123 |     def output_metric(self, metric, dataobject):
124 |         preds, trues = self.used_info(dataobject)
125 |         result = self.metric_info(preds, trues)
126 |         result=round(result, self.decimal_place)
127 |         return {metric: result}
128 | 
129 |     def metric_info(self, preds, trues):
130 |         """Calculate the value of the metric.
131 | 
132 |         Args:
133 |             preds (numpy.ndarray): the scores predicted by model, a one-dimensional vector.
134 |             trues (numpy.ndarray): the label of items, which has the same shape as ``preds``.
135 | 
136 |         Returns:
137 |             float: The value of the metric.
138 |         """
139 |         raise NotImplementedError('Method [metric_info] of loss-based metric should be implemented.')
140 | 


--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmfw0.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time   : 2022
  3 | # @Author : Yong Zheng
  4 | 
  5 | 
  6 | 
  7 | r"""
  8 | NeuCMFw0
  9 | ################################################
 10 | References
 11 | -----
 12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022
 13 | 
 14 | Notes
 15 | -----
 16 | 1). NeuCMFw0 has 4 towers: MLP tower without contexts, MF tower with UI, MF with UC, MF with IC
 17 | 
 18 | 2). w => we consider context situation as a whole/single dimension and create embedding for it, when we fuse them into the MF towers
 19 | """
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | from torch.nn.init import normal_
 24 | 
 25 | from deepcarskit.model.context_recommender import ContextRecommender
 26 | from recbole.model.layers import MLPLayers
 27 | from recbole.utils import InputType, EvaluatorType
 28 | 
 29 | 
 30 | class NeuCMFw0(ContextRecommender):
 31 | 
 32 |     input_type = InputType.POINTWISE
 33 | 
 34 |     def __init__(self, config, dataset):
 35 |         super(NeuCMFw0, self).__init__(config, dataset)
 36 | 
 37 |         # load parameters info
 38 |         self.mf_embedding_size = config['mf_embedding_size']
 39 |         self.mlp_embedding_size = config['mlp_embedding_size']
 40 |         self.mlp_hidden_size = config['mlp_hidden_size']
 41 |         self.dropout_prob = config['dropout_prob']
 42 |         self.mf_train = config['mf_train']
 43 |         self.mlp_train = config['mlp_train']
 44 |         self.use_pretrain = config['use_pretrain']
 45 |         self.mf_pretrain_path = config['mf_pretrain_path']
 46 |         self.mlp_pretrain_path = config['mlp_pretrain_path']
 47 | 
 48 |         # define layers and loss
 49 |         self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size)
 50 |         self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size)
 51 |         self.context_situation_mf_embedding = nn.Embedding(self.n_context_situation, self.mf_embedding_size)
 52 |         self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
 53 |         self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
 54 |         self.context_situation_mlp_embedding = nn.Embedding(self.n_context_situation, self.mlp_embedding_size)
 55 | 
 56 |         # mlp layers = user, item
 57 |         self.mlp_layers = MLPLayers([2 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
 58 |         self.mlp_layers.logger = None  # remove logger to use torch.save()
 59 |         if self.mf_train and self.mlp_train:
 60 |             self.predict_layer = nn.Linear(3 * self.mf_embedding_size + self.mlp_hidden_size[-1], 1)
 61 |         elif self.mf_train:
 62 |             self.predict_layer = nn.Linear(3 * self.mf_embedding_size, 1)
 63 |         elif self.mlp_train:
 64 |             self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
 65 | 
 66 |         # parameters initialization
 67 |         if self.use_pretrain:
 68 |             self.load_pretrain()
 69 |         else:
 70 |             self.apply(self._init_weights)
 71 | 
 72 |     def _init_weights(self, module):
 73 |         if isinstance(module, nn.Embedding):
 74 |             normal_(module.weight.data, mean=0.0, std=0.01)
 75 | 
 76 |     def forward(self, user, item, context_situation):
 77 |         user_mf_e = self.user_mf_embedding(user)
 78 |         item_mf_e = self.item_mf_embedding(item)
 79 |         context_situation_mf_e = self.context_situation_mf_embedding(context_situation)
 80 |         user_mlp_e = self.user_mlp_embedding(user)
 81 |         item_mlp_e = self.item_mlp_embedding(item)
 82 |         if self.mf_train:
 83 |             mf_ui_output = torch.mul(user_mf_e, item_mf_e)  # [batch_size, embedding_size]
 84 |             mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e)  # [batch_size, embedding_size]
 85 |             mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e)  # [batch_size, embedding_size]
 86 |         if self.mlp_train:
 87 |             mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e), -1))  # [batch_size, layers[-1]]
 88 | 
 89 |         if self.mf_train and self.mlp_train:
 90 |             output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1)))
 91 |         elif self.mf_train:
 92 |             output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1)))
 93 |         elif self.mlp_train:
 94 |             output = self.actfun(self.predict_layer(mlp_output))
 95 |         else:
 96 |             raise RuntimeError('mf_train and mlp_train can not be False at the same time')
 97 |         return output.squeeze(-1)
 98 | 
 99 |     def calculate_loss(self, interaction):
100 |         user = interaction[self.USER_ID]
101 |         item = interaction[self.ITEM_ID]
102 |         context_situation = interaction[self.CONTEXT_SITUATION_ID]
103 |         label = interaction[self.LABEL]
104 | 
105 |         output = self.forward(user, item, context_situation)
106 |         return self.loss(output, label)
107 | 
108 |     def predict(self, interaction):
109 |         user = interaction[self.USER_ID]
110 |         item = interaction[self.ITEM_ID]
111 |         context_situation = interaction[self.CONTEXT_SITUATION_ID]
112 |         return self.forward(user, item, context_situation)
113 | 
114 |     def dump_parameters(self):
115 |         r"""A simple implementation of dumping model parameters for pretrain.
116 | 
117 |         """
118 |         if self.mf_train and not self.mlp_train:
119 |             save_path = self.mf_pretrain_path
120 |             torch.save(self, save_path)
121 |         elif self.mlp_train and not self.mf_train:
122 |             save_path = self.mlp_pretrain_path
123 |             torch.save(self, save_path)
124 | 


--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmf0i.py:
--------------------------------------------------------------------------------
  1 | # @Time   : 2021/12
  2 | # @Author : Yong Zheng
  3 | 
  4 | r"""
  5 | NeuCMF0i
  6 | ################################################
  7 | References
  8 | -----
  9 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022
 10 | 
 11 | Notes
 12 | -----
 13 | 1). NeuCMF0i has 2 towers (MLP and MF), and it fuses contexts into MLP tower only.
 14 | 
 15 | 2). NeuCMF0i creates embedding for each individual context conditions.
 16 | """
 17 | 
 18 | import torch
 19 | import torch.nn as nn
 20 | from torch.nn.init import normal_
 21 | 
 22 | from deepcarskit.model.context_recommender import ContextRecommender
 23 | from recbole.model.layers import MLPLayers
 24 | from recbole.utils import InputType, EvaluatorType
 25 | 
 26 | 
 27 | class NeuCMF0i(ContextRecommender):
 28 | 
 29 |     input_type = InputType.POINTWISE
 30 | 
 31 |     def __init__(self, config, dataset):
 32 |         super(NeuCMF0i, self).__init__(config, dataset)
 33 | 
 34 |         # load parameters info
 35 |         self.mf_embedding_size = config['mf_embedding_size']
 36 |         self.mlp_embedding_size = config['mlp_embedding_size']
 37 |         self.mlp_hidden_size = config['mlp_hidden_size']
 38 |         self.dropout_prob = config['dropout_prob']
 39 |         self.mf_train = config['mf_train']
 40 |         self.mlp_train = config['mlp_train']
 41 |         self.use_pretrain = config['use_pretrain']
 42 |         self.mf_pretrain_path = config['mf_pretrain_path']
 43 |         self.mlp_pretrain_path = config['mlp_pretrain_path']
 44 | 
 45 |         # define layers and loss
 46 |         self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size)
 47 |         self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size)
 48 |         self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
 49 |         self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
 50 |         self.context_dimensions_mlp_embedding = []
 51 |         for i in range(0, self.n_contexts_dim):
 52 |             self.context_dimensions_mlp_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mlp_embedding_size).to(self.device))
 53 | 
 54 |         # mlp layers = user, item, context_situation
 55 |         self.mlp_layers = MLPLayers([(2 + self.n_contexts_dim) * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
 56 |         self.mlp_layers.logger = None  # remove logger to use torch.save()
 57 |         if self.mf_train and self.mlp_train:
 58 |             self.predict_layer = nn.Linear(self.mf_embedding_size + self.mlp_hidden_size[-1], 1)
 59 |         elif self.mf_train:
 60 |             self.predict_layer = nn.Linear(self.mf_embedding_size, 1)
 61 |         elif self.mlp_train:
 62 |             self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
 63 | 
 64 |         # parameters initialization
 65 |         if self.use_pretrain:
 66 |             self.load_pretrain()
 67 |         else:
 68 |             self.apply(self._init_weights)
 69 | 
 70 |     def _init_weights(self, module):
 71 |         if isinstance(module, nn.Embedding):
 72 |             normal_(module.weight.data, mean=0.0, std=0.01)
 73 | 
 74 |     def forward(self, user, item, context_situation_list):
 75 |         user_mf_e = self.user_mf_embedding(user)
 76 |         item_mf_e = self.item_mf_embedding(item)
 77 |         user_mlp_e = self.user_mlp_embedding(user)
 78 |         item_mlp_e = self.item_mlp_embedding(item)
 79 |         context_situation_e = None
 80 |         for i in range(0, self.n_contexts_dim):
 81 |             condition = context_situation_list[i]
 82 |             embd = self.context_dimensions_mlp_embedding[i](condition)
 83 |             if context_situation_e is None:
 84 |                 context_situation_e = embd
 85 |             else:
 86 |                 context_situation_e = torch.cat((context_situation_e, embd), -1)
 87 |         if self.mf_train:
 88 |             mf_output = torch.mul(user_mf_e, item_mf_e)  # [batch_size, embedding_size]
 89 |         if self.mlp_train:
 90 |             mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_e), -1))  # [batch_size, layers[-1]]
 91 |         if self.mf_train and self.mlp_train:
 92 |             output = self.actfun(self.predict_layer(torch.cat((mf_output, mlp_output), -1)))
 93 |         elif self.mf_train:
 94 |             output = self.actfun(self.predict_layer(mf_output))
 95 |         elif self.mlp_train:
 96 |             output = self.actfun(self.predict_layer(mlp_output))
 97 |         else:
 98 |             raise RuntimeError('mf_train and mlp_train can not be False at the same time')
 99 |         return output.squeeze(-1)
100 | 
101 |     def calculate_loss(self, interaction):
102 |         user = interaction[self.USER_ID]
103 |         item = interaction[self.ITEM_ID]
104 |         context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
105 |         label = interaction[self.LABEL]
106 | 
107 |         output = self.forward(user, item, context_situation_list)
108 |         return self.loss(output, label)
109 | 
110 |     def predict(self, interaction):
111 |         user = interaction[self.USER_ID]
112 |         item = interaction[self.ITEM_ID]
113 |         context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
114 |         return self.forward(user, item, context_situation_list)
115 | 
116 |     def dump_parameters(self):
117 |         r"""A simple implementation of dumping model parameters for pretrain.
118 | 
119 |         """
120 |         if self.mf_train and not self.mlp_train:
121 |             save_path = self.mf_pretrain_path
122 |             torch.save(self, save_path)
123 |         elif self.mlp_train and not self.mf_train:
124 |             save_path = self.mlp_pretrain_path
125 |             torch.save(self, save_path)


--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmfww.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time   : 2022
  3 | # @Author : Yong Zheng
  4 | 
  5 | 
  6 | 
  7 | r"""
  8 | NeuCMFww
  9 | ################################################
 10 | References
 11 | -----
 12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022
 13 | 
 14 | Notes
 15 | -----
 16 | 1). NeuCMFww has 4 towers: MLP tower with contexts, MF tower with UI, MF with UC, MF with IC
 17 | 
 18 | 2). ww => we consider context situation as a whole/single dimension and create embedding for it, when we fuse contexts into the MLP and MF towers
 19 | """
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | from torch.nn.init import normal_
 24 | 
 25 | from deepcarskit.model.context_recommender import ContextRecommender
 26 | from recbole.model.layers import MLPLayers
 27 | from recbole.utils import InputType, EvaluatorType
 28 | 
 29 | 
 30 | class NeuCMFww(ContextRecommender):
 31 | 
 32 |     input_type = InputType.POINTWISE
 33 | 
 34 |     def __init__(self, config, dataset):
 35 |         super(NeuCMFww, self).__init__(config, dataset)
 36 | 
 37 |         # load parameters info
 38 |         self.mf_embedding_size = config['mf_embedding_size']
 39 |         self.mlp_embedding_size = config['mlp_embedding_size']
 40 |         self.mlp_hidden_size = config['mlp_hidden_size']
 41 |         self.dropout_prob = config['dropout_prob']
 42 |         self.mf_train = config['mf_train']
 43 |         self.mlp_train = config['mlp_train']
 44 |         self.use_pretrain = config['use_pretrain']
 45 |         self.mf_pretrain_path = config['mf_pretrain_path']
 46 |         self.mlp_pretrain_path = config['mlp_pretrain_path']
 47 | 
 48 |         # define layers and loss
 49 |         self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size)
 50 |         self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size)
 51 |         self.context_situation_mf_embedding = nn.Embedding(self.n_context_situation, self.mf_embedding_size)
 52 |         self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
 53 |         self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
 54 |         self.context_situation_mlp_embedding = nn.Embedding(self.n_context_situation, self.mlp_embedding_size)
 55 | 
 56 |         # mlp layers = user, item, context_situation
 57 |         self.mlp_layers = MLPLayers([3 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
 58 |         self.mlp_layers.logger = None  # remove logger to use torch.save()
 59 |         if self.mf_train and self.mlp_train:
 60 |             self.predict_layer = nn.Linear(3 * self.mf_embedding_size + self.mlp_hidden_size[-1], 1)
 61 |         elif self.mf_train:
 62 |             self.predict_layer = nn.Linear(3 * self.mf_embedding_size, 1)
 63 |         elif self.mlp_train:
 64 |             self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
 65 | 
 66 |         # parameters initialization
 67 |         if self.use_pretrain:
 68 |             self.load_pretrain()
 69 |         else:
 70 |             self.apply(self._init_weights)
 71 | 
 72 |     def _init_weights(self, module):
 73 |         if isinstance(module, nn.Embedding):
 74 |             normal_(module.weight.data, mean=0.0, std=0.01)
 75 | 
 76 |     def forward(self, user, item, context_situation):
 77 |         user_mf_e = self.user_mf_embedding(user)
 78 |         item_mf_e = self.item_mf_embedding(item)
 79 |         context_situation_mf_e = self.context_situation_mf_embedding(context_situation)
 80 |         user_mlp_e = self.user_mlp_embedding(user)
 81 |         item_mlp_e = self.item_mlp_embedding(item)
 82 |         context_situation_mlp_e = self.context_situation_mlp_embedding(context_situation)
 83 |         if self.mf_train:
 84 |             mf_ui_output = torch.mul(user_mf_e, item_mf_e)  # [batch_size, embedding_size]
 85 |             mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e)  # [batch_size, embedding_size]
 86 |             mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e)  # [batch_size, embedding_size]
 87 |         if self.mlp_train:
 88 |             mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_mlp_e), -1))  # [batch_size, layers[-1]]
 89 | 
 90 |         if self.mf_train and self.mlp_train:
 91 |             output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1)))
 92 |         elif self.mf_train:
 93 |             output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1)))
 94 |         elif self.mlp_train:
 95 |             output = self.actfun(self.predict_layer(mlp_output))
 96 |         else:
 97 |             raise RuntimeError('mf_train and mlp_train can not be False at the same time')
 98 |         return output.squeeze(-1)
 99 | 
100 |     def calculate_loss(self, interaction):
101 |         user = interaction[self.USER_ID]
102 |         item = interaction[self.ITEM_ID]
103 |         context_situation = interaction[self.CONTEXT_SITUATION_ID]
104 |         label = interaction[self.LABEL]
105 | 
106 |         output = self.forward(user, item, context_situation)
107 |         return self.loss(output, label)
108 | 
109 |     def predict(self, interaction):
110 |         user = interaction[self.USER_ID]
111 |         item = interaction[self.ITEM_ID]
112 |         context_situation = interaction[self.CONTEXT_SITUATION_ID]
113 |         return self.forward(user, item, context_situation)
114 | 
115 |     def dump_parameters(self):
116 |         r"""A simple implementation of dumping model parameters for pretrain.
117 | 
118 |         """
119 |         if self.mf_train and not self.mlp_train:
120 |             save_path = self.mf_pretrain_path
121 |             torch.save(self, save_path)
122 |         elif self.mlp_train and not self.mf_train:
123 |             save_path = self.mlp_pretrain_path
124 |             torch.save(self, save_path)
125 | 


--------------------------------------------------------------------------------
/deepcarskit/trainer/trainer.py:
--------------------------------------------------------------------------------
  1 | # @Time   : 2021/12
  2 | # @Author : Yong Zheng
  3 | # @Notes  : Inherit from recbole.trainer.Trainer
  4 | 
  5 | r"""
  6 | recbole.trainer.trainer
  7 | ################################
  8 | """
  9 | 
 10 | 
 11 | import numpy as np
 12 | import torch
 13 | 
 14 | from tqdm import tqdm
 15 | 
 16 | from deepcarskit.data import LabledDataSortEvalDataLoader
 17 | from deepcarskit.evaluator import CARSCollector
 18 | from recbole.trainer import Trainer
 19 | from recbole.data import FullSortEvalDataLoader
 20 | from recbole.utils import EvaluatorType, set_color, get_gpu_usage
 21 | from deepcarskit.evaluator import Evaluator
 22 | 
 23 | class CARSTrainer(Trainer):
 24 |     r"""The basic Trainer for basic training and evaluation strategies in recommender systems. This class defines common
 25 |     functions for training and evaluation processes of most recommender system models, including fit(), evaluate(),
 26 |     resume_checkpoint() and some other features helpful for model training and evaluation.
 27 | 
 28 |     Generally speaking, this class can serve most recommender system models, If the training process of the model is to
 29 |     simply optimize a single loss without involving any complex training strategies, such as adversarial learning,
 30 |     pre-training and so on.
 31 | 
 32 |     Initializing the Trainer needs two parameters: `config` and `model`. `config` records the parameters information
 33 |     for controlling training and evaluation, such as `learning_rate`, `epochs`, `eval_step` and so on.
 34 |     `model` is the instantiated object of a Model Class.
 35 | 
 36 |     """
 37 | 
 38 |     def __init__(self, config, model):
 39 |         super(CARSTrainer, self).__init__(config, model)
 40 |         self.eval_collector = CARSCollector(config)
 41 |         self.evaluator = Evaluator(config)
 42 | 
 43 |     def _labled_data_sort_batch_eval(self, batched_data):
 44 |         interaction, history_index, positive_u, positive_i = batched_data
 45 |         try:
 46 |             # Note: interaction without item ids
 47 |             scores = self.model.full_sort_predict(interaction.to(self.device))
 48 |         except NotImplementedError:
 49 |             inter_len = len(interaction)
 50 |             new_inter = interaction.to(self.device).repeat_interleave(self.tot_item_num)
 51 |             batch_size = len(new_inter)
 52 |             new_inter.update(self.item_tensor.repeat(inter_len))
 53 |             if batch_size <= self.test_batch_size:
 54 |                 scores = self.model.predict(new_inter)
 55 |             else:
 56 |                 scores = self._spilt_predict(new_inter, batch_size)
 57 | 
 58 |         scores = scores.view(-1, self.tot_item_num)
 59 |         scores[:, 0] = -np.inf
 60 |         if history_index is not None:
 61 |             scores[history_index] = -np.inf
 62 |         return interaction, scores, positive_u, positive_i
 63 | 
 64 | 
 65 |     @torch.no_grad()
 66 |     def evaluate(self, eval_data, load_best_model=True, model_file=None, show_progress=False):
 67 |         r"""Evaluate the model based on the eval data.
 68 | 
 69 |         Args:
 70 |             eval_data (DataLoader): the eval data
 71 |             load_best_model (bool, optional): whether load the best model in the training process, default: True.
 72 |                                               It should be set True, if users want to test the model after training.
 73 |             model_file (str, optional): the saved model file, default: None. If users want to test the previously
 74 |                                         trained model file, they can set this parameter.
 75 |             show_progress (bool): Show the progress of evaluate epoch. Defaults to ``False``.
 76 | 
 77 |         Returns:
 78 |             dict: eval result, key is the eval metric and value in the corresponding metric value.
 79 |         """
 80 |         if not eval_data:
 81 |             return
 82 | 
 83 |         if load_best_model:
 84 |             if model_file:
 85 |                 checkpoint_file = model_file
 86 |             else:
 87 |                 checkpoint_file = self.saved_model_file
 88 |             checkpoint = torch.load(checkpoint_file)
 89 |             self.model.load_state_dict(checkpoint['state_dict'])
 90 |             self.model.load_other_parameter(checkpoint.get('other_parameter'))
 91 |             message_output = 'Loading model structure and parameters from {}'.format(checkpoint_file)
 92 |             self.logger.info(message_output)
 93 | 
 94 |         self.model.eval()
 95 | 
 96 |         if isinstance(eval_data, FullSortEvalDataLoader):
 97 |             eval_func = self._full_sort_batch_eval
 98 |             if self.item_tensor is None:
 99 |                 self.item_tensor = eval_data.dataset.get_item_feature().to(self.device)
100 |         elif isinstance(eval_data, LabledDataSortEvalDataLoader):
101 |             eval_func = self._labled_data_sort_batch_eval
102 |             if self.item_tensor is None:
103 |                 self.item_tensor = eval_data.dataset.get_item_feature().to(self.device)
104 |         else:
105 |             eval_func = self._neg_sample_batch_eval
106 |         if self.config['eval_type'] == EvaluatorType.RANKING:
107 |             self.tot_item_num = eval_data.dataset.item_num
108 | 
109 |         iter_data = (
110 |             tqdm(
111 |                 eval_data,
112 |                 total=len(eval_data),
113 |                 ncols=100,
114 |                 desc=set_color(f"Evaluate   ", 'pink'),
115 |             ) if show_progress else eval_data
116 |         )
117 |         for batch_idx, batched_data in enumerate(iter_data):
118 |             interaction, scores, positive_u, positive_i = eval_func(batched_data)
119 |             if self.gpu_available and show_progress:
120 |                 iter_data.set_postfix_str(set_color('GPU RAM: ' + get_gpu_usage(self.device), 'yellow'))
121 |             self.eval_collector.eval_batch_collect(scores, interaction, positive_u, positive_i)
122 |         self.eval_collector.model_collect(self.model)
123 |         struct = self.eval_collector.get_data_struct()
124 |         result = self.evaluator.evaluate(struct)
125 | 
126 |         return result
127 | 
128 | 
129 | 


--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmfi0.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time   : 2022
  3 | # @Author : Yong Zheng
  4 | 
  5 | 
  6 | 
  7 | r"""
  8 | NeuCMFi0
  9 | ################################################
 10 | References
 11 | -----
 12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022
 13 | 
 14 | Notes
 15 | -----
 16 | 1). NeuCMFi0 has 4 towers: MLP tower without contexts, MF tower with UI, MF with UC, MF with IC
 17 | 
 18 | 2). i => we create embeddings for each individual context conditions when we fuse them into the MF towers
 19 | """
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | from torch.nn.init import normal_
 24 | 
 25 | from deepcarskit.model.context_recommender import ContextRecommender
 26 | from recbole.model.layers import MLPLayers
 27 | from recbole.utils import InputType, EvaluatorType
 28 | 
 29 | 
 30 | class NeuCMFi0(ContextRecommender):
 31 | 
 32 |     input_type = InputType.POINTWISE
 33 | 
 34 |     def __init__(self, config, dataset):
 35 |         super(NeuCMFi0, self).__init__(config, dataset)
 36 | 
 37 |         # load parameters info
 38 |         self.mf_embedding_size = config['mf_embedding_size']
 39 |         self.mlp_embedding_size = config['mlp_embedding_size']
 40 |         self.mlp_hidden_size = config['mlp_hidden_size']
 41 |         self.dropout_prob = config['dropout_prob']
 42 |         self.mf_train = config['mf_train']
 43 |         self.mlp_train = config['mlp_train']
 44 |         self.use_pretrain = config['use_pretrain']
 45 |         self.mf_pretrain_path = config['mf_pretrain_path']
 46 |         self.mlp_pretrain_path = config['mlp_pretrain_path']
 47 | 
 48 |         # define layers and loss
 49 |         self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size*self.n_contexts_dim)
 50 |         self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size*self.n_contexts_dim)
 51 |         self.context_situation_mf_embedding = []
 52 |         self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
 53 |         self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
 54 |         self.context_dimensions_mlp_embedding = []
 55 |         for i in range(0, self.n_contexts_dim):
 56 |             self.context_dimensions_mlp_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mlp_embedding_size).to(self.device))
 57 |             self.context_situation_mf_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mf_embedding_size).to(self.device))
 58 |         num_mf_towers = 3
 59 | 
 60 |         # mlp layers = user, item
 61 |         self.mlp_layers = MLPLayers([2 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
 62 |         self.mlp_layers.logger = None  # remove logger to use torch.save()
 63 |         if self.mf_train and self.mlp_train:
 64 |             self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim + self.mlp_hidden_size[-1], 1)
 65 |         elif self.mf_train:
 66 |             self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim, 1)
 67 |         elif self.mlp_train:
 68 |             self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
 69 | 
 70 |         # parameters initialization
 71 |         if self.use_pretrain:
 72 |             self.load_pretrain()
 73 |         else:
 74 |             self.apply(self._init_weights)
 75 | 
 76 |     def _init_weights(self, module):
 77 |         if isinstance(module, nn.Embedding):
 78 |             normal_(module.weight.data, mean=0.0, std=0.01)
 79 | 
 80 |     def forward(self, user, item, context_situation_list):
 81 |         user_mf_e = self.user_mf_embedding(user)
 82 |         item_mf_e = self.item_mf_embedding(item)
 83 | 
 84 |         context_situation_mf_e = None
 85 |         for i in range(0, self.n_contexts_dim):
 86 |             condition = context_situation_list[i]
 87 |             embd = self.context_dimensions_mlp_embedding[i](condition)
 88 |             if context_situation_mf_e is None:
 89 |                 context_situation_mf_e = embd
 90 |             else:
 91 |                 context_situation_mf_e = torch.cat((context_situation_mf_e, embd), -1)
 92 | 
 93 |         user_mlp_e = self.user_mlp_embedding(user)
 94 |         item_mlp_e = self.item_mlp_embedding(item)
 95 |         if self.mf_train:
 96 |             mf_ui_output = torch.mul(user_mf_e, item_mf_e)  # [batch_size, embedding_size]
 97 |             mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e)  # [batch_size, embedding_size]
 98 |             mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e)  # [batch_size, embedding_size]
 99 |         if self.mlp_train:
100 |             mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e), -1))  # [batch_size, layers[-1]]
101 | 
102 |         if self.mf_train and self.mlp_train:
103 |             output = self.actfun(
104 |                 self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1)))
105 |         elif self.mf_train:
106 |             output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1)))
107 |         elif self.mlp_train:
108 |             output = self.actfun(self.predict_layer(mlp_output))
109 |         else:
110 |             raise RuntimeError('mf_train and mlp_train can not be False at the same time')
111 |         return output.squeeze(-1)
112 | 
113 |     def calculate_loss(self, interaction):
114 |         user = interaction[self.USER_ID]
115 |         item = interaction[self.ITEM_ID]
116 |         context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
117 |         label = interaction[self.LABEL]
118 | 
119 |         output = self.forward(user, item, context_situation_list)
120 |         return self.loss(output, label)
121 | 
122 |     def predict(self, interaction):
123 |         user = interaction[self.USER_ID]
124 |         item = interaction[self.ITEM_ID]
125 |         context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
126 |         return self.forward(user, item, context_situation_list)
127 | 
128 |     def dump_parameters(self):
129 |         r"""A simple implementation of dumping model parameters for pretrain.
130 | 
131 |         """
132 |         if self.mf_train and not self.mlp_train:
133 |             save_path = self.mf_pretrain_path
134 |             torch.save(self, save_path)
135 |         elif self.mlp_train and not self.mf_train:
136 |             save_path = self.mlp_pretrain_path
137 |             torch.save(self, save_path)
138 | 


--------------------------------------------------------------------------------
/deepcarskit/model/neucf/neucmfii.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time   : 2022
  3 | # @Author : Yong Zheng
  4 | 
  5 | 
  6 | 
  7 | r"""
  8 | NeuCMFii
  9 | ################################################
 10 | References
 11 | -----
 12 | Yong Zheng, Gonzalo Florez Arias. "A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations", ACM UMAP, 2022
 13 | 
 14 | Notes
 15 | -----
 16 | 1). NeuCMFii has 4 towers: MLP tower with contexts, MF tower with UI, MF with UC, MF with IC
 17 | 
 18 | 2). ii => we create embeddings for each individual context conditions when we fuse contexts into the MLP and MF towers
 19 | """
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | from torch.nn.init import normal_
 24 | 
 25 | from deepcarskit.model.context_recommender import ContextRecommender
 26 | from recbole.model.layers import MLPLayers
 27 | from recbole.utils import InputType, EvaluatorType
 28 | 
 29 | 
 30 | class NeuCMFii(ContextRecommender):
 31 | 
 32 |     input_type = InputType.POINTWISE
 33 | 
 34 |     def __init__(self, config, dataset):
 35 |         super(NeuCMFii, self).__init__(config, dataset)
 36 | 
 37 |         # load parameters info
 38 |         self.mf_embedding_size = config['mf_embedding_size']
 39 |         self.mlp_embedding_size = config['mlp_embedding_size']
 40 |         self.mlp_hidden_size = config['mlp_hidden_size']
 41 |         self.dropout_prob = config['dropout_prob']
 42 |         self.mf_train = config['mf_train']
 43 |         self.mlp_train = config['mlp_train']
 44 |         self.use_pretrain = config['use_pretrain']
 45 |         self.mf_pretrain_path = config['mf_pretrain_path']
 46 |         self.mlp_pretrain_path = config['mlp_pretrain_path']
 47 | 
 48 |         # define layers and loss
 49 |         self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size*self.n_contexts_dim)
 50 |         self.item_mf_embedding = nn.Embedding(self.n_items, self.mf_embedding_size*self.n_contexts_dim)
 51 |         self.context_situation_mf_embedding = []
 52 |         self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
 53 |         self.item_mlp_embedding = nn.Embedding(self.n_items, self.mlp_embedding_size)
 54 |         self.context_dimensions_mlp_embedding = []
 55 |         for i in range(0, self.n_contexts_dim):
 56 |             self.context_dimensions_mlp_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mlp_embedding_size).to(self.device))
 57 |             self.context_situation_mf_embedding.append(nn.Embedding(self.n_contexts_conditions[i], self.mf_embedding_size).to(self.device))
 58 |         num_mf_towers = 3
 59 | 
 60 |         # mlp layers = user, item, context_situation
 61 |         self.mlp_layers = MLPLayers([(2 + self.n_contexts_dim) * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout_prob)
 62 |         self.mlp_layers.logger = None  # remove logger to use torch.save()
 63 |         if self.mf_train and self.mlp_train:
 64 |             self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim + self.mlp_hidden_size[-1], 1)
 65 |         elif self.mf_train:
 66 |             self.predict_layer = nn.Linear(num_mf_towers * self.mf_embedding_size * self.n_contexts_dim, 1)
 67 |         elif self.mlp_train:
 68 |             self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
 69 | 
 70 |         # parameters initialization
 71 |         if self.use_pretrain:
 72 |             self.load_pretrain()
 73 |         else:
 74 |             self.apply(self._init_weights)
 75 | 
 76 |     def _init_weights(self, module):
 77 |         if isinstance(module, nn.Embedding):
 78 |             normal_(module.weight.data, mean=0.0, std=0.01)
 79 | 
 80 |     def forward(self, user, item, context_situation_list):
 81 |         user_mf_e = self.user_mf_embedding(user)
 82 |         item_mf_e = self.item_mf_embedding(item)
 83 | 
 84 |         context_situation_mf_e = None
 85 |         for i in range(0, self.n_contexts_dim):
 86 |             condition = context_situation_list[i]
 87 |             embd = self.context_dimensions_mlp_embedding[i](condition)
 88 |             if context_situation_mf_e is None:
 89 |                 context_situation_mf_e = embd
 90 |             else:
 91 |                 context_situation_mf_e = torch.cat((context_situation_mf_e, embd), -1)
 92 | 
 93 |         user_mlp_e = self.user_mlp_embedding(user)
 94 |         item_mlp_e = self.item_mlp_embedding(item)
 95 |         context_situation_e = None
 96 |         for i in range(0, self.n_contexts_dim):
 97 |             condition = context_situation_list[i]
 98 |             embd = self.context_dimensions_mlp_embedding[i](condition)
 99 |             if context_situation_e is None:
100 |                 context_situation_e = embd
101 |             else:
102 |                 context_situation_e = torch.cat((context_situation_e, embd), -1)
103 |         if self.mf_train:
104 |             mf_ui_output = torch.mul(user_mf_e, item_mf_e)  # [batch_size, embedding_size]
105 |             mf_uc_output = torch.mul(user_mf_e, context_situation_mf_e)  # [batch_size, embedding_size]
106 |             mf_ic_output = torch.mul(item_mf_e, context_situation_mf_e)  # [batch_size, embedding_size]
107 |         if self.mlp_train:
108 |             mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e, context_situation_e), -1))  # [batch_size, layers[-1]]
109 | 
110 |         if self.mf_train and self.mlp_train:
111 |             output = self.actfun(
112 |                 self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output, mlp_output), -1)))
113 |         elif self.mf_train:
114 |             output = self.actfun(self.predict_layer(torch.cat((mf_ui_output, mf_uc_output, mf_ic_output), -1)))
115 |         elif self.mlp_train:
116 |             output = self.actfun(self.predict_layer(mlp_output))
117 |         else:
118 |             raise RuntimeError('mf_train and mlp_train can not be False at the same time')
119 |         return output.squeeze(-1)
120 | 
121 |     def calculate_loss(self, interaction):
122 |         user = interaction[self.USER_ID]
123 |         item = interaction[self.ITEM_ID]
124 |         context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
125 |         label = interaction[self.LABEL]
126 | 
127 |         output = self.forward(user, item, context_situation_list)
128 |         return self.loss(output, label)
129 | 
130 |     def predict(self, interaction):
131 |         user = interaction[self.USER_ID]
132 |         item = interaction[self.ITEM_ID]
133 |         context_situation_list = self.getContextSituationList(interaction, self.CONTEXTS)
134 |         return self.forward(user, item, context_situation_list)
135 | 
136 |     def dump_parameters(self):
137 |         r"""A simple implementation of dumping model parameters for pretrain.
138 | 
139 |         """
140 |         if self.mf_train and not self.mlp_train:
141 |             save_path = self.mf_pretrain_path
142 |             torch.save(self, save_path)
143 |         elif self.mlp_train and not self.mf_train:
144 |             save_path = self.mlp_pretrain_path
145 |             torch.save(self, save_path)
146 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # DeepCARSKit
  3 | 
  4 | *A Deep Learning Based Context-Aware Recommendation Library*
  5 | 
  6 | [![License](https://img.shields.io/badge/License-MIT-orange.svg)](./LICENSE)
  7 | [![Website carskit.github.io](https://img.shields.io/website-up-down-green-red/http/monip.org.svg)](https://carskit.github.io/)
  8 | [![python](https://badges.aleen42.com/src/python.svg)](https://badges.aleen42.com/src/python.svg)
  9 | [![Citation Badge](https://api.juleskreuer.eu/citation-badge.php?doi=10.1016/j.simpa.2022.100292)](https://scholar.google.com/citations?view_op=view_citation&hl=en&citation_for_view=0FENWMcAAAAJ:Bg7qf7VwUHIC)
 10 | [![DOI:10.1007/978-3-319-76207-4_15](https://zenodo.org/badge/DOI/10.1016/j.simpa.2022.100292.svg)](https://doi.org/10.1016/j.simpa.2022.100292)
 11 | 
 12 | [![CARSKit Website](images/intro-img1.jpg)](https://carskit.github.io/)
 13 | 
 14 | 
 15 | ## History
 16 | + **[CARSKit](https://github.com/irecsys/CARSKit)** was released in 2015, and it was the first open-source library for 
 17 | context-aware recommendations. There were no more significant updates in CARSKit since 2019. It was a library built based on Java and [Librec](https://github.com/guoguibing/librec) v1.3. 
 18 | There is a version in Python, [CARSKit-API](https://github.com/WagnoLeaoSergio/CARSKit_API), which is a python wrapper of CARSKit.
 19 | + Recommender systems based on deep learning have been well-developed in recent years. The context-aware 
 20 | recommendation models based on traditional collaborative filtering (e.g., KNN-based CF, matrix factorization) turned out to 
 21 | be out-dated. Therefore, we develop and release [DeepCARSKit](https://github.com/irecsys/DeepCARSKit) which was built upon the [RecBole](https://recbole.io/) v1.0.0 recommendation library.
 22 | DeepCARSKit is *a Deep Learning Based Context-Aware Recommendation Library* which can be run with correct setting based on Python and [PyTorch](https://pytorch.org/).
 23 | 
 24 | 
 25 | ## Feature
 26 | + **Implemented Deep Context-Aware Recommendation Models.** Currently, we support the CARS models built based on factorization machines (FM) and 
 27 | Neural Collaborative Filtering (NeuCF and NeuMF). More algorithms will be added.
 28 | 
 29 | + **Multiple Data Splits & Evaluation Options.** We provide evaluations based on both hold-out and N-fold cross validations.
 30 | 
 31 | + **Extensive and Standard Evaluation Protocols.** We rewrite codes in RecBole to adapt the evaluations for context-aware recommendations.
 32 | Particularly, item recommendations can be produced for each unique combination of (user and context situation). Relevance and Ranking metrics, 
 33 | such as precision, recall, NDCG, MRR, can be calculated by taking context information into consideration.
 34 | 
 35 | + **Autosave Best Logs.** DeepCARSKit can automatically save the best log/configuration of the models you run, in the folder of 'log/best/'.
 36 | 
 37 | + **Other Features.** Other characteristic in DeepCARSKit are inherited from RecBole, suc as GPU accelerations.
 38 | 
 39 | 
 40 | ## News & Updates
 41 | **11/13/2024**: We release DeepCARSKit v1.0.1
 42 | + Update requirements.txt
 43 | + Address the randomness issue in N-fold cross validation by utilizing multiprocessing
 44 | 
 45 | **03/19/2022**: We release DeepCARSKit v1.0.0
 46 | 
 47 | ## Documents
 48 | + [DeepCARSKit API](https://carskit.github.io/doc/DeepCARSKit/index.html)
 49 | + [RecBole API](https://recbole.io/docs/)
 50 | + Yong Zheng. "[DeepCARSKit: A Deep Learning Based Context-Aware Recommendation Library](https://doi.org/10.1016/j.simpa.2022.100292)", Software Impacts, Vol. 13, Elsevier, 2022
 51 | + Yong Zheng. "[DeepCARSKit: A Demo and User Guide](https://doi.org/10.1145/3511047.3536417)", Adjunct Proceedings of the 30th ACM Conference on User Modeling, Adaptation and Personalization (ACM UMAP), Spain, July, 2022
 52 | + Yong Zheng, Gonzalo Florez Arias. "[A Family of Neural Contextual Matrix Factorization Models for Context-Aware Recommendations](https://doi.org/10.1145/3511047.3536404)", Adjunct Proceedings of the 30th ACM Conference on User Modeling, Adaptation and Personalization (ACM UMAP), Spain, July, 2022
 53 | 
 54 | 
 55 | 
 56 | 
 57 | ## Installation
 58 | DeepCARSKit works with the following operating systems:
 59 | 
 60 | * Linux
 61 | * Windows 10
 62 | * macOS X
 63 | 
 64 | DeepCARSKit requires Python version 3.7 or later, torch version 1.7.0 or later, and RecBole version 1.0.1. 
 65 | For more details, you can refer to the list of [requirements](https://github.com/irecsys/DeepCARSKit/blob/main/requirements.txt).
 66 | If you want to use DeepCARSKit with GPU,
 67 | please ensure that CUDA or cudatoolkit version is 9.2 or later.
 68 | This requires NVIDIA driver version >= 396.26 (for Linux) or >= 397.44 (for Windows10).
 69 | 
 70 | The DeepCARSKit library was successfully tested by using the following environments:
 71 | - `python==3.9.20`
 72 | - `recbole==1.0.1`
 73 | - `numpy==1.20.0`
 74 | - `scipy==1.6.0`
 75 | - `lightgbm==4.5.0`
 76 | - `xgboost==2.1.1`
 77 | 
 78 | More info about installation from conda and pip will be released later.
 79 | Currenly, you can make a git clone of the source codes. We will pulish it to pypi and conda in next release.
 80 | 
 81 | ## Quick-Start
 82 | With the source code, you can use the provided script for initial usage of our library:
 83 | 
 84 | ```bash
 85 | python run.py
 86 | ```
 87 | 
 88 | This script will run the NeuCMFi model on the DePaulMovie dataset.
 89 | 
 90 | ### Data Sets & Preparation
 91 | A list of available data sets for research on context-aware recommender systems can be found [here](https://github.com/irecsys/CARSKit/tree/master/context-aware_data_sets).
 92 | We provide two data sets (i.e., DePaulMovie and TripAdvisor) in the library. You can refer to its data format, such as [depaulmovie.inter](https://github.com/irecsys/DeepCARSKit/blob/main/dataset/depaulmovie/depaulmovie.inter).
 93 | 
 94 | More specifically, you need to prepare a data set looks like this: (use 'float' and 'token' to indicate numerical and nominal variables)
 95 | 
 96 | + user_id:token
 97 | + item_id:token
 98 | + rating:float
 99 | + context variable 1:token
100 | + context variable 2:token
101 | + context variable N:token
102 | + contexts:token => a concatenation of context conditions
103 | + uc_id:token => a concatenation of user_id and contexts
104 | 
105 | ### Algorithms in NeuCMF Framework
106 | An extensive NeuCMF framework is included in the DeepCARSKit library. There are multiple variants of the NeuCMF models in this framework.
107 | 
108 | [![alt text](images/NeuCMF.png)](https://carskit.github.io/)
109 | 
110 | 
111 | ### Hyperparameter tuning 
112 | You can tune up the parameters from the configuration file, config.yaml
113 | 
114 | A user guide with more and more details is on the way...
115 | 
116 | 
117 | ## Major Releases
118 | | Releases | Date       |
119 | |----------|------------|
120 | | v1.0.1   | 11/13/2024 |
121 | | v1.0.0   | 03/19/2022 |
122 | 
123 | 
124 | 
125 | ## Cite
126 | If you find DeepCARSKit useful for your research or development, please cite the following paper:
127 | 
128 | ```
129 | @article{deepcarskit,
130 |     title={DeepCARSKit: A Deep Learning Based Context-Aware Recommendation Library},
131 |     author={Zheng, Yong},
132 |     journal={Software Impacts},
133 |     volume={13},
134 |     pages={100292},
135 |     year={2022},
136 |     publisher={Elsevier}
137 | }
138 | ```
139 | ## Contributing
140 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
141 | Please make sure to update tests as appropriate.
142 | 
143 | We welcome collaborations and contributors to the DeepCARSKit. Your names will be listed here.
144 | 
145 | ## Sponsors
146 | The current project was supported by Google Cloud Platform. We are looking for more sponsors to support the development and distribution of this libraray.
147 | If you are interested in sponsorship, please let me know. Our official email is DeepCARSKit [at] gmail [dot] com.
148 | 
149 | ## License
150 | [MIT License](./LICENSE)
151 | 


--------------------------------------------------------------------------------
/deepcarskit/data/dataloader/general_dataloader.py:
--------------------------------------------------------------------------------
  1 | # @Time   : 2021/12
  2 | # @Author : Yong Zheng
  3 | # @Notes  : added LabledDataSortEvalDataLoader for context-aware ranking evaluations
  4 | 
  5 | """
  6 | deepcarskit.data.dataloader.general_dataloader
  7 | ################################################
  8 | """
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | from recbole.data.dataloader.general_dataloader import FullSortEvalDataLoader
 14 | from recbole.data.interaction import Interaction, cat_interactions
 15 | from recbole.utils import InputType, ModelType
 16 | from collections import defaultdict
 17 | from logging import getLogger
 18 | 
 19 | class FullSortEvalDataLoader(FullSortEvalDataLoader):
 20 |     def __init__(self, config, dataset, sampler, shuffle=False, used_ids=None):
 21 |         super().__init__(config, dataset, sampler, shuffle=shuffle)
 22 | 
 23 | 
 24 | class LabledDataSortEvalDataLoader(FullSortEvalDataLoader):
 25 |     """:class:`FullSortEvalDataLoader` is a dataloader for full-sort evaluation. In order to speed up calculation,
 26 |     this dataloader would only return then user part of interactions, positive items and used items.
 27 |     It would not return negative items.
 28 | 
 29 |     Args:
 30 |         config (Config): The config of dataloader.
 31 |         dataset (Dataset): The dataset of dataloader.
 32 |         sampler (Sampler): The sampler of dataloader.
 33 |         shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``.
 34 | 
 35 |         used_item = all items that users have interacted in the training and evaluation set.
 36 |         positve_item = all items that users have interacted in the evaluation set.
 37 |         history_item = all items that users have interacted in the training set.
 38 |     """
 39 | 
 40 |     def __init__(self, config, dataset, sampler, shuffle=False, used_ids=None):
 41 |         self.uid_field = dataset.uid_field
 42 |         self.iid_field = dataset.iid_field
 43 |         self.is_sequential = config['MODEL_TYPE'] == ModelType.SEQUENTIAL
 44 | 
 45 |         self.user_id = config['USER_ID_FIELD']
 46 |         self.item_id = config['ITEM_ID_FIELD']
 47 |         self.uc_id = config['USER_CONTEXT_FIELD']
 48 |         self.LABEL = config['LABEL_FIELD']
 49 | 
 50 |         if not self.is_sequential:
 51 |             multidict_uc_items = self._get_multidict(dataset) # uc and rated items
 52 | 
 53 |             '''    
 54 |                 uc_positve_item = all items that uc have rated and must be positive in the evaluation set.
 55 |                 uc_history_item = all items that uc have rated in the training set.
 56 |             '''
 57 |             self.ucid_list = multidict_uc_items.keys()
 58 |             self.uc_num = max(self.ucid_list)+1
 59 |             self.ucid2items_num = np.zeros(self.uc_num, dtype=np.int64)
 60 |             self.ucid2positive_item = np.array([None] * self.uc_num)
 61 |             self.ucid2history_item = np.array([None] * self.uc_num)
 62 |             self.ucid_condidates={}
 63 | 
 64 |             # rated items (positive AND negative) for each uc in the training set
 65 |             ucid2used_item = used_ids
 66 | 
 67 |             for ucid in self.ucid_list:
 68 | 
 69 |                 uc_positve_itemlist = set(multidict_uc_items[ucid])
 70 |                 self.ucid2positive_item[ucid] = torch.tensor(list(uc_positve_itemlist), dtype=torch.int64)
 71 | 
 72 |                 self.ucid2items_num[ucid] = len(uc_positve_itemlist)
 73 | 
 74 |                 uc_history_itemlist = ucid2used_item[ucid]
 75 | 
 76 |                 self.ucid2history_item[ucid] = torch.tensor(list(uc_history_itemlist), dtype=torch.int64)
 77 | 
 78 |             # get uid and context information from uc innerid
 79 |             context_fields = dataset._get_context_fields()
 80 |             uid_list = []
 81 |             dict_context = {}
 82 |             for context in context_fields:
 83 |                 dict_context[context]=[]
 84 | 
 85 |             for ucid in self.ucid_list:
 86 |                 uid = dataset._get_uid_from_usercontexts(ucid)
 87 |                 uid_list.append(uid)
 88 |                 tuple_context = dataset._get_context_tuple_from_usercontexts(ucid)
 89 |                 for i in range(0,len(context_fields)):
 90 |                     context = context_fields[i]
 91 |                     dict_context[context].append(tuple_context[i])
 92 | 
 93 |             self.ucid_list = torch.tensor(list(self.ucid_list), dtype=torch.int64)
 94 |             uid_list = torch.tensor(list(uid_list), dtype=torch.int64)
 95 |             # add uc data into data for predictions
 96 |             self.uc_df = dataset.join(Interaction({self.uid_field: uid_list, self.uc_id: self.ucid_list}))
 97 |             for context in dict_context.keys():
 98 |                 new_inter = dataset.join(Interaction({context: torch.tensor(list(dict_context[context]), dtype=torch.int64)}))
 99 |                 self.uc_df.update(new_inter)
100 | 
101 | 
102 |         self.config = config
103 |         self.logger = getLogger()
104 |         self.dataset = dataset
105 |         self.sampler = sampler
106 |         self.batch_size = self.step = None
107 |         self.shuffle = shuffle
108 |         self.pr = 0
109 |         self._init_batch_size_and_step()
110 | 
111 |     def _get_multidict(self, dataset):
112 |         matrix_uc_item = dataset._create_sparse_matrix(dataset.inter_feat, self.uc_id, self.item_id, 'coo',
113 |                                                        self.LABEL)
114 |         # multidict_u_uc = defaultdict(list)
115 |         # key = userid, value = Dict (key = uc, value = decending ranked items with ratings)
116 |         # will get a list of Dict, given a userid
117 |         multidict_uc_items = defaultdict(list)
118 |         multidict_uc_items_positives = defaultdict(list)
119 | 
120 | 
121 |         rows, cols = matrix_uc_item.shape
122 |         for uc_id in range(1, rows):
123 |             # Index = 0 => [PAD]
124 |             # uc_id == inner id for user_context
125 |             uc_items = matrix_uc_item.getrow(uc_id)  # csr_matrix
126 |             items = uc_items.indices  # a list of items
127 |             rates = uc_items.data  # a list of ratings
128 |             num_rates = len(rates)
129 | 
130 |             if num_rates == 0:
131 |                 continue
132 | 
133 |             dict_item_rating = {}
134 | 
135 |             for i in range(0, num_rates):
136 |                 key = items[i]
137 |                 value = rates[i]
138 |                 dict_item_rating[key] = value
139 |             # sort items based on ratings
140 |             dict_item_rating_decending = sorted(dict_item_rating.items(), key=lambda x: x[1], reverse=True)
141 |             # add these items into dict which uses uc as key
142 |             for items in dict_item_rating_decending:
143 |                 multidict_uc_items[uc_id].append(items[0])
144 |         return multidict_uc_items
145 | 
146 |     @property
147 |     def pr_end(self):
148 |         if not self.is_sequential:
149 |             return len(self.ucid_list)
150 |         else:
151 |             return len(self.dataset)
152 | 
153 |     def _next_batch_data(self):
154 |         if not self.is_sequential:
155 |             uc_df = self.uc_df[self.pr:self.pr + self.step]
156 |             ucid_list = list(uc_df[self.uc_id])
157 | 
158 |             history_item = self.ucid2history_item[ucid_list]
159 |             positive_item = self.ucid2positive_item[ucid_list]
160 | 
161 |             history_u = torch.cat([torch.full_like(hist_iid, i) for i, hist_iid in enumerate(history_item)])
162 |             history_i = torch.cat(list(history_item))
163 | 
164 |             positive_u = torch.cat([torch.full_like(pos_iid, i) for i, pos_iid in enumerate(positive_item)])
165 |             positive_i = torch.cat(list(positive_item))
166 | 
167 |             self.pr += self.step
168 |             return uc_df, (history_u, history_i), positive_u, positive_i
169 |         else:
170 |             interaction = self.dataset[self.pr:self.pr + self.step]
171 |             inter_num = len(interaction)
172 |             positive_u = torch.arange(inter_num)
173 |             positive_i = interaction[self.iid_field]
174 | 
175 |             self.pr += self.step
176 |             return interaction, None, positive_u, positive_i
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/style.cfg:
--------------------------------------------------------------------------------
  1 | [style]
  2 | # Align closing bracket with visual indentation.
  3 | align_closing_bracket_with_visual_indent=True
  4 | 
  5 | # Allow dictionary keys to exist on multiple lines. For example:
  6 | #
  7 | #   x = {
  8 | #       ('this is the first element of a tuple',
  9 | #        'this is the second element of a tuple'):
 10 | #            value,
 11 | #   }
 12 | allow_multiline_dictionary_keys=False
 13 | 
 14 | # Allow lambdas to be formatted on more than one line.
 15 | allow_multiline_lambdas=False
 16 | 
 17 | # Allow splitting before a default / named assignment in an argument list.
 18 | allow_split_before_default_or_named_assigns=True
 19 | 
 20 | # Allow splits before the dictionary value.
 21 | allow_split_before_dict_value=True
 22 | 
 23 | #   Let spacing indicate operator precedence. For example:
 24 | #
 25 | #     a = 1 * 2 + 3 / 4
 26 | #     b = 1 / 2 - 3 * 4
 27 | #     c = (1 + 2) * (3 - 4)
 28 | #     d = (1 - 2) / (3 + 4)
 29 | #     e = 1 * 2 - 3
 30 | #     f = 1 + 2 + 3 + 4
 31 | #
 32 | # will be formatted as follows to indicate precedence:
 33 | #
 34 | #     a = 1*2 + 3/4
 35 | #     b = 1/2 - 3*4
 36 | #     c = (1+2) * (3-4)
 37 | #     d = (1-2) / (3+4)
 38 | #     e = 1*2 - 3
 39 | #     f = 1 + 2 + 3 + 4
 40 | #
 41 | arithmetic_precedence_indication=False
 42 | 
 43 | # Number of blank lines surrounding top-level function and class
 44 | # definitions.
 45 | blank_lines_around_top_level_definition=2
 46 | 
 47 | # Insert a blank line before a class-level docstring.
 48 | blank_line_before_class_docstring=False
 49 | 
 50 | # Insert a blank line before a module docstring.
 51 | blank_line_before_module_docstring=True
 52 | 
 53 | # Insert a blank line before a 'def' or 'class' immediately nested
 54 | # within another 'def' or 'class'. For example:
 55 | #
 56 | #   class Foo:
 57 | #                      # <------ this blank line
 58 | #     def method():
 59 | #       ...
 60 | blank_line_before_nested_class_or_def=True
 61 | 
 62 | # Do not split consecutive brackets. Only relevant when
 63 | # dedent_closing_brackets is set. For example:
 64 | #
 65 | #    call_func_that_takes_a_dict(
 66 | #        {
 67 | #            'key1': 'value1',
 68 | #            'key2': 'value2',
 69 | #        }
 70 | #    )
 71 | #
 72 | # would reformat to:
 73 | #
 74 | #    call_func_that_takes_a_dict({
 75 | #        'key1': 'value1',
 76 | #        'key2': 'value2',
 77 | #    })
 78 | coalesce_brackets=True
 79 | 
 80 | # The column limit.
 81 | column_limit=120
 82 | 
 83 | # The style for continuation alignment. Possible values are:
 84 | #
 85 | # - SPACE: Use spaces for continuation alignment. This is default behavior.
 86 | # - FIXED: Use fixed number (CONTINUATION_INDENT_WIDTH) of columns
 87 | #   (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs or
 88 | #   CONTINUATION_INDENT_WIDTH spaces) for continuation alignment.
 89 | # - VALIGN-RIGHT: Vertically align continuation lines to multiple of
 90 | #   INDENT_WIDTH columns. Slightly right (one tab or a few spaces) if
 91 | #   cannot vertically align continuation lines with indent characters.
 92 | continuation_align_style=SPACE
 93 | 
 94 | # Indent width used for line continuations.
 95 | continuation_indent_width=4
 96 | 
 97 | # Put closing brackets on a separate line, dedented, if the bracketed
 98 | # expression can't fit in a single line. Applies to all kinds of brackets,
 99 | # including function definitions and calls. For example:
100 | #
101 | #   config = {
102 | #       'key1': 'value1',
103 | #       'key2': 'value2',
104 | #   }        # <--- this bracket is dedented and on a separate line
105 | #
106 | #   time_series = self.remote_client.query_entity_counters(
107 | #       entity='dev3246.region1',
108 | #       key='dns.query_latency_tcp',
109 | #       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
110 | #       start_ts=now()-timedelta(days=3),
111 | #       end_ts=now(),
112 | #   )        # <--- this bracket is dedented and on a separate line
113 | dedent_closing_brackets=True
114 | 
115 | # Disable the heuristic which places each list element on a separate line
116 | # if the list is comma-terminated.
117 | disable_ending_comma_heuristic=False
118 | 
119 | # Place each dictionary entry onto its own line.
120 | each_dict_entry_on_separate_line=True
121 | 
122 | # Require multiline dictionary even if it would normally fit on one line.
123 | # For example:
124 | #
125 | #   config = {
126 | #       'key1': 'value1'
127 | #   }
128 | force_multiline_dict=False
129 | 
130 | # The regex for an i18n comment. The presence of this comment stops
131 | # reformatting of that line, because the comments are required to be
132 | # next to the string they translate.
133 | i18n_comment=
134 | 
135 | # The i18n function call names. The presence of this function stops
136 | # reformattting on that line, because the string it has cannot be moved
137 | # away from the i18n comment.
138 | i18n_function_call=
139 | 
140 | # Indent blank lines.
141 | indent_blank_lines=False
142 | 
143 | # Put closing brackets on a separate line, indented, if the bracketed
144 | # expression can't fit in a single line. Applies to all kinds of brackets,
145 | # including function definitions and calls. For example:
146 | #
147 | #   config = {
148 | #       'key1': 'value1',
149 | #       'key2': 'value2',
150 | #       }        # <--- this bracket is indented and on a separate line
151 | #
152 | #   time_series = self.remote_client.query_entity_counters(
153 | #       entity='dev3246.region1',
154 | #       key='dns.query_latency_tcp',
155 | #       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
156 | #       start_ts=now()-timedelta(days=3),
157 | #       end_ts=now(),
158 | #       )        # <--- this bracket is indented and on a separate line
159 | indent_closing_brackets=False
160 | 
161 | # Indent the dictionary value if it cannot fit on the same line as the
162 | # dictionary key. For example:
163 | #
164 | #   config = {
165 | #       'key1':
166 | #           'value1',
167 | #       'key2': value1 +
168 | #               value2,
169 | #   }
170 | indent_dictionary_value=False
171 | 
172 | # The number of columns to use for indentation.
173 | indent_width=4
174 | 
175 | # Join short lines into one line. E.g., single line 'if' statements.
176 | join_multiple_lines=True
177 | 
178 | # Do not include spaces around selected binary operators. For example:
179 | #
180 | #   1 + 2 * 3 - 4 / 5
181 | #
182 | # will be formatted as follows when configured with "*,/":
183 | #
184 | #   1 + 2*3 - 4/5
185 | no_spaces_around_selected_binary_operators=
186 | 
187 | # Use spaces around default or named assigns.
188 | spaces_around_default_or_named_assign=False
189 | 
190 | # Adds a space after the opening '{' and before the ending '}' dict delimiters.
191 | #
192 | #   {1: 2}
193 | #
194 | # will be formatted as:
195 | #
196 | #   { 1: 2 }
197 | spaces_around_dict_delimiters=False
198 | 
199 | # Adds a space after the opening '[' and before the ending ']' list delimiters.
200 | #
201 | #   [1, 2]
202 | #
203 | # will be formatted as:
204 | #
205 | #   [ 1, 2 ]
206 | spaces_around_list_delimiters=False
207 | 
208 | # Use spaces around the power operator.
209 | spaces_around_power_operator=True
210 | 
211 | # Use spaces around the subscript / slice operator.  For example:
212 | #
213 | #   my_list[1 : 10 : 2]
214 | spaces_around_subscript_colon=False
215 | 
216 | # Adds a space after the opening '(' and before the ending ')' tuple delimiters.
217 | #
218 | #   (1, 2, 3)
219 | #
220 | # will be formatted as:
221 | #
222 | #   ( 1, 2, 3 )
223 | spaces_around_tuple_delimiters=False
224 | 
225 | # The number of spaces required before a trailing comment.
226 | # This can be a single value (representing the number of spaces
227 | # before each trailing comment) or list of values (representing
228 | # alignment column values; trailing comments within a block will
229 | # be aligned to the first column value that is greater than the maximum
230 | # line length within the block). For example:
231 | #
232 | # With spaces_before_comment=5:
233 | #
234 | #   1 + 1 # Adding values
235 | #
236 | # will be formatted as:
237 | #
238 | #   1 + 1     # Adding values <-- 5 spaces between the end of the statement and comment
239 | #
240 | # With spaces_before_comment=15, 20:
241 | #
242 | #   1 + 1 # Adding values
243 | #   two + two # More adding
244 | #
245 | #   longer_statement # This is a longer statement
246 | #   short # This is a shorter statement
247 | #
248 | #   a_very_long_statement_that_extends_beyond_the_final_column # Comment
249 | #   short # This is a shorter statement
250 | #
251 | # will be formatted as:
252 | #
253 | #   1 + 1          # Adding values <-- end of line comments in block aligned to col 15
254 | #   two + two      # More adding
255 | #
256 | #   longer_statement    # This is a longer statement <-- end of line comments in block aligned to col 20
257 | #   short               # This is a shorter statement
258 | #
259 | #   a_very_long_statement_that_extends_beyond_the_final_column  # Comment <-- the end of line comments are aligned based on the line length
260 | #   short                                                       # This is a shorter statement
261 | #
262 | spaces_before_comment=2
263 | 
264 | # Insert a space between the ending comma and closing bracket of a list,
265 | # etc.
266 | space_between_ending_comma_and_closing_bracket=False
267 | 
268 | # Use spaces inside brackets, braces, and parentheses.  For example:
269 | #
270 | #   method_call( 1 )
271 | #   my_dict[ 3 ][ 1 ][ get_index( *args, **kwargs ) ]
272 | #   my_set = { 1, 2, 3 }
273 | space_inside_brackets=False
274 | 
275 | # Split before arguments
276 | split_all_comma_separated_values=False
277 | 
278 | # Split before arguments, but do not split all subexpressions recursively
279 | # (unless needed).
280 | split_all_top_level_comma_separated_values=False
281 | 
282 | # Split before arguments if the argument list is terminated by a
283 | # comma.
284 | split_arguments_when_comma_terminated=False
285 | 
286 | # Set to True to prefer splitting before '+', '-', '*', '/', '//', or '@'
287 | # rather than after.
288 | split_before_arithmetic_operator=False
289 | 
290 | # Set to True to prefer splitting before '&', '|' or '^' rather than
291 | # after.
292 | split_before_bitwise_operator=True
293 | 
294 | # Split before the closing bracket if a list or dict literal doesn't fit on
295 | # a single line.
296 | split_before_closing_bracket=True
297 | 
298 | # Split before a dictionary or set generator (comp_for). For example, note
299 | # the split before the 'for':
300 | #
301 | #   foo = {
302 | #       variable: 'Hello world, have a nice day!'
303 | #       for variable in bar if variable != 42
304 | #   }
305 | split_before_dict_set_generator=True
306 | 
307 | # Split before the '.' if we need to split a longer expression:
308 | #
309 | #   foo = ('This is a really long string: {}, {}, {}, {}'.format(a, b, c, d))
310 | #
311 | # would reformat to something like:
312 | #
313 | #   foo = ('This is a really long string: {}, {}, {}, {}'
314 | #          .format(a, b, c, d))
315 | split_before_dot=False
316 | 
317 | # Split after the opening paren which surrounds an expression if it doesn't
318 | # fit on a single line.
319 | split_before_expression_after_opening_paren=False
320 | 
321 | # If an argument / parameter list is going to be split, then split before
322 | # the first argument.
323 | split_before_first_argument=False
324 | 
325 | # Set to True to prefer splitting before 'and' or 'or' rather than
326 | # after.
327 | split_before_logical_operator=True
328 | 
329 | # Split named assignments onto individual lines.
330 | split_before_named_assigns=True
331 | 
332 | # Set to True to split list comprehensions and generators that have
333 | # non-trivial expressions and multiple clauses before each of these
334 | # clauses. For example:
335 | #
336 | #   result = [
337 | #       a_long_var + 100 for a_long_var in xrange(1000)
338 | #       if a_long_var % 10]
339 | #
340 | # would reformat to something like:
341 | #
342 | #   result = [
343 | #       a_long_var + 100
344 | #       for a_long_var in xrange(1000)
345 | #       if a_long_var % 10]
346 | split_complex_comprehension=False
347 | 
348 | # The penalty for splitting right after the opening bracket.
349 | split_penalty_after_opening_bracket=300
350 | 
351 | # The penalty for splitting the line after a unary operator.
352 | split_penalty_after_unary_operator=10000
353 | 
354 | # The penalty of splitting the line around the '+', '-', '*', '/', '//',
355 | # ``%``, and '@' operators.
356 | split_penalty_arithmetic_operator=300
357 | 
358 | # The penalty for splitting right before an if expression.
359 | split_penalty_before_if_expr=0
360 | 
361 | # The penalty of splitting the line around the '&', '|', and '^'
362 | # operators.
363 | split_penalty_bitwise_operator=300
364 | 
365 | # The penalty for splitting a list comprehension or generator
366 | # expression.
367 | split_penalty_comprehension=80
368 | 
369 | # The penalty for characters over the column limit.
370 | split_penalty_excess_character=7000
371 | 
372 | # The penalty incurred by adding a line split to the unwrapped line. The
373 | # more line splits added the higher the penalty.
374 | split_penalty_for_added_line_split=30
375 | 
376 | # The penalty of splitting a list of "import as" names. For example:
377 | #
378 | #   from a_very_long_or_indented_module_name_yada_yad import (long_argument_1,
379 | #                                                             long_argument_2,
380 | #                                                             long_argument_3)
381 | #
382 | # would reformat to something like:
383 | #
384 | #   from a_very_long_or_indented_module_name_yada_yad import (
385 | #       long_argument_1, long_argument_2, long_argument_3)
386 | split_penalty_import_names=0
387 | 
388 | # The penalty of splitting the line around the 'and' and 'or'
389 | # operators.
390 | split_penalty_logical_operator=300
391 | 
392 | # Use the Tab character for indentation.
393 | use_tabs=False
394 | 
395 | 


--------------------------------------------------------------------------------
/deepcarskit/quick_start/quick_start.py:
--------------------------------------------------------------------------------
  1 | # @Time   : 2020/10/6
  2 | # @Author : Shanlei Mu
  3 | # @Email  : slmu@ruc.edu.cn
  4 | 
  5 | 
  6 | # UPDATE:
  7 | # @Time   : 2021/12
  8 | # @Author : Yong Zheng
  9 | # @Notes  : made several changes to adapt it for CARS
 10 | 
 11 | """
 12 | deepcarskit.quick_start
 13 | ########################
 14 | """
 15 | import logging
 16 | from logging import getLogger
 17 | import shutil
 18 | import glob
 19 | import os
 20 | 
 21 | import torch
 22 | import pickle
 23 | 
 24 | 
 25 | # from past.builtins import raw_input
 26 | 
 27 | from deepcarskit.config import CARSConfig
 28 | from deepcarskit.data import create_dataset, data_preparation, save_split_dataloaders, load_split_dataloaders
 29 | from deepcarskit.utils.utils import get_model, get_trainer
 30 | from deepcarskit.utils import init_logger, init_seed, set_color
 31 | from multiprocessing.dummy import Pool as ThreadPool
 32 | from multiprocessing import Pool
 33 | from recbole.utils import EvaluatorType
 34 | 
 35 | 
 36 | def eval_folds(args_tuple):
 37 |     train_data_fold = args_tuple[0]
 38 |     valid_data_fold = args_tuple[1]
 39 | 
 40 |     config = args_tuple[2]
 41 |     init_seed(config['seed'], config['reproducibility'])
 42 | 
 43 |     logger = args_tuple[3]
 44 |     fold = args_tuple[4]
 45 | 
 46 |     if config['save_dataloaders']:
 47 |         save_split_dataloaders(config, dataloaders=(train_data_fold, valid_data_fold))
 48 | 
 49 |     # model loading and initialization
 50 |     init_seed(config['seed'], config['reproducibility'])
 51 |     model = get_model(config['model'])(config, train_data_fold.dataset).to(config['device'])
 52 | 
 53 |     # trainer loading and initialization
 54 |     trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
 55 |     name = trainer.saved_model_file
 56 |     ind = name.rindex('.')
 57 |     lname = list(name)
 58 |     lname.insert(ind, '_f'+str(fold))
 59 |     trainer.saved_model_file = ''.join(lname)
 60 | 
 61 |     # model training
 62 |     best_valid_score_fold, best_valid_result_fold = trainer.fit(
 63 |         train_data_fold, valid_data_fold, saved=True, show_progress=config['show_progress']
 64 |     )
 65 |     msghead = 'Fold ' + str(fold) + ' completed: '
 66 |     logger.info(set_color(msghead, 'yellow') + f': {best_valid_result_fold}')
 67 | 
 68 |     return best_valid_score_fold, best_valid_result_fold
 69 | 
 70 | 
 71 | def run(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True):
 72 |     r""" A fast running api, which includes the complete process of
 73 |     training and testing a model on a specified dataset
 74 | 
 75 |     Args:
 76 |         model (str, optional): Model name. Defaults to ``None``.
 77 |         dataset (str, optional): Dataset name. Defaults to ``None``.
 78 |         config_file_list (list, optional): Config files used to modify experiment parameters. Defaults to ``None``.
 79 |         config_dict (dict, optional): Parameters dictionary used to modify experiment parameters. Defaults to ``None``.
 80 |         saved (bool, optional): Whether to save the model. Defaults to ``True``.
 81 |     """
 82 |     # configurations initialization
 83 |     config = CARSConfig(model=model, dataset=dataset, config_file_list=config_file_list, config_dict=config_dict)
 84 |     init_seed(config['seed'], config['reproducibility'])
 85 | 
 86 |     # logger initialization
 87 |     log_handler, log_filepath = init_logger(config)
 88 |     logger = getLogger()
 89 | 
 90 |     logger.info(config)
 91 | 
 92 |     # dataset filtering
 93 |     dataset = create_dataset(config)
 94 |     if config['save_dataset']:
 95 |         dataset.save()
 96 |     logger.info(dataset)
 97 | 
 98 |     # dataset splitting
 99 |     # train_data, valid_data, test_data = data_preparation(config, dataset)
100 |     train_data, valid_data = data_preparation(config, dataset)
101 | 
102 | 
103 |     CV = False
104 |     if isinstance(train_data, list):
105 |         CV = True
106 |     n_folds = len(train_data)
107 | 
108 |     if CV:
109 |         list_train_test = []
110 |         for i in range(n_folds):
111 |             t = (train_data[i], valid_data[i], config, logger, (i+1))
112 |             list_train_test.append(t)
113 | 
114 |         # pool = ThreadPool()
115 |         # rsts = pool.map(eval_folds, list_train_test)
116 |         # pool.close()
117 |         # pool.join()
118 |         # print('cpu count: ', os.cpu_count())
119 | 
120 |         num_processes = config['eval_args']['split']['num_processes']
121 |         with Pool(processes=num_processes) as pool:
122 |             rsts = pool.map(eval_folds, list_train_test)
123 | 
124 |         best_valid_score = 0
125 |         best_valid_result = {}
126 | 
127 |         for rst_fold in rsts:
128 |             valid_score_fold = rst_fold[0]
129 |             valid_result_fold = rst_fold[1]
130 | 
131 |             best_valid_score += valid_score_fold
132 |             if not best_valid_result:
133 |                 best_valid_result = valid_result_fold
134 |             else:
135 |                 for key in best_valid_result.keys():
136 |                     best_valid_result[key] = best_valid_result[key] + valid_result_fold[key]
137 | 
138 |         best_valid_score = round(best_valid_score/n_folds, config['metric_decimal_place'])
139 |         for key in best_valid_result:
140 |             best_valid_result[key] = round(best_valid_result[key]/n_folds, config['metric_decimal_place'])
141 |         msghead = 'Data: '+config['dataset']+', Results on '+str(n_folds)+' CV: best valid by '+config['model']
142 |         layers = [str(int) for int in config['mlp_hidden_size']]
143 |         layers = ' '.join(layers)
144 |         logger.info(set_color(msghead, 'yellow') + f': {best_valid_result}'+', lrate: '+str(config['learning_rate'])+', layers: ['+layers+']')
145 |         log_handler.close()
146 |         logger.removeHandler(log_handler)
147 |         logger_name = log_filepath[:-4] + "_" + config['valid_metric'] + " = " + str(best_valid_score) + ".log"
148 |         shutil.move(log_filepath, logger_name)
149 |         update_best_log(config, logger_name, best_valid_result)
150 |     else:
151 |         if config['save_dataloaders']:
152 |             save_split_dataloaders(config, dataloaders=(train_data, valid_data))
153 | 
154 |         # model loading and initialization
155 |         init_seed(config['seed'], config['reproducibility'])
156 |         model = get_model(config['model'])(config, train_data.dataset).to(config['device'])
157 |         logger.info(model)
158 | 
159 |         # trainer loading and initialization
160 |         trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
161 | 
162 |         # model training
163 |         best_valid_score, best_valid_result = trainer.fit(
164 |             train_data, valid_data, saved=saved, show_progress=config['show_progress']
165 |         )
166 | 
167 |         # model evaluation
168 |         # test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress'])
169 | 
170 |         msghead = 'Data: '+config['dataset']+', best valid by '+config['model']
171 |         logger.info(set_color(msghead, 'yellow') + f': {best_valid_result}')
172 |         # logger.info(set_color('test result', 'yellow') + f': {test_result}')
173 |         log_handler.close()
174 |         logger.removeHandler(log_handler)
175 |         logger_name = log_filepath[:-4] + "_" + config['valid_metric'] + " = " + str(best_valid_score) + ".log"
176 |         shutil.move(log_filepath, logger_name)
177 |         update_best_log(config, logger_name, best_valid_result)
178 | 
179 |     '''
180 |     # example of predictions by context recommender
181 |     # note, raw value in the original data is expected to be transformed to inner ID
182 |     
183 |     # rawid <--->innderid
184 |     print("innerid: ", dataset._get_innderid_from_rawid("user_id", "1003"))
185 |     print("rawid: ", dataset._get_rawid_from_innerid("user_id", 1))
186 |     
187 |     userid = dataset._get_innderid_from_rawid("user_id","1003")
188 |     itemid = dataset._get_innderid_from_rawid("item_id","tt0120912")
189 |     timeid = dataset._get_innderid_from_rawid("time","Weekday")
190 |     locid = dataset._get_innderid_from_rawid("location","Cinema")
191 |     cmpid = dataset._get_innderid_from_rawid("companion","Alone")
192 | 
193 |     user = torch.tensor([userid])
194 |     item = torch.tensor([itemid])
195 |     contexts = []
196 |     contexts.append(torch.tensor([timeid]))
197 |     contexts.append(torch.tensor([locid]))
198 |     contexts.append(torch.tensor([cmpid]))
199 |     print(userid, ', ', itemid, ', ', timeid, ', ', locid, ', ', cmpid)
200 |     print("prediction: ",model.forward(user, item, contexts))
201 |     exit()
202 |     '''
203 | 
204 |     return {
205 |         'best_valid_score': best_valid_score,
206 |         'valid_score_bigger': config['valid_metric_bigger'],
207 |         'best_valid_result': best_valid_result,
208 |         # 'test_result': test_result
209 |     }
210 | 
211 | def update_best_log(config, newlog, best_valid_result):
212 |     dataset = config['dataset']
213 |     # compare which log file is better
214 |     ranking = False
215 |     if config['eval_type'] == EvaluatorType.RANKING:
216 |         ranking = True
217 |         metric = config['ranking_valid_metric']
218 |     else:
219 |         metric = config['err_valid_metric']
220 | 
221 |     metric_value = best_valid_result[metric.lower()]
222 | 
223 |     end = newlog.rindex('.')
224 |     s1 = newlog.index('-')
225 |     s2 = newlog.index('-', s1 + 1, end)
226 |     model = newlog[s1 + 1:s2]
227 | 
228 |     match = [dataset, model, metric]
229 | 
230 | 
231 |     folder_best = './log/best/'
232 |     existing_logs = glob.glob(folder_best+'/*.log')
233 | 
234 |     found = False
235 |     oldlog = None
236 |     for file in existing_logs:
237 |         if all(x in file for x in match):
238 |             oldlog = file
239 |             found = True
240 |             break
241 | 
242 |     newlog_filename = newlog[newlog.rindex('/')+1:]
243 | 
244 |     if not found:
245 |         shutil.copyfile(newlog, folder_best+newlog_filename)
246 |     else:
247 |         newvalue = metric_value
248 |         oldvalue = float(oldlog[oldlog.rindex('=') + 1: oldlog.rindex('.')])
249 | 
250 |         if ranking:
251 |             if newvalue > oldvalue:
252 |                 shutil.copyfile(newlog, folder_best+newlog_filename)
253 |                 os.remove(oldlog)
254 |                 impro = (newvalue - oldvalue) / oldvalue
255 |                 print('Better results! improvement: {:.2%}'.format(impro) + ', best log saved in ' + folder_best + newlog_filename)
256 |         else:
257 |             if newvalue < oldvalue:
258 |                 shutil.copyfile(newlog, folder_best+newlog_filename)
259 |                 os.remove(oldlog)
260 |                 impro = (oldvalue - newvalue) / oldvalue
261 |                 print('Better results! improvement: {:.2%}'.format(impro) + ', best log saved in ' + folder_best + newlog_filename)
262 |     return
263 | 
264 | 
265 | 
266 | def objective_function(config_dict=None, config_file_list=None, saved=True):
267 |     r""" The default objective_function used in HyperTuning
268 | 
269 |     Args:
270 |         config_dict (dict, optional): Parameters dictionary used to modify experiment parameters. Defaults to ``None``.
271 |         config_file_list (list, optional): Config files used to modify experiment parameters. Defaults to ``None``.
272 |         saved (bool, optional): Whether to save the model. Defaults to ``True``.
273 |     """
274 | 
275 |     config = CARSConfig(config_dict=config_dict, config_file_list=config_file_list)
276 |     init_seed(config['seed'], config['reproducibility'])
277 |     logging.basicConfig(level=logging.ERROR)
278 |     dataset = create_dataset(config)
279 |     train_data, valid_data, test_data = data_preparation(config, dataset)
280 |     init_seed(config['seed'], config['reproducibility'])
281 |     model = get_model(config['model'])(config, train_data.dataset).to(config['device'])
282 |     trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
283 |     best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, verbose=False, saved=saved)
284 |     # test_result = trainer.evaluate(test_data, load_best_model=saved)
285 | 
286 |     return {
287 |         'best_valid_score': best_valid_score,
288 |         'valid_score_bigger': config['valid_metric_bigger'],
289 |         'best_valid_result': best_valid_result,
290 |         # 'test_result': test_result
291 |     }
292 | 
293 | 
294 | def load_data_and_model(model_file, dataset_file=None, dataloader_file=None):
295 |     r"""Load filtered dataset, split dataloaders and saved model.
296 | 
297 |     Args:
298 |         model_file (str): The path of saved model file.
299 |         dataset_file (str, optional): The path of filtered dataset. Defaults to ``None``.
300 |         dataloader_file (str, optional): The path of split dataloaders. Defaults to ``None``.
301 | 
302 |     Note:
303 |         The :attr:`dataset` will be loaded or created according to the following strategy:
304 |         If :attr:`dataset_file` is not ``None``, the :attr:`dataset` will be loaded from :attr:`dataset_file`.
305 |         If :attr:`dataset_file` is ``None`` and :attr:`dataloader_file` is ``None``,
306 |         the :attr:`dataset` will be created according to :attr:`config`.
307 |         If :attr:`dataset_file` is ``None`` and :attr:`dataloader_file` is not ``None``,
308 |         the :attr:`dataset` will neither be loaded or created.
309 | 
310 |         The :attr:`dataloader` will be loaded or created according to the following strategy:
311 |         If :attr:`dataloader_file` is not ``None``, the :attr:`dataloader` will be loaded from :attr:`dataloader_file`.
312 |         If :attr:`dataloader_file` is ``None``, the :attr:`dataloader` will be created according to :attr:`config`.
313 | 
314 |     Returns:
315 |         tuple:
316 |             - config (Config): An instance object of Config, which record parameter information in :attr:`model_file`.
317 |             - model (AbstractRecommender): The model load from :attr:`model_file`.
318 |             - dataset (Dataset): The filtered dataset.
319 |             - train_data (AbstractDataLoader): The dataloader for training.
320 |             - valid_data (AbstractDataLoader): The dataloader for validation.
321 |             - test_data (AbstractDataLoader): The dataloader for testing.
322 |     """
323 |     checkpoint = torch.load(model_file)
324 |     config = checkpoint['config']
325 |     init_seed(config['seed'], config['reproducibility'])
326 |     init_logger(config)
327 | 
328 |     dataset = None
329 |     if dataset_file:
330 |         with open(dataset_file, 'rb') as f:
331 |             dataset = pickle.load(f)
332 | 
333 |     if dataloader_file:
334 |         train_data, valid_data, test_data = load_split_dataloaders(dataloader_file)
335 |     else:
336 |         if dataset is None:
337 |             dataset = create_dataset(config)
338 |         train_data, valid_data, test_data = data_preparation(config, dataset)
339 | 
340 |     init_seed(config['seed'], config['reproducibility'])
341 |     model = get_model(config['model'])(config, train_data.dataset).to(config['device'])
342 |     model.load_state_dict(checkpoint['state_dict'])
343 |     model.load_other_parameter(checkpoint.get('other_parameter'))
344 | 
345 |     return config, model, dataset, train_data, valid_data, test_data
346 | 


--------------------------------------------------------------------------------
/deepcarskit/data/utils.py:
--------------------------------------------------------------------------------
  1 | # @Time   : 2020/7/21
  2 | # @Author : Yupeng Hou
  3 | # @Email  : houyupeng@ruc.edu.cn
  4 | 
  5 | # UPDATE:
  6 | # @Time   : 2021/7/9, 2020/9/17, 2020/8/31, 2021/2/20, 2021/3/1
  7 | # @Author : Yupeng Hou, Yushuo Chen, Kaiyuan Li, Haoran Cheng, Jiawei Guan
  8 | # @Email  : houyupeng@ruc.edu.cn, chenyushuo@ruc.edu.cn, tsotfsk@outlook.com, chenghaoran29@foxmail.com, guanjw@ruc.edu.cn
  9 | 
 10 | # UPDATE:
 11 | # @Time   : 2021/12
 12 | # @Author : Yong Zheng
 13 | # @Notes  : made several changes to adapt it for CARS
 14 | 
 15 | 
 16 | """
 17 | deepcarskit.data.utils
 18 | ########################
 19 | """
 20 | 
 21 | import copy
 22 | import importlib
 23 | import os
 24 | import pickle
 25 | 
 26 | from deepcarskit.data.dataloader import *
 27 | from recbole.data.dataloader import TrainDataLoader, NegSampleEvalDataLoader, KnowledgeBasedDataLoader, UserDataLoader
 28 | from recbole.sampler import KGSampler, Sampler, RepeatableSampler
 29 | from recbole.utils import ModelType, ensure_dir, get_local_time, set_color
 30 | from recbole.utils import EvaluatorType
 31 | from logging import getLogger
 32 | 
 33 | 
 34 | def create_dataset(config):
 35 |     """Create dataset according to :attr:`config['model']` and :attr:`config['MODEL_TYPE']`.
 36 | 
 37 |     Args:
 38 |         config (Config): An instance object of Config, used to record parameter information.
 39 |     Returns:
 40 |         Dataset: Constructed dataset.
 41 |     """
 42 |     # David Wang: import the model dynamically
 43 |     dataset_module = importlib.import_module('deepcarskit.data.dataset')
 44 |     if hasattr(dataset_module, config['model'] + 'Dataset'):
 45 |         """ David Wang:
 46 |         if a data set is name after <model_name>Dataset in custom data set model, return the data set class object
 47 |         """
 48 |         return getattr(dataset_module, config['model'] + 'Dataset')(config)
 49 |     else:
 50 |         model_type = config['MODEL_TYPE']
 51 |         if model_type == ModelType.SEQUENTIAL:
 52 |             from .dataset import SequentialDataset
 53 |             return SequentialDataset(config)
 54 |         elif model_type == ModelType.KNOWLEDGE:
 55 |             from .dataset import KnowledgeBasedDataset
 56 |             return KnowledgeBasedDataset(config)
 57 |         elif model_type == ModelType.DECISIONTREE:
 58 |             from .dataset import DecisionTreeDataset
 59 |             return DecisionTreeDataset(config)
 60 |         else:
 61 |             from .dataset import Dataset
 62 |             return Dataset(config)
 63 | 
 64 | 
 65 | def save_split_dataloaders(config, dataloaders):
 66 |     """Save split dataloaders.
 67 | 
 68 |     Args:
 69 |         config (Config): An instance object of Config, used to record parameter information.
 70 |         dataloaders (tuple of AbstractDataLoader): The split dataloaders.
 71 |     """
 72 |     save_path = config['checkpoint_dir']
 73 |     saved_dataloaders_file = f'{config["dataset"]}-for-{config["model"]}-dataloader.pth'
 74 |     file_path = os.path.join(save_path, saved_dataloaders_file)
 75 |     logger = getLogger()
 76 |     logger.info(set_color('Saved split dataloaders', 'blue') + f': {file_path}')
 77 |     with open(file_path, 'wb') as f:
 78 |         pickle.dump(dataloaders, f)
 79 | 
 80 | 
 81 | def load_split_dataloaders(saved_dataloaders_file):
 82 |     """Load split dataloaders.
 83 | 
 84 |     Args:
 85 |         saved_dataloaders_file (str): The path of split dataloaders.
 86 | 
 87 |     Returns:
 88 |         dataloaders (tuple of AbstractDataLoader): The split dataloaders.
 89 |     """
 90 |     with open(saved_dataloaders_file, 'rb') as f:
 91 |         dataloaders = pickle.load(f)
 92 |     return dataloaders
 93 | 
 94 | 
 95 | def data_preparation(config, dataset, save=False):
 96 |     """Split the dataset by :attr:`config['eval_args']` and create training, validation and test dataloader.
 97 | 
 98 |     Args:
 99 |         config (Config): An instance object of Config, used to record parameter information.
100 |         dataset (Dataset): An instance object of Dataset, which contains all interaction records.
101 |         save (bool, optional): If ``True``, it will call :func:`save_datasets` to save split dataset.
102 |             Defaults to ``False``.
103 | 
104 |     Returns:
105 |         tuple:
106 |             - train_data (AbstractDataLoader): The dataloader for training.
107 |             - valid_data (AbstractDataLoader): The dataloader for validation.
108 |             - test_data (AbstractDataLoader): The dataloader for testing.
109 |     """
110 |     model_type = config['MODEL_TYPE']
111 |     # David Wang: make a copy since dataset.build() will modify the .inter_feat attribute to Interaction object
112 |     dataset = copy.copy(dataset)
113 |     # David Wang: read data file and create 3 pandas DateFrame data sets
114 | 
115 | 
116 | 
117 |     CV = True
118 |     built_datasets = dataset.build()
119 |     if isinstance(built_datasets, list):
120 |         CV = False
121 |     logger = getLogger()
122 | 
123 |     # dict
124 |     # key = number of fold
125 |     # value = [train, valid set]
126 | 
127 |     if CV:
128 |         train = []
129 |         valid = []
130 |         for fold in built_datasets:
131 |             train_dataset, valid_dataset = built_datasets[fold]
132 |             train_sampler, valid_sampler = create_samplers(config, dataset, built_datasets[fold])
133 |             used_ids = get_used_ids(config, dataset=train_dataset)
134 | 
135 |             if model_type != ModelType.KNOWLEDGE:
136 |                 train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, shuffle=True)
137 |             else:
138 |                 kg_sampler = KGSampler(dataset, config['train_neg_sample_args']['distribution'])
139 |                 train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, kg_sampler, shuffle=True)
140 | 
141 |             if config['ranking']:
142 |                 valid_data_loader = get_dataloader(config, 'evaluation')
143 |                 valid_data = valid_data_loader(config, valid_dataset, valid_sampler, shuffle=False, used_ids=used_ids)
144 |             else:
145 |                 valid_data = get_dataloader(config, 'evaluation')(config, valid_dataset, valid_sampler, shuffle=False)
146 | 
147 |             logger.info(
148 |                 set_color('[Training]: ', 'pink') + set_color('train_batch_size', 'cyan') + ' = ' +
149 |                 set_color(f'[{config["train_batch_size"]}]', 'yellow') + set_color(' negative sampling', 'cyan') + ': ' +
150 |                 set_color(f'[{config["neg_sampling"]}]', 'yellow')
151 |             )
152 |             logger.info(
153 |                 set_color('[Evaluation]: ', 'pink') + set_color('eval_batch_size', 'cyan') + ' = ' +
154 |                 set_color(f'[{config["eval_batch_size"]}]', 'yellow') + set_color(' eval_args', 'cyan') + ': ' +
155 |                 set_color(f'[{config["eval_args"]}]', 'yellow')
156 |             )
157 |             train.append(train_data)
158 |             valid.append(valid_data)
159 |             # if save:
160 |                 # save_split_dataloaders(config, dataloaders=(train_data, valid_data))
161 | 
162 |         return train, valid
163 |     else:
164 |         train_dataset, valid_dataset = built_datasets
165 |         train_sampler, valid_sampler = create_samplers(config, dataset, built_datasets)
166 |         used_ids = get_used_ids(config, dataset=train_dataset)
167 | 
168 |         if model_type != ModelType.KNOWLEDGE:
169 |             train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, shuffle=True)
170 |         else:
171 |             kg_sampler = KGSampler(dataset, config['train_neg_sample_args']['distribution'])
172 |             train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, kg_sampler, shuffle=True)
173 | 
174 |         if config['ranking']:
175 |             valid_data = get_dataloader(config, 'evaluation')(config, valid_dataset, valid_sampler, shuffle=False, used_ids=used_ids)
176 |         else:
177 |             valid_data = get_dataloader(config, 'evaluation')(config, valid_dataset, valid_sampler, shuffle=False)
178 | 
179 |         logger.info(
180 |             set_color('[Training]: ', 'pink') + set_color('train_batch_size', 'cyan') + ' = ' +
181 |             set_color(f'[{config["train_batch_size"]}]', 'yellow') + set_color(' negative sampling', 'cyan') + ': ' +
182 |             set_color(f'[{config["neg_sampling"]}]', 'yellow')
183 |         )
184 |         logger.info(
185 |             set_color('[Evaluation]: ', 'pink') + set_color('eval_batch_size', 'cyan') + ' = ' +
186 |             set_color(f'[{config["eval_batch_size"]}]', 'yellow') + set_color(' eval_args', 'cyan') + ': ' +
187 |             set_color(f'[{config["eval_args"]}]', 'yellow')
188 |         )
189 |         if save:
190 |             save_split_dataloaders(config, dataloaders=(train_data, valid_data))
191 | 
192 |         return train_data, valid_data
193 | 
194 | 
195 | def get_dataloader(config, phase):
196 |     """Return a dataloader class according to :attr:`config` and :attr:`phase`.
197 | 
198 |     Args:
199 |         config (Config): An instance object of Config, used to record parameter information.
200 |         phase (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'.
201 | 
202 |     Returns:
203 |         type: The dataloader class that meets the requirements in :attr:`config` and :attr:`phase`.
204 |     """
205 |     register_table = {
206 |         "MultiDAE": _get_AE_dataloader,
207 |         "MultiVAE": _get_AE_dataloader,
208 |         'MacridVAE': _get_AE_dataloader,
209 |         'CDAE': _get_AE_dataloader,
210 |         'ENMF': _get_AE_dataloader,
211 |         'RaCT': _get_AE_dataloader,
212 |         'RecVAE': _get_AE_dataloader,
213 |     }
214 | 
215 |     if config['model'] in register_table:
216 |         return register_table[config['model']](config, phase)
217 | 
218 |     model_type = config['MODEL_TYPE']
219 |     if phase == 'train':
220 |         if model_type != ModelType.KNOWLEDGE:
221 |             return TrainDataLoader
222 |         else:
223 |             return KnowledgeBasedDataLoader
224 |     else:
225 |         eval_strategy = config['eval_neg_sample_args']['strategy']
226 |         if eval_strategy in {'none', 'by'}:
227 |             if config['eval_type'] == EvaluatorType.RANKING:
228 |                 return LabledDataSortEvalDataLoader
229 |             else:
230 |                 return NegSampleEvalDataLoader
231 |         elif eval_strategy == 'full':
232 |             return FullSortEvalDataLoader
233 | 
234 | def get_used_ids(config, dataset):
235 |     """
236 |     Returns:
237 |         dict: Used item_ids is the same as positive item_ids.
238 |         Key is phase, and value is a numpy.ndarray which index is user_id, and element is a set of item_ids.
239 |     """
240 |     used_item_id = None
241 |     uc_num = dataset.user_context_num
242 |     iid_field = dataset.iid_field
243 |     ucid_field = dataset.ucid_field
244 |     last = [set() for _ in range(uc_num)]
245 |     cur = np.array([set(s) for s in last])
246 |     for ucid, iid in zip(dataset.inter_feat[ucid_field].numpy(), dataset.inter_feat[iid_field].numpy()):
247 |         cur[ucid].add(iid)
248 |     last = used_item_id = cur
249 | 
250 |     for used_item_set in used_item_id:
251 |         if len(used_item_set) + 1 == dataset.item_num:  # [pad] is a item.
252 |             raise ValueError(
253 |                 'Some users have interacted with all items, '
254 |                 'which we can not sample negative items for them. '
255 |                 'Please set `user_inter_num_interval` to filter those users.'
256 |             )
257 |     return used_item_id
258 | 
259 | def _get_AE_dataloader(config, phase):
260 |     """Customized function for VAE models to get correct dataloader class.
261 | 
262 |     Args:
263 |         config (Config): An instance object of Config, used to record parameter information.
264 |         phase (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'.
265 | 
266 |     Returns:
267 |         type: The dataloader class that meets the requirements in :attr:`config` and :attr:`phase`.
268 |     """
269 |     if phase == 'train':
270 |         return UserDataLoader
271 |     else:
272 |         eval_strategy = config['eval_neg_sample_args']['strategy']
273 |         if eval_strategy in {'none', 'by'}:
274 |             return NegSampleEvalDataLoader
275 |         elif eval_strategy == 'full':
276 |             return FullSortEvalDataLoader
277 | 
278 | 
279 | def create_samplers(config, dataset, built_datasets):
280 |     """Create sampler for training, validation and testing.
281 | 
282 |     Args:
283 |         config (Config): An instance object of Config, used to record parameter information.
284 |         dataset (Dataset): An instance object of Dataset, which contains all interaction records.
285 |         built_datasets (list of Dataset): A list of split Dataset, which contains dataset for
286 |             training, validation and testing.
287 | 
288 |     Returns:
289 |         tuple:
290 |             - train_sampler (AbstractSampler): The sampler for training.
291 |             - valid_sampler (AbstractSampler): The sampler for validation.
292 |             - test_sampler (AbstractSampler): The sampler for testing.
293 |     """
294 |     phases = ['train', 'valid']
295 |     train_neg_sample_args = config['train_neg_sample_args']
296 |     eval_neg_sample_args = config['eval_neg_sample_args']
297 | 
298 |     sampler = None
299 |     train_sampler, valid_sampler = None, None
300 | 
301 |     if train_neg_sample_args['strategy'] != 'none':
302 |         if not config['repeatable']:
303 |             sampler = Sampler(phases, built_datasets, train_neg_sample_args['distribution'])
304 |         else:
305 |             sampler = RepeatableSampler(phases, dataset, train_neg_sample_args['distribution'])
306 |         train_sampler = sampler.set_phase('train')
307 | 
308 |     if eval_neg_sample_args['strategy'] != 'none':
309 |         if sampler is None:
310 |             if not config['repeatable']:
311 |                 sampler = Sampler(phases, built_datasets, eval_neg_sample_args['distribution'])
312 |             else:
313 |                 sampler = RepeatableSampler(phases, dataset, eval_neg_sample_args['distribution'])
314 |         else:
315 |             sampler.set_distribution(eval_neg_sample_args['distribution'])
316 |         valid_sampler = sampler.set_phase('valid')
317 | 
318 |     return train_sampler, valid_sampler
319 | 
320 | '''
321 | def create_samplers(config, dataset, built_datasets):
322 |     """Create sampler for training, validation and testing.
323 | 
324 |     Args:
325 |         config (Config): An instance object of Config, used to record parameter information.
326 |         dataset (Dataset): An instance object of Dataset, which contains all interaction records.
327 |         built_datasets (list of Dataset): A list of split Dataset, which contains dataset for
328 |             training, validation and testing.
329 | 
330 |     Returns:
331 |         tuple:
332 |             - train_sampler (AbstractSampler): The sampler for training.
333 |             - valid_sampler (AbstractSampler): The sampler for validation.
334 |             - test_sampler (AbstractSampler): The sampler for testing.
335 |     """
336 |     phases = ['train', 'valid', 'test']
337 |     train_neg_sample_args = config['train_neg_sample_args']
338 |     eval_neg_sample_args = config['eval_neg_sample_args']
339 |     sampler = None
340 |     train_sampler, valid_sampler, test_sampler = None, None, None
341 | 
342 |     if train_neg_sample_args['strategy'] != 'none':
343 |         if not config['repeatable']:
344 |             sampler = Sampler(phases, built_datasets, train_neg_sample_args['distribution'])
345 |         else:
346 |             sampler = RepeatableSampler(phases, dataset, train_neg_sample_args['distribution'])
347 |         train_sampler = sampler.set_phase('train')
348 | 
349 |     if eval_neg_sample_args['strategy'] != 'none':
350 |         if sampler is None:
351 |             if not config['repeatable']:
352 |                 sampler = Sampler(phases, built_datasets, eval_neg_sample_args['distribution'])
353 |             else:
354 |                 sampler = RepeatableSampler(phases, dataset, eval_neg_sample_args['distribution'])
355 |         else:
356 |             sampler.set_distribution(eval_neg_sample_args['distribution'])
357 |         valid_sampler = sampler.set_phase('valid')
358 |         test_sampler = sampler.set_phase('test')
359 | 
360 |     return train_sampler, valid_sampler, test_sampler
361 | '''


--------------------------------------------------------------------------------
/deepcarskit/model/context_recommender.py:
--------------------------------------------------------------------------------
  1 | # @Time   : 2021/12
  2 | # @Author : Yong Zheng
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | from recbole.model.abstract_recommender import AbstractRecommender
  9 | from recbole.model.layers import FMEmbedding
 10 | from recbole.utils import ModelType, InputType, FeatureSource, FeatureType, set_color, EvaluatorType
 11 | from deepcarskit.model.layers import FMFirstOrderLinear
 12 | 
 13 | class ContextRecommender(AbstractRecommender):
 14 |     """This is a abstract context-aware recommender. All the context-aware model should implement this class.
 15 |     The base context-aware recommender class provide the basic embedding function of feature fields which also
 16 |     contains a first-order part of feature fields.
 17 |     """
 18 |     type = ModelType.CONTEXT
 19 |     input_type = InputType.POINTWISE
 20 | 
 21 |     def __init__(self, config, dataset):
 22 |         super(ContextRecommender, self).__init__()
 23 |         self.config = config
 24 | 
 25 |         self.field_names = dataset.fields(
 26 |             source=[
 27 |                 FeatureSource.INTERACTION,
 28 |                 FeatureSource.USER,
 29 |                 FeatureSource.USER_ID,
 30 |                 FeatureSource.ITEM,
 31 |                 FeatureSource.ITEM_ID,
 32 |             ]
 33 |         )
 34 | 
 35 |         self.USER_ID = config['USER_ID_FIELD']
 36 |         self.ITEM_ID = config['ITEM_ID_FIELD']
 37 |         self.CONTEXT_SITUATION_ID = config['CONTEXT_SITUATION_FIELD']
 38 | 
 39 |         self.actfun = nn.LeakyReLU()
 40 |         self.loss = nn.MSELoss()
 41 |         if config['ranking']:
 42 |             self.LABEL = config['LABEL_FIELD']
 43 |             if config['sigmoid']:
 44 |                 self.actfun = nn.Sigmoid()
 45 |                 self.loss = nn.BCELoss()
 46 |         else:
 47 |             self.LABEL = config['RATING_FIELD']
 48 | 
 49 |         self.CONTEXTS = []
 50 |         for i in range(2, len(self.field_names)):
 51 |             if self.field_names[i] == config['LABEL_FIELD'] or self.field_names[i] == config['USER_CONTEXT_FIELD'] or self.field_names[i] == config['RATING_FIELD']:
 52 |                 continue
 53 |             else:
 54 |                 self.CONTEXTS.append(self.field_names[i])
 55 | 
 56 |         self.n_context_situation = 0
 57 |         if self.CONTEXT_SITUATION_ID in self.CONTEXTS:
 58 |             self.n_context_situation = dataset.num(self.CONTEXT_SITUATION_ID)
 59 |             self.CONTEXTS.remove(self.CONTEXT_SITUATION_ID)
 60 | 
 61 |         msghead = "Loaded context variables: "
 62 |         if self.n_context_situation == 0:
 63 |             msg = ' '.join(self.CONTEXTS) + ', without context situation ID: ' + self.CONTEXT_SITUATION_ID
 64 |         else:
 65 |             msg = ' '.join(self.CONTEXTS) + ', with context situation ID: ' + self.CONTEXT_SITUATION_ID
 66 |         self.logger.info(set_color(msghead, 'yellow') + msg)
 67 | 
 68 |         self.n_users = dataset.num(self.USER_ID)
 69 |         self.n_items = dataset.num(self.ITEM_ID)
 70 |         # number of context variables
 71 |         self.n_contexts_dim = len(self.CONTEXTS)
 72 |         # number of context conditions in each dimension
 73 |         self.n_contexts_conditions = []
 74 | 
 75 |         for i in range(self.n_contexts_dim):
 76 |             dim=self.CONTEXTS[i]
 77 |             n_dim = dataset.num(dim)
 78 |             self.n_contexts_conditions.append(n_dim)
 79 | 
 80 |         self.embedding_size = config['embedding_size']
 81 |         self.device = config['device']
 82 |         self.double_tower = config['double_tower']
 83 |         if self.double_tower is None:
 84 |             self.double_tower = False
 85 |         self.token_field_names = []
 86 |         self.token_field_dims = []
 87 |         self.float_field_names = []
 88 |         self.float_field_dims = []
 89 |         self.token_seq_field_names = []
 90 |         self.token_seq_field_dims = []
 91 |         self.num_feature_field = 0
 92 | 
 93 |         if self.double_tower:
 94 |             self.user_field_names = dataset.fields(source=[FeatureSource.USER, FeatureSource.USER_ID])
 95 |             self.item_field_names = dataset.fields(source=[FeatureSource.ITEM, FeatureSource.ITEM_ID])
 96 |             self.field_names = self.user_field_names + self.item_field_names
 97 |             self.user_token_field_num = 0
 98 |             self.user_float_field_num = 0
 99 |             self.user_token_seq_field_num = 0
100 |             for field_name in self.user_field_names:
101 |                 if dataset.field2type[field_name] == FeatureType.TOKEN:
102 |                     self.user_token_field_num += 1
103 |                 elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ:
104 |                     self.user_token_seq_field_num += 1
105 |                 else:
106 |                     self.user_float_field_num += dataset.num(field_name)
107 |             self.item_token_field_num = 0
108 |             self.item_float_field_num = 0
109 |             self.item_token_seq_field_num = 0
110 |             for field_name in self.item_field_names:
111 |                 if dataset.field2type[field_name] == FeatureType.TOKEN:
112 |                     self.item_token_field_num += 1
113 |                 elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ:
114 |                     self.item_token_seq_field_num += 1
115 |                 else:
116 |                     self.item_float_field_num += dataset.num(field_name)
117 | 
118 |         for field_name in self.field_names:
119 |             if field_name == self.config['RATING_FIELD'] or field_name == self.config['LABEL_FIELD']:
120 |                 continue
121 |             if dataset.field2type[field_name] == FeatureType.TOKEN:
122 |                 self.token_field_names.append(field_name)
123 |                 self.token_field_dims.append(dataset.num(field_name))
124 |             elif dataset.field2type[field_name] == FeatureType.TOKEN_SEQ:
125 |                 self.token_seq_field_names.append(field_name)
126 |                 self.token_seq_field_dims.append(dataset.num(field_name))
127 |             else:
128 |                 self.float_field_names.append(field_name)
129 |                 self.float_field_dims.append(dataset.num(field_name))
130 |             self.num_feature_field += 1
131 |         if len(self.token_field_dims) > 0:
132 |             self.token_field_offsets = np.array((0, *np.cumsum(self.token_field_dims)[:-1]), dtype=np.long)
133 |             self.token_embedding_table = FMEmbedding(
134 |                 self.token_field_dims, self.token_field_offsets, self.embedding_size
135 |             )
136 |         if len(self.float_field_dims) > 0:
137 |             self.float_embedding_table = nn.Embedding(
138 |                 np.sum(self.float_field_dims, dtype=np.int32), self.embedding_size
139 |             )
140 |         if len(self.token_seq_field_dims) > 0:
141 |             self.token_seq_embedding_table = nn.ModuleList()
142 |             for token_seq_field_dim in self.token_seq_field_dims:
143 |                 self.token_seq_embedding_table.append(nn.Embedding(token_seq_field_dim, self.embedding_size))
144 | 
145 |         self.first_order_linear = FMFirstOrderLinear(config, dataset)
146 | 
147 |     def embed_float_fields(self, float_fields, embed=True):
148 |         """Embed the float feature columns
149 | 
150 |         Args:
151 |             float_fields (torch.FloatTensor): The input dense tensor. shape of [batch_size, num_float_field]
152 |             embed (bool): Return the embedding of columns or just the columns itself. Defaults to ``True``.
153 | 
154 |         Returns:
155 |             torch.FloatTensor: The result embedding tensor of float columns.
156 |         """
157 |         # input Tensor shape : [batch_size, num_float_field]
158 |         if not embed or float_fields is None:
159 |             return float_fields
160 | 
161 |         num_float_field = float_fields.shape[1]
162 |         # [batch_size, num_float_field]
163 |         index = torch.arange(0, num_float_field).unsqueeze(0).expand_as(float_fields).long().to(self.device)
164 | 
165 |         # [batch_size, num_float_field, embed_dim]
166 |         float_embedding = self.float_embedding_table(index)
167 |         float_embedding = torch.mul(float_embedding, float_fields.unsqueeze(2))
168 | 
169 |         return float_embedding
170 | 
171 |     def getContextSituationList(self, interaction, context_dims):
172 |         situation = []
173 |         for dim in context_dims:
174 |             situation.append(interaction[dim].tolist())
175 |         situation = torch.tensor(situation).to(self.device)
176 |         return situation
177 | 
178 |     def getContextSituationDict(self, interaction, context_dims):
179 |         situation = {}
180 |         for dim in context_dims:
181 |             situation[dim] = interaction[dim]
182 |         return situation
183 | 
184 |     def embed_token_fields(self, token_fields):
185 |         """Embed the token feature columns
186 | 
187 |         Args:
188 |             token_fields (torch.LongTensor): The input tensor. shape of [batch_size, num_token_field]
189 | 
190 |         Returns:
191 |             torch.FloatTensor: The result embedding tensor of token columns.
192 |         """
193 |         # input Tensor shape : [batch_size, num_token_field]
194 |         if token_fields is None:
195 |             return None
196 |         # [batch_size, num_token_field, embed_dim]
197 |         token_embedding = self.token_embedding_table(token_fields)
198 | 
199 |         return token_embedding
200 | 
201 |     def embed_token_seq_fields(self, token_seq_fields, mode='mean'):
202 |         """Embed the token feature columns
203 | 
204 |         Args:
205 |             token_seq_fields (torch.LongTensor): The input tensor. shape of [batch_size, seq_len]
206 |             mode (str): How to aggregate the embedding of feature in this field. default=mean
207 | 
208 |         Returns:
209 |             torch.FloatTensor: The result embedding tensor of token sequence columns.
210 |         """
211 |         # input is a list of Tensor shape of [batch_size, seq_len]
212 |         fields_result = []
213 |         for i, token_seq_field in enumerate(token_seq_fields):
214 |             embedding_table = self.token_seq_embedding_table[i]
215 |             mask = token_seq_field != 0  # [batch_size, seq_len]
216 |             mask = mask.float()
217 |             value_cnt = torch.sum(mask, dim=1, keepdim=True)  # [batch_size, 1]
218 | 
219 |             token_seq_embedding = embedding_table(token_seq_field)  # [batch_size, seq_len, embed_dim]
220 | 
221 |             mask = mask.unsqueeze(2).expand_as(token_seq_embedding)  # [batch_size, seq_len, embed_dim]
222 |             if mode == 'max':
223 |                 masked_token_seq_embedding = token_seq_embedding - (1 - mask) * 1e9  # [batch_size, seq_len, embed_dim]
224 |                 result = torch.max(masked_token_seq_embedding, dim=1, keepdim=True)  # [batch_size, 1, embed_dim]
225 |             elif mode == 'sum':
226 |                 masked_token_seq_embedding = token_seq_embedding * mask.float()
227 |                 result = torch.sum(masked_token_seq_embedding, dim=1, keepdim=True)  # [batch_size, 1, embed_dim]
228 |             else:
229 |                 masked_token_seq_embedding = token_seq_embedding * mask.float()
230 |                 result = torch.sum(masked_token_seq_embedding, dim=1)  # [batch_size, embed_dim]
231 |                 eps = torch.FloatTensor([1e-8]).to(self.device)
232 |                 result = torch.div(result, value_cnt + eps)  # [batch_size, embed_dim]
233 |                 result = result.unsqueeze(1)  # [batch_size, 1, embed_dim]
234 |             fields_result.append(result)
235 |         if len(fields_result) == 0:
236 |             return None
237 |         else:
238 |             return torch.cat(fields_result, dim=1)  # [batch_size, num_token_seq_field, embed_dim]
239 | 
240 |     def double_tower_embed_input_fields(self, interaction):
241 |         """Embed the whole feature columns in a double tower way.
242 | 
243 |         Args:
244 |             interaction (Interaction): The input data collection.
245 | 
246 |         Returns:
247 |             torch.FloatTensor: The embedding tensor of token sequence columns in the first part.
248 |             torch.FloatTensor: The embedding tensor of float sequence columns in the first part.
249 |             torch.FloatTensor: The embedding tensor of token sequence columns in the second part.
250 |             torch.FloatTensor: The embedding tensor of float sequence columns in the second part.
251 | 
252 |         """
253 |         if not self.double_tower:
254 |             raise RuntimeError('Please check your model hyper parameters and set \'double tower\' as True')
255 |         sparse_embedding, dense_embedding = self.embed_input_fields(interaction)
256 |         if dense_embedding is not None:
257 |             first_dense_embedding, second_dense_embedding = \
258 |                 torch.split(dense_embedding, [self.user_float_field_num, self.item_float_field_num], dim=1)
259 |         else:
260 |             first_dense_embedding, second_dense_embedding = None, None
261 | 
262 |         if sparse_embedding is not None:
263 |             sizes = [
264 |                 self.user_token_seq_field_num, self.item_token_seq_field_num, self.user_token_field_num,
265 |                 self.item_token_field_num
266 |             ]
267 |             first_token_seq_embedding, second_token_seq_embedding, first_token_embedding, second_token_embedding = \
268 |                 torch.split(sparse_embedding, sizes, dim=1)
269 |             first_sparse_embedding = torch.cat([first_token_seq_embedding, first_token_embedding], dim=1)
270 |             second_sparse_embedding = torch.cat([second_token_seq_embedding, second_token_embedding], dim=1)
271 |         else:
272 |             first_sparse_embedding, second_sparse_embedding = None, None
273 | 
274 |         return first_sparse_embedding, first_dense_embedding, second_sparse_embedding, second_dense_embedding
275 | 
276 |     def concat_embed_input_fields(self, interaction):
277 |         sparse_embedding, dense_embedding = self.embed_input_fields(interaction)
278 |         all_embeddings = []
279 |         if sparse_embedding is not None:
280 |             all_embeddings.append(sparse_embedding)
281 |         if dense_embedding is not None and len(dense_embedding.shape) == 3:
282 |             all_embeddings.append(dense_embedding)
283 |         return torch.cat(all_embeddings, dim=1)  # [batch_size, num_field, embed_dim]
284 | 
285 |     def embed_input_fields(self, interaction):
286 |         """Embed the whole feature columns.
287 | 
288 |         Args:
289 |             interaction (Interaction): The input data collection.
290 | 
291 |         Returns:
292 |             torch.FloatTensor: The embedding tensor of token sequence columns.
293 |             torch.FloatTensor: The embedding tensor of float sequence columns.
294 |         """
295 |         float_fields = []
296 |         for field_name in self.float_field_names:
297 |             if len(interaction[field_name].shape) == 2:
298 |                 float_fields.append(interaction[field_name])
299 |             else:
300 |                 float_fields.append(interaction[field_name].unsqueeze(1))
301 |         if len(float_fields) > 0:
302 |             float_fields = torch.cat(float_fields, dim=1)  # [batch_size, num_float_field]
303 |         else:
304 |             float_fields = None
305 |         # [batch_size, num_float_field] or [batch_size, num_float_field, embed_dim] or None
306 |         float_fields_embedding = self.embed_float_fields(float_fields)
307 | 
308 |         token_fields = []
309 |         for field_name in self.token_field_names:
310 |             token_fields.append(interaction[field_name].unsqueeze(1))
311 |         if len(token_fields) > 0:
312 |             token_fields = torch.cat(token_fields, dim=1)  # [batch_size, num_token_field]
313 |         else:
314 |             token_fields = None
315 |         # [batch_size, num_token_field, embed_dim] or None
316 |         token_fields_embedding = self.embed_token_fields(token_fields)
317 | 
318 |         token_seq_fields = []
319 |         for field_name in self.token_seq_field_names:
320 |             token_seq_fields.append(interaction[field_name])
321 |         # [batch_size, num_token_seq_field, embed_dim] or None
322 |         token_seq_fields_embedding = self.embed_token_seq_fields(token_seq_fields)
323 | 
324 |         if token_fields_embedding is None:
325 |             sparse_embedding = token_seq_fields_embedding
326 |         else:
327 |             if token_seq_fields_embedding is None:
328 |                 sparse_embedding = token_fields_embedding
329 |             else:
330 |                 sparse_embedding = torch.cat([token_fields_embedding, token_seq_fields_embedding], dim=1)
331 | 
332 |         dense_embedding = float_fields_embedding
333 | 
334 |         # sparse_embedding shape: [batch_size, num_token_seq_field+num_token_field, embed_dim] or None
335 |         # dense_embedding shape: [batch_size, num_float_field] or [batch_size, num_float_field, embed_dim] or None
336 |         return sparse_embedding, dense_embedding
337 | 


--------------------------------------------------------------------------------