├── LICENSE ├── README.md ├── code ├── IDNet │ ├── bert4rec.yaml │ ├── dssm.yaml │ ├── fm.yaml │ ├── gru4rec.yaml │ ├── lightgcn.yaml │ ├── lightsans.yaml │ ├── mf.yaml │ ├── nextitnet.yaml │ ├── sasrec.yaml │ └── srgnn.yaml ├── PixelNet │ ├── bert4rec.yaml │ ├── dssm.yaml │ ├── fm.yaml │ ├── gru4rec.yaml │ ├── lightsans.yaml │ ├── mf.yaml │ ├── nextitnet.yaml │ ├── sasrec.yaml │ └── srgnn.yaml ├── REC │ ├── config │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ └── configurator.cpython-39.pyc │ │ └── configurator.py │ ├── data │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── data.cpython-39.pyc │ │ │ ├── dataload.cpython-39.pyc │ │ │ ├── utils.cpython-38.pyc │ │ │ └── utils.cpython-39.pyc │ │ ├── data.py │ │ ├── dataload.py │ │ ├── dataset │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ ├── batchset.cpython-39.pyc │ │ │ │ ├── collate_fn.cpython-39.pyc │ │ │ │ ├── evalset.cpython-39.pyc │ │ │ │ ├── trainset.cpython-38.pyc │ │ │ │ └── trainset.cpython-39.pyc │ │ │ ├── batchset.py │ │ │ ├── collate_fn.py │ │ │ ├── evalset.py │ │ │ └── trainset.py │ │ └── utils.py │ ├── evaluator │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── base_metric.cpython-39.pyc │ │ │ ├── collector.cpython-39.pyc │ │ │ ├── evaluator.cpython-39.pyc │ │ │ ├── metrics.cpython-39.pyc │ │ │ ├── register.cpython-39.pyc │ │ │ └── utils.cpython-39.pyc │ │ ├── base_metric.py │ │ ├── collector.py │ │ ├── evaluator.py │ │ ├── metrics.py │ │ ├── register.py │ │ └── utils.py │ ├── model │ │ ├── FreezeModel │ │ │ ├── acf.py │ │ │ ├── curatornet.py │ │ │ ├── fsasrec.py │ │ │ ├── vbpr.py │ │ │ └── visrank.py │ │ ├── IDNet │ │ │ ├── __pycache__ │ │ │ │ ├── bert4rec.cpython-39.pyc │ │ │ │ ├── din.cpython-39.pyc │ │ │ │ ├── dssm.cpython-39.pyc │ │ │ │ ├── fm.cpython-39.pyc │ │ │ │ ├── gru4rec.cpython-39.pyc │ │ │ │ ├── lightgcn.cpython-39.pyc │ │ │ │ ├── lightsans.cpython-39.pyc │ │ │ │ ├── mf.cpython-39.pyc │ │ │ │ ├── nextitnet.cpython-39.pyc │ │ │ │ ├── sasrec.cpython-39.pyc │ │ │ │ ├── srgnn.cpython-39.pyc │ │ │ │ └── ytdnn.cpython-39.pyc │ │ │ ├── bert4rec.py │ │ │ ├── din.py │ │ │ ├── dssm.py │ │ │ ├── fm.py │ │ │ ├── gru4rec.py │ │ │ ├── lightgcn.py │ │ │ ├── lightsans.py │ │ │ ├── mf.py │ │ │ ├── nextitnet.py │ │ │ ├── sasrec.py │ │ │ ├── srgnn.py │ │ │ └── widedeep.py │ │ ├── IdModel │ │ │ ├── bert4rec.py │ │ │ ├── din.py │ │ │ ├── dssm.py │ │ │ ├── fm.py │ │ │ ├── gru4rec.py │ │ │ ├── lightgcn.py │ │ │ ├── lightsans.py │ │ │ ├── mf.py │ │ │ ├── nextitnet.py │ │ │ ├── sasrec.py │ │ │ ├── srgnn.py │ │ │ └── widedeep.py │ │ ├── PixelNet │ │ │ ├── dvbpr.py │ │ │ ├── mobert4rec.py │ │ │ ├── modin.py │ │ │ ├── modssm.py │ │ │ ├── mofm.py │ │ │ ├── mogru4rec.py │ │ │ ├── molightsans.py │ │ │ ├── momf.py │ │ │ ├── monextitnet.py │ │ │ ├── mosasrec.py │ │ │ └── mosrgnn.py │ │ ├── ViNet │ │ │ ├── __pycache__ │ │ │ │ ├── acf.cpython-39.pyc │ │ │ │ ├── curatornet.cpython-39.pyc │ │ │ │ ├── fsasrec.cpython-39.pyc │ │ │ │ ├── vbpr.cpython-39.pyc │ │ │ │ └── visrank.cpython-39.pyc │ │ │ ├── acf.py │ │ │ ├── curatornet.py │ │ │ ├── fsasrec.py │ │ │ ├── vbpr.py │ │ │ └── visrank.py │ │ ├── VisualModel │ │ │ ├── __pycache__ │ │ │ │ ├── mobert4rec.cpython-39.pyc │ │ │ │ ├── mobert4rec2.cpython-39.pyc │ │ │ │ ├── modin.cpython-39.pyc │ │ │ │ ├── modssm.cpython-39.pyc │ │ │ │ ├── mofm.cpython-39.pyc │ │ │ │ ├── mogru4rec.cpython-39.pyc │ │ │ │ ├── molightsans.cpython-39.pyc │ │ │ │ ├── momf.cpython-39.pyc │ │ │ │ ├── monextitnet.cpython-39.pyc │ │ │ │ ├── mosasrec.cpython-39.pyc │ │ │ │ └── mosrgnn.cpython-39.pyc │ │ │ ├── dvbpr.py │ │ │ ├── mobert4rec.py │ │ │ ├── modin.py │ │ │ ├── modssm.py │ │ │ ├── mofm.py │ │ │ ├── mogru4rec.py │ │ │ ├── molightsans.py │ │ │ ├── momf.py │ │ │ ├── monextitnet.py │ │ │ ├── mosasrec.py │ │ │ └── mosrgnn.py │ │ ├── basemodel.py │ │ ├── layers.py │ │ └── load.py │ ├── trainer │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── hyper_tuning.cpython-39.pyc │ │ │ └── trainer.cpython-39.pyc │ │ ├── hyper_tuning.py │ │ └── trainer.py │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── argument_list.cpython-39.pyc │ │ ├── enum_type.cpython-39.pyc │ │ ├── logger.cpython-39.pyc │ │ ├── utils.cpython-39.pyc │ │ └── wandblogger.cpython-39.pyc │ │ ├── argument_list.py │ │ ├── enum_type.py │ │ ├── logger.py │ │ ├── utils.py │ │ └── wandblogger.py ├── ViNet │ ├── acf.yaml │ ├── sasrec_semantic_id.yaml │ ├── sasrec_v.yaml │ ├── sasrec_vid.yaml │ ├── vbpr.yaml │ └── visrank.yaml ├── generate_lmdb.py ├── main.py ├── overall │ ├── BEiT.yaml │ ├── ID.yaml │ ├── RN50.yaml │ ├── Swin-B.yaml │ ├── Swin-T.yaml │ └── ViT.yaml ├── readme.md └── run.py ├── dataset ├── LICENSE ├── overview.png ├── readme.md └── statistics │ ├── item_rank.pdf │ ├── item_rank.png │ └── session_length.pdf ├── doc ├── SDM_poster.pdf └── pre.pdf └── requirements.txt /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 westlake-repl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /code/IDNet/bert4rec.yaml: -------------------------------------------------------------------------------- 1 | model: BERT4Rec 2 | n_layers: 2 3 | n_heads: 4 4 | embedding_size: 512 5 | inner_size: 1 6 | hidden_dropout_prob: 0.1 7 | attn_dropout_prob: 0.1 8 | hidden_act: 'gelu' 9 | layer_norm_eps: 1e-12 10 | initializer_range: 0.02 11 | mask_ratio: 0.6 -------------------------------------------------------------------------------- /code/IDNet/dssm.yaml: -------------------------------------------------------------------------------- 1 | model: DSSM 2 | embedding_size: 4096 3 | dropout_prob: 0 4 | mlp_hidden_size: [] -------------------------------------------------------------------------------- /code/IDNet/fm.yaml: -------------------------------------------------------------------------------- 1 | model: FM 2 | embedding_size: 2048 3 | dropout_prob: 0 -------------------------------------------------------------------------------- /code/IDNet/gru4rec.yaml: -------------------------------------------------------------------------------- 1 | model: GRU4Rec 2 | embedding_size: 2048 3 | hidden_size: 1 4 | num_layers: 1 5 | dropout_prob: 0 -------------------------------------------------------------------------------- /code/IDNet/lightgcn.yaml: -------------------------------------------------------------------------------- 1 | model: LightGCN 2 | embedding_size: 256 3 | n_layers: 1 -------------------------------------------------------------------------------- /code/IDNet/lightsans.yaml: -------------------------------------------------------------------------------- 1 | model: LightSANs 2 | n_layers: 1 3 | n_heads: 4 4 | embedding_size: 512 5 | inner_size: 2 6 | k_interests: 3 7 | hidden_dropout_prob: 0.1 8 | attn_dropout_prob: 0.1 9 | hidden_act: 'gelu' 10 | layer_norm_eps: 1e-12 11 | initializer_range: 0.02 12 | -------------------------------------------------------------------------------- /code/IDNet/mf.yaml: -------------------------------------------------------------------------------- 1 | model: MF 2 | embedding_size: 4096 3 | dropout_prob: 0 4 | mlp_hidden_size: [] -------------------------------------------------------------------------------- /code/IDNet/nextitnet.yaml: -------------------------------------------------------------------------------- 1 | model: NextItNet 2 | embedding_size: 1024 3 | kernel_size: 3 4 | block_num: 3 5 | dilations: [1,4] 6 | final_layer: False -------------------------------------------------------------------------------- /code/IDNet/sasrec.yaml: -------------------------------------------------------------------------------- 1 | model: SASRec 2 | n_layers: 2 3 | n_heads: 4 4 | embedding_size: 512 5 | inner_size: 2 6 | hidden_dropout_prob: 0.1 7 | attn_dropout_prob: 0.1 8 | hidden_act: 'gelu' 9 | layer_norm_eps: 1e-12 10 | initializer_range: 0.02 -------------------------------------------------------------------------------- /code/IDNet/srgnn.yaml: -------------------------------------------------------------------------------- 1 | model: SRGNN 2 | embedding_size: 512 3 | step: 2 4 | 5 | -------------------------------------------------------------------------------- /code/PixelNet/bert4rec.yaml: -------------------------------------------------------------------------------- 1 | model: MOBERT4Rec 2 | n_layers: 2 3 | n_heads: 4 4 | embedding_size: 512 5 | inner_size: 1 6 | hidden_dropout_prob: 0.1 7 | attn_dropout_prob: 0.1 8 | hidden_act: 'gelu' 9 | layer_norm_eps: 1e-12 10 | initializer_range: 0.02 11 | mask_ratio: 0.6 -------------------------------------------------------------------------------- /code/PixelNet/dssm.yaml: -------------------------------------------------------------------------------- 1 | model: MODSSM 2 | embedding_size: 4096 3 | dropout_prob: 0 4 | mlp_hidden_size: [] -------------------------------------------------------------------------------- /code/PixelNet/fm.yaml: -------------------------------------------------------------------------------- 1 | model: MOFM 2 | embedding_size: 4096 3 | dropout_prob: 0 4 | -------------------------------------------------------------------------------- /code/PixelNet/gru4rec.yaml: -------------------------------------------------------------------------------- 1 | model: MOGRU4Rec 2 | num_layers: 1 3 | embedding_size: 512 4 | hidden_size: 1 5 | dropout_prob: 0 -------------------------------------------------------------------------------- /code/PixelNet/lightsans.yaml: -------------------------------------------------------------------------------- 1 | model: MOLightSANs 2 | n_layers: 1 3 | n_heads: 4 4 | embedding_size: 512 5 | inner_size: 2 6 | k_interests: 3 7 | hidden_dropout_prob: 0.1 8 | attn_dropout_prob: 0.1 9 | hidden_act: 'gelu' 10 | layer_norm_eps: 1e-12 11 | initializer_range: 0.02 -------------------------------------------------------------------------------- /code/PixelNet/mf.yaml: -------------------------------------------------------------------------------- 1 | model: MOMF 2 | embedding_size: 4096 3 | dropout_prob: 0 4 | mlp_hidden_size: [] -------------------------------------------------------------------------------- /code/PixelNet/nextitnet.yaml: -------------------------------------------------------------------------------- 1 | model: MONextItNet 2 | embedding_size: 1024 3 | kernel_size: 3 4 | block_num: 3 5 | dilations: [1,4] 6 | final_layer: False -------------------------------------------------------------------------------- /code/PixelNet/sasrec.yaml: -------------------------------------------------------------------------------- 1 | model: MOSASRec 2 | n_layers: 2 3 | n_heads: 4 4 | embedding_size: 512 5 | inner_size: 2 6 | hidden_dropout_prob: 0.1 7 | attn_dropout_prob: 0.1 8 | hidden_act: 'gelu' 9 | layer_norm_eps: 1e-12 10 | initializer_range: 0.02 -------------------------------------------------------------------------------- /code/PixelNet/srgnn.yaml: -------------------------------------------------------------------------------- 1 | model: MOSRGNN 2 | embedding_size: 512 3 | step: 2 -------------------------------------------------------------------------------- /code/REC/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .configurator import Config -------------------------------------------------------------------------------- /code/REC/config/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/config/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/config/__pycache__/configurator.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/config/__pycache__/configurator.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | 3 | 4 | __all__ = ['load_data', 'bulid_dataloader','LMDB_Image'] 5 | -------------------------------------------------------------------------------- /code/REC/data/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /code/REC/data/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/data/__pycache__/data.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/__pycache__/data.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/data/__pycache__/dataload.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/__pycache__/dataload.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/data/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /code/REC/data/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/data/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .trainset import * 2 | from .evalset import * 3 | from .batchset import * 4 | from .collate_fn import * -------------------------------------------------------------------------------- /code/REC/data/dataset/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/dataset/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /code/REC/data/dataset/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/dataset/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/data/dataset/__pycache__/batchset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/dataset/__pycache__/batchset.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/data/dataset/__pycache__/collate_fn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/dataset/__pycache__/collate_fn.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/data/dataset/__pycache__/evalset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/dataset/__pycache__/evalset.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/data/dataset/__pycache__/trainset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/dataset/__pycache__/trainset.cpython-38.pyc -------------------------------------------------------------------------------- /code/REC/data/dataset/__pycache__/trainset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/data/dataset/__pycache__/trainset.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/data/dataset/batchset.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset 2 | 3 | import torch 4 | import numpy as np 5 | import pandas as pd 6 | from PIL import Image 7 | import torchvision.transforms as transforms 8 | import torchvision 9 | import lmdb 10 | import pickle 11 | import random 12 | import math 13 | import os 14 | 15 | # Image_Mean = [0.4860599, 0.4426124, 0.43379018] 16 | # Image_Std = [0.31636897, 0.3010678, 0.30478135] 17 | 18 | Image_Mean = [0.5, 0.5, 0.5] 19 | Image_Std = [0.5, 0.5, 0.5] 20 | Resize = 224 21 | 22 | class BatchDataset(Dataset): 23 | def __init__(self,config,dataload): 24 | self.item_num = dataload.item_num 25 | self.item_list = dataload.id2token['item_id'] 26 | 27 | 28 | self.db_path = config['image_path'] 29 | 30 | if 'BERT4Rec' in config['model']: 31 | self.length = self.item_num+1 32 | else : 33 | self.length = self.item_num 34 | 35 | self.load_content() 36 | 37 | def __len__(self): 38 | return self.length #bert4rec这里加了1 39 | 40 | def load_content(self): 41 | self.env = lmdb.open(self.db_path, subdir=os.path.isdir(self.db_path), 42 | readonly=True, lock=False, 43 | readahead=False, meminit=False) 44 | self.feature_extractor = transforms.Compose([ 45 | transforms.Resize((Resize,Resize)), 46 | transforms.ToTensor(), 47 | #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 48 | transforms.Normalize(mean=Image_Mean, std=Image_Std) 49 | 50 | ]) 51 | 52 | self.reserve_embedding = torch.zeros(3,Resize,Resize) 53 | self.mask_embedding = torch.ones(3,Resize,Resize) 54 | 55 | 56 | def __getitem__(self, index): 57 | item_i = index 58 | if index == 0 or index == self.item_num: 59 | if index == 0: 60 | item_i = self.reserve_embedding 61 | else: 62 | item_i = self.mask_embedding 63 | else : 64 | item_token_i = self.item_list[index] 65 | 66 | with self.env.begin() as txn: 67 | byteflow_i = txn.get(item_token_i.encode('ascii')) 68 | IMAGE_i = pickle.loads(byteflow_i) 69 | item_i = self.feature_extractor(Image.fromarray(IMAGE_i.get_image()).convert('RGB')) 70 | return item_i -------------------------------------------------------------------------------- /code/REC/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_metric import * 2 | from .metrics import * 3 | from .evaluator import * 4 | from .register import * 5 | from .collector import * -------------------------------------------------------------------------------- /code/REC/evaluator/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/evaluator/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/evaluator/__pycache__/base_metric.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/evaluator/__pycache__/base_metric.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/evaluator/__pycache__/collector.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/evaluator/__pycache__/collector.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/evaluator/__pycache__/evaluator.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/evaluator/__pycache__/evaluator.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/evaluator/__pycache__/metrics.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/evaluator/__pycache__/metrics.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/evaluator/__pycache__/register.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/evaluator/__pycache__/register.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/evaluator/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/evaluator/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/evaluator/base_metric.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from REC.utils import EvaluatorType 3 | 4 | 5 | class AbstractMetric(object): 6 | """:class:`AbstractMetric` is the base object of all metrics. If you want to 7 | implement a metric, you should inherit this class. 8 | 9 | Args: 10 | config (Config): the config of evaluator. 11 | """ 12 | smaller = False 13 | 14 | def __init__(self, config): 15 | self.decimal_place = config['metric_decimal_place'] +2 if config['metric_decimal_place'] else 7 16 | 17 | def calculate_metric(self, dataobject): 18 | """Get the dictionary of a metric. 19 | 20 | Args: 21 | dataobject(DataStruct): it contains all the information needed to calculate metrics. 22 | 23 | Returns: 24 | dict: such as ``{'metric@10': 3153, 'metric@20': 0.3824}`` 25 | """ 26 | raise NotImplementedError('Method [calculate_metric] should be implemented.') 27 | 28 | 29 | class TopkMetric(AbstractMetric): 30 | """:class:`TopkMetric` is a base object of top-k metrics. If you want to 31 | implement an top-k metric, you can inherit this class. 32 | 33 | Args: 34 | config (Config): The config of evaluator. 35 | """ 36 | metric_type = EvaluatorType.RANKING 37 | metric_need = ['rec.topk'] 38 | 39 | def __init__(self, config): 40 | super().__init__(config) 41 | self.topk = config['topk'] 42 | 43 | def used_info(self, dataobject): 44 | """Get the bool matrix indicating whether the corresponding item is positive 45 | and number of positive items for each user. 46 | """ 47 | rec_mat = dataobject.get('rec.topk') 48 | topk_idx, pos_len_list = torch.split(rec_mat, [max(self.topk), 1], dim=1) 49 | return topk_idx.to(torch.bool).numpy(), pos_len_list.squeeze(-1).numpy() 50 | 51 | def topk_result(self, metric, value): 52 | """Match the metric value to the `k` and put them in `dictionary` form. 53 | 54 | Args: 55 | metric(str): the name of calculated metric. 56 | value(numpy.ndarray): metrics for each user, including values from `metric@1` to `metric@max(self.topk)`. 57 | 58 | Returns: 59 | dict: metric values required in the configuration. 60 | """ 61 | metric_dict = {} 62 | avg_result = value.sum(axis=0) 63 | for k in self.topk: 64 | key = '{}@{}'.format(metric, k) 65 | #metric_dict[key] = round(avg_result[k - 1], self.decimal_place) 66 | metric_dict[key] = avg_result[k - 1] 67 | return metric_dict 68 | 69 | def metric_info(self, pos_index, pos_len=None): 70 | """Calculate the value of the metric. 71 | 72 | Args: 73 | pos_index(numpy.ndarray): a bool matrix, shape of ``n_users * max(topk)``. The item with the (j+1)-th \ 74 | highest score of i-th user is positive if ``pos_index[i][j] == True`` and negative otherwise. 75 | pos_len(numpy.ndarray): a vector representing the number of positive items per user, shape of ``(n_users,)``. 76 | 77 | Returns: 78 | numpy.ndarray: metrics for each user, including values from `metric@1` to `metric@max(self.topk)`. 79 | """ 80 | raise NotImplementedError('Method [metric_info] of top-k metric should be implemented.') 81 | 82 | 83 | class LossMetric(AbstractMetric): 84 | """:class:`LossMetric` is a base object of loss based metrics and AUC. If you want to 85 | implement an loss based metric, you can inherit this class. 86 | 87 | Args: 88 | config (Config): The config of evaluator. 89 | """ 90 | metric_type = EvaluatorType.VALUE 91 | metric_need = ['rec.score', 'data.label'] 92 | 93 | def __init__(self, config): 94 | super().__init__(config) 95 | 96 | def used_info(self, dataobject): 97 | """Get scores that model predicted and the ground truth.""" 98 | preds = dataobject.get('rec.score') 99 | trues = dataobject.get('data.label') 100 | 101 | return preds.squeeze(-1).numpy(), trues.squeeze(-1).numpy() 102 | 103 | def output_metric(self, metric, dataobject): 104 | preds, trues = self.used_info(dataobject) 105 | result = self.metric_info(preds, trues) 106 | return {metric: round(result, self.decimal_place)} 107 | 108 | def metric_info(self, preds, trues): 109 | """Calculate the value of the metric. 110 | 111 | Args: 112 | preds (numpy.ndarray): the scores predicted by model, a one-dimensional vector. 113 | trues (numpy.ndarray): the label of items, which has the same shape as ``preds``. 114 | 115 | Returns: 116 | float: The value of the metric. 117 | """ 118 | raise NotImplementedError('Method [metric_info] of loss-based metric should be implemented.') 119 | -------------------------------------------------------------------------------- /code/REC/evaluator/evaluator.py: -------------------------------------------------------------------------------- 1 | from .register import metrics_dict 2 | from .collector import DataStruct 3 | from collections import OrderedDict 4 | 5 | 6 | class Evaluator(object): 7 | """Evaluator is used to check parameter correctness, and summarize the results of all metrics. 8 | """ 9 | 10 | def __init__(self, config): 11 | self.config = config 12 | self.metrics = [metric.lower() for metric in self.config['metrics']] 13 | self.metric_class = {} 14 | 15 | for metric in self.metrics: 16 | self.metric_class[metric] = metrics_dict[metric](self.config) 17 | 18 | def evaluate(self, dataobject: DataStruct): 19 | """calculate all the metrics. It is called at the end of each epoch 20 | 21 | Args: 22 | dataobject (DataStruct): It contains all the information needed for metrics. 23 | 24 | Returns: 25 | collections.OrderedDict: such as ``{'hit@20': 0.3824, 'recall@20': 0.0527, 'hit@10': 0.3153, 'recall@10': 0.0329, 'gauc': 0.9236}`` 26 | 27 | """ 28 | result_dict = OrderedDict() 29 | for metric in self.metrics: 30 | metric_val = self.metric_class[metric].calculate_metric(dataobject) 31 | result_dict.update(metric_val) 32 | return result_dict 33 | -------------------------------------------------------------------------------- /code/REC/evaluator/register.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import sys 3 | 4 | 5 | def cluster_info(module_name): 6 | """Collect information of all metrics, including: 7 | 8 | - ``metric_need``: Information needed to calculate this metric, the combination of ``rec.items, rec.topk, 9 | rec.meanrank, rec.score, data.num_items, data.num_users, data.count_items, data.count_users, data.label``. 10 | - ``metric_type``: Whether the scores required by metric are grouped by user, range in ``EvaluatorType.RANKING`` 11 | and ``EvaluatorType.VALUE``. 12 | - ``smaller``: Whether the smaller metric value represents better performance, 13 | range in ``True`` and ``False``, default to ``False``. 14 | 15 | Note: 16 | For ``metric_type``: in current RecBole, all the "grouped-score" metrics are ranking-based and all the 17 | "non-grouped-score" metrics are value-based. To keep with our paper, we adopted the more formal terms: 18 | ``RANKING`` and ``VALUE``. 19 | 20 | Args: 21 | module_name (str): the name of module ``recbole.evaluator.metrics``. 22 | 23 | Returns: 24 | dict: Three dictionaries containing the above information 25 | and a dictionary matching metric names to metric classes. 26 | """ 27 | smaller_m = [] 28 | m_dict, m_info, m_types = {}, {}, {} 29 | metric_class = inspect.getmembers( 30 | sys.modules[module_name], lambda x: inspect.isclass(x) and x.__module__ == module_name 31 | ) 32 | for name, metric_cls in metric_class: 33 | name = name.lower() 34 | m_dict[name] = metric_cls 35 | if hasattr(metric_cls, 'metric_need'): 36 | m_info[name] = metric_cls.metric_need 37 | else: 38 | raise AttributeError(f"Metric '{name}' has no attribute [metric_need].") 39 | if hasattr(metric_cls, 'metric_type'): 40 | m_types[name] = metric_cls.metric_type 41 | else: 42 | raise AttributeError(f"Metric '{name}' has no attribute [metric_type].") 43 | if metric_cls.smaller is True: 44 | smaller_m.append(name) 45 | return smaller_m, m_info, m_types, m_dict 46 | 47 | 48 | metric_module_name = 'REC.evaluator.metrics' 49 | smaller_metrics, metric_information, metric_types, metrics_dict = cluster_info(metric_module_name) 50 | 51 | 52 | class Register(object): 53 | """ Register module load the registry according to the metrics in config. 54 | It is a member of DataCollector. 55 | The DataCollector collect the resource that need for Evaluator under the guidance of Register 56 | """ 57 | 58 | def __init__(self, config): 59 | 60 | self.config = config 61 | self.metrics = [metric.lower() for metric in self.config['metrics']] 62 | self._build_register() 63 | 64 | def _build_register(self): 65 | for metric in self.metrics: 66 | metric_needs = metric_information[metric] 67 | for info in metric_needs: 68 | setattr(self, info, True) 69 | 70 | def has_metric(self, metric: str): 71 | if metric.lower() in self.metrics: 72 | return True 73 | else: 74 | return False 75 | 76 | def need(self, key: str): 77 | if hasattr(self, key): 78 | return getattr(self, key) 79 | return False 80 | -------------------------------------------------------------------------------- /code/REC/evaluator/utils.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def pad_sequence(sequences, len_list, pad_to=None, padding_value=0): 8 | """pad sequences to a matrix 9 | 10 | Args: 11 | sequences (list): list of variable length sequences. 12 | len_list (list): the length of the tensors in the sequences 13 | pad_to (int, optional): if pad_to is not None, the sequences will pad to the length you set, 14 | else the sequence will pad to the max length of the sequences. 15 | padding_value (int, optional): value for padded elements. Default: 0. 16 | 17 | Returns: 18 | torch.Tensor: [seq_num, max_len] or [seq_num, pad_to] 19 | 20 | """ 21 | max_len = np.max(len_list) if pad_to is None else pad_to 22 | min_len = np.min(len_list) 23 | device = sequences[0].device 24 | if max_len == min_len: 25 | result = torch.cat(sequences, dim=0).view(-1, max_len) 26 | else: 27 | extra_len_list = np.subtract(max_len, len_list).tolist() 28 | padding_nums = max_len * len(len_list) - np.sum(len_list) 29 | padding_tensor = torch.tensor([-np.inf], device=device).repeat(padding_nums) 30 | padding_list = torch.split(padding_tensor, extra_len_list) 31 | result = list(itertools.chain.from_iterable(zip(sequences, padding_list))) 32 | result = torch.cat(result) 33 | 34 | return result.view(-1, max_len) 35 | 36 | 37 | def trunc(scores, method): 38 | """Round the scores by using the given method 39 | 40 | Args: 41 | scores (numpy.ndarray): scores 42 | method (str): one of ['ceil', 'floor', 'around'] 43 | 44 | Raises: 45 | NotImplementedError: method error 46 | 47 | Returns: 48 | numpy.ndarray: processed scores 49 | """ 50 | 51 | try: 52 | cut_method = getattr(np, method) 53 | except NotImplementedError: 54 | raise NotImplementedError("module 'numpy' has no function named '{}'".format(method)) 55 | scores = cut_method(scores) 56 | return scores 57 | 58 | 59 | def cutoff(scores, threshold): 60 | """cut of the scores based on threshold 61 | 62 | Args: 63 | scores (numpy.ndarray): scores 64 | threshold (float): between 0 and 1 65 | 66 | Returns: 67 | numpy.ndarray: processed scores 68 | """ 69 | return np.where(scores > threshold, 1, 0) 70 | 71 | 72 | def _binary_clf_curve(trues, preds): 73 | """Calculate true and false positives per binary classification threshold 74 | 75 | Args: 76 | trues (numpy.ndarray): the true scores' list 77 | preds (numpy.ndarray): the predict scores' list 78 | 79 | Returns: 80 | fps (numpy.ndarray): A count of false positives, at index i being the number of negative 81 | samples assigned a score >= thresholds[i] 82 | preds (numpy.ndarray): An increasing count of true positives, at index i being the number 83 | of positive samples assigned a score >= thresholds[i]. 84 | 85 | Note: 86 | To improve efficiency, we referred to the source code(which is available at sklearn.metrics.roc_curve) 87 | in SkLearn and made some optimizations. 88 | 89 | """ 90 | trues = (trues == 1) 91 | 92 | desc_idxs = np.argsort(preds)[::-1] 93 | preds = preds[desc_idxs] 94 | trues = trues[desc_idxs] 95 | 96 | unique_val_idxs = np.where(np.diff(preds))[0] 97 | threshold_idxs = np.r_[unique_val_idxs, trues.size - 1] 98 | 99 | tps = np.cumsum(trues)[threshold_idxs] 100 | fps = 1 + threshold_idxs - tps 101 | return fps, tps 102 | -------------------------------------------------------------------------------- /code/REC/model/FreezeModel/curatornet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from REC.utils.enum_type import InputType 4 | from REC.model.basemodel import BaseModel 5 | import numpy as np 6 | from torch.nn.init import xavier_normal_, constant_ 7 | import torch.nn.functional as F 8 | 9 | class CuratorNet(BaseModel): 10 | input_type = InputType.SEQ 11 | def __init__(self, config, dataload): 12 | super(CuratorNet, self).__init__() 13 | self.embedding_size = config['embedding_size'] 14 | self.hidden_size = config['hidden_size']*self.embedding_size 15 | self.device = config['device'] 16 | 17 | self.v_feat_path = config['v_feat_path'] 18 | v_feat = np.load(self.v_feat_path, allow_pickle=True) 19 | 20 | v_feat = torch.tensor(v_feat,dtype=torch.float).to(self.device) 21 | v_feat[0].fill_(0) 22 | self.embedding = nn.Embedding.from_pretrained(v_feat, freeze=True) 23 | 24 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 25 | 26 | self.feature_dim = v_feat.shape[-1] 27 | 28 | # Common section 29 | self.selu_common1 = nn.Linear(self.feature_dim, self.embedding_size) 30 | self.selu_common2 = nn.Linear(self.embedding_size, self.embedding_size) 31 | 32 | # Profile section 33 | self.maxpool = nn.AdaptiveMaxPool2d((1, self.embedding_size)) 34 | self.avgpool = nn.AdaptiveAvgPool2d((1, self.embedding_size)) 35 | self.selu_pu1 = nn.Linear(self.embedding_size + self.embedding_size, self.hidden_size) 36 | self.selu_pu2 = nn.Linear(self.hidden_size, self.hidden_size) 37 | self.selu_pu3 = nn.Linear(self.hidden_size, self.embedding_size) 38 | 39 | # Random weight initialization 40 | self.reset_parameters() 41 | 42 | 43 | def reset_parameters(self): 44 | """Resets network weights. 45 | 46 | Restart network weights using a Xavier uniform distribution. 47 | """ 48 | # Common section 49 | nn.init.xavier_uniform_(self.selu_common1.weight) 50 | nn.init.xavier_uniform_(self.selu_common2.weight) 51 | # Profile section 52 | nn.init.xavier_uniform_(self.selu_pu1.weight) 53 | nn.init.xavier_uniform_(self.selu_pu2.weight) 54 | nn.init.xavier_uniform_(self.selu_pu3.weight) 55 | 56 | 57 | def forward(self, inputs): #inputs: user_seq, pos, neg 58 | profile = inputs[:, :-2] 59 | pi = inputs[:, -2] 60 | ni = inputs[:, -1] 61 | # Load embedding data 62 | profile = self.embedding(profile) 63 | pi = self.embedding(pi) 64 | ni = self.embedding(ni) 65 | 66 | # Positive item 67 | pi = F.selu(self.selu_common1(pi)) 68 | pi = F.selu(self.selu_common2(pi)) 69 | 70 | # Negative item 71 | ni = F.selu(self.selu_common1(ni)) 72 | ni = F.selu(self.selu_common2(ni)) 73 | 74 | # User profile 75 | profile = F.selu(self.selu_common1(profile)) 76 | profile = F.selu(self.selu_common2(profile)) 77 | profile = torch.cat( 78 | (self.maxpool(profile), self.avgpool(profile)), dim=-1 79 | ) 80 | profile = F.selu(self.selu_pu1(profile)) 81 | profile = F.selu(self.selu_pu2(profile)) 82 | profile = F.selu(self.selu_pu3(profile)) 83 | 84 | # x_ui > x_uj 85 | profile = profile.squeeze(1) 86 | x_ui = (profile*pi).sum(-1) 87 | x_uj = (profile*ni).sum(-1) 88 | batch_loss = -torch.mean(torch.log(1e-8 + torch.sigmoid(x_ui - x_uj))) 89 | return batch_loss 90 | 91 | 92 | 93 | 94 | @torch.no_grad() 95 | def predict(self, user,item_feature): 96 | profile = item_feature[user] 97 | profile = torch.cat( 98 | (self.maxpool(profile), self.avgpool(profile)), dim=-1 99 | ) 100 | 101 | profile = F.selu(self.selu_pu1(profile)) 102 | profile = F.selu(self.selu_pu2(profile)) 103 | profile = F.selu(self.selu_pu3(profile)) 104 | profile = profile.squeeze(1) 105 | 106 | score = torch.matmul(profile,item_feature.t()) 107 | return score 108 | 109 | @torch.no_grad() # [num_item, 32] 110 | def compute_item_all(self): 111 | embed = self.embedding.weight 112 | embed = F.selu(self.selu_common1(embed)) 113 | embed = F.selu(self.selu_common2(embed)) 114 | return embed 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /code/REC/model/FreezeModel/vbpr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from REC.utils.enum_type import InputType 4 | from REC.model.basemodel import BaseModel 5 | import numpy as np 6 | from torch.nn.init import xavier_normal_, constant_ 7 | 8 | class VBPR(BaseModel): 9 | input_type = InputType.PAIR 10 | def __init__(self, config, dataload): 11 | super(VBPR, self).__init__() 12 | self.mlp_hidden_size = config['mlp_hidden_size'] 13 | self.dropout_prob = config['dropout_prob'] 14 | self.embedding_size = config['embedding_size'] // 2 15 | 16 | self.device = config['device'] 17 | 18 | self.user_num = dataload.user_num 19 | self.item_num = dataload.item_num 20 | 21 | self.v_feat_path = config['v_feat_path'] 22 | v_feat = np.load(self.v_feat_path, allow_pickle=True) 23 | 24 | self.v_feat = torch.tensor(v_feat,dtype=torch.float).to(self.device) 25 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 26 | 27 | self.feature_dim = self.v_feat.shape[-1] 28 | 29 | # define layers and loss 30 | self.feature_projection = nn.Linear(self.feature_dim, self.embedding_size, bias=False) 31 | self.bias_projection = nn.Linear(self.feature_dim, 1, bias=False) 32 | self.user_id_embedding = nn.Embedding(self.user_num, self.embedding_size) 33 | self.item_id_embedding = nn.Embedding(self.item_num, self.embedding_size) 34 | 35 | self.user_modal_embedding = nn.Embedding(self.user_num, self.embedding_size) 36 | 37 | #self.visual_bias = nn.Parameter(torch.tensor(0.0)) 38 | #self.user_bias = nn.Parameter(torch.tensor(0.0)) 39 | #self.item_bias = nn.Parameter(torch.tensor(0.0)) 40 | #self.global_bias = nn.Parameter(torch.tensor(0.0)) 41 | #self.loss = BPRLoss() 42 | # parameters initialization 43 | self.apply(self._init_weights) 44 | 45 | 46 | def _init_weights(self, module): 47 | if isinstance(module, nn.Embedding): 48 | xavier_normal_(module.weight.data) 49 | elif isinstance(module, nn.Linear): 50 | xavier_normal_(module.weight.data) 51 | if module.bias is not None: 52 | constant_(module.bias.data, 0) 53 | 54 | 55 | def forward(self, inputs): 56 | user, item = inputs 57 | embed_id_user = self.user_id_embedding(user).unsqueeze(1) 58 | embed_id_item = self.item_id_embedding(item) 59 | 60 | embed_modal_user = self.user_modal_embedding(user).unsqueeze(1) 61 | embed_modal_item = self.feature_projection(self.v_feat[item]) 62 | 63 | 64 | score = (embed_id_user * embed_id_item).sum(-1) + (embed_modal_user * embed_modal_item).sum(-1) \ 65 | + self.bias_projection(self.v_feat[item]).squeeze(-1) 66 | #self.global_bias + self.user_bias + self.item_bias 67 | 68 | output = score.view(-1,2) 69 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 70 | return batch_loss 71 | 72 | 73 | 74 | 75 | @torch.no_grad() 76 | def predict(self, user,item_feature): 77 | embed_id_user = self.user_id_embedding(user) 78 | embed_id_item = self.item_id_embedding.weight 79 | 80 | embed_modal_user = self.user_modal_embedding(user) 81 | 82 | 83 | score = torch.matmul(embed_id_user,embed_id_item.t()) + \ 84 | torch.matmul(embed_modal_user,item_feature.t()) + \ 85 | self.total_visual_bias 86 | 87 | 88 | return score 89 | 90 | @torch.no_grad() 91 | def compute_item_all(self): 92 | embed = self.feature_projection(self.v_feat) 93 | self.total_visual_bias = self.bias_projection(self.v_feat).squeeze(-1) 94 | return embed 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /code/REC/model/FreezeModel/visrank.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from REC.utils.enum_type import InputType 4 | from REC.model.basemodel import BaseModel 5 | import numpy as np 6 | 7 | 8 | class VISRANK(nn.Module): 9 | input_type = InputType.PAIR 10 | def __init__(self, config, dataload): 11 | super(VISRANK, self).__init__() 12 | 13 | 14 | self.method = config['method'] 15 | if self.method == 'average_top_k': 16 | self.k = config['top_num'] 17 | elif self.method == 'maximum': 18 | self.k = 1 19 | else: 20 | self.k = None 21 | v_feat_path = config['v_feat_path'] 22 | self.device = config['device'] 23 | v_feat = np.load(v_feat_path, allow_pickle=True) 24 | 25 | self.v_feat = torch.tensor(v_feat,dtype=torch.float).to(self.device) 26 | 27 | self.module = None 28 | self.placeholder = nn.Parameter(torch.zeros(0, requires_grad=True)) 29 | 30 | 31 | 32 | def forward(self, inputs): 33 | pass 34 | 35 | 36 | @torch.no_grad() #set batch=1 37 | def predict(self, user,item_feature): 38 | 39 | user = user[-50:] # due to the limited of GPU memory RN50:-50 resnet50:-30 40 | 41 | 42 | seq_feat = self.v_feat[user] 43 | possible_items = torch.cosine_similarity(seq_feat.unsqueeze(1),self.v_feat.unsqueeze(0),dim=-1) 44 | 45 | seq_len = len(user) 46 | if self.method == 'average_top_k': 47 | k = min(self.k, seq_len) 48 | elif self.method == 'maximum': 49 | k = 1 50 | else: 51 | k = seq_len 52 | values, _ = torch.topk(possible_items, k = k, dim=0) 53 | scores = values.mean(0) 54 | scores[0] = -np.inf 55 | return scores 56 | 57 | @torch.no_grad() 58 | def compute_item_all(self): 59 | return None 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/bert4rec.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/bert4rec.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/din.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/din.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/dssm.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/dssm.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/fm.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/fm.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/gru4rec.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/gru4rec.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/lightgcn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/lightgcn.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/lightsans.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/lightsans.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/mf.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/mf.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/nextitnet.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/nextitnet.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/sasrec.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/sasrec.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/srgnn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/srgnn.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/__pycache__/ytdnn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/IDNet/__pycache__/ytdnn.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/IDNet/din.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers, SequenceAttLayer 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | class DIN(BaseModel): 9 | 10 | input_type = InputType.SEQ 11 | 12 | def __init__(self, config, dataload): 13 | super(DIN, self).__init__() 14 | 15 | # get field names and parameter value from config 16 | self.embedding_size = config['embedding_size'] 17 | self.mlp_hidden_size = config['mlp_hidden_size'] 18 | self.device = config['device'] 19 | self.dropout_prob = config['dropout_prob'] 20 | 21 | self.item_num = dataload.item_num 22 | 23 | self.dnn_list = [3 * self.embedding_size] + self.mlp_hidden_size 24 | self.att_list = [4 * self.embedding_size] + self.mlp_hidden_size 25 | 26 | self.attention = SequenceAttLayer( 27 | self.att_list, activation='Sigmoid', softmax_stag=False, return_seq_weight=False 28 | ) 29 | 30 | #self.dnn_mlp_layers = MLPLayers(self.dnn_list, activation='Dice', dropout=self.dropout_prob, bn=True) 31 | self.item_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 32 | #self.dnn_predict_layers = nn.Linear(self.mlp_hidden_size[-1], 1) 33 | #self.criterion = nn.BCEWithLogitsLoss() 34 | # parameters initialization 35 | self.apply(self._init_weights) 36 | 37 | 38 | def _init_weights(self, module): 39 | if isinstance(module, nn.Embedding): 40 | xavier_normal_(module.weight.data) 41 | elif isinstance(module, nn.Linear): 42 | xavier_normal_(module.weight.data) 43 | if module.bias is not None: 44 | constant_(module.bias.data, 0) 45 | 46 | 47 | 48 | def get_scores(self, cand_embs, user_seq_emb, mask): 49 | user_emb = self.attention(cand_embs, user_seq_emb,mask).squeeze(1) 50 | # din_in = torch.cat([user_emb, cand_embs, user_emb * cand_embs], dim=-1) 51 | # din_out = self.dnn_mlp_layers(din_in) 52 | # scores = self.dnn_predict_layers(din_out).squeeze(1) 53 | scores = (user_emb*cand_embs).sum(-1) 54 | return scores 55 | 56 | 57 | def forward(self, items): 58 | #[batch,seq_len+2] 59 | item_emb = self.item_embedding(items) #[batch,seq_len+2,dim] 60 | user_seq_emb = item_emb[:, :-2] 61 | pos_cand_embs = item_emb[:, -2] 62 | neg_cand_embs = item_emb[:, -1] 63 | 64 | # attention 65 | mask = (items[:,:-2] == 0) 66 | # pos_user_emb = self.attention(pos_cand_embs, user_seq_emb,mask).squeeze(1) 67 | # neg_user_emb = self.attention(neg_cand_embs, user_seq_emb,mask).squeeze(1) 68 | 69 | # pos_score = (pos_user_emb * pos_cand_embs).sum(-1) #[batch] 70 | # neg_score = (neg_user_emb * neg_cand_embs).sum(-1) #[batch] 71 | pos_score = self.get_scores(pos_cand_embs, user_seq_emb, mask) 72 | neg_score = self.get_scores(neg_cand_embs, user_seq_emb, mask) 73 | 74 | # pos_labels, neg_labels = torch.ones(pos_score.shape).to(self.device), torch.zeros(neg_score.shape).to(self.device) 75 | 76 | # loss_1 = self.criterion(pos_score, pos_labels) 77 | # loss_2 = self.criterion(neg_score, neg_labels) 78 | # loss = loss_1 + loss_2 79 | MBAloss = 0.01 * torch.norm(item_emb, 2) / item_emb.shape[0] 80 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)).mean(-1) 81 | return loss + MBAloss 82 | 83 | 84 | 85 | 86 | @torch.no_grad() 87 | def predict(self, item_seq, item_feature): 88 | 89 | #[batch,item_num, seq_len+1] 90 | batch_size = item_seq.shape[0] 91 | item_seq = item_seq.flatten(0,1) #[batch*item_num, seq_len+1] 92 | item_emb = item_feature[item_seq] #[batch*item_num, seq_len+1,dim] 93 | user_seq_emb = item_emb[:, :-1] 94 | cand_emb = item_emb[:,-1] 95 | 96 | 97 | # attention 98 | mask = (item_seq[:,:-1] == 0) 99 | user_emb = self.attention(cand_emb, user_seq_emb, mask).squeeze(1) #[batch*item_num,dim] 100 | 101 | user_emb = user_emb.view(batch_size, self.item_num, self.embedding_size) #[batch,item_num,dim] 102 | scores = (user_emb*item_feature).sum(-1) # [B n_items] 103 | return scores 104 | 105 | @torch.no_grad() 106 | def compute_item_all(self): 107 | return self.item_embedding.weight 108 | -------------------------------------------------------------------------------- /code/REC/model/IDNet/dssm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | class DSSM(BaseModel): 9 | input_type = InputType.SEQ 10 | def __init__(self, config, dataload): 11 | super(DSSM, self).__init__() 12 | 13 | # load parameters info 14 | self.mlp_hidden_size = config['mlp_hidden_size'] 15 | self.dropout_prob = config['dropout_prob'] 16 | self.embedding_size = config['embedding_size'] 17 | self.out_size = self.mlp_hidden_size[-1] if len(self.mlp_hidden_size) else self.embedding_size 18 | 19 | self.device = config['device'] 20 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 21 | 22 | self.item_num = dataload.item_num 23 | #self.user_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 24 | self.item_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 25 | self.user_embedding = self.item_embedding 26 | #size_list = [self.embedding_size] + self.mlp_hidden_size + [self.embedding_size] 27 | size_list = self.mlp_hidden_size 28 | self.mlp_layers = MLPLayers(size_list, self.dropout_prob) 29 | 30 | 31 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 32 | 33 | # parameters initialization 34 | self.apply(self._init_weights) 35 | 36 | def _init_weights(self, module): 37 | if isinstance(module, nn.Embedding): 38 | xavier_normal_(module.weight.data) 39 | elif isinstance(module, nn.Linear): 40 | xavier_normal_(module.weight.data) 41 | if module.bias is not None: 42 | constant_(module.bias.data, 0) 43 | 44 | 45 | 46 | def avg_emb(self, user_seq): 47 | mask = user_seq != 0 48 | mask = mask.float() 49 | value_cnt = torch.sum(mask, dim=1, keepdim=True) 50 | token_seq_embedding = self.user_embedding(user_seq) 51 | mask = mask.unsqueeze(2).expand_as(token_seq_embedding) 52 | masked_token_seq_embedding = token_seq_embedding * mask.float() 53 | result = torch.sum(masked_token_seq_embedding, dim=-2) 54 | user_embedding = torch.div(result, value_cnt + 1e-8) 55 | return user_embedding 56 | 57 | 58 | 59 | def forward(self, inputs): 60 | inputs = inputs[0].unsqueeze(0) 61 | user_seq = inputs[:, :-2] 62 | target_item = inputs[:, -2:] 63 | user_embedding = self.avg_emb(user_seq) 64 | user_embedding = self.mlp_layers(user_embedding).unsqueeze(1) 65 | item_embedding = self.item_embedding(target_item) 66 | score = (user_embedding * item_embedding).sum(-1) 67 | output = score.view(-1,2) 68 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 69 | return batch_loss 70 | 71 | 72 | #如果concate的话,就是和顺序有关的,如果直接取mean或者add,就和顺序无关 73 | @torch.no_grad() 74 | def predict(self,user_seq,item_feature): 75 | user_embedding = self.avg_emb(user_seq) 76 | user_embedding = self.mlp_layers(user_embedding) 77 | scores = torch.matmul(user_embedding,item_feature.t()) 78 | return scores 79 | 80 | @torch.no_grad() 81 | def compute_item_all(self): 82 | return self.item_embedding.weight 83 | 84 | #return torch.arange(0,self.n_items).to(self.device) 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /code/REC/model/IDNet/fm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers, BaseFactorizationMachine 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | from logging import getLogger 8 | 9 | 10 | class FM(BaseModel): 11 | input_type = InputType.SEQ 12 | def __init__(self, config, dataload): 13 | super(FM, self).__init__() 14 | 15 | # load parameters info 16 | self.mlp_hidden_size = config['mlp_hidden_size'] 17 | self.dropout_prob = config['dropout_prob'] 18 | self.embedding_size = config['embedding_size'] 19 | self.out_size = self.mlp_hidden_size[-1] if (self.mlp_hidden_size and len(self.mlp_hidden_size)) else self.embedding_size 20 | 21 | self.device = config['device'] 22 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 23 | 24 | self.item_num = dataload.item_num 25 | #self.user_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 26 | self.item_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 27 | 28 | #size_list = [self.embedding_size] + self.mlp_hidden_size + [self.embedding_size] 29 | #self.mlp_layers = MLPLayers(size_list, self.dropout_prob) 30 | self.fm = BaseFactorizationMachine(reduce_sum=True) 31 | 32 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 33 | 34 | # parameters initialization 35 | self.apply(self._init_weights) 36 | 37 | def _init_weights(self, module): 38 | if isinstance(module, nn.Embedding): 39 | xavier_normal_(module.weight.data) 40 | elif isinstance(module, nn.Linear): 41 | xavier_normal_(module.weight.data) 42 | if module.bias is not None: 43 | constant_(module.bias.data, 0) 44 | 45 | 46 | 47 | def mask_emb(self, user_seq): 48 | mask = user_seq != 0 49 | mask = mask.float() 50 | 51 | token_seq_embedding = self.item_embedding(user_seq) 52 | mask = mask.unsqueeze(-1).expand_as(token_seq_embedding) 53 | masked_token_seq_embedding = token_seq_embedding * mask 54 | 55 | return masked_token_seq_embedding 56 | 57 | 58 | 59 | def forward(self, inputs): 60 | inputs = inputs[0].unsqueeze(0) 61 | 62 | inputs_embedding = self.mask_emb(inputs) 63 | scores = self.fm(inputs_embedding.flatten(0,1)) 64 | output = scores.view(-1,2) 65 | 66 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 67 | return batch_loss 68 | 69 | 70 | @torch.no_grad() 71 | def predict(self,user_seq,item_feature): 72 | user_embedding = self.mask_emb(user_seq) 73 | user_embedding = torch.sum(user_embedding, dim=1) 74 | scores = torch.matmul(user_embedding,item_feature.t()) 75 | return scores 76 | 77 | @torch.no_grad() 78 | def compute_item_all(self): 79 | return self.item_embedding.weight 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /code/REC/model/IDNet/gru4rec.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn.init import xavier_uniform_, xavier_normal_ 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | 9 | 10 | class GRU4Rec(BaseModel): 11 | input_type = InputType.SEQ 12 | def __init__(self, config, data): 13 | super(GRU4Rec, self).__init__() 14 | 15 | # load parameters info 16 | self.embedding_size = config['embedding_size'] 17 | self.hidden_size = config['hidden_size'] * config['embedding_size'] 18 | self.num_layers = config['num_layers'] 19 | self.dropout_prob = config['dropout_prob'] 20 | 21 | 22 | self.user_num = data.user_num 23 | self.item_num = data.item_num 24 | # define layers and loss 25 | self.item_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 26 | self.emb_dropout = nn.Dropout(self.dropout_prob) 27 | self.gru_layers = nn.GRU( 28 | input_size=self.embedding_size, 29 | hidden_size=self.hidden_size, 30 | num_layers=self.num_layers, 31 | bias=False, 32 | batch_first=True, 33 | ) 34 | 35 | self.dense = nn.Linear(self.hidden_size, self.embedding_size) 36 | 37 | 38 | # parameters initialization 39 | self.apply(self._init_weights) 40 | 41 | def _init_weights(self, module): 42 | if isinstance(module, nn.Embedding): 43 | xavier_normal_(module.weight) 44 | elif isinstance(module, nn.GRU): 45 | xavier_uniform_(module.weight_hh_l0) 46 | xavier_uniform_(module.weight_ih_l0) 47 | 48 | def forward(self, inputs): 49 | items, masked_index = inputs 50 | 51 | item_emb = self.item_embedding(items) #[batch, 2, max_seq_len+1, dim] 52 | pos_items_embs = item_emb[:, 0, :] #[batch, max_seq_len+1, dim] 53 | neg_items_embs = item_emb[:, 1, :] #[batch, max_seq_len+1, dim] 54 | 55 | input_emb = pos_items_embs[:, :-1, :] #[batch, max_seq_len, dim] 56 | target_pos_embs = pos_items_embs[:, 1:, :] #[batch, max_seq_len, dim] 57 | target_neg_embs = neg_items_embs[:, 1:, :] #[batch, max_seq_len, dim] 58 | 59 | input_emb_dropout = self.emb_dropout(input_emb) 60 | gru_output, _ = self.gru_layers(input_emb_dropout) 61 | gru_output = self.dense(gru_output) 62 | 63 | pos_score = (gru_output * target_pos_embs).sum(-1) #[batch, max_seq_len-1] 64 | neg_score = (gru_output * target_neg_embs).sum(-1) #[batch, max_seq_len-1] 65 | 66 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)*masked_index).sum(-1) 67 | return loss.mean(-1) 68 | 69 | 70 | @torch.no_grad() 71 | def predict(self, item_seq, item_feature): 72 | 73 | item_emb = item_feature[item_seq] 74 | 75 | item_seq_emb_dropout = self.emb_dropout(item_emb) 76 | gru_output, _ = self.gru_layers(item_seq_emb_dropout) 77 | gru_output = self.dense(gru_output) 78 | hidden = gru_output[:, -1] 79 | scores = torch.matmul(hidden, item_feature.t()) # [B n_items] 80 | return scores 81 | 82 | @torch.no_grad() 83 | def compute_item_all(self): 84 | return self.item_embedding.weight 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /code/REC/model/IDNet/lightgcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | from REC.model.layers import LightGCNConv 8 | 9 | 10 | class LightGCN(BaseModel): 11 | 12 | input_type = InputType.PAIR 13 | 14 | def __init__(self, config, data): 15 | super(LightGCN, self).__init__() 16 | self.latent_dim = config['embedding_size'] 17 | self.n_layers = config['n_layers'] 18 | 19 | self.device = config['device'] 20 | 21 | self.user_num = data.user_num 22 | self.item_num = data.item_num 23 | 24 | self.edge_index, self.edge_weight = data.get_norm_adj_mat() 25 | self.edge_index, self.edge_weight = self.edge_index.to(self.device), self.edge_weight.to(self.device) 26 | 27 | self.user_embedding = nn.Embedding(self.user_num, self.latent_dim) 28 | self.item_embedding = nn.Embedding(self.item_num, self.latent_dim) 29 | 30 | 31 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 32 | self.gcn_conv = LightGCNConv(dim=self.latent_dim) 33 | self.store_ufeatures = None 34 | self.store_ifeatures = None 35 | self.apply(self._init_weights) 36 | 37 | 38 | def _init_weights(self, module): 39 | if isinstance(module, nn.Embedding): 40 | xavier_normal_(module.weight.data) 41 | elif isinstance(module, nn.Linear): 42 | xavier_normal_(module.weight.data) 43 | if module.bias is not None: 44 | constant_(module.bias.data, 0) 45 | 46 | 47 | def get_ego_embeddings(self): 48 | r"""Get the embedding of users and items and combine to an embedding matrix. 49 | Returns: 50 | Tensor of the embedding matrix. Shape of [n_items+n_users, embedding_dim] 51 | """ 52 | user_embeddings = self.user_embedding.weight 53 | item_embeddings = self.item_embedding.weight 54 | ego_embeddings = torch.cat([user_embeddings, item_embeddings], dim=0) 55 | return ego_embeddings 56 | 57 | def computer(self): 58 | all_embeddings = self.get_ego_embeddings() 59 | embeddings_list = [all_embeddings] 60 | 61 | for layer_idx in range(self.n_layers): 62 | all_embeddings = self.gcn_conv(all_embeddings, self.edge_index, self.edge_weight) 63 | embeddings_list.append(all_embeddings) 64 | lightgcn_all_embeddings = torch.stack(embeddings_list, dim=1) 65 | lightgcn_all_embeddings = torch.mean(lightgcn_all_embeddings, dim=1) 66 | 67 | user_all_embeddings, item_all_embeddings = torch.split(lightgcn_all_embeddings, [self.user_num, self.item_num]) 68 | return user_all_embeddings, item_all_embeddings 69 | 70 | def forward(self, input): 71 | user, item = input 72 | user_all_embeddings, item_all_embeddings = self.computer() 73 | embed_user = user_all_embeddings[user].unsqueeze(1) 74 | embed_item = item_all_embeddings[item] 75 | score = (embed_user * embed_item).sum(-1) 76 | output = score.view(-1,2) 77 | batch_loss = -torch.mean(1e-8+torch.log(torch.sigmoid(torch.matmul(output, self.weight)))) 78 | return batch_loss 79 | 80 | 81 | @torch.no_grad() 82 | def predict(self, user,features_pad): 83 | embed_user = self.store_ufeatures[user] 84 | scores = torch.matmul(embed_user,self.store_ifeatures.t()) 85 | return scores 86 | 87 | @torch.no_grad() 88 | def compute_item_all(self): 89 | self.store_ufeatures, self.store_ifeatures= self.computer() 90 | return None 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /code/REC/model/IDNet/lightsans.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from REC.model.layers import LightTransformerEncoder 5 | from REC.utils.enum_type import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | 9 | class LightSANs(BaseModel): 10 | input_type = InputType.AUGSEQ 11 | 12 | def __init__(self, config, dataload): 13 | super(LightSANs, self).__init__() 14 | 15 | # load parameters info 16 | self.n_layers = config['n_layers'] 17 | self.n_heads = config['n_heads'] 18 | self.hidden_size = config['embedding_size'] 19 | self.inner_size = config['inner_size'] 20 | 21 | self.k_interests = config["k_interests"] 22 | self.inner_size *= self.hidden_size 23 | self.hidden_dropout_prob = config['hidden_dropout_prob'] 24 | self.attn_dropout_prob = config['attn_dropout_prob'] 25 | self.hidden_act = config['hidden_act'] 26 | self.layer_norm_eps = config['layer_norm_eps'] 27 | self.device = config['device'] 28 | 29 | self.initializer_range = config['initializer_range'] 30 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 31 | self.item_num = dataload.item_num 32 | # define layers and loss 33 | self.item_embedding = nn.Embedding(self.item_num, self.hidden_size, padding_idx=0) 34 | self.position_embedding = nn.Embedding(self.max_seq_length, self.hidden_size) 35 | self.trm_encoder = LightTransformerEncoder( 36 | n_layers=self.n_layers, 37 | n_heads=self.n_heads, 38 | k_interests=self.k_interests, 39 | hidden_size=self.hidden_size, 40 | seq_len=self.max_seq_length, 41 | inner_size=self.inner_size, 42 | hidden_dropout_prob=self.hidden_dropout_prob, 43 | attn_dropout_prob=self.attn_dropout_prob, 44 | hidden_act=self.hidden_act, 45 | layer_norm_eps=self.layer_norm_eps 46 | ) 47 | 48 | self.LayerNorm = nn.LayerNorm(self.hidden_size, eps=self.layer_norm_eps) 49 | self.dropout = nn.Dropout(self.hidden_dropout_prob) 50 | 51 | 52 | # parameters initialization 53 | self.apply(self._init_weights) 54 | 55 | def _init_weights(self, module): 56 | """Initialize the weights""" 57 | if isinstance(module, (nn.Linear, nn.Embedding)): 58 | module.weight.data.normal_(mean=0.0, std=self.initializer_range) 59 | elif isinstance(module, nn.LayerNorm): 60 | module.bias.data.zero_() 61 | module.weight.data.fill_(1.0) 62 | if isinstance(module, nn.Linear) and module.bias is not None: 63 | module.bias.data.zero_() 64 | 65 | 66 | def forward(self, interaction): 67 | item_emb = self.item_embedding(interaction) 68 | 69 | input_emb = item_emb[:, :-2, :] 70 | target_pos_embs = item_emb[:, -2, :] 71 | target_neg_embs = item_emb[:, -1, :] 72 | 73 | 74 | position_ids = torch.arange(input_emb.size(1), dtype=torch.long, device=self.device) 75 | position_embedding = self.position_embedding(position_ids) 76 | 77 | input_emb = self.LayerNorm(input_emb) 78 | input_emb = self.dropout(input_emb) 79 | 80 | output_embs = self.trm_encoder(input_emb, position_embedding, output_all_encoded_layers=False) 81 | output_embs = output_embs[-1] 82 | output_embs = output_embs[:, -1, :] 83 | 84 | pos_score = (output_embs * target_pos_embs).sum(-1) 85 | neg_score = (output_embs * target_neg_embs).sum(-1) 86 | 87 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)) 88 | return loss.mean(-1) 89 | 90 | 91 | @torch.no_grad() 92 | def predict(self, item_seq, item_feature): 93 | 94 | position_ids = torch.arange(item_seq.size(1), dtype=torch.long, device=item_seq.device) 95 | position_embedding = self.position_embedding(position_ids) 96 | 97 | input_emb = self.item_embedding(item_seq) 98 | input_emb = self.LayerNorm(input_emb) 99 | input_emb = self.dropout(input_emb) 100 | 101 | output = self.trm_encoder(input_emb, position_embedding, output_all_encoded_layers=False) 102 | output_embs = output[-1] 103 | seq_output = output_embs[:, -1] 104 | 105 | scores = torch.matmul(seq_output, item_feature.t()) 106 | return scores 107 | 108 | @torch.no_grad() 109 | def compute_item_all(self): 110 | return self.item_embedding.weight 111 | 112 | 113 | -------------------------------------------------------------------------------- /code/REC/model/IDNet/mf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | 9 | 10 | class MF(BaseModel): 11 | 12 | input_type = InputType.PAIR 13 | 14 | def __init__(self, config, data): 15 | super(MF, self).__init__() 16 | self.mlp_hidden_size = config['mlp_hidden_size'] 17 | self.dropout_prob = config['dropout_prob'] 18 | self.embedding_size = config['embedding_size'] 19 | self.out_size = self.mlp_hidden_size[-1] if len(self.mlp_hidden_size) else self.embedding_size 20 | 21 | self.device = config['device'] 22 | 23 | self.user_num = data.user_num 24 | self.item_num = data.item_num 25 | 26 | 27 | user_size_list = [self.embedding_size] + self.mlp_hidden_size 28 | item_size_list = [self.embedding_size] + self.mlp_hidden_size 29 | 30 | # define layers and loss 31 | self.user_mlp_layers = MLPLayers(user_size_list, self.dropout_prob, activation='tanh', bn=True) 32 | self.item_mlp_layers = MLPLayers(item_size_list, self.dropout_prob, activation='tanh', bn=True) 33 | 34 | self.user_embedding = nn.Embedding(self.user_num, self.embedding_size) 35 | self.item_embedding = nn.Embedding(self.item_num, self.embedding_size) 36 | 37 | 38 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 39 | 40 | self.apply(self._init_weights) 41 | 42 | 43 | def _init_weights(self, module): 44 | if isinstance(module, nn.Embedding): 45 | xavier_normal_(module.weight.data) 46 | elif isinstance(module, nn.Linear): 47 | xavier_normal_(module.weight.data) 48 | if module.bias is not None: 49 | constant_(module.bias.data, 0) 50 | 51 | 52 | def forward(self, input): 53 | user, item = input 54 | embed_user = self.user_embedding(user) 55 | embed_item = self.item_embedding(item).view(-1,self.embedding_size) 56 | user_dnn_out = self.user_mlp_layers(embed_user).unsqueeze(1) 57 | item_dnn_out = self.item_mlp_layers(embed_item) 58 | item_dnn_out = item_dnn_out.view(user.shape[0], -1, self.out_size) 59 | score = (user_dnn_out * item_dnn_out).sum(-1) 60 | output = score.view(-1,2) 61 | batch_loss = -torch.mean(1e-8+torch.log(torch.sigmoid(torch.matmul(output, self.weight)))) 62 | return batch_loss 63 | 64 | 65 | 66 | 67 | 68 | @torch.no_grad() 69 | def predict(self, user,item_feature): 70 | 71 | user_feature = self.user_embedding(user) 72 | user_dnn_out = self.user_mlp_layers(user_feature) 73 | 74 | scores = torch.matmul(user_dnn_out,item_feature.t()) 75 | return scores 76 | 77 | @torch.no_grad() # [num_item, 64] 78 | def compute_item_all(self): 79 | embed_item = self.item_embedding.weight 80 | return self.item_mlp_layers(embed_item) 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /code/REC/model/IDNet/srgnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from REC.utils import InputType 4 | from REC.model.basemodel import BaseModel 5 | import torch.nn.functional as F 6 | import math 7 | import numpy as np 8 | #torch.set_default_dtype(torch.float64) 9 | class SRGNN(BaseModel): 10 | input_type = InputType.AUGSEQ 11 | def __init__(self, config, data): 12 | super(SRGNN, self).__init__() 13 | self.hidden_size = config['embedding_size'] 14 | self.step = config['step'] 15 | 16 | self.device = config['device'] 17 | self.item_num = data.item_num 18 | 19 | self.embedding = nn.Embedding(self.item_num, self.hidden_size) 20 | self.gnn = GNN(self.hidden_size, step=self.step) 21 | self.linear_one = nn.Linear(self.hidden_size, self.hidden_size, bias=True) 22 | self.linear_two = nn.Linear(self.hidden_size, self.hidden_size, bias=True) 23 | self.linear_three = nn.Linear(self.hidden_size, 1, bias=False) 24 | self.linear_transform = nn.Linear(self.hidden_size * 2, self.hidden_size, bias=True) 25 | 26 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 27 | self._reset_parameters() 28 | 29 | 30 | def _reset_parameters(self): 31 | stdv = 1.0 / np.sqrt(self.hidden_size) 32 | for weight in self.parameters(): 33 | weight.data.uniform_(-stdv, stdv) 34 | 35 | 36 | 37 | def seq_modeling(self, alias_inputs, A, hidden, mask): 38 | gnn_output = self.gnn(A, hidden) 39 | seq_hidden = [] 40 | for i in range(len(alias_inputs)): 41 | seq_hidden.append(gnn_output[i][alias_inputs[i]]) 42 | seq_hidden = torch.stack(seq_hidden) 43 | 44 | ht = seq_hidden[torch.arange(mask.shape[0]).long(), torch.sum(mask, 1) - 1] # batch_size x latent_size 取的最后一位作为global 45 | q1 = self.linear_one(ht).view(ht.shape[0], 1, ht.shape[1]) # batch_size x 1 x latent_size 46 | q2 = self.linear_two(seq_hidden) # batch_size x seq_length x latent_size 47 | alpha = self.linear_three(torch.sigmoid(q1 + q2)) 48 | a = torch.sum(alpha * seq_hidden * mask.view(mask.shape[0], -1, 1).float(), 1) 49 | a = self.linear_transform(torch.cat([a, ht], 1)) 50 | return a 51 | 52 | def forward(self, input): 53 | alias_inputs, A, items, mask, targets = input 54 | hidden = self.embedding(items) 55 | seq_output = self.seq_modeling(alias_inputs, A, hidden, mask).unsqueeze(1) #[batch,1, dim] 56 | target_output =self.embedding(targets) #[batch,2, dim] 57 | score = (seq_output * target_output).sum(-1) 58 | output = score.view(-1,2) 59 | batch_loss = -torch.mean(1e-8+torch.log(torch.sigmoid(torch.matmul(output, self.weight)))) 60 | return batch_loss 61 | 62 | 63 | @torch.no_grad() 64 | def predict(self, input,item_feature): 65 | alias_inputs, A, items, mask = input 66 | hidden = item_feature[items] 67 | seq_output = self.seq_modeling(alias_inputs, A, hidden, mask) 68 | scores = torch.matmul(seq_output,item_feature.t()) 69 | return scores 70 | 71 | @torch.no_grad() 72 | def compute_item_all(self): 73 | embed_item = self.embedding.weight 74 | return embed_item 75 | 76 | 77 | 78 | class GNN(nn.Module): 79 | def __init__(self, hidden_size, step=1): 80 | super(GNN, self).__init__() 81 | self.step = step 82 | self.hidden_size = hidden_size 83 | self.input_size = hidden_size * 2 84 | self.gate_size = 3 * hidden_size 85 | self.w_ih = nn.Parameter(torch.Tensor(self.gate_size, self.input_size)) 86 | self.w_hh = nn.Parameter(torch.Tensor(self.gate_size, self.hidden_size)) 87 | self.b_ih = nn.Parameter(torch.Tensor(self.gate_size)) 88 | self.b_hh = nn.Parameter(torch.Tensor(self.gate_size)) 89 | self.b_iah = nn.Parameter(torch.Tensor(self.hidden_size)) 90 | self.b_oah = nn.Parameter(torch.Tensor(self.hidden_size)) 91 | 92 | self.linear_edge_in = nn.Linear(self.hidden_size, self.hidden_size, bias=True) 93 | self.linear_edge_out = nn.Linear(self.hidden_size, self.hidden_size, bias=True) 94 | self.linear_edge_f = nn.Linear(self.hidden_size, self.hidden_size, bias=True) 95 | 96 | def GNNCell(self, A, hidden): 97 | input_in = torch.matmul(A[:, :, :A.shape[1]], self.linear_edge_in(hidden)) + self.b_iah 98 | input_out = torch.matmul(A[:, :, A.shape[1]: 2 * A.shape[1]], self.linear_edge_out(hidden)) + self.b_oah 99 | inputs = torch.cat([input_in, input_out], 2) 100 | gi = F.linear(inputs, self.w_ih, self.b_ih) 101 | gh = F.linear(hidden, self.w_hh, self.b_hh) 102 | i_r, i_i, i_n = gi.chunk(3, 2) 103 | h_r, h_i, h_n = gh.chunk(3, 2) 104 | resetgate = torch.sigmoid(i_r + h_r) 105 | inputgate = torch.sigmoid(i_i + h_i) 106 | newgate = torch.tanh(i_n + resetgate * h_n) 107 | hy = newgate + inputgate * (hidden - newgate) 108 | return hy 109 | 110 | def forward(self, A, hidden): 111 | for i in range(self.step): 112 | hidden = self.GNNCell(A, hidden) 113 | return hidden -------------------------------------------------------------------------------- /code/REC/model/IDNet/widedeep.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | class WideDeep(BaseModel): 9 | 10 | input_type = InputType.SEQ 11 | def __init__(self, config, dataload): 12 | super(WideDeep, self).__init__() 13 | 14 | # load parameters info 15 | self.mlp_hidden_size = config['mlp_hidden_size'] 16 | self.dropout_prob = config['dropout_prob'] 17 | self.embedding_size = config['embedding_size'] 18 | self.device = config['device'] 19 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 20 | self.method = config['method'] if config['method'] else 'mean' 21 | 22 | self.item_num = dataload.item_num 23 | #wide部分 24 | #相当于wide中Linear变换的w矩阵 25 | self.wide_item_embedding = nn.Embedding(self.item_num, 1, padding_idx=0) 26 | self.wide_bias = nn.Parameter(torch.zeros((1,)), requires_grad=True) 27 | 28 | 29 | #deep部分 30 | self.deep_item_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 31 | size_list = [self.embedding_size * self.max_seq_length] + self.mlp_hidden_size 32 | self.mlp_layers = MLPLayers(size_list, self.dropout_prob) 33 | self.deep_predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) 34 | #self.wide_layer = nn.Linear(self.max_seq_length, 1) 35 | 36 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 37 | 38 | # parameters initialization 39 | self.apply(self._init_weights) 40 | 41 | def _init_weights(self, module): 42 | if isinstance(module, nn.Embedding): 43 | xavier_normal_(module.weight.data) 44 | elif isinstance(module, nn.Linear): 45 | xavier_normal_(module.weight.data) 46 | if module.bias is not None: 47 | constant_(module.bias.data, 0) 48 | 49 | def forward(self, inputs): #[batch, 2 , seq_len] 50 | batch_size = inputs.shape[0] 51 | #wide_output = self.wide_layer(interaction.float()) #[batch, 2 , seq_len] -> [batch, 2 , 1] 52 | 53 | wide_output = torch.sum(self.wide_item_embedding(inputs),dim=-2) + self.wide_bias 54 | 55 | deep_input_emb = self.deep_item_embedding(inputs).view(batch_size*2, -1) ##[batch, 2 , seq_len, dim] -> 56 | deep_output = self.mlp_layers(deep_input_emb) 57 | deep_output = self.deep_predict_layer(deep_output) 58 | 59 | 60 | output = wide_output.view(-1,2) + deep_output.view(-1,2) 61 | 62 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 63 | return batch_loss 64 | 65 | 66 | @torch.no_grad() 67 | def predict(self, interaction,item_token): 68 | item_seq, item_len = interaction 69 | batch_size = item_seq.shape[0] 70 | item_seq = item_seq.flatten(0,1) 71 | wide_output = torch.sum(self.wide_item_embedding(item_seq), dim=-2) + self.wide_bias 72 | deep_input_emb = self.deep_item_embedding(item_seq).view(item_seq.shape[0],-1) 73 | deep_output = self.mlp_layers(deep_input_emb) 74 | deep_output = self.deep_predict_layer(deep_output) 75 | 76 | 77 | output = wide_output + deep_output 78 | scores = output.view(batch_size, -1) 79 | return scores 80 | 81 | @torch.no_grad() 82 | def compute_item_all(self): 83 | return None 84 | 85 | #return torch.arange(0,self.n_items).to(self.device) 86 | -------------------------------------------------------------------------------- /code/REC/model/IdModel/din.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers, SequenceAttLayer 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | class DIN(BaseModel): 9 | 10 | input_type = InputType.SEQ 11 | 12 | def __init__(self, config, dataload): 13 | super(DIN, self).__init__() 14 | 15 | # get field names and parameter value from config 16 | self.embedding_size = config['embedding_size'] 17 | self.mlp_hidden_size = config['mlp_hidden_size'] 18 | self.device = config['device'] 19 | self.dropout_prob = config['dropout_prob'] 20 | 21 | self.item_num = dataload.item_num 22 | 23 | self.dnn_list = [3 * self.embedding_size] + self.mlp_hidden_size 24 | self.att_list = [4 * self.embedding_size] + self.mlp_hidden_size 25 | 26 | self.attention = SequenceAttLayer( 27 | self.att_list, activation='Sigmoid', softmax_stag=False, return_seq_weight=False 28 | ) 29 | 30 | #self.dnn_mlp_layers = MLPLayers(self.dnn_list, activation='Dice', dropout=self.dropout_prob, bn=True) 31 | self.item_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 32 | #self.dnn_predict_layers = nn.Linear(self.mlp_hidden_size[-1], 1) 33 | #self.criterion = nn.BCEWithLogitsLoss() 34 | # parameters initialization 35 | self.apply(self._init_weights) 36 | 37 | 38 | def _init_weights(self, module): 39 | if isinstance(module, nn.Embedding): 40 | xavier_normal_(module.weight.data) 41 | elif isinstance(module, nn.Linear): 42 | xavier_normal_(module.weight.data) 43 | if module.bias is not None: 44 | constant_(module.bias.data, 0) 45 | 46 | 47 | 48 | def get_scores(self, cand_embs, user_seq_emb, mask): 49 | user_emb = self.attention(cand_embs, user_seq_emb,mask).squeeze(1) 50 | # din_in = torch.cat([user_emb, cand_embs, user_emb * cand_embs], dim=-1) 51 | # din_out = self.dnn_mlp_layers(din_in) 52 | # scores = self.dnn_predict_layers(din_out).squeeze(1) 53 | scores = (user_emb*cand_embs).sum(-1) 54 | return scores 55 | 56 | 57 | def forward(self, items): 58 | #[batch,seq_len+2] 59 | item_emb = self.item_embedding(items) #[batch,seq_len+2,dim] 60 | user_seq_emb = item_emb[:, :-2] 61 | pos_cand_embs = item_emb[:, -2] 62 | neg_cand_embs = item_emb[:, -1] 63 | 64 | # attention 65 | mask = (items[:,:-2] == 0) 66 | # pos_user_emb = self.attention(pos_cand_embs, user_seq_emb,mask).squeeze(1) 67 | # neg_user_emb = self.attention(neg_cand_embs, user_seq_emb,mask).squeeze(1) 68 | 69 | # pos_score = (pos_user_emb * pos_cand_embs).sum(-1) #[batch] 70 | # neg_score = (neg_user_emb * neg_cand_embs).sum(-1) #[batch] 71 | pos_score = self.get_scores(pos_cand_embs, user_seq_emb, mask) 72 | neg_score = self.get_scores(neg_cand_embs, user_seq_emb, mask) 73 | 74 | # pos_labels, neg_labels = torch.ones(pos_score.shape).to(self.device), torch.zeros(neg_score.shape).to(self.device) 75 | 76 | # loss_1 = self.criterion(pos_score, pos_labels) 77 | # loss_2 = self.criterion(neg_score, neg_labels) 78 | # loss = loss_1 + loss_2 79 | MBAloss = 0.01 * torch.norm(item_emb, 2) / item_emb.shape[0] 80 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)).mean(-1) 81 | return loss + MBAloss 82 | 83 | 84 | 85 | 86 | @torch.no_grad() 87 | def predict(self, item_seq, item_feature): 88 | 89 | #[batch,item_num, seq_len+1] 90 | batch_size = item_seq.shape[0] 91 | item_seq = item_seq.flatten(0,1) #[batch*item_num, seq_len+1] 92 | item_emb = item_feature[item_seq] #[batch*item_num, seq_len+1,dim] 93 | user_seq_emb = item_emb[:, :-1] 94 | cand_emb = item_emb[:,-1] 95 | 96 | 97 | # attention 98 | mask = (item_seq[:,:-1] == 0) 99 | user_emb = self.attention(cand_emb, user_seq_emb, mask).squeeze(1) #[batch*item_num,dim] 100 | 101 | user_emb = user_emb.view(batch_size, self.item_num, self.embedding_size) #[batch,item_num,dim] 102 | scores = (user_emb*item_feature).sum(-1) # [B n_items] 103 | return scores 104 | 105 | @torch.no_grad() 106 | def compute_item_all(self): 107 | return self.item_embedding.weight 108 | -------------------------------------------------------------------------------- /code/REC/model/IdModel/dssm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | class DSSM(BaseModel): 9 | input_type = InputType.SEQ 10 | def __init__(self, config, dataload): 11 | super(DSSM, self).__init__() 12 | 13 | # load parameters info 14 | self.mlp_hidden_size = config['mlp_hidden_size'] 15 | self.dropout_prob = config['dropout_prob'] 16 | self.embedding_size = config['embedding_size'] 17 | self.out_size = self.mlp_hidden_size[-1] if len(self.mlp_hidden_size) else self.embedding_size 18 | 19 | self.device = config['device'] 20 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 21 | 22 | self.item_num = dataload.item_num 23 | #self.user_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 24 | self.item_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 25 | self.user_embedding = self.item_embedding 26 | #size_list = [self.embedding_size] + self.mlp_hidden_size + [self.embedding_size] 27 | size_list = self.mlp_hidden_size 28 | self.mlp_layers = MLPLayers(size_list, self.dropout_prob) 29 | 30 | 31 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 32 | 33 | # parameters initialization 34 | self.apply(self._init_weights) 35 | 36 | def _init_weights(self, module): 37 | if isinstance(module, nn.Embedding): 38 | xavier_normal_(module.weight.data) 39 | elif isinstance(module, nn.Linear): 40 | xavier_normal_(module.weight.data) 41 | if module.bias is not None: 42 | constant_(module.bias.data, 0) 43 | 44 | 45 | 46 | def avg_emb(self, user_seq): 47 | mask = user_seq != 0 48 | mask = mask.float() 49 | value_cnt = torch.sum(mask, dim=1, keepdim=True) 50 | token_seq_embedding = self.user_embedding(user_seq) 51 | mask = mask.unsqueeze(2).expand_as(token_seq_embedding) 52 | masked_token_seq_embedding = token_seq_embedding * mask.float() 53 | result = torch.sum(masked_token_seq_embedding, dim=-2) 54 | user_embedding = torch.div(result, value_cnt + 1e-8) 55 | return user_embedding 56 | 57 | 58 | 59 | def forward(self, inputs): 60 | inputs = inputs[0].unsqueeze(0) 61 | user_seq = inputs[:, :-2] 62 | target_item = inputs[:, -2:] 63 | user_embedding = self.avg_emb(user_seq) 64 | user_embedding = self.mlp_layers(user_embedding).unsqueeze(1) 65 | item_embedding = self.item_embedding(target_item) 66 | score = (user_embedding * item_embedding).sum(-1) 67 | output = score.view(-1,2) 68 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 69 | return batch_loss 70 | 71 | 72 | #如果concate的话,就是和顺序有关的,如果直接取mean或者add,就和顺序无关 73 | @torch.no_grad() 74 | def predict(self,user_seq,item_feature): 75 | user_embedding = self.avg_emb(user_seq) 76 | user_embedding = self.mlp_layers(user_embedding) 77 | scores = torch.matmul(user_embedding,item_feature.t()) 78 | return scores 79 | 80 | @torch.no_grad() 81 | def compute_item_all(self): 82 | return self.item_embedding.weight 83 | 84 | #return torch.arange(0,self.n_items).to(self.device) 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /code/REC/model/IdModel/fm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers, BaseFactorizationMachine 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | from logging import getLogger 8 | 9 | 10 | class FM(BaseModel): 11 | input_type = InputType.SEQ 12 | def __init__(self, config, dataload): 13 | super(FM, self).__init__() 14 | 15 | # load parameters info 16 | self.mlp_hidden_size = config['mlp_hidden_size'] 17 | self.dropout_prob = config['dropout_prob'] 18 | self.embedding_size = config['embedding_size'] 19 | self.out_size = self.mlp_hidden_size[-1] if (self.mlp_hidden_size and len(self.mlp_hidden_size)) else self.embedding_size 20 | 21 | self.device = config['device'] 22 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 23 | 24 | self.item_num = dataload.item_num 25 | #self.user_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 26 | self.item_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 27 | 28 | #size_list = [self.embedding_size] + self.mlp_hidden_size + [self.embedding_size] 29 | #self.mlp_layers = MLPLayers(size_list, self.dropout_prob) 30 | self.fm = BaseFactorizationMachine(reduce_sum=True) 31 | 32 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 33 | 34 | # parameters initialization 35 | self.apply(self._init_weights) 36 | 37 | def _init_weights(self, module): 38 | if isinstance(module, nn.Embedding): 39 | xavier_normal_(module.weight.data) 40 | elif isinstance(module, nn.Linear): 41 | xavier_normal_(module.weight.data) 42 | if module.bias is not None: 43 | constant_(module.bias.data, 0) 44 | 45 | 46 | 47 | def mask_emb(self, user_seq): 48 | mask = user_seq != 0 49 | mask = mask.float() 50 | 51 | token_seq_embedding = self.item_embedding(user_seq) 52 | mask = mask.unsqueeze(-1).expand_as(token_seq_embedding) 53 | masked_token_seq_embedding = token_seq_embedding * mask 54 | 55 | return masked_token_seq_embedding 56 | 57 | 58 | 59 | def forward(self, inputs): 60 | inputs = inputs[0].unsqueeze(0) 61 | 62 | inputs_embedding = self.mask_emb(inputs) 63 | scores = self.fm(inputs_embedding.flatten(0,1)) 64 | output = scores.view(-1,2) 65 | 66 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 67 | return batch_loss 68 | 69 | 70 | @torch.no_grad() 71 | def predict(self,user_seq,item_feature): 72 | user_embedding = self.mask_emb(user_seq) 73 | user_embedding = torch.sum(user_embedding, dim=1) 74 | scores = torch.matmul(user_embedding,item_feature.t()) 75 | return scores 76 | 77 | @torch.no_grad() 78 | def compute_item_all(self): 79 | return self.item_embedding.weight 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /code/REC/model/IdModel/gru4rec.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn.init import xavier_uniform_, xavier_normal_ 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | 9 | 10 | class GRU4Rec(BaseModel): 11 | input_type = InputType.SEQ 12 | def __init__(self, config, data): 13 | super(GRU4Rec, self).__init__() 14 | 15 | # load parameters info 16 | self.embedding_size = config['embedding_size'] 17 | self.hidden_size = config['hidden_size'] * config['embedding_size'] 18 | self.num_layers = config['num_layers'] 19 | self.dropout_prob = config['dropout_prob'] 20 | 21 | 22 | self.user_num = data.user_num 23 | self.item_num = data.item_num 24 | # define layers and loss 25 | self.item_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 26 | self.emb_dropout = nn.Dropout(self.dropout_prob) 27 | self.gru_layers = nn.GRU( 28 | input_size=self.embedding_size, 29 | hidden_size=self.hidden_size, 30 | num_layers=self.num_layers, 31 | bias=False, 32 | batch_first=True, 33 | ) 34 | 35 | self.dense = nn.Linear(self.hidden_size, self.embedding_size) 36 | 37 | 38 | # parameters initialization 39 | self.apply(self._init_weights) 40 | 41 | def _init_weights(self, module): 42 | if isinstance(module, nn.Embedding): 43 | xavier_normal_(module.weight) 44 | elif isinstance(module, nn.GRU): 45 | xavier_uniform_(module.weight_hh_l0) 46 | xavier_uniform_(module.weight_ih_l0) 47 | 48 | def forward(self, inputs): 49 | items, masked_index = inputs 50 | 51 | item_emb = self.item_embedding(items) #[batch, 2, max_seq_len+1, dim] 52 | pos_items_embs = item_emb[:, 0, :] #[batch, max_seq_len+1, dim] 53 | neg_items_embs = item_emb[:, 1, :] #[batch, max_seq_len+1, dim] 54 | 55 | input_emb = pos_items_embs[:, :-1, :] #[batch, max_seq_len, dim] 56 | target_pos_embs = pos_items_embs[:, 1:, :] #[batch, max_seq_len, dim] 57 | target_neg_embs = neg_items_embs[:, 1:, :] #[batch, max_seq_len, dim] 58 | 59 | input_emb_dropout = self.emb_dropout(input_emb) 60 | gru_output, _ = self.gru_layers(input_emb_dropout) 61 | gru_output = self.dense(gru_output) 62 | 63 | pos_score = (gru_output * target_pos_embs).sum(-1) #[batch, max_seq_len-1] 64 | neg_score = (gru_output * target_neg_embs).sum(-1) #[batch, max_seq_len-1] 65 | 66 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)*masked_index).sum(-1) 67 | return loss.mean(-1) 68 | 69 | 70 | @torch.no_grad() 71 | def predict(self, item_seq, item_feature): 72 | 73 | item_emb = item_feature[item_seq] 74 | 75 | item_seq_emb_dropout = self.emb_dropout(item_emb) 76 | gru_output, _ = self.gru_layers(item_seq_emb_dropout) 77 | gru_output = self.dense(gru_output) 78 | hidden = gru_output[:, -1] 79 | scores = torch.matmul(hidden, item_feature.t()) # [B n_items] 80 | return scores 81 | 82 | @torch.no_grad() 83 | def compute_item_all(self): 84 | return self.item_embedding.weight 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /code/REC/model/IdModel/lightgcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | from REC.model.layers import LightGCNConv 8 | 9 | 10 | class LightGCN(BaseModel): 11 | 12 | input_type = InputType.PAIR 13 | 14 | def __init__(self, config, data): 15 | super(LightGCN, self).__init__() 16 | self.latent_dim = config['embedding_size'] 17 | self.n_layers = config['n_layers'] 18 | 19 | self.device = config['device'] 20 | 21 | self.user_num = data.user_num 22 | self.item_num = data.item_num 23 | 24 | self.edge_index, self.edge_weight = data.get_norm_adj_mat() 25 | self.edge_index, self.edge_weight = self.edge_index.to(self.device), self.edge_weight.to(self.device) 26 | 27 | self.user_embedding = nn.Embedding(self.user_num, self.latent_dim) 28 | self.item_embedding = nn.Embedding(self.item_num, self.latent_dim) 29 | 30 | 31 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 32 | self.gcn_conv = LightGCNConv(dim=self.latent_dim) 33 | self.store_ufeatures = None 34 | self.store_ifeatures = None 35 | self.apply(self._init_weights) 36 | 37 | 38 | def _init_weights(self, module): 39 | if isinstance(module, nn.Embedding): 40 | xavier_normal_(module.weight.data) 41 | elif isinstance(module, nn.Linear): 42 | xavier_normal_(module.weight.data) 43 | if module.bias is not None: 44 | constant_(module.bias.data, 0) 45 | 46 | 47 | def get_ego_embeddings(self): 48 | r"""Get the embedding of users and items and combine to an embedding matrix. 49 | Returns: 50 | Tensor of the embedding matrix. Shape of [n_items+n_users, embedding_dim] 51 | """ 52 | user_embeddings = self.user_embedding.weight 53 | item_embeddings = self.item_embedding.weight 54 | ego_embeddings = torch.cat([user_embeddings, item_embeddings], dim=0) 55 | return ego_embeddings 56 | 57 | def computer(self): 58 | all_embeddings = self.get_ego_embeddings() 59 | embeddings_list = [all_embeddings] 60 | 61 | for layer_idx in range(self.n_layers): 62 | all_embeddings = self.gcn_conv(all_embeddings, self.edge_index, self.edge_weight) 63 | embeddings_list.append(all_embeddings) 64 | lightgcn_all_embeddings = torch.stack(embeddings_list, dim=1) 65 | lightgcn_all_embeddings = torch.mean(lightgcn_all_embeddings, dim=1) 66 | 67 | user_all_embeddings, item_all_embeddings = torch.split(lightgcn_all_embeddings, [self.user_num, self.item_num]) 68 | return user_all_embeddings, item_all_embeddings 69 | 70 | def forward(self, input): 71 | user, item = input 72 | user_all_embeddings, item_all_embeddings = self.computer() 73 | embed_user = user_all_embeddings[user].unsqueeze(1) 74 | embed_item = item_all_embeddings[item] 75 | score = (embed_user * embed_item).sum(-1) 76 | output = score.view(-1,2) 77 | batch_loss = -torch.mean(1e-8+torch.log(torch.sigmoid(torch.matmul(output, self.weight)))) 78 | return batch_loss 79 | 80 | 81 | @torch.no_grad() 82 | def predict(self, user,features_pad): 83 | embed_user = self.store_ufeatures[user] 84 | scores = torch.matmul(embed_user,self.store_ifeatures.t()) 85 | return scores 86 | 87 | @torch.no_grad() 88 | def compute_item_all(self): 89 | self.store_ufeatures, self.store_ifeatures= self.computer() 90 | return None 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /code/REC/model/IdModel/lightsans.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from REC.model.layers import LightTransformerEncoder 5 | from REC.utils.enum_type import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | 9 | class LightSANs(BaseModel): 10 | input_type = InputType.SEQ 11 | 12 | def __init__(self, config, dataload): 13 | super(LightSANs, self).__init__() 14 | 15 | # load parameters info 16 | self.n_layers = config['n_layers'] 17 | self.n_heads = config['n_heads'] 18 | self.hidden_size = config['embedding_size'] 19 | self.inner_size = config['inner_size'] 20 | 21 | self.k_interests = config["k_interests"] 22 | self.inner_size *= self.hidden_size 23 | self.hidden_dropout_prob = config['hidden_dropout_prob'] 24 | self.attn_dropout_prob = config['attn_dropout_prob'] 25 | self.hidden_act = config['hidden_act'] 26 | self.layer_norm_eps = config['layer_norm_eps'] 27 | self.device = config['device'] 28 | 29 | self.initializer_range = config['initializer_range'] 30 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 31 | self.item_num = dataload.item_num 32 | # define layers and loss 33 | self.item_embedding = nn.Embedding(self.item_num, self.hidden_size, padding_idx=0) 34 | self.position_embedding = nn.Embedding(self.max_seq_length, self.hidden_size) 35 | self.trm_encoder = LightTransformerEncoder( 36 | n_layers=self.n_layers, 37 | n_heads=self.n_heads, 38 | k_interests=self.k_interests, 39 | hidden_size=self.hidden_size, 40 | seq_len=self.max_seq_length, 41 | inner_size=self.inner_size, 42 | hidden_dropout_prob=self.hidden_dropout_prob, 43 | attn_dropout_prob=self.attn_dropout_prob, 44 | hidden_act=self.hidden_act, 45 | layer_norm_eps=self.layer_norm_eps 46 | ) 47 | 48 | self.LayerNorm = nn.LayerNorm(self.hidden_size, eps=self.layer_norm_eps) 49 | self.dropout = nn.Dropout(self.hidden_dropout_prob) 50 | 51 | 52 | # parameters initialization 53 | self.apply(self._init_weights) 54 | 55 | def _init_weights(self, module): 56 | """Initialize the weights""" 57 | if isinstance(module, (nn.Linear, nn.Embedding)): 58 | module.weight.data.normal_(mean=0.0, std=self.initializer_range) 59 | elif isinstance(module, nn.LayerNorm): 60 | module.bias.data.zero_() 61 | module.weight.data.fill_(1.0) 62 | if isinstance(module, nn.Linear) and module.bias is not None: 63 | module.bias.data.zero_() 64 | 65 | 66 | def forward(self, interaction): 67 | item_emb = self.item_embedding(interaction) 68 | 69 | input_emb = item_emb[:, :-2, :] 70 | target_pos_embs = item_emb[:, -2, :] 71 | target_neg_embs = item_emb[:, -1, :] 72 | 73 | 74 | position_ids = torch.arange(input_emb.size(1), dtype=torch.long, device=self.device) 75 | position_embedding = self.position_embedding(position_ids) 76 | 77 | input_emb = self.LayerNorm(input_emb) 78 | input_emb = self.dropout(input_emb) 79 | 80 | output_embs = self.trm_encoder(input_emb, position_embedding, output_all_encoded_layers=False) 81 | output_embs = output_embs[-1] 82 | output_embs = output_embs[:, -1, :] 83 | 84 | pos_score = (output_embs * target_pos_embs).sum(-1) 85 | neg_score = (output_embs * target_neg_embs).sum(-1) 86 | 87 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)) 88 | return loss.mean(-1) 89 | 90 | 91 | @torch.no_grad() 92 | def predict(self, item_seq, item_feature): 93 | 94 | position_ids = torch.arange(item_seq.size(1), dtype=torch.long, device=item_seq.device) 95 | position_embedding = self.position_embedding(position_ids) 96 | 97 | input_emb = self.item_embedding(item_seq) 98 | input_emb = self.LayerNorm(input_emb) 99 | input_emb = self.dropout(input_emb) 100 | 101 | output = self.trm_encoder(input_emb, position_embedding, output_all_encoded_layers=False) 102 | output_embs = output[-1] 103 | seq_output = output_embs[:, -1] 104 | 105 | scores = torch.matmul(seq_output, item_feature.t()) 106 | return scores 107 | 108 | @torch.no_grad() 109 | def compute_item_all(self): 110 | return self.item_embedding.weight 111 | 112 | 113 | -------------------------------------------------------------------------------- /code/REC/model/IdModel/mf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | 9 | 10 | class MF(BaseModel): 11 | 12 | input_type = InputType.PAIR 13 | 14 | def __init__(self, config, data): 15 | super(MF, self).__init__() 16 | self.mlp_hidden_size = config['mlp_hidden_size'] 17 | self.dropout_prob = config['dropout_prob'] 18 | self.embedding_size = config['embedding_size'] 19 | self.out_size = self.mlp_hidden_size[-1] if len(self.mlp_hidden_size) else self.embedding_size 20 | 21 | self.device = config['device'] 22 | 23 | self.user_num = data.user_num 24 | self.item_num = data.item_num 25 | 26 | 27 | user_size_list = [self.embedding_size] + self.mlp_hidden_size 28 | item_size_list = [self.embedding_size] + self.mlp_hidden_size 29 | 30 | # define layers and loss 31 | self.user_mlp_layers = MLPLayers(user_size_list, self.dropout_prob, activation='tanh', bn=True) 32 | self.item_mlp_layers = MLPLayers(item_size_list, self.dropout_prob, activation='tanh', bn=True) 33 | 34 | self.user_embedding = nn.Embedding(self.user_num, self.embedding_size) 35 | self.item_embedding = nn.Embedding(self.item_num, self.embedding_size) 36 | 37 | 38 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 39 | 40 | self.apply(self._init_weights) 41 | 42 | 43 | def _init_weights(self, module): 44 | if isinstance(module, nn.Embedding): 45 | xavier_normal_(module.weight.data) 46 | elif isinstance(module, nn.Linear): 47 | xavier_normal_(module.weight.data) 48 | if module.bias is not None: 49 | constant_(module.bias.data, 0) 50 | 51 | 52 | def forward(self, input): 53 | user, item = input 54 | embed_user = self.user_embedding(user) 55 | embed_item = self.item_embedding(item).view(-1,self.embedding_size) 56 | user_dnn_out = self.user_mlp_layers(embed_user).unsqueeze(1) 57 | item_dnn_out = self.item_mlp_layers(embed_item) 58 | item_dnn_out = item_dnn_out.view(user.shape[0], -1, self.out_size) 59 | score = (user_dnn_out * item_dnn_out).sum(-1) 60 | output = score.view(-1,2) 61 | batch_loss = -torch.mean(1e-8+torch.log(torch.sigmoid(torch.matmul(output, self.weight)))) 62 | return batch_loss 63 | 64 | 65 | 66 | 67 | 68 | @torch.no_grad() 69 | def predict(self, user,item_feature): 70 | 71 | user_feature = self.user_embedding(user) 72 | user_dnn_out = self.user_mlp_layers(user_feature) 73 | 74 | scores = torch.matmul(user_dnn_out,item_feature.t()) 75 | return scores 76 | 77 | @torch.no_grad() # [num_item, 64] 78 | def compute_item_all(self): 79 | embed_item = self.item_embedding.weight 80 | return self.item_mlp_layers(embed_item) 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /code/REC/model/IdModel/srgnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from REC.utils import InputType 4 | from REC.model.basemodel import BaseModel 5 | import torch.nn.functional as F 6 | import math 7 | import numpy as np 8 | #torch.set_default_dtype(torch.float64) 9 | class SRGNN(BaseModel): 10 | input_type = InputType.SEQ 11 | def __init__(self, config, data): 12 | super(SRGNN, self).__init__() 13 | self.hidden_size = config['embedding_size'] 14 | self.step = config['step'] 15 | 16 | self.device = config['device'] 17 | self.item_num = data.item_num 18 | 19 | self.embedding = nn.Embedding(self.item_num, self.hidden_size) 20 | self.gnn = GNN(self.hidden_size, step=self.step) 21 | self.linear_one = nn.Linear(self.hidden_size, self.hidden_size, bias=True) 22 | self.linear_two = nn.Linear(self.hidden_size, self.hidden_size, bias=True) 23 | self.linear_three = nn.Linear(self.hidden_size, 1, bias=False) 24 | self.linear_transform = nn.Linear(self.hidden_size * 2, self.hidden_size, bias=True) 25 | 26 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 27 | self._reset_parameters() 28 | 29 | 30 | def _reset_parameters(self): 31 | stdv = 1.0 / np.sqrt(self.hidden_size) 32 | for weight in self.parameters(): 33 | weight.data.uniform_(-stdv, stdv) 34 | 35 | 36 | 37 | def seq_modeling(self, alias_inputs, A, hidden, mask): 38 | gnn_output = self.gnn(A, hidden) 39 | seq_hidden = [] 40 | for i in range(len(alias_inputs)): 41 | seq_hidden.append(gnn_output[i][alias_inputs[i]]) 42 | seq_hidden = torch.stack(seq_hidden) 43 | 44 | ht = seq_hidden[torch.arange(mask.shape[0]).long(), torch.sum(mask, 1) - 1] # batch_size x latent_size 取的最后一位作为global 45 | q1 = self.linear_one(ht).view(ht.shape[0], 1, ht.shape[1]) # batch_size x 1 x latent_size 46 | q2 = self.linear_two(seq_hidden) # batch_size x seq_length x latent_size 47 | alpha = self.linear_three(torch.sigmoid(q1 + q2)) 48 | a = torch.sum(alpha * seq_hidden * mask.view(mask.shape[0], -1, 1).float(), 1) 49 | a = self.linear_transform(torch.cat([a, ht], 1)) 50 | return a 51 | 52 | def forward(self, input): 53 | alias_inputs, A, items, mask, targets = input 54 | hidden = self.embedding(items) 55 | seq_output = self.seq_modeling(alias_inputs, A, hidden, mask).unsqueeze(1) #[batch,1, dim] 56 | target_output =self.embedding(targets) #[batch,2, dim] 57 | score = (seq_output * target_output).sum(-1) 58 | output = score.view(-1,2) 59 | batch_loss = -torch.mean(1e-8+torch.log(torch.sigmoid(torch.matmul(output, self.weight)))) 60 | return batch_loss 61 | 62 | 63 | @torch.no_grad() 64 | def predict(self, input,item_feature): 65 | alias_inputs, A, items, mask = input 66 | hidden = item_feature[items] 67 | seq_output = self.seq_modeling(alias_inputs, A, hidden, mask) 68 | scores = torch.matmul(seq_output,item_feature.t()) 69 | return scores 70 | 71 | @torch.no_grad() 72 | def compute_item_all(self): 73 | embed_item = self.embedding.weight 74 | return embed_item 75 | 76 | 77 | 78 | class GNN(nn.Module): 79 | def __init__(self, hidden_size, step=1): 80 | super(GNN, self).__init__() 81 | self.step = step 82 | self.hidden_size = hidden_size 83 | self.input_size = hidden_size * 2 84 | self.gate_size = 3 * hidden_size 85 | self.w_ih = nn.Parameter(torch.Tensor(self.gate_size, self.input_size)) 86 | self.w_hh = nn.Parameter(torch.Tensor(self.gate_size, self.hidden_size)) 87 | self.b_ih = nn.Parameter(torch.Tensor(self.gate_size)) 88 | self.b_hh = nn.Parameter(torch.Tensor(self.gate_size)) 89 | self.b_iah = nn.Parameter(torch.Tensor(self.hidden_size)) 90 | self.b_oah = nn.Parameter(torch.Tensor(self.hidden_size)) 91 | 92 | self.linear_edge_in = nn.Linear(self.hidden_size, self.hidden_size, bias=True) 93 | self.linear_edge_out = nn.Linear(self.hidden_size, self.hidden_size, bias=True) 94 | self.linear_edge_f = nn.Linear(self.hidden_size, self.hidden_size, bias=True) 95 | 96 | def GNNCell(self, A, hidden): 97 | input_in = torch.matmul(A[:, :, :A.shape[1]], self.linear_edge_in(hidden)) + self.b_iah 98 | input_out = torch.matmul(A[:, :, A.shape[1]: 2 * A.shape[1]], self.linear_edge_out(hidden)) + self.b_oah 99 | inputs = torch.cat([input_in, input_out], 2) 100 | gi = F.linear(inputs, self.w_ih, self.b_ih) 101 | gh = F.linear(hidden, self.w_hh, self.b_hh) 102 | i_r, i_i, i_n = gi.chunk(3, 2) 103 | h_r, h_i, h_n = gh.chunk(3, 2) 104 | resetgate = torch.sigmoid(i_r + h_r) 105 | inputgate = torch.sigmoid(i_i + h_i) 106 | newgate = torch.tanh(i_n + resetgate * h_n) 107 | hy = newgate + inputgate * (hidden - newgate) 108 | return hy 109 | 110 | def forward(self, A, hidden): 111 | for i in range(self.step): 112 | hidden = self.GNNCell(A, hidden) 113 | return hidden -------------------------------------------------------------------------------- /code/REC/model/IdModel/widedeep.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | 8 | class WideDeep(BaseModel): 9 | 10 | input_type = InputType.SEQ 11 | def __init__(self, config, dataload): 12 | super(WideDeep, self).__init__() 13 | 14 | # load parameters info 15 | self.mlp_hidden_size = config['mlp_hidden_size'] 16 | self.dropout_prob = config['dropout_prob'] 17 | self.embedding_size = config['embedding_size'] 18 | self.device = config['device'] 19 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 20 | self.method = config['method'] if config['method'] else 'mean' 21 | 22 | self.item_num = dataload.item_num 23 | #wide部分 24 | #相当于wide中Linear变换的w矩阵 25 | self.wide_item_embedding = nn.Embedding(self.item_num, 1, padding_idx=0) 26 | self.wide_bias = nn.Parameter(torch.zeros((1,)), requires_grad=True) 27 | 28 | 29 | #deep部分 30 | self.deep_item_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 31 | size_list = [self.embedding_size * self.max_seq_length] + self.mlp_hidden_size 32 | self.mlp_layers = MLPLayers(size_list, self.dropout_prob) 33 | self.deep_predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) 34 | #self.wide_layer = nn.Linear(self.max_seq_length, 1) 35 | 36 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 37 | 38 | # parameters initialization 39 | self.apply(self._init_weights) 40 | 41 | def _init_weights(self, module): 42 | if isinstance(module, nn.Embedding): 43 | xavier_normal_(module.weight.data) 44 | elif isinstance(module, nn.Linear): 45 | xavier_normal_(module.weight.data) 46 | if module.bias is not None: 47 | constant_(module.bias.data, 0) 48 | 49 | def forward(self, inputs): #[batch, 2 , seq_len] 50 | batch_size = inputs.shape[0] 51 | #wide_output = self.wide_layer(interaction.float()) #[batch, 2 , seq_len] -> [batch, 2 , 1] 52 | 53 | wide_output = torch.sum(self.wide_item_embedding(inputs),dim=-2) + self.wide_bias 54 | 55 | deep_input_emb = self.deep_item_embedding(inputs).view(batch_size*2, -1) ##[batch, 2 , seq_len, dim] -> 56 | deep_output = self.mlp_layers(deep_input_emb) 57 | deep_output = self.deep_predict_layer(deep_output) 58 | 59 | 60 | output = wide_output.view(-1,2) + deep_output.view(-1,2) 61 | 62 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 63 | return batch_loss 64 | 65 | 66 | @torch.no_grad() 67 | def predict(self, interaction,item_token): 68 | item_seq, item_len = interaction 69 | batch_size = item_seq.shape[0] 70 | item_seq = item_seq.flatten(0,1) 71 | wide_output = torch.sum(self.wide_item_embedding(item_seq), dim=-2) + self.wide_bias 72 | deep_input_emb = self.deep_item_embedding(item_seq).view(item_seq.shape[0],-1) 73 | deep_output = self.mlp_layers(deep_input_emb) 74 | deep_output = self.deep_predict_layer(deep_output) 75 | 76 | 77 | output = wide_output + deep_output 78 | scores = output.view(batch_size, -1) 79 | return scores 80 | 81 | @torch.no_grad() 82 | def compute_item_all(self): 83 | return None 84 | 85 | #return torch.arange(0,self.n_items).to(self.device) 86 | -------------------------------------------------------------------------------- /code/REC/model/PixelNet/dvbpr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from REC.utils.enum_type import InputType 4 | from REC.model.basemodel import BaseModel 5 | import numpy as np 6 | from torch.nn.init import xavier_normal_, constant_ 7 | import torch.nn.functional as F 8 | 9 | 10 | 11 | class DVBPR(BaseModel): 12 | input_type = InputType.PAIR 13 | def __init__(self, config, dataload): 14 | super(DVBPR, self).__init__() 15 | 16 | self.dropout_prob = config['dropout_prob'] 17 | self.embedding_size = config['embedding_size'] // 2 18 | 19 | self.device = config['device'] 20 | 21 | self.user_num = dataload.user_num 22 | self.item_num = dataload.item_num 23 | # CNN for learned image features 24 | 25 | self.visual_encoder = CNNF(hidden_dim=self.embedding_size, dropout=self.dropout_prob) # CNN-F is a smaller CNN 26 | 27 | # Visual latent preference (theta) 28 | self.theta_users = nn.Embedding(self.user_num, self.embedding_size) 29 | 30 | # Latent factors (gamma) 31 | self.gamma_users = nn.Embedding(self.user_num, self.embedding_size) 32 | self.gamma_items = nn.Embedding(self.item_num, self.embedding_size) 33 | 34 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 35 | # Random weight initialization 36 | self.reset_parameters() 37 | 38 | 39 | 40 | def reset_parameters(self): 41 | """ Restart network weights using a Xavier uniform distribution. """ 42 | if isinstance(self.visual_encoder, CNNF): 43 | self.visual_encoder.reset_parameters() 44 | nn.init.uniform_(self.theta_users.weight) # Visual factors (theta) 45 | nn.init.uniform_(self.gamma_users.weight) # Visual factors (theta) 46 | nn.init.uniform_(self.gamma_items.weight) # Visual factors (theta) 47 | 48 | 49 | def forward(self, inputs): 50 | user, item_id, item_modal = inputs 51 | embed_id_user = self.gamma_users(user).unsqueeze(1) 52 | embed_id_item = self.gamma_items(item_id) 53 | 54 | embed_modal_user = self.theta_users(user).unsqueeze(1) 55 | embed_modal_item = self.visual_encoder(item_modal).view(user.shape[0], -1, self.embedding_size) #[5,2,32] 56 | 57 | score = (embed_id_user * embed_id_item).sum(-1) + \ 58 | (embed_modal_user * embed_modal_item).sum(-1) 59 | 60 | output = score.view(-1,2) 61 | batch_loss = -torch.mean(torch.log(torch.sigmoid(torch.matmul(output, self.weight)))) 62 | return batch_loss 63 | 64 | 65 | 66 | @torch.no_grad() 67 | def predict(self, user,item_feature): 68 | embed_id_user = self.gamma_users(user) 69 | embed_id_item = self.gamma_items.weight 70 | 71 | embed_modal_user = self.theta_users(user) 72 | 73 | score = torch.matmul(embed_id_user,embed_id_item.t()) + \ 74 | torch.matmul(embed_modal_user,item_feature.t()) 75 | return score 76 | 77 | @torch.no_grad() 78 | def compute_item(self, item): 79 | return self.visual_encoder(item) 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | class CNNF(nn.Module): 93 | """CNN-F network""" 94 | def __init__(self, hidden_dim=2048, fc_dim=512, weights=None, dropout=0.5): 95 | super(CNNF, self).__init__() 96 | self.hidden_dim = hidden_dim 97 | 98 | if weights is None: 99 | weights = { 100 | # conv layers: ((c_in, c_out, stride (square)), custom stride) 101 | 'cnn': [([3, 64, 11], [1, 4]), 102 | ([64, 256, 5], None), 103 | ([256, 256, 3], None), 104 | ([256, 256, 3], None), 105 | ([256, 256, 3], None)], 106 | 107 | # fc layers: n_in, n_out 108 | 'fc': [[256*22*2, fc_dim], # original: 256*7*7 -> 4096 109 | [fc_dim, fc_dim], 110 | [fc_dim, self.hidden_dim]] 111 | } 112 | 113 | self.convs = nn.ModuleList([nn.Conv2d(*params, padding_mode='replicate', stride=stride if stride else 1) 114 | for params, stride in weights['cnn']]) 115 | 116 | self.fcs = nn.ModuleList([nn.Linear(*params) for params in weights['fc']]) 117 | self.maxpool2d = nn.MaxPool2d(2) 118 | self.maxpool_idxs = [True, True, False, False, True] # CNN layers to maxpool 119 | self.dropout = nn.Dropout(p=dropout) 120 | self.layer_params = weights 121 | 122 | def forward(self, x): 123 | x = torch.reshape(x, shape=[-1, 3, 224, 224]) 124 | 125 | # convolutional layers 126 | for cnn_layer, apply_maxpool in zip(self.convs, self.maxpool_idxs): 127 | x = F.relu(cnn_layer(x)) 128 | # notable difference: original TF implementation has "SAME" padding 129 | x = self.maxpool2d(x) if apply_maxpool else x 130 | 131 | # fully connected layers 132 | x = torch.reshape(x, shape=[-1, self.layer_params['fc'][0][0]]) 133 | for fc_layer in self.fcs: 134 | x = F.relu(fc_layer(x)) 135 | x = self.dropout(x) 136 | 137 | return x 138 | 139 | def reset_parameters(self): 140 | for conv in self.convs: 141 | nn.init.xavier_uniform_(conv.weight) 142 | for fc in self.fcs: 143 | nn.init.xavier_uniform_(fc.weight) -------------------------------------------------------------------------------- /code/REC/model/PixelNet/modin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers, SequenceAttLayer 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | from REC.model.load import load_model 8 | 9 | class MODIN(BaseModel): 10 | 11 | input_type = InputType.SEQ 12 | 13 | def __init__(self, config, dataload): 14 | super(MODIN, self).__init__() 15 | 16 | # get field names and parameter value from config 17 | self.embedding_size = config['embedding_size'] 18 | self.mlp_hidden_size = config['mlp_hidden_size'] 19 | self.device = config['device'] 20 | self.dropout_prob = config['dropout_prob'] 21 | 22 | self.item_num = dataload.item_num 23 | self.att_list = [4 * self.embedding_size] + self.mlp_hidden_size 24 | 25 | self.attention = SequenceAttLayer( 26 | self.att_list, activation='Sigmoid', softmax_stag=False, return_seq_weight=False 27 | ) 28 | 29 | # parameters initialization 30 | self.apply(self._init_weights) 31 | self.visual_encoder = load_model(config=config) 32 | # if self.pretrain_weights: 33 | # self.load_weights(self.pretrain_weights) 34 | 35 | 36 | def _init_weights(self, module): 37 | if isinstance(module, nn.Embedding): 38 | xavier_normal_(module.weight.data) 39 | elif isinstance(module, nn.Linear): 40 | xavier_normal_(module.weight.data) 41 | if module.bias is not None: 42 | constant_(module.bias.data, 0) 43 | 44 | def forward(self, inputs): 45 | #[batch,seq_len+2,3,224,224] 46 | items_modal, items = inputs 47 | batch_size = items.shape[0] 48 | item_emb = self.visual_encoder(items_modal.flatten(0,1)).view(batch_size, -1, self.embedding_size) #[batch,seq_len+2,dim] 49 | user_seq_emb = item_emb[:, :-2] 50 | pos_cand_embs = item_emb[:, -2] 51 | neg_cand_embs = item_emb[:, -1] 52 | 53 | # attention 54 | mask = (items[:,:-2] == 0) 55 | pos_user_emb = self.attention(pos_cand_embs, user_seq_emb,mask).squeeze(1) 56 | neg_user_emb = self.attention(neg_cand_embs, user_seq_emb,mask).squeeze(1) 57 | 58 | pos_score = (pos_user_emb * pos_cand_embs).sum(-1) #[batch] 59 | neg_score = (neg_user_emb * neg_cand_embs).sum(-1) #[batch] 60 | 61 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)).mean(-1) 62 | return loss 63 | 64 | 65 | @torch.no_grad() 66 | def predict(self, item_seq, item_feature): 67 | 68 | #[batch,item_num, seq_len+1] 69 | batch_size = item_seq.shape[0] 70 | item_seq = item_seq.flatten(0,1) #[batch*item_num, seq_len+1] 71 | item_emb = item_feature[item_seq] #[batch*item_num, seq_len+1,dim] 72 | user_seq_emb = item_emb[:, :-1] 73 | cand_emb = item_emb[:,-1] 74 | 75 | 76 | # attention 77 | mask = (item_seq[:,:-1] == 0) 78 | user_emb = self.attention(cand_emb, user_seq_emb, mask).squeeze(1) #[batch*item_num,dim] 79 | 80 | user_emb = user_emb.view(batch_size, self.item_num, self.embedding_size) #[batch,item_num,dim] 81 | scores = (user_emb*item_feature).sum(-1) # [B n_items] 82 | return scores 83 | 84 | @torch.no_grad() 85 | def compute_item(self, item): 86 | return self.visual_encoder(item) 87 | -------------------------------------------------------------------------------- /code/REC/model/PixelNet/modssm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | from REC.model.load import load_model 8 | 9 | class MODSSM(BaseModel): 10 | input_type = InputType.SEQ 11 | def __init__(self, config, dataload): 12 | super(MODSSM, self).__init__() 13 | 14 | # load parameters info 15 | self.mlp_hidden_size = config['mlp_hidden_size'] 16 | self.dropout_prob = config['dropout_prob'] 17 | self.embedding_size = config['embedding_size'] 18 | 19 | self.device = config['device'] 20 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 21 | 22 | self.item_num = dataload.item_num 23 | #self.user_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 24 | self.visual_encoder = load_model(config=config) 25 | 26 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 27 | 28 | 29 | 30 | 31 | 32 | 33 | def avg_emb(self, mask, token_seq_embedding): 34 | mask = mask.float() 35 | value_cnt = torch.sum(mask, dim=1, keepdim=True) 36 | mask = mask.unsqueeze(2).expand_as(token_seq_embedding) 37 | masked_token_seq_embedding = token_seq_embedding * mask.float() 38 | result = torch.sum(masked_token_seq_embedding, dim=-2) 39 | user_embedding = torch.div(result, value_cnt + 1e-8) 40 | return user_embedding 41 | 42 | 43 | 44 | def forward(self, inputs): 45 | items_index,all_item_modal = inputs 46 | mask = items_index[:, :-2] != 0 47 | all_item_embs = self.visual_encoder(all_item_modal) 48 | input_item_embs = all_item_embs[items_index, :] 49 | 50 | user_embedding = input_item_embs[:, :-2, :] 51 | item_embedding = input_item_embs[:, -2:,:] 52 | user_embedding = self.avg_emb(mask, user_embedding).unsqueeze(1) 53 | score = (user_embedding * item_embedding).sum(-1) 54 | output = score.view(-1,2) 55 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 56 | return batch_loss 57 | 58 | 59 | @torch.no_grad() 60 | def predict(self,user_seq,item_feature): 61 | mask = user_seq != 0 62 | input_embs = item_feature[user_seq] 63 | user_embedding = self.avg_emb(mask,input_embs) 64 | scores = torch.matmul(user_embedding,item_feature.t()) 65 | return scores 66 | 67 | 68 | @torch.no_grad() 69 | def compute_item(self,item): 70 | return self.visual_encoder(item) 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /code/REC/model/PixelNet/mofm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers, BaseFactorizationMachine 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | from logging import getLogger 8 | from REC.model.load import load_model 9 | 10 | 11 | class MOFM(BaseModel): 12 | input_type = InputType.SEQ 13 | def __init__(self, config, dataload): 14 | super(MOFM, self).__init__() 15 | 16 | # load parameters info 17 | self.mlp_hidden_size = config['mlp_hidden_size'] 18 | self.dropout_prob = config['dropout_prob'] 19 | self.embedding_size = config['embedding_size'] 20 | 21 | self.device = config['device'] 22 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 23 | 24 | self.item_num = dataload.item_num 25 | self.visual_encoder = load_model(config=config) 26 | 27 | self.fm = BaseFactorizationMachine(reduce_sum=True) 28 | 29 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 30 | 31 | def mask_emb(self, input_item_embs, mask): 32 | 33 | mask = mask.unsqueeze(-1).expand_as(input_item_embs) 34 | masked_token_seq_embedding = input_item_embs * mask 35 | 36 | return masked_token_seq_embedding 37 | 38 | 39 | 40 | def forward(self, inputs): 41 | 42 | items_index,all_item_modal = inputs 43 | mask = items_index != 0 44 | all_item_embs = self.visual_encoder(all_item_modal) 45 | input_item_embs = all_item_embs[items_index, :] 46 | 47 | inputs_embedding = self.mask_emb(input_item_embs, mask) 48 | scores = self.fm(inputs_embedding.flatten(0,1)) 49 | output = scores.view(-1,2) 50 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 51 | return batch_loss 52 | 53 | 54 | @torch.no_grad() 55 | def predict(self,user_seq,item_feature): 56 | mask = user_seq != 0 57 | input_embs = item_feature[user_seq] 58 | user_embedding = self.mask_emb(input_embs, mask) 59 | user_embedding = torch.sum(user_embedding, dim=1) 60 | scores = torch.matmul(user_embedding,item_feature.t()) 61 | return scores 62 | 63 | @torch.no_grad() 64 | def compute_item(self,item): 65 | return self.visual_encoder(item) 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /code/REC/model/PixelNet/mogru4rec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn.init import xavier_uniform_, xavier_normal_ 4 | from REC.model.layers import TransformerEncoder 5 | from REC.utils.enum_type import InputType 6 | from REC.model.load import load_model 7 | from REC.model.basemodel import BaseModel 8 | 9 | class MOGRU4Rec(BaseModel): 10 | input_type = InputType.SEQ 11 | 12 | def __init__(self, config, dataload): 13 | super(MOGRU4Rec, self).__init__() 14 | 15 | # load parameters info 16 | 17 | self.embedding_size = config['embedding_size'] 18 | self.hidden_size = config['hidden_size'] * config['embedding_size'] 19 | self.num_layers = config['num_layers'] 20 | self.dropout_prob = config['dropout_prob'] 21 | 22 | self.initializer_range = config['initializer_range'] 23 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 24 | self.item_num = dataload.item_num 25 | # define layers and loss 26 | 27 | self.visual_encoder = load_model(config=config) 28 | self.emb_dropout = nn.Dropout(self.dropout_prob) 29 | self.gru_layers = nn.GRU( 30 | input_size=self.embedding_size, 31 | hidden_size=self.hidden_size, 32 | num_layers=self.num_layers, 33 | bias=False, 34 | batch_first=True, 35 | ) 36 | 37 | self.dense = nn.Linear(self.hidden_size, self.embedding_size) 38 | 39 | xavier_uniform_(self.gru_layers.weight_hh_l0) 40 | xavier_uniform_(self.gru_layers.weight_ih_l0) 41 | xavier_normal_(self.dense.weight) 42 | 43 | 44 | def forward(self, interaction): 45 | items, masked_index = interaction 46 | batch_size = masked_index.shape[0] 47 | item_emb = self.visual_encoder(items.flatten(0,1)).view(batch_size, -1, 2, self.embedding_size) #[batch, 2, max_seq_len+1, dim] 48 | pos_items_embs = item_emb[:, :, 0] 49 | neg_items_embs = item_emb[:, :, 1] 50 | 51 | input_emb = pos_items_embs[:, :-1, :] 52 | target_pos_embs = pos_items_embs[:, 1:, :] 53 | target_neg_embs = neg_items_embs[:, 1:, :] 54 | 55 | input_emb_dropout = self.emb_dropout(input_emb) 56 | gru_output, _ = self.gru_layers(input_emb_dropout) 57 | gru_output = self.dense(gru_output) 58 | 59 | pos_score = (gru_output * target_pos_embs).sum(-1) 60 | neg_score = (gru_output * target_neg_embs).sum(-1) 61 | 62 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)*masked_index).sum(-1) 63 | return loss.mean(-1) 64 | 65 | @torch.no_grad() 66 | def predict(self, item_seq, item_feature): 67 | 68 | item_emb = item_feature[item_seq] 69 | 70 | item_seq_emb_dropout = self.emb_dropout(item_emb) 71 | gru_output, _ = self.gru_layers(item_seq_emb_dropout) 72 | gru_output = self.dense(gru_output) 73 | hidden = gru_output[:, -1] 74 | scores = torch.matmul(hidden, item_feature.t()) 75 | return scores 76 | 77 | 78 | @torch.no_grad() 79 | def compute_item(self, item): 80 | return self.visual_encoder(item) 81 | 82 | 83 | -------------------------------------------------------------------------------- /code/REC/model/PixelNet/molightsans.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from REC.model.layers import LightTransformerEncoder 4 | from REC.utils.enum_type import InputType 5 | from REC.model.load import load_model 6 | from REC.model.basemodel import BaseModel 7 | 8 | class MOLightSANs(BaseModel): 9 | input_type = InputType.AUGSEQ 10 | 11 | def __init__(self, config, dataload): 12 | super(MOLightSANs, self).__init__() 13 | 14 | # load parameters info 15 | self.n_layers = config['n_layers'] 16 | self.n_heads = config['n_heads'] 17 | self.embedding_size = config['embedding_size'] # same as embedding_size 18 | self.inner_size = config['inner_size']* self.embedding_size # the dimensionality in feed-forward layer 19 | self.hidden_dropout_prob = config['hidden_dropout_prob'] 20 | self.attn_dropout_prob = config['attn_dropout_prob'] 21 | self.hidden_act = config['hidden_act'] 22 | self.layer_norm_eps = config['layer_norm_eps'] 23 | self.k_interests = config["k_interests"] 24 | self.device = config['device'] 25 | self.initializer_range = config['initializer_range'] 26 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 27 | self.item_num = dataload.item_num 28 | # define layers and loss 29 | 30 | self.visual_encoder = load_model(config=config) 31 | 32 | self.position_embedding = nn.Embedding(self.max_seq_length, self.embedding_size) 33 | self.LayerNorm = nn.LayerNorm(self.embedding_size, eps=self.layer_norm_eps) 34 | self.dropout = nn.Dropout(self.hidden_dropout_prob) 35 | 36 | self.trm_encoder = LightTransformerEncoder( 37 | n_layers=self.n_layers, 38 | n_heads=self.n_heads, 39 | k_interests=self.k_interests, 40 | hidden_size=self.embedding_size, 41 | seq_len=self.max_seq_length, 42 | inner_size=self.inner_size, 43 | hidden_dropout_prob=self.hidden_dropout_prob, 44 | attn_dropout_prob=self.attn_dropout_prob, 45 | hidden_act=self.hidden_act, 46 | layer_norm_eps=self.layer_norm_eps 47 | ) 48 | 49 | self.position_embedding.weight.data.normal_(mean=0.0, std=self.initializer_range) 50 | self.trm_encoder.apply(self._init_weights) 51 | self.LayerNorm.bias.data.zero_() 52 | self.LayerNorm.weight.data.fill_(1.0) 53 | 54 | def _init_weights(self, module): 55 | if isinstance(module, (nn.Linear, nn.Embedding)): 56 | module.weight.data.normal_(mean=0.0, std=self.initializer_range) 57 | elif isinstance(module, nn.LayerNorm): 58 | module.bias.data.zero_() 59 | module.weight.data.fill_(1.0) 60 | if isinstance(module, nn.Linear) and module.bias is not None: 61 | module.bias.data.zero_() 62 | 63 | def forward(self, items): 64 | 65 | batch_size = items.shape[0] 66 | item_emb = self.visual_encoder(items.flatten(0,1)).view(batch_size,-1,self.embedding_size) #[batch, max_seq_len+2, dim] 67 | 68 | input_emb = item_emb[:, :-2, :] 69 | target_pos_embs = item_emb[:, -2, :] 70 | target_neg_embs = item_emb[:, -1, :] 71 | 72 | 73 | position_ids = torch.arange(input_emb.size(1), dtype=torch.long, device=self.device) 74 | position_embedding = self.position_embedding(position_ids) 75 | 76 | input_emb = self.LayerNorm(input_emb) 77 | input_emb = self.dropout(input_emb) 78 | 79 | output_embs = self.trm_encoder(input_emb, position_embedding, output_all_encoded_layers=False) #[batch, max_seq_len-1, dim] 80 | output_embs = output_embs[-1] 81 | output_embs = output_embs[:, -1, :] 82 | 83 | pos_score = (output_embs * target_pos_embs).sum(-1) 84 | neg_score = (output_embs * target_neg_embs).sum(-1) 85 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)) 86 | return loss.mean(-1) 87 | 88 | @torch.no_grad() 89 | def predict(self, item_seq, item_feature): 90 | 91 | position_ids = torch.arange(item_seq.size(1), dtype=torch.long, device=item_seq.device) 92 | position_embedding = self.position_embedding(position_ids) 93 | 94 | input_emb = item_feature[item_seq] 95 | input_emb = self.LayerNorm(input_emb) 96 | input_emb = self.dropout(input_emb) 97 | 98 | output = self.trm_encoder(input_emb, position_embedding, output_all_encoded_layers=False) 99 | output_embs = output[-1] 100 | seq_output = output_embs[:, -1] 101 | 102 | scores = torch.matmul(seq_output, item_feature.t()) 103 | return scores 104 | 105 | 106 | @torch.no_grad() 107 | def compute_item(self, item): 108 | return self.visual_encoder(item) 109 | 110 | 111 | -------------------------------------------------------------------------------- /code/REC/model/PixelNet/momf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | 5 | from REC.model.layers import MLPLayers 6 | from REC.model.load import load_model 7 | from REC.utils import InputType 8 | from REC.model.basemodel import BaseModel 9 | 10 | class MOMF(BaseModel): 11 | 12 | input_type = InputType.PAIR 13 | 14 | def __init__(self, config, data): 15 | super(MOMF, self).__init__() 16 | 17 | self.mlp_hidden_size = config['mlp_hidden_size'] 18 | self.dropout_prob = config['dropout_prob'] 19 | self.embedding_size = config['embedding_size'] 20 | self.out_size = self.mlp_hidden_size[-1] if len(self.mlp_hidden_size) else self.embedding_size 21 | self.device = config['device'] 22 | 23 | self.user_num = data.user_num 24 | self.item_num = data.item_num 25 | 26 | 27 | user_size_list = [self.embedding_size] + self.mlp_hidden_size 28 | item_size_list = [self.embedding_size] + self.mlp_hidden_size 29 | 30 | # define layers and loss 31 | self.user_mlp_layers = MLPLayers(user_size_list, self.dropout_prob, activation='tanh', bn=True) 32 | self.item_mlp_layers = MLPLayers(item_size_list, self.dropout_prob, activation='tanh', bn=True) 33 | 34 | self.user_embedding = nn.Embedding(self.user_num, self.embedding_size) 35 | 36 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 37 | # parameters initialization 38 | self.apply(self._init_weights) 39 | self.visual_encoder = load_model(config=config) 40 | 41 | def _init_weights(self, module): 42 | if isinstance(module, nn.Embedding): 43 | xavier_normal_(module.weight.data) 44 | elif isinstance(module, nn.Linear): 45 | xavier_normal_(module.weight.data) 46 | if module.bias is not None: 47 | constant_(module.bias.data, 0) 48 | 49 | 50 | def forward(self, input): 51 | user, item = input 52 | embed_user = self.user_embedding(user) 53 | embed_item = self.visual_encoder(item.flatten(0,1)) 54 | user_dnn_out = self.user_mlp_layers(embed_user).unsqueeze(1) 55 | item_dnn_out = self.item_mlp_layers(embed_item) 56 | item_dnn_out = item_dnn_out.view(user.shape[0], -1, self.out_size) 57 | score = (user_dnn_out * item_dnn_out).sum(-1) 58 | output = score.view(-1,2) 59 | batch_loss = -torch.mean(torch.log(torch.sigmoid(torch.matmul(output, self.weight)))) 60 | return batch_loss 61 | 62 | @torch.no_grad() 63 | def predict(self, user,item_feature): 64 | user_feature = self.user_embedding(user) 65 | user_dnn_out = self.user_mlp_layers(user_feature) 66 | scores = torch.matmul(user_dnn_out,item_feature.t()) 67 | return scores 68 | 69 | @torch.no_grad() 70 | def compute_item(self, item): 71 | return self.item_mlp_layers(self.visual_encoder(item)) 72 | 73 | -------------------------------------------------------------------------------- /code/REC/model/ViNet/__pycache__/acf.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/ViNet/__pycache__/acf.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/ViNet/__pycache__/curatornet.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/ViNet/__pycache__/curatornet.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/ViNet/__pycache__/fsasrec.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/ViNet/__pycache__/fsasrec.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/ViNet/__pycache__/vbpr.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/ViNet/__pycache__/vbpr.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/ViNet/__pycache__/visrank.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/ViNet/__pycache__/visrank.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/ViNet/curatornet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from REC.utils.enum_type import InputType 4 | from REC.model.basemodel import BaseModel 5 | import numpy as np 6 | from torch.nn.init import xavier_normal_, constant_ 7 | import torch.nn.functional as F 8 | 9 | class CuratorNet(BaseModel): 10 | input_type = InputType.SEQ 11 | def __init__(self, config, dataload): 12 | super(CuratorNet, self).__init__() 13 | self.embedding_size = config['embedding_size'] 14 | self.hidden_size = config['hidden_size']*self.embedding_size 15 | self.device = config['device'] 16 | 17 | self.v_feat_path = config['v_feat_path'] 18 | v_feat = np.load(self.v_feat_path, allow_pickle=True) 19 | 20 | v_feat = torch.tensor(v_feat,dtype=torch.float).to(self.device) 21 | v_feat[0].fill_(0) 22 | self.embedding = nn.Embedding.from_pretrained(v_feat, freeze=True) 23 | 24 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 25 | 26 | self.feature_dim = v_feat.shape[-1] 27 | 28 | # Common section 29 | self.selu_common1 = nn.Linear(self.feature_dim, self.embedding_size) 30 | self.selu_common2 = nn.Linear(self.embedding_size, self.embedding_size) 31 | 32 | # Profile section 33 | self.maxpool = nn.AdaptiveMaxPool2d((1, self.embedding_size)) 34 | self.avgpool = nn.AdaptiveAvgPool2d((1, self.embedding_size)) 35 | self.selu_pu1 = nn.Linear(self.embedding_size + self.embedding_size, self.hidden_size) 36 | self.selu_pu2 = nn.Linear(self.hidden_size, self.hidden_size) 37 | self.selu_pu3 = nn.Linear(self.hidden_size, self.embedding_size) 38 | 39 | # Random weight initialization 40 | self.reset_parameters() 41 | 42 | 43 | def reset_parameters(self): 44 | """Resets network weights. 45 | 46 | Restart network weights using a Xavier uniform distribution. 47 | """ 48 | # Common section 49 | nn.init.xavier_uniform_(self.selu_common1.weight) 50 | nn.init.xavier_uniform_(self.selu_common2.weight) 51 | # Profile section 52 | nn.init.xavier_uniform_(self.selu_pu1.weight) 53 | nn.init.xavier_uniform_(self.selu_pu2.weight) 54 | nn.init.xavier_uniform_(self.selu_pu3.weight) 55 | 56 | 57 | def forward(self, inputs): #inputs: user_seq, pos, neg 58 | profile = inputs[:, :-2] 59 | pi = inputs[:, -2] 60 | ni = inputs[:, -1] 61 | # Load embedding data 62 | profile = self.embedding(profile) 63 | pi = self.embedding(pi) 64 | ni = self.embedding(ni) 65 | 66 | # Positive item 67 | pi = F.selu(self.selu_common1(pi)) 68 | pi = F.selu(self.selu_common2(pi)) 69 | 70 | # Negative item 71 | ni = F.selu(self.selu_common1(ni)) 72 | ni = F.selu(self.selu_common2(ni)) 73 | 74 | # User profile 75 | profile = F.selu(self.selu_common1(profile)) 76 | profile = F.selu(self.selu_common2(profile)) 77 | profile = torch.cat( 78 | (self.maxpool(profile), self.avgpool(profile)), dim=-1 79 | ) 80 | profile = F.selu(self.selu_pu1(profile)) 81 | profile = F.selu(self.selu_pu2(profile)) 82 | profile = F.selu(self.selu_pu3(profile)) 83 | 84 | # x_ui > x_uj 85 | profile = profile.squeeze(1) 86 | x_ui = (profile*pi).sum(-1) 87 | x_uj = (profile*ni).sum(-1) 88 | batch_loss = -torch.mean(torch.log(1e-8 + torch.sigmoid(x_ui - x_uj))) 89 | return batch_loss 90 | 91 | 92 | 93 | 94 | @torch.no_grad() 95 | def predict(self, user,item_feature): 96 | profile = item_feature[user] 97 | profile = torch.cat( 98 | (self.maxpool(profile), self.avgpool(profile)), dim=-1 99 | ) 100 | 101 | profile = F.selu(self.selu_pu1(profile)) 102 | profile = F.selu(self.selu_pu2(profile)) 103 | profile = F.selu(self.selu_pu3(profile)) 104 | profile = profile.squeeze(1) 105 | 106 | score = torch.matmul(profile,item_feature.t()) 107 | return score 108 | 109 | @torch.no_grad() # [num_item, 32] 110 | def compute_item_all(self): 111 | embed = self.embedding.weight 112 | embed = F.selu(self.selu_common1(embed)) 113 | embed = F.selu(self.selu_common2(embed)) 114 | return embed 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /code/REC/model/ViNet/vbpr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from REC.utils.enum_type import InputType 4 | from REC.model.basemodel import BaseModel 5 | import numpy as np 6 | from torch.nn.init import xavier_normal_, constant_ 7 | 8 | class VBPR(BaseModel): 9 | input_type = InputType.PAIR 10 | def __init__(self, config, dataload): 11 | super(VBPR, self).__init__() 12 | self.mlp_hidden_size = config['mlp_hidden_size'] 13 | self.dropout_prob = config['dropout_prob'] 14 | self.embedding_size = config['embedding_size'] // 2 15 | 16 | self.device = config['device'] 17 | 18 | self.user_num = dataload.user_num 19 | self.item_num = dataload.item_num 20 | 21 | self.v_feat_path = config['v_feat_path'] 22 | v_feat = np.load(self.v_feat_path, allow_pickle=True) 23 | 24 | self.v_feat = torch.tensor(v_feat,dtype=torch.float).to(self.device) 25 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 26 | 27 | self.feature_dim = self.v_feat.shape[-1] 28 | 29 | # define layers and loss 30 | self.feature_projection = nn.Linear(self.feature_dim, self.embedding_size, bias=False) 31 | self.bias_projection = nn.Linear(self.feature_dim, 1, bias=False) 32 | self.user_id_embedding = nn.Embedding(self.user_num, self.embedding_size) 33 | self.item_id_embedding = nn.Embedding(self.item_num, self.embedding_size) 34 | 35 | self.user_modal_embedding = nn.Embedding(self.user_num, self.embedding_size) 36 | 37 | #self.visual_bias = nn.Parameter(torch.tensor(0.0)) 38 | #self.user_bias = nn.Parameter(torch.tensor(0.0)) 39 | #self.item_bias = nn.Parameter(torch.tensor(0.0)) 40 | #self.global_bias = nn.Parameter(torch.tensor(0.0)) 41 | #self.loss = BPRLoss() 42 | # parameters initialization 43 | self.apply(self._init_weights) 44 | 45 | 46 | def _init_weights(self, module): 47 | if isinstance(module, nn.Embedding): 48 | xavier_normal_(module.weight.data) 49 | elif isinstance(module, nn.Linear): 50 | xavier_normal_(module.weight.data) 51 | if module.bias is not None: 52 | constant_(module.bias.data, 0) 53 | 54 | 55 | def forward(self, inputs): 56 | user, item = inputs 57 | embed_id_user = self.user_id_embedding(user).unsqueeze(1) 58 | embed_id_item = self.item_id_embedding(item) 59 | 60 | embed_modal_user = self.user_modal_embedding(user).unsqueeze(1) 61 | embed_modal_item = self.feature_projection(self.v_feat[item]) 62 | 63 | 64 | score = (embed_id_user * embed_id_item).sum(-1) + (embed_modal_user * embed_modal_item).sum(-1) \ 65 | + self.bias_projection(self.v_feat[item]).squeeze(-1) 66 | #self.global_bias + self.user_bias + self.item_bias 67 | 68 | output = score.view(-1,2) 69 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 70 | return batch_loss 71 | 72 | 73 | 74 | 75 | @torch.no_grad() 76 | def predict(self, user,item_feature): 77 | embed_id_user = self.user_id_embedding(user) 78 | embed_id_item = self.item_id_embedding.weight 79 | 80 | embed_modal_user = self.user_modal_embedding(user) 81 | 82 | 83 | score = torch.matmul(embed_id_user,embed_id_item.t()) + \ 84 | torch.matmul(embed_modal_user,item_feature.t()) + \ 85 | self.total_visual_bias 86 | 87 | 88 | return score 89 | 90 | @torch.no_grad() 91 | def compute_item_all(self): 92 | embed = self.feature_projection(self.v_feat) 93 | self.total_visual_bias = self.bias_projection(self.v_feat).squeeze(-1) 94 | return embed 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /code/REC/model/ViNet/visrank.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from REC.utils.enum_type import InputType 4 | from REC.model.basemodel import BaseModel 5 | import numpy as np 6 | 7 | 8 | class VISRANK(nn.Module): 9 | input_type = InputType.PAIR 10 | def __init__(self, config, dataload): 11 | super(VISRANK, self).__init__() 12 | 13 | 14 | self.method = config['method'] 15 | if self.method == 'average_top_k': 16 | self.k = config['top_num'] 17 | elif self.method == 'maximum': 18 | self.k = 1 19 | else: 20 | self.k = None 21 | v_feat_path = config['v_feat_path'] 22 | self.device = config['device'] 23 | v_feat = np.load(v_feat_path, allow_pickle=True) 24 | 25 | self.v_feat = torch.tensor(v_feat,dtype=torch.float).to(self.device) 26 | 27 | self.module = None 28 | self.placeholder = nn.Parameter(torch.zeros(0, requires_grad=True)) 29 | 30 | 31 | 32 | def forward(self, inputs): 33 | pass 34 | 35 | 36 | @torch.no_grad() #set batch=1 37 | def predict(self, user,item_feature): 38 | 39 | user = user[-50:] # due to the limited of GPU memory RN50:-50 resnet50:-30 40 | 41 | 42 | seq_feat = self.v_feat[user] 43 | possible_items = torch.cosine_similarity(seq_feat.unsqueeze(1),self.v_feat.unsqueeze(0),dim=-1) 44 | 45 | seq_len = len(user) 46 | if self.method == 'average_top_k': 47 | k = min(self.k, seq_len) 48 | elif self.method == 'maximum': 49 | k = 1 50 | else: 51 | k = seq_len 52 | values, _ = torch.topk(possible_items, k = k, dim=0) 53 | scores = values.mean(0) 54 | scores[0] = -np.inf 55 | return scores 56 | 57 | @torch.no_grad() 58 | def compute_item_all(self): 59 | return None 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/mobert4rec.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/mobert4rec.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/mobert4rec2.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/mobert4rec2.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/modin.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/modin.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/modssm.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/modssm.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/mofm.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/mofm.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/mogru4rec.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/mogru4rec.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/molightsans.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/molightsans.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/momf.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/momf.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/monextitnet.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/monextitnet.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/mosasrec.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/mosasrec.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/__pycache__/mosrgnn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/model/VisualModel/__pycache__/mosrgnn.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/model/VisualModel/dvbpr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from REC.utils.enum_type import InputType 4 | from REC.model.basemodel import BaseModel 5 | import numpy as np 6 | from torch.nn.init import xavier_normal_, constant_ 7 | import torch.nn.functional as F 8 | 9 | 10 | 11 | class DVBPR(BaseModel): 12 | input_type = InputType.PAIR 13 | def __init__(self, config, dataload): 14 | super(DVBPR, self).__init__() 15 | 16 | self.dropout_prob = config['dropout_prob'] 17 | self.embedding_size = config['embedding_size'] // 2 18 | 19 | self.device = config['device'] 20 | 21 | self.user_num = dataload.user_num 22 | self.item_num = dataload.item_num 23 | # CNN for learned image features 24 | 25 | self.visual_encoder = CNNF(hidden_dim=self.embedding_size, dropout=self.dropout_prob) # CNN-F is a smaller CNN 26 | 27 | # Visual latent preference (theta) 28 | self.theta_users = nn.Embedding(self.user_num, self.embedding_size) 29 | 30 | # Latent factors (gamma) 31 | self.gamma_users = nn.Embedding(self.user_num, self.embedding_size) 32 | self.gamma_items = nn.Embedding(self.item_num, self.embedding_size) 33 | 34 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 35 | # Random weight initialization 36 | self.reset_parameters() 37 | 38 | 39 | 40 | def reset_parameters(self): 41 | """ Restart network weights using a Xavier uniform distribution. """ 42 | if isinstance(self.visual_encoder, CNNF): 43 | self.visual_encoder.reset_parameters() 44 | nn.init.uniform_(self.theta_users.weight) # Visual factors (theta) 45 | nn.init.uniform_(self.gamma_users.weight) # Visual factors (theta) 46 | nn.init.uniform_(self.gamma_items.weight) # Visual factors (theta) 47 | 48 | 49 | def forward(self, inputs): 50 | user, item_id, item_modal = inputs 51 | embed_id_user = self.gamma_users(user).unsqueeze(1) 52 | embed_id_item = self.gamma_items(item_id) 53 | 54 | embed_modal_user = self.theta_users(user).unsqueeze(1) 55 | embed_modal_item = self.visual_encoder(item_modal).view(user.shape[0], -1, self.embedding_size) #[5,2,32] 56 | 57 | score = (embed_id_user * embed_id_item).sum(-1) + \ 58 | (embed_modal_user * embed_modal_item).sum(-1) 59 | 60 | output = score.view(-1,2) 61 | batch_loss = -torch.mean(torch.log(torch.sigmoid(torch.matmul(output, self.weight)))) 62 | return batch_loss 63 | 64 | 65 | 66 | @torch.no_grad() 67 | def predict(self, user,item_feature): 68 | embed_id_user = self.gamma_users(user) 69 | embed_id_item = self.gamma_items.weight 70 | 71 | embed_modal_user = self.theta_users(user) 72 | 73 | score = torch.matmul(embed_id_user,embed_id_item.t()) + \ 74 | torch.matmul(embed_modal_user,item_feature.t()) 75 | return score 76 | 77 | @torch.no_grad() 78 | def compute_item(self, item): 79 | return self.visual_encoder(item) 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | class CNNF(nn.Module): 93 | """CNN-F network""" 94 | def __init__(self, hidden_dim=2048, fc_dim=512, weights=None, dropout=0.5): 95 | super(CNNF, self).__init__() 96 | self.hidden_dim = hidden_dim 97 | 98 | if weights is None: 99 | weights = { 100 | # conv layers: ((c_in, c_out, stride (square)), custom stride) 101 | 'cnn': [([3, 64, 11], [1, 4]), 102 | ([64, 256, 5], None), 103 | ([256, 256, 3], None), 104 | ([256, 256, 3], None), 105 | ([256, 256, 3], None)], 106 | 107 | # fc layers: n_in, n_out 108 | 'fc': [[256*22*2, fc_dim], # original: 256*7*7 -> 4096 109 | [fc_dim, fc_dim], 110 | [fc_dim, self.hidden_dim]] 111 | } 112 | 113 | self.convs = nn.ModuleList([nn.Conv2d(*params, padding_mode='replicate', stride=stride if stride else 1) 114 | for params, stride in weights['cnn']]) 115 | 116 | self.fcs = nn.ModuleList([nn.Linear(*params) for params in weights['fc']]) 117 | self.maxpool2d = nn.MaxPool2d(2) 118 | self.maxpool_idxs = [True, True, False, False, True] # CNN layers to maxpool 119 | self.dropout = nn.Dropout(p=dropout) 120 | self.layer_params = weights 121 | 122 | def forward(self, x): 123 | x = torch.reshape(x, shape=[-1, 3, 224, 224]) 124 | 125 | # convolutional layers 126 | for cnn_layer, apply_maxpool in zip(self.convs, self.maxpool_idxs): 127 | x = F.relu(cnn_layer(x)) 128 | # notable difference: original TF implementation has "SAME" padding 129 | x = self.maxpool2d(x) if apply_maxpool else x 130 | 131 | # fully connected layers 132 | x = torch.reshape(x, shape=[-1, self.layer_params['fc'][0][0]]) 133 | for fc_layer in self.fcs: 134 | x = F.relu(fc_layer(x)) 135 | x = self.dropout(x) 136 | 137 | return x 138 | 139 | def reset_parameters(self): 140 | for conv in self.convs: 141 | nn.init.xavier_uniform_(conv.weight) 142 | for fc in self.fcs: 143 | nn.init.xavier_uniform_(fc.weight) -------------------------------------------------------------------------------- /code/REC/model/VisualModel/modin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers, SequenceAttLayer 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | from REC.model.load import load_model 8 | 9 | class MODIN(BaseModel): 10 | 11 | input_type = InputType.SEQ 12 | 13 | def __init__(self, config, dataload): 14 | super(MODIN, self).__init__() 15 | 16 | # get field names and parameter value from config 17 | self.embedding_size = config['embedding_size'] 18 | self.mlp_hidden_size = config['mlp_hidden_size'] 19 | self.device = config['device'] 20 | self.dropout_prob = config['dropout_prob'] 21 | 22 | self.item_num = dataload.item_num 23 | self.att_list = [4 * self.embedding_size] + self.mlp_hidden_size 24 | 25 | self.attention = SequenceAttLayer( 26 | self.att_list, activation='Sigmoid', softmax_stag=False, return_seq_weight=False 27 | ) 28 | 29 | # parameters initialization 30 | self.apply(self._init_weights) 31 | self.visual_encoder = load_model(config=config) 32 | # if self.pretrain_weights: 33 | # self.load_weights(self.pretrain_weights) 34 | 35 | 36 | def _init_weights(self, module): 37 | if isinstance(module, nn.Embedding): 38 | xavier_normal_(module.weight.data) 39 | elif isinstance(module, nn.Linear): 40 | xavier_normal_(module.weight.data) 41 | if module.bias is not None: 42 | constant_(module.bias.data, 0) 43 | 44 | def forward(self, inputs): 45 | #[batch,seq_len+2,3,224,224] 46 | items_modal, items = inputs 47 | batch_size = items.shape[0] 48 | item_emb = self.visual_encoder(items_modal.flatten(0,1)).view(batch_size, -1, self.embedding_size) #[batch,seq_len+2,dim] 49 | user_seq_emb = item_emb[:, :-2] 50 | pos_cand_embs = item_emb[:, -2] 51 | neg_cand_embs = item_emb[:, -1] 52 | 53 | # attention 54 | mask = (items[:,:-2] == 0) 55 | pos_user_emb = self.attention(pos_cand_embs, user_seq_emb,mask).squeeze(1) 56 | neg_user_emb = self.attention(neg_cand_embs, user_seq_emb,mask).squeeze(1) 57 | 58 | pos_score = (pos_user_emb * pos_cand_embs).sum(-1) #[batch] 59 | neg_score = (neg_user_emb * neg_cand_embs).sum(-1) #[batch] 60 | 61 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)).mean(-1) 62 | return loss 63 | 64 | 65 | @torch.no_grad() 66 | def predict(self, item_seq, item_feature): 67 | 68 | #[batch,item_num, seq_len+1] 69 | batch_size = item_seq.shape[0] 70 | item_seq = item_seq.flatten(0,1) #[batch*item_num, seq_len+1] 71 | item_emb = item_feature[item_seq] #[batch*item_num, seq_len+1,dim] 72 | user_seq_emb = item_emb[:, :-1] 73 | cand_emb = item_emb[:,-1] 74 | 75 | 76 | # attention 77 | mask = (item_seq[:,:-1] == 0) 78 | user_emb = self.attention(cand_emb, user_seq_emb, mask).squeeze(1) #[batch*item_num,dim] 79 | 80 | user_emb = user_emb.view(batch_size, self.item_num, self.embedding_size) #[batch,item_num,dim] 81 | scores = (user_emb*item_feature).sum(-1) # [B n_items] 82 | return scores 83 | 84 | @torch.no_grad() 85 | def compute_item(self, item): 86 | return self.visual_encoder(item) 87 | -------------------------------------------------------------------------------- /code/REC/model/VisualModel/modssm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | from REC.model.load import load_model 8 | 9 | class MODSSM(BaseModel): 10 | input_type = InputType.SEQ 11 | def __init__(self, config, dataload): 12 | super(MODSSM, self).__init__() 13 | 14 | # load parameters info 15 | self.mlp_hidden_size = config['mlp_hidden_size'] 16 | self.dropout_prob = config['dropout_prob'] 17 | self.embedding_size = config['embedding_size'] 18 | 19 | self.device = config['device'] 20 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 21 | 22 | self.item_num = dataload.item_num 23 | #self.user_embedding = nn.Embedding(self.item_num, self.embedding_size, padding_idx=0) 24 | self.visual_encoder = load_model(config=config) 25 | 26 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 27 | 28 | 29 | 30 | 31 | 32 | 33 | def avg_emb(self, mask, token_seq_embedding): 34 | mask = mask.float() 35 | value_cnt = torch.sum(mask, dim=1, keepdim=True) 36 | mask = mask.unsqueeze(2).expand_as(token_seq_embedding) 37 | masked_token_seq_embedding = token_seq_embedding * mask.float() 38 | result = torch.sum(masked_token_seq_embedding, dim=-2) 39 | user_embedding = torch.div(result, value_cnt + 1e-8) 40 | return user_embedding 41 | 42 | 43 | 44 | def forward(self, inputs): 45 | items_index,all_item_modal = inputs 46 | mask = items_index[:, :-2] != 0 47 | all_item_embs = self.visual_encoder(all_item_modal) 48 | input_item_embs = all_item_embs[items_index, :] 49 | 50 | user_embedding = input_item_embs[:, :-2, :] 51 | item_embedding = input_item_embs[:, -2:,:] 52 | user_embedding = self.avg_emb(mask, user_embedding).unsqueeze(1) 53 | score = (user_embedding * item_embedding).sum(-1) 54 | output = score.view(-1,2) 55 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 56 | return batch_loss 57 | 58 | 59 | @torch.no_grad() 60 | def predict(self,user_seq,item_feature): 61 | mask = user_seq != 0 62 | input_embs = item_feature[user_seq] 63 | user_embedding = self.avg_emb(mask,input_embs) 64 | scores = torch.matmul(user_embedding,item_feature.t()) 65 | return scores 66 | 67 | 68 | @torch.no_grad() 69 | def compute_item(self,item): 70 | return self.visual_encoder(item) 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /code/REC/model/VisualModel/mofm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | from REC.model.layers import MLPLayers, BaseFactorizationMachine 5 | from REC.utils import InputType 6 | from REC.model.basemodel import BaseModel 7 | from logging import getLogger 8 | from REC.model.load import load_model 9 | 10 | 11 | class MOFM(BaseModel): 12 | input_type = InputType.SEQ 13 | def __init__(self, config, dataload): 14 | super(MOFM, self).__init__() 15 | 16 | # load parameters info 17 | self.mlp_hidden_size = config['mlp_hidden_size'] 18 | self.dropout_prob = config['dropout_prob'] 19 | self.embedding_size = config['embedding_size'] 20 | 21 | self.device = config['device'] 22 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 23 | 24 | self.item_num = dataload.item_num 25 | self.visual_encoder = load_model(config=config) 26 | 27 | self.fm = BaseFactorizationMachine(reduce_sum=True) 28 | 29 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 30 | 31 | def mask_emb(self, input_item_embs, mask): 32 | 33 | mask = mask.unsqueeze(-1).expand_as(input_item_embs) 34 | masked_token_seq_embedding = input_item_embs * mask 35 | 36 | return masked_token_seq_embedding 37 | 38 | 39 | 40 | def forward(self, inputs): 41 | 42 | items_index,all_item_modal = inputs 43 | mask = items_index != 0 44 | all_item_embs = self.visual_encoder(all_item_modal) 45 | input_item_embs = all_item_embs[items_index, :] 46 | 47 | inputs_embedding = self.mask_emb(input_item_embs, mask) 48 | scores = self.fm(inputs_embedding.flatten(0,1)) 49 | output = scores.view(-1,2) 50 | batch_loss = -torch.mean(torch.log(1e-8+torch.sigmoid(torch.matmul(output, self.weight)))) 51 | return batch_loss 52 | 53 | 54 | @torch.no_grad() 55 | def predict(self,user_seq,item_feature): 56 | mask = user_seq != 0 57 | input_embs = item_feature[user_seq] 58 | user_embedding = self.mask_emb(input_embs, mask) 59 | user_embedding = torch.sum(user_embedding, dim=1) 60 | scores = torch.matmul(user_embedding,item_feature.t()) 61 | return scores 62 | 63 | @torch.no_grad() 64 | def compute_item(self,item): 65 | return self.visual_encoder(item) 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /code/REC/model/VisualModel/mogru4rec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn.init import xavier_uniform_, xavier_normal_ 4 | from REC.model.layers import TransformerEncoder 5 | from REC.utils.enum_type import InputType 6 | from REC.model.load import load_model 7 | from REC.model.basemodel import BaseModel 8 | 9 | class MOGRU4Rec(BaseModel): 10 | input_type = InputType.SEQ 11 | 12 | def __init__(self, config, dataload): 13 | super(MOGRU4Rec, self).__init__() 14 | 15 | # load parameters info 16 | 17 | self.embedding_size = config['embedding_size'] 18 | self.hidden_size = config['hidden_size'] * config['embedding_size'] 19 | self.num_layers = config['num_layers'] 20 | self.dropout_prob = config['dropout_prob'] 21 | 22 | self.initializer_range = config['initializer_range'] 23 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 24 | self.item_num = dataload.item_num 25 | # define layers and loss 26 | 27 | self.visual_encoder = load_model(config=config) 28 | self.emb_dropout = nn.Dropout(self.dropout_prob) 29 | self.gru_layers = nn.GRU( 30 | input_size=self.embedding_size, 31 | hidden_size=self.hidden_size, 32 | num_layers=self.num_layers, 33 | bias=False, 34 | batch_first=True, 35 | ) 36 | 37 | self.dense = nn.Linear(self.hidden_size, self.embedding_size) 38 | 39 | xavier_uniform_(self.gru_layers.weight_hh_l0) 40 | xavier_uniform_(self.gru_layers.weight_ih_l0) 41 | xavier_normal_(self.dense.weight) 42 | 43 | 44 | def forward(self, interaction): 45 | items, masked_index = interaction 46 | batch_size = masked_index.shape[0] 47 | item_emb = self.visual_encoder(items.flatten(0,1)).view(batch_size, -1, 2, self.embedding_size) #[batch, 2, max_seq_len+1, dim] 48 | pos_items_embs = item_emb[:, :, 0] 49 | neg_items_embs = item_emb[:, :, 1] 50 | 51 | input_emb = pos_items_embs[:, :-1, :] 52 | target_pos_embs = pos_items_embs[:, 1:, :] 53 | target_neg_embs = neg_items_embs[:, 1:, :] 54 | 55 | input_emb_dropout = self.emb_dropout(input_emb) 56 | gru_output, _ = self.gru_layers(input_emb_dropout) 57 | gru_output = self.dense(gru_output) 58 | 59 | pos_score = (gru_output * target_pos_embs).sum(-1) 60 | neg_score = (gru_output * target_neg_embs).sum(-1) 61 | 62 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)*masked_index).sum(-1) 63 | return loss.mean(-1) 64 | 65 | @torch.no_grad() 66 | def predict(self, item_seq, item_feature): 67 | 68 | item_emb = item_feature[item_seq] 69 | 70 | item_seq_emb_dropout = self.emb_dropout(item_emb) 71 | gru_output, _ = self.gru_layers(item_seq_emb_dropout) 72 | gru_output = self.dense(gru_output) 73 | hidden = gru_output[:, -1] 74 | scores = torch.matmul(hidden, item_feature.t()) 75 | return scores 76 | 77 | 78 | @torch.no_grad() 79 | def compute_item(self, item): 80 | return self.visual_encoder(item) 81 | 82 | 83 | -------------------------------------------------------------------------------- /code/REC/model/VisualModel/molightsans.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from REC.model.layers import LightTransformerEncoder 4 | from REC.utils.enum_type import InputType 5 | from REC.model.load import load_model 6 | from REC.model.basemodel import BaseModel 7 | 8 | class MOLightSANs(BaseModel): 9 | input_type = InputType.SEQ 10 | 11 | def __init__(self, config, dataload): 12 | super(MOLightSANs, self).__init__() 13 | 14 | # load parameters info 15 | self.n_layers = config['n_layers'] 16 | self.n_heads = config['n_heads'] 17 | self.embedding_size = config['embedding_size'] # same as embedding_size 18 | self.inner_size = config['inner_size']* self.embedding_size # the dimensionality in feed-forward layer 19 | self.hidden_dropout_prob = config['hidden_dropout_prob'] 20 | self.attn_dropout_prob = config['attn_dropout_prob'] 21 | self.hidden_act = config['hidden_act'] 22 | self.layer_norm_eps = config['layer_norm_eps'] 23 | self.k_interests = config["k_interests"] 24 | self.device = config['device'] 25 | self.initializer_range = config['initializer_range'] 26 | self.max_seq_length = config['MAX_ITEM_LIST_LENGTH'] 27 | self.item_num = dataload.item_num 28 | # define layers and loss 29 | 30 | self.visual_encoder = load_model(config=config) 31 | 32 | self.position_embedding = nn.Embedding(self.max_seq_length, self.embedding_size) 33 | self.LayerNorm = nn.LayerNorm(self.embedding_size, eps=self.layer_norm_eps) 34 | self.dropout = nn.Dropout(self.hidden_dropout_prob) 35 | 36 | self.trm_encoder = LightTransformerEncoder( 37 | n_layers=self.n_layers, 38 | n_heads=self.n_heads, 39 | k_interests=self.k_interests, 40 | hidden_size=self.embedding_size, 41 | seq_len=self.max_seq_length, 42 | inner_size=self.inner_size, 43 | hidden_dropout_prob=self.hidden_dropout_prob, 44 | attn_dropout_prob=self.attn_dropout_prob, 45 | hidden_act=self.hidden_act, 46 | layer_norm_eps=self.layer_norm_eps 47 | ) 48 | 49 | self.position_embedding.weight.data.normal_(mean=0.0, std=self.initializer_range) 50 | self.trm_encoder.apply(self._init_weights) 51 | self.LayerNorm.bias.data.zero_() 52 | self.LayerNorm.weight.data.fill_(1.0) 53 | 54 | def _init_weights(self, module): 55 | if isinstance(module, (nn.Linear, nn.Embedding)): 56 | module.weight.data.normal_(mean=0.0, std=self.initializer_range) 57 | elif isinstance(module, nn.LayerNorm): 58 | module.bias.data.zero_() 59 | module.weight.data.fill_(1.0) 60 | if isinstance(module, nn.Linear) and module.bias is not None: 61 | module.bias.data.zero_() 62 | 63 | def forward(self, items): 64 | 65 | batch_size = items.shape[0] 66 | item_emb = self.visual_encoder(items.flatten(0,1)).view(batch_size,-1,self.embedding_size) #[batch, max_seq_len+2, dim] 67 | 68 | input_emb = item_emb[:, :-2, :] 69 | target_pos_embs = item_emb[:, -2, :] 70 | target_neg_embs = item_emb[:, -1, :] 71 | 72 | 73 | position_ids = torch.arange(input_emb.size(1), dtype=torch.long, device=self.device) 74 | position_embedding = self.position_embedding(position_ids) 75 | 76 | input_emb = self.LayerNorm(input_emb) 77 | input_emb = self.dropout(input_emb) 78 | 79 | output_embs = self.trm_encoder(input_emb, position_embedding, output_all_encoded_layers=False) #[batch, max_seq_len-1, dim] 80 | output_embs = output_embs[-1] 81 | output_embs = output_embs[:, -1, :] 82 | 83 | pos_score = (output_embs * target_pos_embs).sum(-1) 84 | neg_score = (output_embs * target_neg_embs).sum(-1) 85 | loss = - (torch.log((pos_score - neg_score).sigmoid() + 1e-8)) 86 | return loss.mean(-1) 87 | 88 | @torch.no_grad() 89 | def predict(self, item_seq, item_feature): 90 | 91 | position_ids = torch.arange(item_seq.size(1), dtype=torch.long, device=item_seq.device) 92 | position_embedding = self.position_embedding(position_ids) 93 | 94 | input_emb = item_feature[item_seq] 95 | input_emb = self.LayerNorm(input_emb) 96 | input_emb = self.dropout(input_emb) 97 | 98 | output = self.trm_encoder(input_emb, position_embedding, output_all_encoded_layers=False) 99 | output_embs = output[-1] 100 | seq_output = output_embs[:, -1] 101 | 102 | scores = torch.matmul(seq_output, item_feature.t()) 103 | return scores 104 | 105 | 106 | @torch.no_grad() 107 | def compute_item(self, item): 108 | return self.visual_encoder(item) 109 | 110 | 111 | -------------------------------------------------------------------------------- /code/REC/model/VisualModel/momf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.init import xavier_normal_, constant_ 4 | 5 | from REC.model.layers import MLPLayers 6 | from REC.model.load import load_model 7 | from REC.utils import InputType 8 | from REC.model.basemodel import BaseModel 9 | 10 | class MOMF(BaseModel): 11 | 12 | input_type = InputType.PAIR 13 | 14 | def __init__(self, config, data): 15 | super(MOMF, self).__init__() 16 | 17 | self.mlp_hidden_size = config['mlp_hidden_size'] 18 | self.dropout_prob = config['dropout_prob'] 19 | self.embedding_size = config['embedding_size'] 20 | self.out_size = self.mlp_hidden_size[-1] if len(self.mlp_hidden_size) else self.embedding_size 21 | self.device = config['device'] 22 | 23 | self.user_num = data.user_num 24 | self.item_num = data.item_num 25 | 26 | 27 | user_size_list = [self.embedding_size] + self.mlp_hidden_size 28 | item_size_list = [self.embedding_size] + self.mlp_hidden_size 29 | 30 | # define layers and loss 31 | self.user_mlp_layers = MLPLayers(user_size_list, self.dropout_prob, activation='tanh', bn=True) 32 | self.item_mlp_layers = MLPLayers(item_size_list, self.dropout_prob, activation='tanh', bn=True) 33 | 34 | self.user_embedding = nn.Embedding(self.user_num, self.embedding_size) 35 | 36 | self.weight = torch.tensor([[1.0],[-1.0]]).to(self.device) 37 | # parameters initialization 38 | self.apply(self._init_weights) 39 | self.visual_encoder = load_model(config=config) 40 | 41 | def _init_weights(self, module): 42 | if isinstance(module, nn.Embedding): 43 | xavier_normal_(module.weight.data) 44 | elif isinstance(module, nn.Linear): 45 | xavier_normal_(module.weight.data) 46 | if module.bias is not None: 47 | constant_(module.bias.data, 0) 48 | 49 | 50 | def forward(self, input): 51 | user, item = input 52 | embed_user = self.user_embedding(user) 53 | embed_item = self.visual_encoder(item.flatten(0,1)) 54 | user_dnn_out = self.user_mlp_layers(embed_user).unsqueeze(1) 55 | item_dnn_out = self.item_mlp_layers(embed_item) 56 | item_dnn_out = item_dnn_out.view(user.shape[0], -1, self.out_size) 57 | score = (user_dnn_out * item_dnn_out).sum(-1) 58 | output = score.view(-1,2) 59 | batch_loss = -torch.mean(torch.log(torch.sigmoid(torch.matmul(output, self.weight)))) 60 | return batch_loss 61 | 62 | @torch.no_grad() 63 | def predict(self, user,item_feature): 64 | user_feature = self.user_embedding(user) 65 | user_dnn_out = self.user_mlp_layers(user_feature) 66 | scores = torch.matmul(user_dnn_out,item_feature.t()) 67 | return scores 68 | 69 | @torch.no_grad() 70 | def compute_item(self, item): 71 | return self.item_mlp_layers(self.visual_encoder(item)) 72 | 73 | -------------------------------------------------------------------------------- /code/REC/model/basemodel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from REC.utils import set_color 6 | 7 | 8 | 9 | 10 | class BaseModel(nn.Module): 11 | 12 | def __init__(self): 13 | super(BaseModel, self).__init__() 14 | 15 | 16 | 17 | def load_weights(self, path): 18 | checkpoint = torch.load(path,map_location='cpu') 19 | pretrained_dicts = checkpoint['state_dict'] 20 | self.load_state_dict({k.replace('item_embedding.rec_fc', 'visual_encoder.item_encoder.fc') 21 | :v for k,v in pretrained_dicts.items()}, strict=False) 22 | 23 | 24 | 25 | 26 | def __str__(self): 27 | """ 28 | Model prints with number of trainable parameters 29 | """ 30 | model_parameters = filter(lambda p: p.requires_grad, self.parameters()) 31 | params = sum([np.prod(p.size()) for p in model_parameters]) 32 | return super().__str__() + set_color('\nTrainable parameters', 'blue') + f': {params}' 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /code/REC/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | from .hyper_tuning import HyperTuning 2 | from .trainer import * 3 | 4 | 5 | __all__ = ['Trainer'] 6 | -------------------------------------------------------------------------------- /code/REC/trainer/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/trainer/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/trainer/__pycache__/hyper_tuning.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/trainer/__pycache__/hyper_tuning.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/trainer/__pycache__/trainer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/trainer/__pycache__/trainer.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logger import init_logger, set_color 2 | from .utils import get_local_time, ensure_dir, get_model, \ 3 | early_stopping, calculate_valid_score, dict2str, init_seed, get_tensorboard, get_gpu_usage 4 | 5 | from .enum_type import * 6 | from .argument_list import * 7 | from .wandblogger import WandbLogger 8 | 9 | __all__ = [ 10 | 'init_logger', 'get_local_time', 'ensure_dir', 'get_model', 'early_stopping', 11 | 'calculate_valid_score', 'dict2str', 'Enum', 'EvaluatorType', 'InputType', 12 | 'init_seed', 'general_arguments', 'training_arguments', 'evaluation_arguments', 13 | 'dataset_arguments', 'get_tensorboard', 'set_color', 'get_gpu_usage', 'WandbLogger' 14 | ] 15 | 16 | -------------------------------------------------------------------------------- /code/REC/utils/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/utils/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/utils/__pycache__/argument_list.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/utils/__pycache__/argument_list.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/utils/__pycache__/enum_type.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/utils/__pycache__/enum_type.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/utils/__pycache__/logger.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/utils/__pycache__/logger.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/utils/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/utils/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/utils/__pycache__/wandblogger.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/code/REC/utils/__pycache__/wandblogger.cpython-39.pyc -------------------------------------------------------------------------------- /code/REC/utils/argument_list.py: -------------------------------------------------------------------------------- 1 | general_arguments = [ 2 | 'seed', 3 | 'reproducibility', 4 | 'state', 5 | 'model', 6 | 'data_path', 7 | 'checkpoint_dir', 8 | 'show_progress', 9 | 'config_file', 10 | 'log_wandb', 11 | 'use_modality' 12 | ] 13 | 14 | training_arguments = [ 15 | 'epochs', 'train_batch_size', 16 | 'optim_args', 17 | 'eval_step', 'stopping_step', 18 | 'clip_grad_norm', 19 | 'loss_decimal_place', 20 | ] 21 | 22 | evaluation_arguments = [ 23 | 'eval_type', 24 | 'repeatable', 25 | 'metrics', 'topk', 'valid_metric', 'valid_metric_bigger', 26 | 'eval_batch_size', 27 | 'metric_decimal_place', 28 | ] 29 | 30 | dataset_arguments = [ 31 | 'MAX_ITEM_LIST_LENGTH' 32 | ] 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /code/REC/utils/enum_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class InputType(Enum): 4 | 5 | SEQ = 1 6 | PAIR = 2 7 | AUGSEQ = 3 8 | 9 | 10 | class EvaluatorType(Enum): 11 | """Type for evaluation metrics. 12 | 13 | - ``RANKING``: Ranking-based metrics like NDCG, Recall, etc. 14 | - ``VALUE``: Value-based metrics like AUC, etc. 15 | """ 16 | RANKING = 1 17 | VALUE = 2 -------------------------------------------------------------------------------- /code/REC/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import colorlog 4 | import re 5 | import torch 6 | from REC.utils.utils import get_local_time, ensure_dir 7 | from colorama import init 8 | 9 | log_colors_config = { 10 | 'DEBUG': 'cyan', 11 | 'WARNING': 'yellow', 12 | 'ERROR': 'red', 13 | 'CRITICAL': 'red', 14 | } 15 | 16 | 17 | class RemoveColorFilter(logging.Filter): 18 | 19 | def filter(self, record): 20 | if record: 21 | ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') 22 | record.msg = ansi_escape.sub('', str(record.msg)) 23 | return True 24 | 25 | 26 | def set_color(log, color, highlight=True): 27 | color_set = ['black', 'red', 'green', 'yellow', 'blue', 'pink', 'cyan', 'white'] 28 | try: 29 | index = color_set.index(color) 30 | except: 31 | index = len(color_set) - 1 32 | prev_log = '\033[' 33 | if highlight: 34 | prev_log += '1;3' 35 | else: 36 | prev_log += '0;3' 37 | prev_log += str(index) + 'm' 38 | return prev_log + log + '\033[0m' 39 | 40 | 41 | def init_logger(config): 42 | """ 43 | A logger that can show a message on standard output and write it into the 44 | file named `filename` simultaneously. 45 | All the message that you want to log MUST be str. 46 | 47 | Args: 48 | config (Config): An instance object of Config, used to record parameter information. 49 | 50 | Example: 51 | >>> logger = logging.getLogger(config) 52 | >>> logger.debug(train_state) 53 | >>> logger.info(train_result) 54 | """ 55 | 56 | init(autoreset=True) 57 | LOGROOT = './log/' 58 | dir_name = os.path.dirname(LOGROOT) 59 | rank = torch.distributed.get_rank() 60 | if rank == 0: 61 | ensure_dir(dir_name) 62 | model_name = os.path.join(dir_name, config['model']) 63 | ensure_dir(model_name) 64 | torch.distributed.barrier() 65 | logfilename = '{}/{}.log'.format(config['model'], get_local_time()) 66 | 67 | logfilepath = os.path.join(LOGROOT, logfilename) 68 | if config['log_path']: 69 | logfilepath = os.path.join(LOGROOT,config['log_path']) 70 | 71 | filefmt = "%(asctime)-15s %(levelname)s %(message)s" 72 | filedatefmt = "%a %d %b %Y %H:%M:%S" 73 | fileformatter = logging.Formatter(filefmt, filedatefmt) 74 | 75 | sfmt = "%(log_color)s%(asctime)-15s %(levelname)s %(message)s" 76 | sdatefmt = "%d %b %H:%M" 77 | sformatter = colorlog.ColoredFormatter(sfmt, sdatefmt, log_colors=log_colors_config) 78 | if config['state'] is None or config['state'].lower() == 'info': 79 | level = logging.INFO 80 | elif config['state'].lower() == 'debug': 81 | level = logging.DEBUG 82 | elif config['state'].lower() == 'error': 83 | level = logging.ERROR 84 | elif config['state'].lower() == 'warning': 85 | level = logging.WARNING 86 | elif config['state'].lower() == 'critical': 87 | level = logging.CRITICAL 88 | else: 89 | level = logging.INFO 90 | 91 | fh = logging.FileHandler(logfilepath) 92 | fh.setLevel(level) 93 | fh.setFormatter(fileformatter) 94 | remove_color_filter = RemoveColorFilter() 95 | fh.addFilter(remove_color_filter) 96 | 97 | sh = logging.StreamHandler() 98 | sh.setLevel(level) 99 | sh.setFormatter(sformatter) 100 | 101 | logging.basicConfig(level=level if rank in [-1, 0] else logging.WARN, handlers=[sh, fh]) 102 | -------------------------------------------------------------------------------- /code/REC/utils/wandblogger.py: -------------------------------------------------------------------------------- 1 | class WandbLogger(object): 2 | """WandbLogger to log metrics to Weights and Biases. 3 | 4 | """ 5 | def __init__(self, config): 6 | """ 7 | Args: 8 | config (dict): A dictionary of parameters used by RecBole. 9 | """ 10 | self.config = config 11 | self.log_wandb = config.log_wandb 12 | self.setup() 13 | 14 | def setup(self): 15 | if self.log_wandb: 16 | try: 17 | import wandb 18 | self._wandb = wandb 19 | except ImportError: 20 | raise ImportError( 21 | "To use the Weights and Biases Logger please install wandb." 22 | "Run `pip install wandb` to install it." 23 | ) 24 | 25 | # Initialize a W&B run 26 | if self._wandb.run is None: 27 | self._wandb.init( 28 | project=self.config.wandb_project, 29 | config=self.config 30 | ) 31 | 32 | self._set_steps() 33 | 34 | def log_metrics(self, metrics, head='train', commit=True): 35 | if self.log_wandb: 36 | if head: 37 | metrics = self._add_head_to_metrics(metrics, head) 38 | self._wandb.log(metrics, commit=commit) 39 | else: 40 | self._wandb.log(metrics, commit=commit) 41 | 42 | def log_eval_metrics(self, metrics, head='eval'): 43 | if self.log_wandb: 44 | metrics = self._add_head_to_metrics(metrics, head) 45 | for k, v in metrics.items(): 46 | self._wandb.run.summary[k] = v 47 | 48 | def _set_steps(self): 49 | self._wandb.define_metric('train/*', step_metric='train_step') 50 | self._wandb.define_metric('valid/*', step_metric='valid_step') 51 | 52 | def _add_head_to_metrics(self, metrics, head): 53 | head_metrics = dict() 54 | for k, v in metrics.items(): 55 | if '_step' in k: 56 | head_metrics[k] = v 57 | else: 58 | head_metrics[f'{head}/{k}'] = v 59 | 60 | return head_metrics 61 | -------------------------------------------------------------------------------- /code/ViNet/acf.yaml: -------------------------------------------------------------------------------- 1 | model: ACF 2 | seed: 2020 3 | use_modality: False 4 | checkpoint_dir: 'saved' 5 | show_progress: False 6 | 7 | MAX_ITEM_LIST_LENGTH: 10 8 | 9 | embedding_size: 512 10 | 11 | log_wandb: False 12 | wandb_project: 'REC' 13 | MAX_ITEM_LIST_LENGTH: 10 14 | 15 | data_path: ../dataset/ 16 | dataset: Pixel200K 17 | 18 | v_feat_path: ../dataset/visual_features/RN50_layer4.npy 19 | 20 | 21 | # training settings 22 | epochs: 200 23 | train_batch_size: 512 24 | optim_args: { 25 | learning_rate: 0.0001, 26 | weight_decay: 0.01 27 | } 28 | 29 | 30 | # eval settings 31 | eval_batch_size: 512 32 | topk: [5,10] 33 | metrics: ['Recall', 'NDCG'] 34 | valid_metric: NDCG@10 35 | metric_decimal_place: 7 36 | eval_step: 1 37 | stopping_step: 30 38 | 39 | 40 | -------------------------------------------------------------------------------- /code/ViNet/sasrec_semantic_id.yaml: -------------------------------------------------------------------------------- 1 | model: FSASRec 2 | semantic_model: True 3 | seed: 2020 4 | use_modality: False 5 | checkpoint_dir: 'saved' 6 | MAX_ITEM_LIST_LENGTH: 10 7 | 8 | n_layers: 2 9 | n_heads: 4 10 | embedding_size: 512 11 | inner_size: 2 12 | hidden_dropout_prob: 0.1 13 | attn_dropout_prob: 0.1 14 | hidden_act: 'gelu' 15 | layer_norm_eps: 1e-12 16 | initializer_range: 0.02 17 | 18 | log_wandb: False 19 | wandb_project: 'REC' 20 | show_progress: False 21 | MAX_ITEM_LIST_LENGTH: 10 22 | 23 | data_path: ../dataset/ 24 | dataset: Pixel200K 25 | 26 | 27 | semantic_id_path: ../dataset/visual_features/sematic_id_len_5_size_512.npy 28 | 29 | # training settings 30 | epochs: 200 31 | train_batch_size: 512 32 | 33 | optim_args: { 34 | modal_lr: 0.0001, 35 | rec_lr: 0.001, 36 | modal_decay: 0.1, 37 | rec_decay: 0 38 | } 39 | 40 | # eval settings 41 | eval_batch_size: 512 42 | topk: [5,10] 43 | metrics: ['Recall', 'NDCG'] 44 | valid_metric: NDCG@10 45 | metric_decimal_place: 7 46 | eval_step: 1 47 | stopping_step: 30 48 | 49 | 50 | -------------------------------------------------------------------------------- /code/ViNet/sasrec_v.yaml: -------------------------------------------------------------------------------- 1 | model: FSASRec 2 | freeze_model: True 3 | seed: 2020 4 | use_modality: False 5 | checkpoint_dir: 'saved' 6 | MAX_ITEM_LIST_LENGTH: 10 7 | 8 | n_layers: 2 9 | n_heads: 4 10 | embedding_size: 512 11 | inner_size: 2 12 | hidden_dropout_prob: 0.1 13 | attn_dropout_prob: 0.1 14 | hidden_act: 'gelu' 15 | layer_norm_eps: 1e-12 16 | initializer_range: 0.02 17 | 18 | log_wandb: False 19 | wandb_project: 'REC' 20 | show_progress: False 21 | MAX_ITEM_LIST_LENGTH: 10 22 | 23 | data_path: ../dataset/ 24 | dataset: Pixel200K 25 | 26 | v_feat_path: ../dataset/visual_features/RN50.npy 27 | 28 | 29 | # training settings 30 | epochs: 200 31 | train_batch_size: 512 32 | 33 | optim_args: { 34 | modal_lr: 0.0001, 35 | rec_lr: 0.001, 36 | modal_decay: 0.1, 37 | rec_decay: 0 38 | } 39 | 40 | # eval settings 41 | eval_batch_size: 512 42 | topk: [5,10] 43 | metrics: ['Recall', 'NDCG'] 44 | valid_metric: NDCG@10 45 | metric_decimal_place: 7 46 | eval_step: 1 47 | stopping_step: 30 48 | 49 | 50 | -------------------------------------------------------------------------------- /code/ViNet/sasrec_vid.yaml: -------------------------------------------------------------------------------- 1 | model: FSASRec 2 | hybrid_model: True 3 | seed: 2020 4 | use_modality: False 5 | checkpoint_dir: 'saved' 6 | MAX_ITEM_LIST_LENGTH: 10 7 | 8 | n_layers: 2 9 | n_heads: 4 10 | embedding_size: 512 11 | inner_size: 2 12 | hidden_dropout_prob: 0.1 13 | attn_dropout_prob: 0.1 14 | hidden_act: 'gelu' 15 | layer_norm_eps: 1e-12 16 | initializer_range: 0.02 17 | 18 | log_wandb: False 19 | wandb_project: 'REC' 20 | show_progress: False 21 | MAX_ITEM_LIST_LENGTH: 10 22 | 23 | data_path: ../dataset/ 24 | dataset: Pixel200K 25 | 26 | v_feat_path: ../dataset/visual_features/RN50.npy 27 | 28 | 29 | # training settings 30 | epochs: 200 31 | train_batch_size: 512 32 | 33 | optim_args: { 34 | modal_lr: 0.0001, 35 | rec_lr: 0.001, 36 | modal_decay: 0.1, 37 | rec_decay: 0 38 | } 39 | 40 | # eval settings 41 | eval_batch_size: 512 42 | topk: [5,10] 43 | metrics: ['Recall', 'NDCG'] 44 | valid_metric: NDCG@10 45 | metric_decimal_place: 7 46 | eval_step: 1 47 | stopping_step: 30 48 | 49 | 50 | -------------------------------------------------------------------------------- /code/ViNet/vbpr.yaml: -------------------------------------------------------------------------------- 1 | model: VBPR 2 | seed: 2020 3 | use_modality: False 4 | checkpoint_dir: 'saved' 5 | MAX_ITEM_LIST_LENGTH: 10 6 | 7 | embedding_size: 4096 8 | dropout_prob: 0 9 | 10 | log_wandb: False 11 | wandb_project: 'REC' 12 | show_progress: False 13 | MAX_ITEM_LIST_LENGTH: 10 14 | 15 | data_path: ../dataset/ 16 | dataset: Pixel200K 17 | 18 | v_feat_path: ../dataset/visual_features/RN50.npy 19 | 20 | 21 | # training settings 22 | epochs: 200 23 | train_batch_size: 512 24 | decay_check_name: 'projection' 25 | optim_args: { 26 | modal_lr: 0.0001, 27 | rec_lr: 0.001, 28 | modal_decay: 0.1, 29 | rec_decay: 0 30 | } 31 | 32 | # eval settings 33 | eval_batch_size: 512 34 | topk: [5,10] 35 | metrics: ['Recall', 'NDCG'] 36 | valid_metric: NDCG@10 37 | metric_decimal_place: 7 38 | eval_step: 1 39 | stopping_step: 30 40 | 41 | 42 | -------------------------------------------------------------------------------- /code/ViNet/visrank.yaml: -------------------------------------------------------------------------------- 1 | model: VISRANK 2 | seed: 2020 3 | 4 | method: average_top_k 5 | top_num: 1 6 | 7 | use_modality: False 8 | log_wandb: False 9 | wandb_project: 'REC' 10 | show_progress: False 11 | checkpoint_dir: 'saved' 12 | 13 | MAX_ITEM_LIST_LENGTH: 10 14 | 15 | data_path: ../dataset/ 16 | dataset: Pixel200K 17 | 18 | v_feat_path: ../dataset/visual_features/RN50.npy 19 | 20 | 21 | # training settings 22 | epochs: 1 23 | need_training: False 24 | train_batch_size: 512 25 | optim_args: { 26 | learning_rate: 0.0001, 27 | weight_decay: 0.1 28 | } 29 | 30 | # eval settings 31 | eval_batch_size: 1 32 | topk: [5,10] 33 | metrics: ['Recall', 'NDCG'] 34 | valid_metric: NDCG@10 35 | metric_decimal_place: 7 36 | eval_step: 1 37 | stopping_step: 30 38 | 39 | 40 | -------------------------------------------------------------------------------- /code/generate_lmdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | import numpy as np 4 | import lmdb 5 | import pandas as pd 6 | import pickle 7 | import tqdm 8 | import torch 9 | import torchvision.transforms as transforms 10 | 11 | torch.manual_seed(123456) 12 | ''' 13 | bulid lmdb database from images. 14 | ''' 15 | 16 | interaction_path = '../dataset/Pixel200K.csv' 17 | image_path = '../images/' 18 | generate_lmdb_name = '../dataset/image.lmdb' 19 | 20 | class LMDB_Image: 21 | def __init__(self, image, id): 22 | self.channels = image.shape[2] 23 | self.size = image.shape[:2] 24 | self.image = image.tobytes() 25 | self.id = id 26 | 27 | def get_image(self): 28 | image = np.frombuffer(self.image, dtype=np.uint8) 29 | return image.reshape(*self.size, self.channels) 30 | 31 | 32 | if __name__ == '__main__': 33 | print('build lmdb database') 34 | interaction = pd.read_csv(interaction_path, usecols=[0]) 35 | items = interaction['item_id'].unique() 36 | image_num = len(items) 37 | print("all images %s" % image_num) 38 | 39 | lmdb_path = generate_lmdb_name 40 | isdir = os.path.isdir(lmdb_path) 41 | print("Generate LMDB to %s" % lmdb_path) 42 | lmdb_env = lmdb.open(lmdb_path, subdir=isdir, map_size=image_num * np.zeros((3, 224, 224)).nbytes*10, 43 | readonly=False, meminit=False, map_async=True) 44 | txn = lmdb_env.begin(write=True) 45 | write_frequency = 5000 46 | 47 | 48 | bad_file = {} 49 | t = transforms.Resize((224,224)) 50 | lmdb_keys = [] 51 | for index, row in enumerate(tqdm.tqdm(items)): 52 | item_id = str(row) 53 | item_name = str(row)+ '.jpg' 54 | lmdb_keys.append(item_id) 55 | try: 56 | img = Image.open(os.path.join(image_path, item_name)).convert('RGB') 57 | img = t(img) #resize the image to (3,224,224) before stored into database,you can remove this if you don't need it. 58 | img = np.array(img) 59 | temp = LMDB_Image(img, item_id) 60 | txn.put(u'{}'.format(item_id).encode('ascii'), pickle.dumps(temp)) 61 | if index % write_frequency == 0 and index != 0: 62 | txn.commit() 63 | txn = lmdb_env.begin(write=True) 64 | except Exception as e: 65 | bad_file[index] = item_id 66 | 67 | txn.commit() 68 | keys = [u'{}'.format(k).encode('ascii') for k in lmdb_keys] 69 | with lmdb_env.begin(write=True) as txn: 70 | txn.put(b'__keys__', pickle.dumps(keys)) 71 | txn.put(b'__len__', pickle.dumps(len(keys))) 72 | print(len(keys)) 73 | print("Flushing database ...") 74 | lmdb_env.sync() 75 | lmdb_env.close() 76 | print(f'bad_file: {len(bad_file)}') 77 | -------------------------------------------------------------------------------- /code/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 5 | os.environ["OMP_NUM_THREADS"] = '1' 6 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' 7 | 8 | 9 | if __name__ == '__main__': 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--device", default="0", type=str) 12 | parser.add_argument("--config_file",nargs='+') 13 | 14 | args = parser.parse_args() 15 | device = args.device 16 | config_file = args.config_file 17 | 18 | import random 19 | master_port = random.randint(1002,9999) 20 | 21 | nproc_per_node = len(device.split(',')) 22 | 23 | if len(config_file) ==2: 24 | run_yaml = f"CUDA_VISIBLE_DEVICES='{device}' python -m torch.distributed.run --nproc_per_node {nproc_per_node} \ 25 | --master_port {master_port} run.py --config_file {config_file[0]} {config_file[1]}" 26 | elif len(config_file) ==1: 27 | run_yaml = f"CUDA_VISIBLE_DEVICES='{device}' python -m torch.distributed.run --nproc_per_node {nproc_per_node} \ 28 | --master_port {master_port} run.py --config_file {config_file[0]}" 29 | 30 | 31 | os.system(run_yaml) 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /code/overall/BEiT.yaml: -------------------------------------------------------------------------------- 1 | # general 2 | seed: 2020 3 | state: INFO 4 | use_modality: True 5 | reproducibility: True 6 | checkpoint_dir: 'saved' 7 | show_progress: False 8 | 9 | log_wandb: False 10 | wandb_project: 'REC' 11 | MAX_ITEM_LIST_LENGTH: 10 12 | 13 | data_path: ../dataset/ # dataset path 14 | dataset: Pixel200K # dataset name 15 | image_path: ../dataset/image.lmdb # lmdb path 16 | 17 | encoder_name: 'beit-base-patch16' 18 | encoder_source: 'transformers' 19 | 20 | 21 | 22 | # training settings 23 | epochs: 200 24 | train_batch_size: 16 25 | fine_tune_arg: { 26 | tune_scale: 183, 27 | pre_trained: True, 28 | activation: 'relu', 29 | dnn_layers: [], 30 | method: 'cls' 31 | } 32 | 33 | 34 | optim_args: { 35 | modal_lr: 0.0001, 36 | rec_lr: 0.0001, 37 | modal_decay: 0, 38 | rec_decay: 0.1 39 | } 40 | 41 | 42 | # eval settings 43 | eval_batch_size: 1024 44 | topk: [5,10] 45 | metrics: ['Recall', 'NDCG'] 46 | valid_metric: NDCG@10 47 | metric_decimal_place: 7 48 | eval_step: 1 49 | stopping_step: 30 50 | -------------------------------------------------------------------------------- /code/overall/ID.yaml: -------------------------------------------------------------------------------- 1 | # general 2 | seed: 2020 3 | state: INFO 4 | use_modality: False 5 | reproducibility: True 6 | checkpoint_dir: 'saved' 7 | show_progress: False 8 | 9 | log_wandb: False 10 | wandb_project: 'REC' 11 | MAX_ITEM_LIST_LENGTH: 10 12 | 13 | data_path: ../dataset/ # dataset path 14 | dataset: Pixel200K # dataset name 15 | 16 | 17 | # training settings 18 | epochs: 200 19 | train_batch_size: 64 20 | optim_args: { 21 | learning_rate: 0.0001, 22 | weight_decay: 0.1 23 | } 24 | 25 | 26 | # eval settings 27 | eval_batch_size: 1024 28 | topk: [5,10] 29 | metrics: ['Recall', 'NDCG'] 30 | valid_metric: NDCG@10 31 | metric_decimal_place: 7 32 | eval_step: 1 33 | stopping_step: 30 34 | 35 | 36 | -------------------------------------------------------------------------------- /code/overall/RN50.yaml: -------------------------------------------------------------------------------- 1 | # general 2 | seed: 2020 3 | state: INFO 4 | use_modality: True 5 | reproducibility: True 6 | checkpoint_dir: 'saved' 7 | show_progress: False 8 | 9 | log_wandb: False 10 | wandb_project: 'REC' 11 | MAX_ITEM_LIST_LENGTH: 10 12 | 13 | data_path: ../dataset/ # dataset path 14 | dataset: Pixel200K # dataset name 15 | image_path: ../dataset/image.lmdb # lmdb path 16 | 17 | encoder_name: 'RN50' 18 | encoder_source: 'clip' 19 | 20 | 21 | 22 | # training settings 23 | epochs: 200 24 | train_batch_size: 16 25 | fine_tune_arg: { 26 | tune_scale: 78, 27 | pre_trained: True, 28 | activation: 'relu', 29 | dnn_layers: [], 30 | method: 'cls' 31 | } 32 | 33 | 34 | optim_args: { 35 | modal_lr: 0.0001, 36 | rec_lr: 0.0001, 37 | modal_decay: 0.01, 38 | rec_decay: 0.01 39 | } 40 | 41 | 42 | 43 | # eval settings 44 | eval_batch_size: 1024 45 | topk: [5,10] 46 | metrics: ['Recall', 'NDCG'] 47 | valid_metric: NDCG@10 48 | metric_decimal_place: 7 49 | eval_step: 1 50 | stopping_step: 30 -------------------------------------------------------------------------------- /code/overall/Swin-B.yaml: -------------------------------------------------------------------------------- 1 | # general 2 | seed: 2020 3 | state: INFO 4 | use_modality: True 5 | reproducibility: True 6 | checkpoint_dir: 'saved' 7 | show_progress: False 8 | 9 | log_wandb: False 10 | wandb_project: 'REC' 11 | MAX_ITEM_LIST_LENGTH: 10 12 | 13 | data_path: ../dataset/ # dataset path 14 | dataset: Pixel200K # dataset name 15 | image_path: ../dataset/image.lmdb # lmdb path 16 | 17 | encoder_name: 'swin-base-patch4-window7-224' 18 | encoder_source: 'transformers' 19 | 20 | 21 | 22 | # training settings 23 | epochs: 200 24 | train_batch_size: 8 25 | fine_tune_arg: { 26 | tune_scale: 78, 27 | pre_trained: True, 28 | activation: 'relu', 29 | dnn_layers: [], 30 | method: 'pool' 31 | } 32 | 33 | 34 | optim_args: { 35 | modal_lr: 0.0001, 36 | rec_lr: 0.0001, 37 | modal_decay: 0, 38 | rec_decay: 0.1 39 | } 40 | 41 | 42 | # eval settings 43 | eval_batch_size: 1024 44 | topk: [5,10] 45 | metrics: ['Recall', 'NDCG'] 46 | valid_metric: NDCG@10 47 | metric_decimal_place: 7 48 | eval_step: 1 49 | stopping_step: 30 50 | -------------------------------------------------------------------------------- /code/overall/Swin-T.yaml: -------------------------------------------------------------------------------- 1 | # general 2 | seed: 2020 3 | state: INFO 4 | use_modality: True 5 | reproducibility: True 6 | checkpoint_dir: 'saved' 7 | show_progress: False 8 | 9 | log_wandb: False 10 | wandb_project: 'REC' 11 | MAX_ITEM_LIST_LENGTH: 10 12 | 13 | data_path: ../dataset/ # dataset path 14 | dataset: Pixel200K # dataset name 15 | image_path: ../dataset/image.lmdb # lmdb path 16 | 17 | encoder_name: 'swin-tiny-patch4-window7-224' 18 | encoder_source: 'transformers' 19 | 20 | 21 | 22 | # training settings 23 | epochs: 200 24 | train_batch_size: 16 25 | fine_tune_arg: { 26 | tune_scale: 78, 27 | pre_trained: True, 28 | activation: 'relu', 29 | dnn_layers: [], 30 | method: 'pool' 31 | } 32 | 33 | 34 | optim_args: { 35 | modal_lr: 0.0001, 36 | rec_lr: 0.0001, 37 | modal_decay: 0, 38 | rec_decay: 0.1 39 | } 40 | 41 | 42 | # eval settings 43 | eval_batch_size: 1024 44 | topk: [5,10] 45 | metrics: ['Recall', 'NDCG'] 46 | valid_metric: NDCG@10 47 | metric_decimal_place: 7 48 | eval_step: 1 49 | stopping_step: 30 50 | -------------------------------------------------------------------------------- /code/overall/ViT.yaml: -------------------------------------------------------------------------------- 1 | # general 2 | seed: 2020 3 | state: INFO 4 | use_modality: True 5 | reproducibility: True 6 | checkpoint_dir: 'saved' 7 | show_progress: False 8 | 9 | log_wandb: False 10 | wandb_project: 'REC' 11 | MAX_ITEM_LIST_LENGTH: 10 12 | 13 | data_path: ../dataset/ # dataset path 14 | dataset: Pixel200K # dataset name 15 | image_path: ../dataset/image.lmdb # lmdb path 16 | 17 | encoder_name: 'clip-vit-base-patch32' 18 | encoder_source: 'transformers' 19 | 20 | 21 | 22 | # training settings 23 | epochs: 200 24 | train_batch_size: 16 25 | fine_tune_arg: { 26 | tune_scale: 165, 27 | pre_trained: True, 28 | activation: 'relu', 29 | dnn_layers: [], 30 | method: 'mean' 31 | } 32 | 33 | optim_args: { 34 | modal_lr: 0.0001, 35 | rec_lr: 0.0001, 36 | modal_decay: 0, 37 | rec_decay: 0.1 38 | } 39 | 40 | 41 | # eval settings 42 | eval_batch_size: 1024 43 | topk: [5,10] 44 | metrics: ['Recall', 'NDCG'] 45 | valid_metric: NDCG@10 46 | metric_decimal_place: 7 47 | eval_step: 1 48 | stopping_step: 30 49 | -------------------------------------------------------------------------------- /code/readme.md: -------------------------------------------------------------------------------- 1 | > Formulation of different baselines. For example, u → i denotes that these models aim to predict target i for user u. Item i can be represented by itemID in IDNet, pre-extracted features (in ViNet), or an image encoder in PixelNet. 2 | 3 | Model | Formulation | InputType 4 | --- | --- | --- 5 | MF, VBPR, LightGCN | u→i | InputType.Pair 6 | DSSM, FM | i1, i2 ...in−1 → in; i1, i2 ...in → in−1; ... i2, i3 ...in → i1; | InputType.SEQ 7 | ACF | u, i1, i2 ...in−1 → in; u, i1, i2 ...in → in−1 ; ... u, i2, i3 ...in → i1; | InputType.SEQ 8 | GRU4Rec, NextItNet, SASRec | i1, i2 ...in−1 → i2, i3 ...in; | InputType.SEQ 9 | BERT4Rec | i1, [MASK], ...in → i2 | InputType.SEQ 10 | SRGNN, LightSANs | i1→ i2; i1, i2→ i3; ... i1, i2 ...in−1 → in; | InputType.AUGSEQ 11 | VisRank | i1, i2 ...iNu−1 → iNu | InputType.SEQ/ Pair 12 | 13 | 14 | 15 | 16 | 17 | 18 | > Introduction of the code pipeline 19 | 20 | We supply a brief guide on implementing new models based on the pipeline. The foremost step is determining the model type. We've divided models into three fundamental types within the pipeline: IDNet models, which only model ID features; ViNet models which utilize pre-extracted visual features for recommendation; and PixelNet models, which train image encoders end-to-end with recommendation tasks. The implementation details for these three models vary, hence we discuss them individually: 21 | 22 | 1. Building traditional recommendation models (IDNet). 23 | The implementation steps are akin to those in *RecBole*, with four functions requiring instantiation: 24 | 25 | - `_init_()`: This function is used for network structure construction, load and definition of global variables, parameters initialization, etc. 26 | 27 | - `forward()`: This function is employed for the optimization process of the model, calculating the forward propagation loss for a batch of training data. 28 | 29 | - `compute_item_all()`: This function is used to calculate the entire item representations, primarily used for the model evaluation process. 30 | 31 | - `predict()`: This function is used in the model evaluation process, generating the input user's scores for the entire item pool. 32 | 33 | 34 | 2. Building traditional visual recommendation models (ViNet). 35 | The construction process is the same with IDNet, with the addition that we offer `load_weight` function to facilitate processing visual feature vectors in such models. It is straightforward to include new application methods for visual features by extending the `load_weight` function. 36 | 37 | 3. Building end-to-end training visual recommendation models (PixelNet). 38 | We offer the `load_model` function to aid in loading and applying image encoders in PixelNet models. In such models, given the GPU memory limitations, it's typically not feasible to compute all item representations at once. Therefore, different from the previous construction process, the model needs to implement the third function as follows: 39 | 40 | - `compute_item()`: This function is used to compute the representations of an input batch of items, primarily used in the model evaluation process. 41 | 42 | 43 | The steps outlined above ensure the correct definition of a new model in the pipeline. In the end, it is necessary to choose the input and output data formats for the new model to perform model training and evaluation with reliance on pipeline interfaces. Multiple implementations of data formats are provided in `data.dataset` module, see the upper table for reference. In the model, specifying the class variable `input_type` to set a particular data format. For example, setting `input_type == InputType.Pair` corresponds to the data format of the 1st row from Table. Then with the usage of `data.utils` function, you can bind the model name with corresponding train, valid, and test dataset names, thus completing the process of model input and output data format finalization. 44 | 45 | 46 | > Thanks to the excellent code repository [RecBole](https://github.com/RUCAIBox/RecBole) and [VisRec](https://github.com/ialab-puc/VisualRecSys-Tutorial-IUI2021) ! 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /code/run.py: -------------------------------------------------------------------------------- 1 | from cProfile import run 2 | from logging import getLogger 3 | import torch 4 | from REC.data import * 5 | from REC.config import Config 6 | from REC.utils import init_logger, get_model, init_seed, set_color 7 | from REC.trainer import Trainer 8 | import torch.distributed as dist 9 | from torch.nn.parallel import DistributedDataParallel as DDP 10 | import os 11 | import numpy as np 12 | import argparse 13 | import torch.distributed as dist 14 | import torch 15 | from REC.data import LMDB_Image 16 | 17 | 18 | def run_loop(local_rank,config_file=None,saved=True): 19 | 20 | # configurations initialization 21 | config = Config(config_file_list=config_file) 22 | 23 | device = torch.device("cuda", local_rank) 24 | config['device'] = device 25 | 26 | init_seed(config['seed'], config['reproducibility']) 27 | 28 | # logger initialization 29 | init_logger(config) 30 | logger = getLogger() 31 | 32 | 33 | # get model and data 34 | dataload = load_data(config) 35 | train_loader, valid_loader, test_loader = bulid_dataloader(config, dataload) 36 | 37 | model = get_model(config['model'])(config, dataload) 38 | model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) 39 | 40 | model = DDP(model, device_ids=[local_rank], output_device=local_rank,find_unused_parameters=True) 41 | 42 | 43 | world_size = torch.distributed.get_world_size() 44 | logger.info(set_color('\nWorld_Size', 'pink') + f' = {world_size} \n') 45 | logger.info(config) 46 | logger.info(dataload) 47 | logger.info(model.module) 48 | 49 | 50 | trainer = Trainer(config,model) 51 | 52 | # training process 53 | best_valid_score, best_valid_result = trainer.fit( 54 | train_loader, valid_loader, saved=saved, show_progress=config['show_progress'] 55 | ) 56 | 57 | #model evaluation 58 | test_result = trainer.evaluate(test_loader, load_best_model=saved, show_progress=config['show_progress']) 59 | 60 | logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}') 61 | logger.info(set_color('test result', 'yellow') + f': {test_result}') 62 | 63 | return { 64 | 'best_valid_score': best_valid_score, 65 | 'valid_score_bigger': config['valid_metric_bigger'], 66 | 'best_valid_result': best_valid_result, 67 | 'test_result': test_result 68 | } 69 | 70 | 71 | 72 | if __name__ == '__main__': 73 | parser = argparse.ArgumentParser() 74 | parser.add_argument("--config_file", nargs='+',type=str) 75 | args = parser.parse_args() 76 | local_rank = int(os.environ['LOCAL_RANK']) 77 | config_file = args.config_file 78 | 79 | torch.cuda.set_device(local_rank) 80 | dist.init_process_group(backend='nccl') 81 | 82 | run_loop(local_rank = local_rank,config_file=config_file) 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /dataset/LICENSE: -------------------------------------------------------------------------------- 1 | This dataset is provided by the Westlake Representation Learning Lab exclusively for non-commercial research and educational purposes. In exchange for the permission to the access to the dataset from Westlake Representation Learning Lab, you automatically agree to the following terms and conditions: Researcher accepts full responsibility for his or her use of the Dataset. Under no circumstances will the Westlake Representation Learning Lab be liable for any damages or losses arising from the use of the dataset. The dataset is provided "as-is," without any express or implied warranties, including but not limited to, warranties of merchantability, fitness for a particular purpose, non-infringement, or the absence of defects, errors, or viruses. No rights are granted with respect to copying, modifying, publishing, distributing, or commercializing the dataset. 2 | -------------------------------------------------------------------------------- /dataset/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/dataset/overview.png -------------------------------------------------------------------------------- /dataset/readme.md: -------------------------------------------------------------------------------- 1 | The files in 'pixelrec_iteminfo' folder contains item features in PixelRec: 2 | | item_id | view_number | comment_number | thumbup_number | share_number | coin_number | favorite_number | barrage_number | title | tag | description | 3 | | ------: | ----------: | -------------: | -------------: | -----------: | ----------: | --------------: | -------------: | ---: | ----------: | ----------: | 4 | 5 | The files in 'pixelrec_interaction' folder contains interaction in PixelRec: 6 | | item_id | user_id | timestamp | 7 | | ------: | ----------: | -------------: | 8 | 9 | The files in 'cover' folder contains the cover images of items, named in the format of "item_id.jpg". 10 | 11 | The 'item_id' is universal across all files, meaning that the same item is related with the same 'item_id' in every file, ranging from i0 - i408373 12 | 13 | -------------------------------------------------------------------------------- /dataset/statistics/item_rank.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/dataset/statistics/item_rank.pdf -------------------------------------------------------------------------------- /dataset/statistics/item_rank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/dataset/statistics/item_rank.png -------------------------------------------------------------------------------- /dataset/statistics/session_length.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/dataset/statistics/session_length.pdf -------------------------------------------------------------------------------- /doc/SDM_poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/doc/SDM_poster.pdf -------------------------------------------------------------------------------- /doc/pre.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/westlake-repl/PixelRec/a4e4a01b01aba96f746ae66de9eb35a88484a681/doc/pre.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/openai/CLIP.git 2 | colorama==0.4.4 3 | colorlog==4.7.2 4 | hyperopt==0.2.7 5 | lmdb==1.2.1 6 | numpy==1.21.2 7 | pandas==1.4.1 8 | Pillow==9.4.0 9 | PyYAML==6.0 10 | scikit_learn==1.2.1 11 | tensorboardX==2.6 12 | torch==1.10.2 13 | torch_geometric==2.0.4 14 | torchvision==0.11.3 15 | tqdm==4.63.0 16 | transformers==4.16.2 17 | wandb==0.13.10 18 | --------------------------------------------------------------------------------