├── checkpoints └── .keep ├── trainers ├── __init__.py ├── losses.py ├── acf_trainer.py ├── trainer.py └── img_trainer.py ├── slides ├── Session 5 ACF.pdf ├── Session 3 DVBPR.pdf ├── Session 4 CuratorNet.pdf ├── Session 1 VisRec Introduction .pdf └── Session 2 Pipeline + VisRank + VBPR.pdf ├── requirements.txt ├── datasets ├── __init__.py ├── utils.py ├── user_profile_mode.py ├── user_mode.py ├── profile_mode.py └── user_mode_img.py ├── models ├── __init__.py ├── utils.py ├── visrank.py ├── vbpr.py ├── curatornet.py ├── dvbpr.py └── acf.py ├── colabnotebooks └── README.md ├── LICENSE ├── utils ├── environment.py ├── metrics.py ├── logger.py ├── data.py ├── hashing.py └── curatornet_sampler.py ├── CITATION.cff ├── data └── README.md ├── .gitignore ├── README.md ├── 3 - (ACF) Training procedure.ipynb ├── dvbpr_train.py ├── 1 - Create image embeddings.ipynb ├── 3 - (VBPR) Training procedure.ipynb ├── 3 - (DVBPR) Training procedure.ipynb ├── 3 - (CuratorNet) Training procedure.ipynb ├── 3.5 - (VisRank) Evaluation procedure.ipynb ├── 2 - Triplet sampling (Random).ipynb └── 4 - Evaluation procedure.ipynb /checkpoints/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /trainers/__init__.py: -------------------------------------------------------------------------------- 1 | from .trainer import Trainer 2 | from .img_trainer import ImgTrainer -------------------------------------------------------------------------------- /slides/Session 5 ACF.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ialab-puc/VisualRecSys-Tutorial-IUI2021/HEAD/slides/Session 5 ACF.pdf -------------------------------------------------------------------------------- /slides/Session 3 DVBPR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ialab-puc/VisualRecSys-Tutorial-IUI2021/HEAD/slides/Session 3 DVBPR.pdf -------------------------------------------------------------------------------- /slides/Session 4 CuratorNet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ialab-puc/VisualRecSys-Tutorial-IUI2021/HEAD/slides/Session 4 CuratorNet.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib == 3.2.* 2 | pandas == 1.1.* 3 | seaborn == 0.11.* 4 | torch >= 1.7.0 5 | torchvision == 0.8.* 6 | tqdm == 4.41.* 7 | -------------------------------------------------------------------------------- /slides/Session 1 VisRec Introduction .pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ialab-puc/VisualRecSys-Tutorial-IUI2021/HEAD/slides/Session 1 VisRec Introduction .pdf -------------------------------------------------------------------------------- /slides/Session 2 Pipeline + VisRank + VBPR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ialab-puc/VisualRecSys-Tutorial-IUI2021/HEAD/slides/Session 2 Pipeline + VisRank + VBPR.pdf -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .user_mode import UserModeDataset 2 | from .user_mode_img import UserModeImgDataset 3 | from .user_profile_mode import UserProfileModeDataset 4 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .acf import ACF 2 | from .curatornet import CuratorNet 3 | from .dvbpr import DVBPR 4 | from .vbpr import VBPR 5 | from .visrank import VisRank 6 | -------------------------------------------------------------------------------- /colabnotebooks/README.md: -------------------------------------------------------------------------------- 1 | # List of Google Colab Notebooks 2 | 3 | The most important actually is Colab Notebook 6 **Evaluation**, since you can load pre-trained models and get result metrics. 4 | 5 | | ID | Description | Link | 6 | |-------|-----------------------------|-------------| 7 | | [1] | Visual feature extraction | [colab1](https://colab.research.google.com/drive/1JCTPS88AzKA0KNVCoEvYCBaaYebgdoYn?usp=sharing) | 8 | | [2] | VisRank train (TODO) | [colab2](#) | 9 | | [3] | VBPR train (TODO) | [colab3](#) | 10 | | [4] | DVBPR train (TODO) | [colab4](#) | 11 | | [5] | CuratorNet train | [colab5](https://colab.research.google.com/drive/1vGYUbQK8fxOxt_TAoOgok17n2g9VAr2s?usp=sharing) | 12 | | [6] | Evaluation | [colab6](https://colab.research.google.com/drive/1TCmXpcRHOlzleOrMsX_C3-7Xon4K-oRJ?usp=sharing) | 13 | | [7] | Attention Exploration (ACF) | [colab7](https://colab.research.google.com/drive/1eSI5ZFM1NNBhXO0sSZreXHJfEJU5vraE?usp=sharing) | 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 IALab UC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /datasets/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from glob import glob 3 | from pathlib import Path 4 | 5 | import torch 6 | from PIL import Image 7 | from torch.utils.data import Dataset 8 | 9 | 10 | class PreprocessingDataset(Dataset): 11 | 12 | def __init__(self, images_dir, extensions=None, transform=None): 13 | self.images_dir = images_dir 14 | self.transform = transform 15 | extensions = ["*.jpg"] if extensions is None else extensions 16 | self.images_paths = [] 17 | for ext in sorted(extensions): 18 | # List images in folder by pattern 19 | pattern = os.path.join(self.images_dir, ext) 20 | # Use glob over iglob to sort and calculate length 21 | self.images_paths.extend(sorted(glob(pattern))) 22 | 23 | def __len__(self): 24 | return len(self.images_paths) 25 | 26 | def __getitem__(self, idx): 27 | if torch.is_tensor(idx): 28 | idx = idx.tolist() 29 | 30 | img_name = self.images_paths[idx] 31 | img_fn = Path(img_name).name 32 | img = Image.open(img_name).convert("RGB") 33 | 34 | if self.transform: 35 | img = self.transform(img) 36 | 37 | return {"image": img, "id": img_fn, "idx": idx} 38 | -------------------------------------------------------------------------------- /utils/environment.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import os 3 | 4 | 5 | @contextlib.contextmanager 6 | def modified_environ(*remove, **update): 7 | """ 8 | Temporarily updates the ``os.environ`` dictionary in-place. 9 | 10 | The ``os.environ`` dictionary is updated in-place so that the modification 11 | is sure to work in all situations. 12 | 13 | :param remove: Environment variables to remove. 14 | :param update: Dictionary of environment variables and values to add/update. 15 | """ 16 | # Taken from: https://stackoverflow.com/a/34333710 17 | env = os.environ 18 | update = update or {} 19 | remove = remove or [] 20 | 21 | # List of environment variables being updated or removed. 22 | stomped = (set(update.keys()) | set(remove)) & set(env.keys()) 23 | # Environment variables and values to restore on exit. 24 | update_after = {k: env[k] for k in stomped} 25 | # Environment variables and values to remove on exit. 26 | remove_after = frozenset(k for k in update if k not in env) 27 | 28 | try: 29 | env.update(update) 30 | [env.pop(k, None) for k in remove] 31 | yield 32 | finally: 33 | env.update(update_after) 34 | [env.pop(k) for k in remove_after] 35 | -------------------------------------------------------------------------------- /models/utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torchvision.models as models 5 | 6 | 7 | def get_cpu_copy(model): 8 | return OrderedDict({ 9 | k: v.to("cpu") 10 | for k, v in model.state_dict().items() 11 | }) 12 | 13 | 14 | def get_model_by_name(model_name, pretrained=True, output_layer=None): 15 | model = models.__dict__[model_name](pretrained=pretrained) 16 | 17 | if output_layer: 18 | children_list = [] 19 | for n, c in model.named_children(): 20 | children_list.append(c) 21 | if n == output_layer: 22 | break 23 | model = torch.nn.Sequential(*children_list) 24 | else: 25 | # output_layer = last layer 26 | model = torch.nn.Sequential(*list(model.children()))[:-1] 27 | for param in model.parameters(): 28 | model.requires_grad = False 29 | return model 30 | 31 | 32 | def save_checkpoint(checkpoint_path, **components): 33 | checkpoint_dict = dict() 34 | for name, component in components.items(): 35 | if hasattr(component, "state_dict"): 36 | checkpoint_dict[name] = component.state_dict() 37 | else: 38 | checkpoint_dict[name] = component 39 | torch.save(checkpoint_dict, checkpoint_path) 40 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import numpy as np 5 | 6 | 7 | def precision(ground_truth_indexes, k): 8 | return (ground_truth_indexes < k).to(float).sum() / k 9 | 10 | 11 | def recall(ground_truth_indexes, k): 12 | return (ground_truth_indexes < k).to(float).sum() / ground_truth_indexes.size(0) 13 | 14 | 15 | # Pre-compute ideal DCGs for performance improvement 16 | IDEAL_DCG = np.zeros((1000,)) 17 | IDEAL_DCG[0] = 0 18 | for _i in range(1, 1000): 19 | IDEAL_DCG[_i] = IDEAL_DCG[_i-1] + 1/math.log2(_i+1) 20 | 21 | 22 | def nDCG(ground_truth_indexes, k): 23 | ground_truth_indexes = ground_truth_indexes.to(float) 24 | dcg = 1 / torch.log2(ground_truth_indexes + 2) 25 | dcg.scatter_(0, (ground_truth_indexes >= k).nonzero(as_tuple=True)[0], 0) 26 | dcg = dcg.sum() 27 | return dcg / IDEAL_DCG[(ground_truth_indexes < k).sum()] if dcg > 0 else 0 28 | 29 | 30 | def auc_exact(ground_truth_indexes, inventory_size): 31 | n = ground_truth_indexes.size(0) 32 | assert inventory_size >= n 33 | if inventory_size == n: 34 | return 1 35 | i = torch.arange(1, n + 1, device=ground_truth_indexes.device) 36 | idx = ground_truth_indexes + 1 37 | auc = (((inventory_size - idx) - (n - i))).sum(dtype=torch.float64) 38 | auc /= (inventory_size - n) 39 | auc /= n 40 | return auc 41 | 42 | 43 | def reciprocal_rank(ground_truth_indexes): 44 | return 1 / (ground_truth_indexes.min().to(float) + 1) 45 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import time 4 | 5 | import torch 6 | from torch.utils.tensorboard import SummaryWriter 7 | 8 | 9 | class Log(): 10 | """ 11 | Handles logging for a train process. 12 | It prints messages, writes them to log file, writes metrics for tensorboard and saves model. 13 | Everything is saved to runs/ directory. 14 | """ 15 | def __init__(self, model_name, checkpoint_dir='runs'): 16 | if not os.path.exists(checkpoint_dir): 17 | os.mkdir(checkpoint_dir) 18 | 19 | path = os.path.join(checkpoint_dir, model_name) 20 | if not os.path.exists(path): 21 | os.mkdir(path) 22 | else: 23 | shutil.rmtree(path) 24 | 25 | self.model_name = model_name 26 | self.path = path 27 | self.writer = SummaryWriter(path) 28 | 29 | def log(self, text): 30 | """ 31 | Write both to log and stdout 32 | """ 33 | print(text) 34 | with open(f"{self.path}/log.txt", "a+") as f: 35 | f.write(text + '\n') 36 | 37 | def epoch(self, n, phase): 38 | start = time.strftime("%H:%M:%S") 39 | if phase == 'train': 40 | self.log('\n') 41 | self.log(f"Starting epoch: {n} | phase: {phase} | ⏰: {start}") 42 | 43 | def metrics(self, loss, accuracy, epoch, phase, digits=6): 44 | self.log(f"Loss: {round(loss, digits)}, Accuracy: {round(accuracy, digits)}") 45 | 46 | self.writer.add_scalar(f'Loss/{phase}', loss, epoch) 47 | self.writer.add_scalar(f'Accuracy/{phase}', accuracy, epoch) 48 | 49 | def save(self, state, epoch): 50 | self.log("******** New optimal found, saving state ********") 51 | state['epoch'] = epoch 52 | torch.save(state, f"{self.path}/{self.model_name}_e{epoch}.pt") -------------------------------------------------------------------------------- /trainers/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def bpr_loss(pos, neg, b=0.0, collapse=True): 4 | """ 5 | Usual BPR loss that penalizes when negative score is greater than positive one. 6 | 7 | Parameters 8 | ---------- 9 | pos: torch.tensor 10 | positive score 11 | 12 | neg: torch.tensor 13 | negative scores. Can be a batch of negatives. 14 | 15 | b: float 16 | desired boundary between positive and negative examples 17 | 18 | collapse: bool 19 | If True collapse batch with mean. 20 | 21 | Returns 22 | ------- 23 | torch.tensor 24 | """ 25 | res = torch.sigmoid(neg - pos + b) 26 | if collapse: 27 | res = res.mean() 28 | return res 29 | 30 | 31 | def warp_loss(pos, neg, b=1, collapse=True): 32 | """ 33 | Batch version of WARP loss. 34 | 35 | Regular version samples one negative example until violation is met. 36 | This number of samples become the estimation of rank and weight is produced as some function of this rank. 37 | 38 | This version uses a batch of negatives and estimates rank as a number of violated examples. 39 | 40 | If you use the number of first violation as rank this will degenerate to usual WARP with a limit on draws. 41 | 42 | Parameters 43 | ---------- 44 | pos: torch.tensor 45 | positive score 46 | 47 | neg: torch.tensor 48 | negative scores. Can be a batch of negatives. 49 | 50 | b: float 51 | desired boundary between positive and negative examples 52 | 53 | collapse: bool 54 | If True collapse batch with mean. 55 | 56 | Returns 57 | ------- 58 | torch.tensor 59 | """ 60 | loss = bpr_loss(pos, neg, b, collapse=False) 61 | m = (loss > 0.5).float() 62 | m *= torch.log(m.sum() + 1) + 1 63 | res = m * loss 64 | if collapse: 65 | res = res.mean() 66 | return res -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | authors: 3 | - family-names: Parra 4 | given-names: Denis 5 | orcid: "https://orcid.org/0000-0001-9878-8761" 6 | - family-names: Ossa 7 | given-names: Antonio 8 | orcid: "https://orcid.org/0000-0002-6752-4541" 9 | - family-names: Cartagena 10 | given-names: Manuel 11 | - family-names: "Cerda-Mardini" 12 | given-names: Patricio 13 | - family-names: "del Rio" 14 | given-names: Felipe 15 | orcid: "https://orcid.org/0000-0003-0411-8295" 16 | date-released: 2021-04-13 17 | doi: "10.1145/3397482.3450620" 18 | keywords: 19 | - "neural networks" 20 | - "deep learning" 21 | - "recommender systems" 22 | - "multimedia recommendation" 23 | license: MIT 24 | message: "If you use this material or code, please cite the article from preferred-citation." 25 | repository-code: "https://github.com/ialab-puc/VisualRecSys-Tutorial-IUI2021" 26 | title: "VisRec: A Hands-on Tutorial on Deep Learning for Visual Recommender Systems" 27 | preferred-citation: 28 | type: conference-paper 29 | authors: 30 | - family-names: Parra 31 | given-names: Denis 32 | orcid: "https://orcid.org/0000-0001-9878-8761" 33 | - family-names: Ossa 34 | given-names: Antonio 35 | orcid: "https://orcid.org/0000-0002-6752-4541" 36 | - family-names: Cartagena 37 | given-names: Manuel 38 | - family-names: "Cerda-Mardini" 39 | given-names: Patricio 40 | - family-names: "del Rio" 41 | given-names: Felipe 42 | title: "VisRec: A Hands-on Tutorial on Deep Learning for Visual Recommender Systems" 43 | year: 2021 44 | collection-title: "26th International Conference on Intelligent User Interfaces" 45 | collection-doi: "10.1145/3397482" 46 | conference: 47 | name: "26th International Conference on Intelligent User Interfaces (IUI '21)" 48 | location: "College Station, TX, USA" 49 | isbn: "9781450380188" 50 | start: 5 51 | end: 6 52 | doi: "10.1145/3397482.3450620" 53 | -------------------------------------------------------------------------------- /utils/data.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | def extract_embedding(embedding, verbose=False): 8 | features = list() 9 | id2index = dict() 10 | index2fn = dict() 11 | filenames = set() 12 | for i, (fn, vector_embedding) in enumerate(embedding): 13 | fn = str(fn) 14 | _id = Path(fn).stem 15 | if _id not in id2index and fn not in filenames: 16 | index = len(features) 17 | index2fn[index] = fn 18 | id2index[_id] = index 19 | filenames.add(fn) 20 | features.append(vector_embedding) 21 | elif verbose: 22 | print(f"Warning: Duplicated id or filename (id={_id}, fn={fn})") 23 | features = np.asarray(features) 24 | return features, id2index, index2fn 25 | 26 | 27 | def get_interactions_dataframe(interactions_path, display_stats=False): 28 | # Load interactions from CSV 29 | interactions_df = pd.read_csv(interactions_path) 30 | 31 | # Display stats 32 | if display_stats: 33 | for column in interactions_df.columns: 34 | print(f"Interactions - {column}: {interactions_df[column].nunique()} unique values") 35 | 36 | return interactions_df 37 | 38 | 39 | def mark_evaluation_rows(interactions_df, threshold=None): 40 | if threshold is None: 41 | threshold = 1 42 | 43 | def _mark_evaluation_rows(group): 44 | # Only the last 'threshold' items are used for evaluation, 45 | # unless less items are available (then they're used for training) 46 | evaluation_series = pd.Series(False, index=group.index) 47 | if len(group) > threshold: 48 | evaluation_series.iloc[-threshold:] = True 49 | return evaluation_series 50 | 51 | # Mark evaluation rows 52 | interactions_df["evaluation"] = interactions_df.groupby(["user_id"])["user_id"].apply(_mark_evaluation_rows) 53 | # Sort transactions by timestamp 54 | interactions_df = interactions_df.sort_values("timestamp") 55 | # Reset index according to new order 56 | interactions_df = interactions_df.reset_index(drop=True) 57 | return interactions_df 58 | -------------------------------------------------------------------------------- /datasets/user_profile_mode.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from skimage import io, transform 7 | import torch 8 | from torch.utils.data import Dataset 9 | 10 | 11 | class UserProfileModeDataset(Dataset): 12 | def __init__(self, csv_file, transform=None): 13 | items_padded = True 14 | 15 | # Data sources 16 | if not os.path.isfile(csv_file): 17 | raise FileNotFoundError( 18 | errno.ENOENT, os.strerror(errno.ENOENT), csv_file, 19 | ) 20 | 21 | self.__source_file = csv_file 22 | 23 | # Load triples from dataframe 24 | triples = pd.read_csv(self.__source_file) 25 | 26 | # Keep important attributes 27 | self.ui = triples["ui"].to_numpy(copy=True) 28 | self.pi = triples["pi"].to_numpy(copy=True) 29 | self.ni = triples["ni"].to_numpy(copy=True) 30 | if items_padded: 31 | self.pi = self.pi + 1 32 | self.ni = self.ni + 1 33 | 34 | self.profile = self.create_profiles(triples["profile"], remove_items=self.pi, padded=items_padded) 35 | 36 | self.users = np.unique(self.ui) 37 | self.items = np.unique(self.pi) 38 | 39 | # Common setup 40 | self.transform = transform 41 | 42 | def create_profiles(self, profile_column, remove_items=None, padded=True): 43 | profiles = profile_column.to_list() 44 | profiles = [[int(item) for item in profile.split()] for profile in profiles] 45 | if padded: 46 | profiles = [[item + 1 for item in profile] for profile in profiles] # Profile ids start at 1 due to padding 47 | 48 | if remove_items is not None: 49 | for profile, item in zip(profiles, remove_items): 50 | profile.remove(item) 51 | 52 | profiles = [torch.tensor(profile) for profile in profiles] 53 | 54 | return profiles 55 | 56 | def __len__(self): 57 | return len(self.ui) 58 | 59 | def __getitem__(self, idx): 60 | return ( 61 | self.ui[idx], 62 | self.profile[idx], 63 | self.pi[idx], 64 | self.ni[idx], 65 | ) 66 | -------------------------------------------------------------------------------- /datasets/user_mode.py: -------------------------------------------------------------------------------- 1 | """Profile mode Dataset (PyTorch) object 2 | 3 | This module contains Dataset object with the triples information 4 | represented as (ui, pi, ni), where each is an identifier. 5 | """ 6 | import errno 7 | import os 8 | 9 | import pandas as pd 10 | from torch.utils.data import Dataset 11 | 12 | 13 | class UserModeDataset(Dataset): 14 | """Represents the Dataset as a PyTorch Dataset that yields tuples 15 | of 3 items: (ui, pi, ni). This mode, represents users as an id. 16 | 17 | Attributes: 18 | ui, pi, ni: Dataset triples (in different arrays). 19 | transform: Transforms for each sample. 20 | """ 21 | 22 | def __init__(self, csv_file, transform=None, id2index=None): 23 | """Inits a UGallery Dataset. 24 | 25 | Args: 26 | csv_file: Path (string) to the triplets file. 27 | transform: Optional. Torchvision like transforms. 28 | id2index: Optional. Transformation to apply on items. 29 | """ 30 | # Data sources 31 | if not os.path.isfile(csv_file): 32 | raise FileNotFoundError( 33 | errno.ENOENT, os.strerror(errno.ENOENT), csv_file, 34 | ) 35 | self.__source_file = csv_file 36 | # Load triples from dataframe 37 | triples = pd.read_csv(self.__source_file) 38 | # Process profile elements 39 | if id2index: 40 | # Note: Assumes id is str and index is int 41 | def map_id2index(element): 42 | if type(element) is list: 43 | return [id2index[e] for e in element] 44 | else: 45 | return id2index[str(element)] 46 | triples[["pi", "ni"]] = triples[["pi", "ni"]].applymap(map_id2index) 47 | # Keep important attributes 48 | self.ui = triples["ui"].to_numpy(copy=True) 49 | self.pi = triples["pi"].to_numpy(copy=True) 50 | self.ni = triples["ni"].to_numpy(copy=True) 51 | # Common setup 52 | self.transform = transform 53 | 54 | def __len__(self): 55 | return len(self.ui) 56 | 57 | def __getitem__(self, idx): 58 | return ( 59 | self.ui[idx], 60 | self.pi[idx], 61 | self.ni[idx], 62 | ) 63 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # How to format your dataset 2 | 3 | A recommendation dataset, like the one we used in this tutorial, requires 2 important components: 4 | 5 | 1. Interactions between users and items: Could be "likes", "purchases" or other forms of interactions 6 | 2. Images content: We're using a content-based approach, so we need the raw images to extract their features 7 | 8 | In this document we'll explain the expected input or format of both components. 9 | 10 | ### Representation of interactions 11 | 12 | Usually the interactions in a dataset come in different representations, so we used a script to format the interactions in a simpler format for preprocessing script. If you want to use our scripts, make sure to use the same output format and everything should work. 13 | 14 | We only use user-item positive feedback interactions, so each row of our dataset will contain a **user_id** column and an **item_id** column. Also, we store the **timestamp** of the interaction to split the interactions for each user that we'll use to evaluate the model (in our case, data was already divided, but sometimes you'll have to define a criteria to choose which interactions to predict). Finally, we'll leave a Boolean **evaluation** column to mark the rows that we'll use to evaluate our models. 15 | 16 | Below, you can see and example of how our data looks in our format: 17 | 18 | ``` 19 | user_id,item_id,timestamp,evaluation 20 | 30,200501002,1105490700,False 21 | 12,200501002,1105521180,False 22 | ``` 23 | 24 | Our preprocessing script assumes this structure and creates the training samples in the appropriate format. 25 | 26 | ### Representation of images 27 | 28 | We stored all the images in a folder in our filesystem. Each image filename was the same value as the **item_id** described in the previous step. This has the benefit that we did not require an additional mapping from filename to item_id. If your dataset requires this mapping please be careful and consistent in its usage (in particular, make sure to return the correct **item_id** in the `PreprocessingDataset` class, so your embeddings are correctly formatted). 29 | 30 | Our scripts assume the already described structure to create the embeddings. The embeddings have the following structure stored in a `*.npy` file: 31 | 32 | ``` 33 | [ 34 | ["200501002", [0.123123, 0.13184, ...]], 35 | ... 36 | ] 37 | ``` 38 | 39 | This is just a structure representation. The actual object is a `numpy.ndarray` with shape `(len(image_dataset), 2)`, where each row has two elements: the **item_id** and a vector with the features of the item. The features (in general) correspond to the output of the second to last layer of a pretrained DNN network. In our script, you'll find the configuration to forward the dataset images through a pretrained ResNet50 network. 40 | 41 | Our training scripts assume this structure and should be able to forward the data through each model using both the embedding and interactions data. 42 | 43 | ### Additional steps 44 | 45 | Once your dataset interactions and image features are properly formatted, the provided scripts should work with no additional modifications. You'll only need to make sure that you're using your own files. If you have any problem, please contact the authors or create an issue in our repository. -------------------------------------------------------------------------------- /utils/hashing.py: -------------------------------------------------------------------------------- 1 | """Utilities to hash elements. 2 | 3 | This module contains a class (HashesContainer) that calculate hashes 4 | and notify collisions. 5 | """ 6 | 7 | 8 | def pre_hash(triple, contains_iter=True): 9 | """Prepare tuple to be hashed. 10 | 11 | This means that each element of the tuple will be converted to 12 | string. The first item (profile) should be iterable, whilst only 13 | the second and third items (positive and negative) will be 14 | considered as well, leaving the rest out of the tuple. 15 | 16 | Args: 17 | triple: Tuple with the profile items (iterable), positive 18 | item and its negative counterpart. 19 | contains_iter: Optional. If its true, sorts and transform 20 | each element of the first element in a truple with str. 21 | 22 | Returns: 23 | Same tuple but converted to string. Example: 24 | 25 | ([1, 2], 3, 4, 5) 26 | 27 | Becomes: 28 | 29 | (['1', '2'], '3', '4') 30 | 31 | If contains_iter is False: 32 | 33 | (1, 2, 3) 34 | 35 | Becomes: 36 | 37 | ('1', '2', '3') 38 | """ 39 | if not contains_iter: 40 | return (str(triple[0]), str(triple[1]), str(triple[2])) 41 | _sorted_t0 = tuple(sorted([str(_id) for _id in triple[0]])) 42 | return (_sorted_t0, str(triple[1]), str(triple[2])) 43 | 44 | 45 | class HashesContainer: 46 | """Manages hashes of elements to detect duplicates. 47 | 48 | A custom hashing function is used to hash an arbitrary number of 49 | elements. Also, stores used hashes to detect collisions and 50 | count them. 51 | 52 | Attributes: 53 | collisions: Current count of hash collisions detected. 54 | hashes: Set of used hashes. 55 | """ 56 | 57 | _MOD = 402653189 58 | _BASE = 92821 59 | 60 | def __init__(self): 61 | """Inits an empty HashesContainer""" 62 | self.collisions = 0 63 | self.hashes = set() 64 | 65 | def enroll(self, *content): 66 | """Tries to register a new hash and reports collision. 67 | 68 | Hahes new content and returns True if was added successfully 69 | (no collision). 70 | 71 | Args: 72 | *content: Information to be hashed (must contain/be 73 | iterables and/or str) 74 | 75 | Returns: 76 | True if no hash collision was detected and False 77 | otherwise. 78 | """ 79 | h = self.hash(*content) 80 | if h in self.hashes: 81 | self.collisions += 1 82 | return False 83 | self.hashes.add(h) 84 | return True 85 | 86 | def hash(self, *args, h=0): 87 | """Calculates hash of given elements. 88 | 89 | Uses a custom hash function to calculate hashes recursively. 90 | 91 | Args: 92 | *args: Information to be hashed (must contain/be 93 | iterables and/or str). 94 | h: Optional. Current hash value. Defaults to 0. 95 | 96 | Returns: 97 | Hash value as an integer. 98 | """ 99 | for arg in args: 100 | if isinstance(arg, str): 101 | h = ((h * self._BASE) % self._MOD + int(arg, 32)) % self._MOD 102 | else: 103 | h = self.hash(*arg, h=h) 104 | return h 105 | -------------------------------------------------------------------------------- /models/visrank.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.metrics.pairwise import cosine_similarity 4 | 5 | 6 | # https://www.kaggle.com/jonathanoheix/product-recommendation-based-on-visual-similarity 7 | class VisRank: 8 | 9 | def __init__(self, embedding, similarity_method=cosine_similarity): 10 | self.embedding = embedding 11 | self.similarities = pd.DataFrame(similarity_method(self.embedding)) 12 | 13 | def most_similar_to_item(self, item, k=5): 14 | # Ignore first items (it's the same) 15 | closest_items = self.similarities[item].sort_values(ascending=False)[1:] 16 | if k is not None: 17 | # Select next k items (first is already ignored) 18 | closest_items = closest_items[:k] 19 | # Return index and score of similar items 20 | return np.array(closest_items.index), np.array(closest_items) 21 | 22 | def most_similar_to_profile(self, items, k=None, method="maximum", top=None, include_consumed=False): 23 | # Top argument is only needed in a specific method 24 | if method != "average_top_k" and top is not None: 25 | raise ValueError("top should be None unless method is 'average_top_k'") 26 | elif method == "average_top_k" and top is None: 27 | raise ValueError("top should not be None if method is 'average_top_k'") 28 | # Retrieve similarities of seen items towards all the items 29 | possible_items = self.similarities[items] 30 | if method == "maximum": 31 | # score(u, i) = max(sim(Vi, Vj) for j in P_u) 32 | score_ui = possible_items.max(axis=1) 33 | elif method == "average_top_k": 34 | # score(u, i) = largest(sim(Vi, Vj) for j in P_u, min(top, |P_u|)) / min(top, |P_u|) 35 | top = min(len(items), top) 36 | possible_items = possible_items.T 37 | possible_items = possible_items.nlargest(top, possible_items.columns) 38 | score_ui = possible_items.mean() 39 | elif method == "average": 40 | # score(u, i) = sum(sim(Vi, Vj) for j in P_u) / |P_u| 41 | score_ui = np.array(possible_items.mean(axis=1)) 42 | else: 43 | raise ValueError("method has to be 'maximum', 'average_top_k' or 'average'") 44 | # Calculate score and retrieve relevant indexes 45 | score_ui = np.array(score_ui) 46 | # Retrieve relevant indexes 47 | recommendation = score_ui.argsort()[::-1] 48 | # Remove seen items indexes 49 | if not include_consumed: 50 | recommendation = np.delete(recommendation, np.where(np.isin(recommendation, np.array(items)))) 51 | # If k is None, all items are calculated 52 | if k is not None: 53 | recommendation = recommendation[:k] 54 | return recommendation, score_ui[recommendation] 55 | 56 | 57 | if __name__ == '__main__': 58 | embedding = np.random.rand(20, 100) 59 | print("Embedding size:", embedding.shape) 60 | model = VisRank(embedding, similarity_method=cosine_similarity) 61 | items = [0, 17, 3] 62 | print("Consumed items:", items) 63 | print("-" * 70) 64 | indexes, scores = model.most_similar_to_profile(items, k=10, method="maximum") 65 | print("Top items using maximum:\t\t", indexes) 66 | indexes, scores = model.most_similar_to_profile(items, k=10, method="average_top_k", top=2) 67 | print("Top items using average_top_k:\t", indexes) 68 | indexes, scores = model.most_similar_to_profile(items, k=10, method="average") 69 | print("Top items using average:\t\t", indexes) 70 | -------------------------------------------------------------------------------- /datasets/profile_mode.py: -------------------------------------------------------------------------------- 1 | """Profile mode Dataset (PyTorch) object 2 | 3 | This module contains Dataset object with the triples information 4 | represented as (profile, pi, ni), where profile is a set of items 5 | and pi and ni are identifier. 6 | """ 7 | import errno 8 | import os 9 | 10 | import numpy as np 11 | import pandas as pd 12 | from torch.utils.data import Dataset 13 | 14 | 15 | class ProfileModeDataset(Dataset): 16 | """Represents the Dataset as a PyTorch Dataset that yields tuples 17 | of 3 items: (profile, pi, ni). This mode, represents users as a 18 | profile, a set of items. 19 | 20 | Attributes: 21 | profile_sizes: Size of each user profile. 22 | unique_profiles: Actual profile data to save space. 23 | profile, pi, ni: Dataset triples (in different arrays). 24 | transform: Transforms for each sample. 25 | """ 26 | 27 | def __init__(self, csv_file, transform=None, id2index=None): 28 | """Inits a UGallery Dataset. 29 | 30 | Args: 31 | csv_file: Path (string) to the triplets file. 32 | transform: Optional. Torchvision like transforms. 33 | id2index: Optional. Transformation to apply on items. 34 | """ 35 | # Data sources 36 | if not os.path.isfile(csv_file): 37 | raise FileNotFoundError( 38 | errno.ENOENT, os.strerror(errno.ENOENT), csv_file, 39 | ) 40 | self.__source_file = csv_file 41 | # Load triples from dataframe 42 | triples = pd.read_csv(self.__source_file) 43 | # Process profile elements 44 | if id2index: 45 | # Note: Assumes id is str and index is int 46 | def map_id2index(element): 47 | if type(element) is list: 48 | return [id2index[e] for e in element] 49 | else: 50 | return id2index[str(element)] 51 | triples["profile"] = triples["profile"].map(lambda p: p.split()) 52 | triples = triples.applymap(map_id2index) 53 | triples["profile"] = triples["profiles"].map(lambda p: " ".join(p)) 54 | # Mapping to unique profiles and use it to calculate profile sizes 55 | unique_profiles = triples["profile"].unique() 56 | profile2index = {k: v for v, k in enumerate(unique_profiles)} 57 | triples["profile"] = triples["profile"].map(profile2index) 58 | profile_sizes = np.fromiter( 59 | map(lambda p: p.count(" "), unique_profiles), 60 | dtype=int, count=len(unique_profiles), 61 | ) + 1 62 | profile_sizes = triples["profile"].map(dict(enumerate(profile_sizes))) 63 | self.unique_profiles = unique_profiles.astype(np.string_) 64 | self.profile_sizes = profile_sizes.to_numpy(copy=True) 65 | # Using numpy arrays for faster lookup 66 | self.profile = triples["profile"].to_numpy(copy=True) 67 | self.pi = triples["pi"].to_numpy(copy=True) 68 | self.ni = triples["ni"].to_numpy(copy=True) 69 | # Common setup 70 | self.transform = transform 71 | 72 | def __len__(self): 73 | return len(self.pi) 74 | 75 | def __getitem__(self, idx): 76 | prof = self.profile[idx] 77 | if isinstance(idx, int) or isinstance(idx, np.number): 78 | profile = np.fromstring( 79 | self.unique_profiles[prof], dtype=int, sep=" ", 80 | ) 81 | else: 82 | profile = np.fromstring( 83 | b" ".join(self.unique_profiles[prof]), dtype=int, sep=" ", 84 | ).reshape((len(idx), -1)) 85 | 86 | return ( 87 | profile, 88 | self.pi[idx], 89 | self.ni[idx], 90 | ) 91 | 92 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Custom 2 | 3 | # Ignore dataset 4 | colabnotebooks/wikimedia_recsys 5 | colabnotebooks/wikimedia_recsys.zip 6 | 7 | # Ignore checkpoints folder contents 8 | checkpoints/* 9 | 10 | # Ignore changes in datasets data folder... 11 | data/*/* 12 | # ... except its README... 13 | !data/*/README.md 14 | # and the data analysis 15 | !data/*/Data\ Analysis.ipynb 16 | 17 | 18 | # Created by https://www.gitignore.io/api/linux,macos,python,windows,jupyternotebooks 19 | # Edit at https://www.gitignore.io/?templates=linux,macos,python,windows,jupyternotebooks 20 | 21 | ### JupyterNotebooks ### 22 | # gitignore template for Jupyter Notebooks 23 | # website: http://jupyter.org/ 24 | 25 | .ipynb_checkpoints 26 | */.ipynb_checkpoints/* 27 | 28 | # IPython 29 | profile_default/ 30 | ipython_config.py 31 | 32 | # Remove previous ipynb_checkpoints 33 | # git rm -r .ipynb_checkpoints/ 34 | 35 | ### Linux ### 36 | *~ 37 | 38 | # temporary files which can be created if a process still has a handle open of a deleted file 39 | .fuse_hidden* 40 | 41 | # KDE directory preferences 42 | .directory 43 | 44 | # Linux trash folder which might appear on any partition or disk 45 | .Trash-* 46 | 47 | # .nfs files are created when an open file is removed but is still being accessed 48 | .nfs* 49 | 50 | ### macOS ### 51 | # General 52 | .DS_Store 53 | .AppleDouble 54 | .LSOverride 55 | 56 | # Icon must end with two \r 57 | Icon 58 | 59 | # Thumbnails 60 | ._* 61 | 62 | # Files that might appear in the root of a volume 63 | .DocumentRevisions-V100 64 | .fseventsd 65 | .Spotlight-V100 66 | .TemporaryItems 67 | .Trashes 68 | .VolumeIcon.icns 69 | .com.apple.timemachine.donotpresent 70 | 71 | # Directories potentially created on remote AFP share 72 | .AppleDB 73 | .AppleDesktop 74 | Network Trash Folder 75 | Temporary Items 76 | .apdisk 77 | 78 | ### Python ### 79 | # Byte-compiled / optimized / DLL files 80 | __pycache__/ 81 | *.py[cod] 82 | *$py.class 83 | 84 | # C extensions 85 | *.so 86 | 87 | # Distribution / packaging 88 | .Python 89 | build/ 90 | develop-eggs/ 91 | dist/ 92 | downloads/ 93 | eggs/ 94 | .eggs/ 95 | lib/ 96 | lib64/ 97 | parts/ 98 | sdist/ 99 | var/ 100 | wheels/ 101 | pip-wheel-metadata/ 102 | share/python-wheels/ 103 | *.egg-info/ 104 | .installed.cfg 105 | *.egg 106 | MANIFEST 107 | 108 | # PyInstaller 109 | # Usually these files are written by a python script from a template 110 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 111 | *.manifest 112 | *.spec 113 | 114 | # Installer logs 115 | pip-log.txt 116 | pip-delete-this-directory.txt 117 | 118 | # Unit test / coverage reports 119 | htmlcov/ 120 | .tox/ 121 | .nox/ 122 | .coverage 123 | .coverage.* 124 | .cache 125 | nosetests.xml 126 | coverage.xml 127 | *.cover 128 | .hypothesis/ 129 | .pytest_cache/ 130 | 131 | # Translations 132 | *.mo 133 | *.pot 134 | 135 | # Scrapy stuff: 136 | .scrapy 137 | 138 | # Sphinx documentation 139 | docs/_build/ 140 | 141 | # PyBuilder 142 | target/ 143 | 144 | # pyenv 145 | .python-version 146 | 147 | # pipenv 148 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 149 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 150 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 151 | # install all needed dependencies. 152 | #Pipfile.lock 153 | 154 | # celery beat schedule file 155 | celerybeat-schedule 156 | 157 | # SageMath parsed files 158 | *.sage.py 159 | 160 | # Spyder project settings 161 | .spyderproject 162 | .spyproject 163 | 164 | # Rope project settings 165 | .ropeproject 166 | 167 | # Mr Developer 168 | .mr.developer.cfg 169 | .project 170 | .pydevproject 171 | 172 | # mkdocs documentation 173 | /site 174 | 175 | # mypy 176 | .mypy_cache/ 177 | .dmypy.json 178 | dmypy.json 179 | 180 | # Pyre type checker 181 | .pyre/ 182 | 183 | ### Windows ### 184 | # Windows thumbnail cache files 185 | Thumbs.db 186 | Thumbs.db:encryptable 187 | ehthumbs.db 188 | ehthumbs_vista.db 189 | 190 | # Dump file 191 | *.stackdump 192 | 193 | # Folder config file 194 | [Dd]esktop.ini 195 | 196 | # Recycle Bin used on file shares 197 | $RECYCLE.BIN/ 198 | 199 | # Windows Installer files 200 | *.cab 201 | *.msi 202 | *.msix 203 | *.msm 204 | *.msp 205 | 206 | # Windows shortcuts 207 | *.lnk 208 | 209 | # End of https://www.gitignore.io/api/linux,macos,python,windows,jupyternotebooks 210 | -------------------------------------------------------------------------------- /utils/curatornet_sampler.py: -------------------------------------------------------------------------------- 1 | """Custom batch sampler""" 2 | import random 3 | from collections import defaultdict 4 | 5 | import numpy as np 6 | from torch.utils.data.sampler import BatchSampler, RandomSampler 7 | 8 | 9 | class SameProfileSizeBatchSampler(BatchSampler): 10 | """Custom batch sampler that yields batches of triples with the 11 | same profile size (CuratorNet). 12 | 13 | Retrieves items from the sampler and yields a batch of size 14 | batch_size with items of the same size. 15 | 16 | Attributes: 17 | sampler: PyTorch sampler object to retrieve triples. 18 | batch_size: Max number of triples in each batch. 19 | profile_items_per_batch: Max number of items in profile. 20 | drop_last: Decides what to do with items that do not fill a 21 | batch. 22 | n_largest_first: How many of the largest batches to return 23 | first. 24 | """ 25 | 26 | def __init__(self, sampler, batch_size=None, profile_items_per_batch=None, 27 | drop_last=False, n_largest_first=0): 28 | # Data sources 29 | self.sampler = sampler 30 | assert hasattr(self.sampler.data_source, "profile_sizes") 31 | # Minibatch limits 32 | assert batch_size is not None or profile_items_per_batch is not None 33 | self.batch_size = batch_size if batch_size else float("inf") 34 | self.profile_items_per_batch = profile_items_per_batch if profile_items_per_batch else float("inf") 35 | # More setup 36 | self.drop_last = drop_last 37 | self.n_largest_first = n_largest_first 38 | # Settings under the hood 39 | self.__shuffle = isinstance(sampler, RandomSampler) 40 | self.__minibatches = None 41 | self.__samples_per_profile_size = None 42 | # Prepare sampler 43 | self.prepare() 44 | 45 | def prepare(self): 46 | # Group samples of the same size to avoid doing it while training 47 | self.__samples_per_profile_size = defaultdict(list) 48 | for idx in self.sampler: 49 | p_size = self.sampler.data_source.profile_sizes[idx] 50 | self.__samples_per_profile_size[p_size].append(idx) 51 | # Transform each list into numpy array 52 | self.__samples_per_profile_size = { 53 | k: np.array(v) 54 | for k, v in self.__samples_per_profile_size.items() 55 | } 56 | # Generate minibatches for the first time to fill attributes 57 | self.generate_minibatches() 58 | 59 | def generate_minibatches(self): 60 | minibatches = list() 61 | for p_size, samples in self.__samples_per_profile_size.items(): 62 | # Shuffle samples if necessary 63 | if self.__shuffle: 64 | np.random.shuffle(samples) 65 | batch_size = min( 66 | self.batch_size, 67 | self.profile_items_per_batch // p_size, 68 | ) 69 | # Reduce samples to chunks 70 | for i in range(0, len(samples), batch_size): 71 | minibatch = samples[i:i+batch_size] 72 | minibatches.append(( 73 | len(minibatch) * p_size, # Items in profile 74 | len(minibatch), # Items in pi/ni 75 | minibatch, # Actual minibatch 76 | )) 77 | # Drop "irregular" batches 78 | if self.drop_last: 79 | minibatches.pop(-1) 80 | self.__minibatches = minibatches 81 | 82 | def __iter__(self): 83 | # Generate minibatches 84 | self.generate_minibatches() 85 | # Prepare largest items first 86 | if self.n_largest_first: 87 | self.__minibatches = sorted( 88 | self.__minibatches, 89 | key=lambda mb: (mb[0], mb[1]), 90 | reverse=True, 91 | ) 92 | largest_minibatches = self.__minibatches[:self.n_largest_first] 93 | minibatches = self.__minibatches[self.n_largest_first:] 94 | # Prepare and shuffle other minibatches if necessary 95 | if self.__shuffle: 96 | random.shuffle(minibatches) 97 | # Join and yield minibatches 98 | self.__minibatches = largest_minibatches + minibatches 99 | for _, _, minibatch in self.__minibatches: 100 | yield minibatch 101 | 102 | def __len__(self): 103 | return len(self.__minibatches) 104 | -------------------------------------------------------------------------------- /datasets/user_mode_img.py: -------------------------------------------------------------------------------- 1 | """Profile mode Dataset (PyTorch) object 2 | 3 | This module contains Dataset object with the triples information 4 | represented as (ui, pi, ni), where each is an identifier. 5 | To this triplet, we append the item image: (ui, pi, ni, ii) 6 | """ 7 | import torch 8 | import errno 9 | import os 10 | 11 | import numpy as np 12 | import pandas as pd 13 | from skimage import io, transform 14 | from PIL import Image 15 | from torch.utils.data import Dataset 16 | from torchvision import transforms 17 | 18 | 19 | class UserModeImgDataset(Dataset): 20 | """Represents the Dataset as a PyTorch Dataset that yields tuples 21 | of 5 items: (ui, pi, ni, pimg, nimg). 22 | This mode represents users as an id. 23 | 24 | Attributes: 25 | ui, pi, ni, pimg, nimg: Dataset tuples (in different arrays). 26 | transform: Transforms for each sample. 27 | """ 28 | 29 | def __init__(self, csv_file, img_path, id2index, index2fn, transform=None, img_size=224): 30 | """Inits a Dataset. 31 | 32 | Args: 33 | csv_file: Path (string) to the triplets file. 34 | img_path: Path (string) to the images 35 | id2index: Dict. Keys are img name, values are indexes 36 | index2fn: Dict. Keys are indexes, values are file names 37 | transform: Optional. Torchvision like transforms. 38 | 39 | """ 40 | # Data sources 41 | if not os.path.isfile(csv_file): 42 | raise FileNotFoundError( 43 | errno.ENOENT, os.strerror(errno.ENOENT), csv_file, 44 | ) 45 | if not os.path.isdir(img_path): 46 | raise NotADirectoryError( 47 | errno.ENOENT, os.strerror(errno.ENOENT), img_path 48 | ) 49 | 50 | self.__source_file = csv_file 51 | self.__images_path = img_path 52 | self.id2index = id2index 53 | self.index2fn = index2fn 54 | 55 | # Load triples from dataframe 56 | triples = pd.read_csv(self.__source_file) 57 | 58 | # Keep important attributes 59 | self.ui = triples["ui"].to_numpy(copy=True) 60 | self.pi = triples["pi"].to_numpy(copy=True) 61 | self.ni = triples["ni"].to_numpy(copy=True) 62 | # Common setup 63 | if transform is None: 64 | self.transform = TransformTuple(img_size) 65 | else: 66 | self.transform = transform 67 | 68 | def __len__(self): 69 | return len(self.ui) 70 | 71 | def __getitem__(self, idx): 72 | pimgpath = os.path.join(self.__images_path, self.index2fn[self.pi[idx]]) 73 | pimg = Image.open(pimgpath) 74 | 75 | nimgpath = os.path.join(self.__images_path, self.index2fn[self.ni[idx]]) 76 | nimg = Image.open(nimgpath) 77 | tuple = self.transform(self.ui[idx], pimg, nimg, self.pi[idx], self.ni[idx]) 78 | 79 | if tuple[1] is not None and tuple[2] is not None: 80 | return tuple 81 | 82 | 83 | class TransformTuple(object): 84 | def __init__(self, img_size): 85 | assert isinstance(img_size, (int, tuple)) 86 | self.to_tensor = ToTensor() 87 | 88 | def __call__(self, ui, pimg, nimg, pi, ni): 89 | pimg = self.to_tensor(pimg) 90 | nimg = self.to_tensor(nimg) 91 | return (ui, pimg, nimg, pi, ni) 92 | 93 | 94 | 95 | class Rescale(object): 96 | """Rescale the image in a sample to a given size. 97 | output_size (tuple or int): Desired output size. If tuple, output is 98 | matched to output_size. If int, smaller of image edges is matched 99 | to output_size keeping aspect ratio the same. 100 | """ 101 | def __init__(self, output_size): 102 | self.output_size = output_size 103 | 104 | def __call__(self, image): 105 | h, w = image.shape[:2] 106 | if isinstance(self.output_size, int): 107 | if h > w: 108 | new_h, new_w = self.output_size * h / w, self.output_size 109 | else: 110 | new_h, new_w = self.output_size, self.output_size * w / h 111 | else: 112 | new_h, new_w = self.output_size 113 | 114 | new_h, new_w = int(new_h), int(new_w) 115 | img = transform.resize(image, (new_h, new_w)) 116 | return img 117 | 118 | 119 | class ToTensor(object): 120 | """Convert ndarrays in sample to Tensors.""" 121 | def __call__(self, image): 122 | img = transforms.ToTensor()(image) 123 | return img 124 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VisRec: A Hands-on Tutorial on Deep Learning for Visual Recommender Systems 2 | 3 | This page hosts the material for the tutorial on **VisRec: A Hands-on Tutorial on Deep Learning for Visual Recommender Systems**, 4 | presented at the 2021 ACM Conference on Intelligent User Interfaces (IUI 2021). 5 | 6 | **Schedule**: Tuesday, April 13th 2021, starting at 1:30pm CDT 7 | 8 | ## Citation 9 | 10 | If you use this material or code and publish something thanks to it, please cite 11 | 12 | Denis Parra, Antonio Ossa-Guerra, Manuel Cartagena, Patricio Cerda-Mardini, and Felipe del Rio. 2021. VisRec: A Hands-on Tutorial on Deep Learning for Visual Recommender Systems. In 26th International Conference on Intelligent User Interfaces (IUI '21). Association for Computing Machinery, New York, NY, USA, 5–6. DOI:https://doi.org/10.1145/3397482.3450620 13 | 14 | 15 | ## Instructors 16 | 17 | * Denis Parra, Associate Professor, PUC Chile 18 | * Antonio Ossa-Guerra, MSc Student, PUC Chile 19 | * Manuel Cartagena, MSc Student, PUC Chile 20 | * Patricio Cerda-Mardini, MSc, PUC Chile & MindsDB 21 | * Felipe del Río, PhD Student, PUC Chile 22 | 23 | ![speakers-visrec](https://user-images.githubusercontent.com/208111/114323807-f818ba80-9af4-11eb-84ef-428517a4fe60.jpg) 24 | 25 | ## Recording 26 | 27 | We have the [recording of the tutorial session](https://drive.google.com/file/d/1HfCs-9PgKwV5XEDMTEo-bMDrp4R9lc4p/view?usp=sharing) 28 | 29 | ## Requisites 30 | 31 | * Python 3.7+ 32 | * Pytorch 1.7 33 | * Torchvision 34 | 35 | ## Program 36 | 37 | * (40 mins) [Session 1](https://github.com/ialab-puc/VisualRecSys-Tutorial-IUI2021/blob/main/slides/Session%201%20VisRec%20Introduction%20.pdf): Introduction to Visual RecSys, datasets and feature extraction with CNNs in Pytorch 38 | * (40 mins) [Session 2](https://github.com/ialab-puc/VisualRecSys-Tutorial-IUI2021/blob/main/slides/Session%202%20Pipeline%20%2B%20VisRank%20%2B%20VBPR.pdf): Pipeline for training and testing visual RecSys in Pytorch, application with VisRank and VBPR 39 | 40 | (10 mins) [BREAK] 41 | 42 | * (25 mins) [Session 3](https://github.com/ialab-puc/VisualRecSys-Tutorial-IUI2021/blob/main/slides/Session%203%20DVBPR.pdf): Dynamic Visual Bayesian Personalized Ranking (DVBPR) in Pytorch 43 | * (25 mins) [Session 4](https://github.com/ialab-puc/VisualRecSys-Tutorial-IUI2021/blob/main/slides/Session%204%20CuratorNet.pdf): CuratorNet in Pytorch 44 | * (25 mins) [Session 5](https://github.com/ialab-puc/VisualRecSys-Tutorial-IUI2021/blob/main/slides/Session%205%20ACF.pdf): Attentive Collaborative Filtering (ACF) in Pytorch 45 | 46 | (5 mins) [BREAK] 47 | 48 | * (10 mins) Conclusion 49 | 50 | ## Wikimedia Commons Dataset 51 | 52 | Just like you, we have been looking for several years for some datasets to train our models. For instance, the RecSys dataset collection 53 | by Prof. Julian McAuley at USCD has datasets, but due to copyright issues he only shares embeddings as .npy and in some cases (such as the Amazon datasets) links to image URLS so you can doonload them on your own. We need images to test if our recommendations are making sense! 54 | 55 | We acknowledge the support of [Diego Saez-Trumper](https://wikimediafoundation.org/profile/diego-saez-trumper/) from Wikimedia foundation to collect this dataset. 56 | 57 | ## Benchmark on Wikimedia Commons Dataset 58 | 59 | | | AUC | RR | R@20 | P@20 | nDCG@20 | R@100 | P@100 | nDCG@100 | 60 | |------------|---------|---------|---------|---------|---------|---------|---------|----------| 61 | | [1] CuratorNet | .66931 | .01955 | .03803 | .00190 | .02226 | .07884 | .00078 | .02943 | 62 | | [2] VBPR | .77846 | .02169 | .05565 | .00278 | .02684 | .13821 | .00138 | .04105 | 63 | | [3] DVBPR | .83168 | .04507 | .12152 | .00607 | .05814 | .25695 | .00256 | .08245 | 64 | | [4] ACF | .80409 | .01594 | .05473 | .00273 | .02127 | .14935 | .00149 | .03781 | 65 | 66 | ## References 67 | 68 | [1] Messina, P., Cartagena, M., Cerda, P., del Rio, F., & Parra, D. (2020). CuratorNet: Visually-aware Recommendation of Art Images. arXiv preprint arXiv:2009.04426. 69 | 70 | [2] He, R., & McAuley, J. (2016). VBPR: visual bayesian personalized ranking from implicit feedback. In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 30, No. 1). 71 | 72 | [3] Kang, W. C., Fang, C., Wang, Z., & McAuley, J. (2017). Visually-aware fashion recommendation and design with generative image models. In 2017 IEEE International Conference on Data Mining (ICDM) (pp. 207-216). IEEE. 73 | 74 | [4] Chen, J., Zhang, H., He, X., Nie, L., Liu, W., & Chua, T. S. (2017). Attentive collaborative filtering: Multimedia recommendation with item-and component-level attention. In Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval (pp. 335-344). 75 | -------------------------------------------------------------------------------- /models/vbpr.py: -------------------------------------------------------------------------------- 1 | """VBPR implementation in PyTorch 2 | """ 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class VBPR(nn.Module): 9 | """VBPR model architecture from 'VBPR: Visual Bayesian 10 | Personalized Ranking from Implicit Feedback'. 11 | """ 12 | 13 | def __init__(self, n_users, n_items, features, dim_gamma, dim_theta): 14 | super().__init__() 15 | 16 | # Image features 17 | self.features = nn.Embedding.from_pretrained(features, freeze=True) 18 | 19 | # Latent factors (gamma) 20 | self.gamma_users = nn.Embedding(n_users, dim_gamma) 21 | self.gamma_items = nn.Embedding(n_items, dim_gamma) 22 | 23 | # Visual factors (theta) 24 | self.theta_users = nn.Embedding(n_users, dim_theta) 25 | self.embedding = nn.Embedding(features.size(1), dim_theta) 26 | 27 | # Biases (beta) 28 | # self.beta_users = nn.Embedding(n_users, 1) 29 | self.beta_items = nn.Embedding(n_items, 1) 30 | self.visual_bias = nn.Embedding(features.size(1), 1) 31 | 32 | # Random weight initialization 33 | self.reset_parameters() 34 | 35 | def forward(self, ui, pi, ni): 36 | """Forward pass of the model. 37 | 38 | Feed forward a given input (batch). Each object is expected 39 | to be a Tensor. 40 | 41 | Args: 42 | ui: User index, as a Tensor. 43 | pi: Positive item index, as a Tensor. 44 | ni: Negative item index, as a Tensor. 45 | 46 | Returns: 47 | Network output (scalar) for each input. 48 | """ 49 | # User 50 | ui_latent_factors = self.gamma_users(ui) # Latent factors of user u 51 | ui_visual_factors = self.theta_users(ui) # Visual factors of user u 52 | # Items 53 | pi_bias = self.beta_items(pi) # Pos. item bias 54 | ni_bias = self.beta_items(ni) # Neg. item bias 55 | pi_latent_factors = self.gamma_items(pi) # Pos. item visual factors 56 | ni_latent_factors = self.gamma_items(ni) # Neg. item visual factors 57 | pi_features = self.features(pi) # Pos. item visual features 58 | ni_features = self.features(ni) # Neg. item visual features 59 | 60 | # Precompute differences 61 | diff_features = pi_features - ni_features 62 | diff_latent_factors = pi_latent_factors - ni_latent_factors 63 | 64 | # x_uij 65 | x_uij = ( 66 | pi_bias - ni_bias 67 | + (ui_latent_factors * diff_latent_factors).sum(dim=1).unsqueeze(-1) 68 | + (ui_visual_factors * diff_features.mm(self.embedding.weight)).sum(dim=1).unsqueeze(-1) 69 | + diff_features.mm(self.visual_bias.weight) 70 | ) 71 | 72 | return x_uij.unsqueeze(-1) 73 | 74 | def recommend_all(self, user, cache=None, grad_enabled=False): 75 | with torch.set_grad_enabled(grad_enabled): 76 | # User 77 | u_latent_factors = self.gamma_users(user) # Latent factors of user u 78 | u_visual_factors = self.theta_users(user) # Visual factors of user u 79 | 80 | # Items 81 | i_bias = self.beta_items.weight # Items bias 82 | i_latent_factors = self.gamma_items.weight # Items visual factors 83 | i_features = self.features.weight # Items visual features 84 | if cache is not None: 85 | visual_rating_space, opinion_visual_appearance = cache 86 | else: 87 | visual_rating_space = i_features.mm(self.embedding.weight) 88 | opinion_visual_appearance = i_features.mm(self.visual_bias.weight) 89 | 90 | # x_ui 91 | x_ui = ( 92 | i_bias 93 | + (u_latent_factors * i_latent_factors).sum(dim=1).unsqueeze(-1) 94 | + (u_visual_factors * visual_rating_space).sum(dim=1).unsqueeze(-1) 95 | + opinion_visual_appearance 96 | ) 97 | 98 | return x_ui 99 | 100 | 101 | def reset_parameters(self): 102 | """Resets network weights. 103 | 104 | Restart network weights using a Xavier uniform distribution. 105 | """ 106 | # Latent factors (gamma) 107 | nn.init.xavier_uniform_(self.gamma_users.weight) 108 | nn.init.xavier_uniform_(self.gamma_items.weight) 109 | 110 | # Visual factors (theta) 111 | nn.init.xavier_uniform_(self.theta_users.weight) 112 | nn.init.xavier_uniform_(self.embedding.weight) 113 | 114 | # Biases (beta) 115 | nn.init.xavier_uniform_(self.beta_items.weight) 116 | nn.init.xavier_uniform_(self.visual_bias.weight) 117 | 118 | def generate_cache(self, grad_enabled=False): 119 | with torch.set_grad_enabled(grad_enabled): 120 | i_features = self.features.weight # Items visual features 121 | visual_rating_space = i_features.mm(self.embedding.weight) 122 | opinion_visual_appearance = i_features.mm(self.visual_bias.weight) 123 | return visual_rating_space, opinion_visual_appearance 124 | -------------------------------------------------------------------------------- /models/curatornet.py: -------------------------------------------------------------------------------- 1 | """CuratorNet implementation in PyTorch 2 | """ 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class CuratorNet(nn.Module): 9 | """CuratorNet model architecture from 'CuratorNet: A Neural 10 | Network for Visually-aware Recommendation of Art Images'. 11 | """ 12 | 13 | def __init__(self, embedding, input_size=2048): 14 | super().__init__() 15 | 16 | # Embedding 17 | self.embedding = nn.Embedding.from_pretrained(embedding, freeze=True) 18 | 19 | # Common section 20 | self.selu_common1 = nn.Linear(input_size, 200) 21 | self.selu_common2 = nn.Linear(200, 200) 22 | 23 | # Profile section 24 | self.maxpool = nn.AdaptiveMaxPool2d((1, 200)) 25 | self.avgpool = nn.AdaptiveAvgPool2d((1, 200)) 26 | self.selu_pu1 = nn.Linear(200 + 200, 300) 27 | self.selu_pu2 = nn.Linear(300, 300) 28 | self.selu_pu3 = nn.Linear(300, 200) 29 | 30 | # Random weight initialization 31 | self.reset_parameters() 32 | 33 | def forward(self, profile, pi, ni): 34 | """Forward pass of the model. 35 | 36 | Feed forward a given input (batch). Each object is expected 37 | to be a Tensor. 38 | 39 | Args: 40 | profile: User profile items embeddings, as a Tensor. 41 | pi: Positive item embedding, as a Tensor. 42 | ni: Negative item embedding, as a Tensor. 43 | 44 | Returns: 45 | Network output (scalar) for each input. 46 | """ 47 | # Load embedding data 48 | profile = self.embedding(profile) 49 | pi = self.embedding(pi) 50 | ni = self.embedding(ni) 51 | 52 | # Positive item 53 | pi = F.selu(self.selu_common1(pi)) 54 | pi = F.selu(self.selu_common2(pi)) 55 | 56 | # Negative item 57 | ni = F.selu(self.selu_common1(ni)) 58 | ni = F.selu(self.selu_common2(ni)) 59 | 60 | # User profile 61 | profile = F.selu(self.selu_common1(profile)) 62 | profile = F.selu(self.selu_common2(profile)) 63 | profile = torch.cat( 64 | (self.maxpool(profile), self.avgpool(profile)), dim=-1 65 | ) 66 | profile = F.selu(self.selu_pu1(profile)) 67 | profile = F.selu(self.selu_pu2(profile)) 68 | profile = F.selu(self.selu_pu3(profile)) 69 | 70 | # x_ui > x_uj 71 | x_ui = torch.bmm(profile, pi.unsqueeze(-1)) 72 | x_uj = torch.bmm(profile, ni.unsqueeze(-1)) 73 | 74 | return x_ui - x_uj 75 | 76 | def recommend_all(self, profile, cache=None, grad_enabled=False): 77 | with torch.set_grad_enabled(grad_enabled): 78 | # Load embedding data 79 | profile = self.embedding(profile) 80 | 81 | # Items 82 | if cache is not None: 83 | items = cache[0] 84 | else: 85 | items = self.embedding.weight.unsqueeze(0) 86 | items = F.selu(self.selu_common1(items)) 87 | items = F.selu(self.selu_common2(items)) 88 | items = items.transpose(-1, -2) 89 | 90 | # User profile 91 | profile = F.selu(self.selu_common1(profile)) 92 | profile = F.selu(self.selu_common2(profile)) 93 | profile = torch.cat( 94 | (self.maxpool(profile), self.avgpool(profile)), dim=-1 95 | ) 96 | profile = F.selu(self.selu_pu1(profile)) 97 | profile = F.selu(self.selu_pu2(profile)) 98 | profile = F.selu(self.selu_pu3(profile)) 99 | 100 | # x_ui 101 | x_ui = torch.bmm(profile, items).squeeze() 102 | 103 | return x_ui 104 | 105 | def recommend(self, profile, items=None, grad_enabled=False): 106 | with torch.set_grad_enabled(grad_enabled): 107 | # Load embedding data 108 | profile = self.embedding(profile) 109 | 110 | # Items 111 | items = self.embedding(items) 112 | items = F.selu(self.selu_common1(items)) 113 | items = F.selu(self.selu_common2(items)) 114 | items = items.transpose(-1, -2) 115 | 116 | # User profile 117 | profile = F.selu(self.selu_common1(profile)) 118 | profile = F.selu(self.selu_common2(profile)) 119 | profile = torch.cat( 120 | (self.maxpool(profile), self.avgpool(profile)), dim=-1 121 | ) 122 | profile = F.selu(self.selu_pu1(profile)) 123 | profile = F.selu(self.selu_pu2(profile)) 124 | profile = F.selu(self.selu_pu3(profile)) 125 | 126 | # x_ui 127 | x_ui = torch.bmm(profile, items).squeeze() 128 | 129 | return x_ui 130 | 131 | def reset_parameters(self): 132 | """Resets network weights. 133 | 134 | Restart network weights using a Xavier uniform distribution. 135 | """ 136 | # Common section 137 | nn.init.xavier_uniform_(self.selu_common1.weight) 138 | nn.init.xavier_uniform_(self.selu_common2.weight) 139 | # Profile section 140 | nn.init.xavier_uniform_(self.selu_pu1.weight) 141 | nn.init.xavier_uniform_(self.selu_pu2.weight) 142 | nn.init.xavier_uniform_(self.selu_pu3.weight) 143 | 144 | def generate_cache(self, grad_enabled=False): 145 | with torch.set_grad_enabled(grad_enabled): 146 | # Items 147 | items = self.embedding.weight.unsqueeze(0) 148 | items = F.selu(self.selu_common1(items)) 149 | items = F.selu(self.selu_common2(items)) 150 | items = items.transpose(-1, -2) 151 | return (items,) 152 | -------------------------------------------------------------------------------- /3 - (ACF) Training procedure.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2021-03-26T01:41:47.880863Z", 9 | "start_time": "2021-03-26T01:41:46.868570Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import os\n", 15 | "import random\n", 16 | "import time\n", 17 | "\n", 18 | "import numpy as np\n", 19 | "import torch\n", 20 | "import torch.nn as nn\n", 21 | "import torch.optim as optim\n", 22 | "\n", 23 | "from datasets.user_profile_mode import UserProfileModeDataset\n", 24 | "from models.acf import ACF\n", 25 | "from trainers.acf_trainer import ACFTrainer\n", 26 | "from trainers.losses import warp_loss" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# Dataset\n", 36 | "DATASET = \"UGallery\"\n", 37 | "assert DATASET in [\"UGallery\", \"Wikimedia\"]" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "ExecuteTime": { 45 | "end_time": "2021-03-26T01:41:50.055518Z", 46 | "start_time": "2021-03-26T01:41:50.052454Z" 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# Parameters\n", 52 | "RNG_SEED = 0\n", 53 | "EMBEDDING_PATH = os.path.join(\"data\", DATASET, \"embedding-resnet50-layer4.npy\")\n", 54 | "TRAINING_PATH = os.path.join(\"data\", DATASET, \"naive-user-train.csv\")\n", 55 | "VALIDATION_PATH = os.path.join(\"data\", DATASET, \"naive-user-validation.csv\")\n", 56 | "CHECKPOINTS_DIR = os.path.join(\"checkpoints\")\n", 57 | "USE_GPU = True\n", 58 | "\n", 59 | "# Parameters (training)\n", 60 | "SETTINGS = {\n", 61 | " \"batch_sampler:batch_size\": 128,\n", 62 | " \"optimizer:lr\": 1e-3,\n", 63 | " \"optimizer:weight_decay\": 1e-5,\n", 64 | " \"train:max_epochs\": 10,\n", 65 | " \"model:model_dim\": 128,\n", 66 | "}" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "ExecuteTime": { 74 | "end_time": "2021-03-26T01:41:59.213466Z", 75 | "start_time": "2021-03-26T01:41:51.073361Z" 76 | } 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "%%time\n", 81 | "# Freezing RNG seed if needed\n", 82 | "if RNG_SEED is not None:\n", 83 | " print(f\"\\nUsing random seed...\")\n", 84 | " random.seed(RNG_SEED)\n", 85 | " torch.manual_seed(RNG_SEED)\n", 86 | " np.random.seed(RNG_SEED)\n", 87 | "\n", 88 | "# Training DataLoader\n", 89 | "train_dataset = UserProfileModeDataset(\n", 90 | " csv_file=TRAINING_PATH,\n", 91 | ")\n", 92 | "print(f\">> Training dataset: {len(train_dataset)}\")\n", 93 | "\n", 94 | "# Validation DataLoader\n", 95 | "valid_dataset = UserProfileModeDataset(\n", 96 | " csv_file=VALIDATION_PATH,\n", 97 | ")\n", 98 | "print(f\">> Validation dataset: {len(valid_dataset)}\")\n", 99 | "\n", 100 | "# Model initialization\n", 101 | "print(\"\\nInitialize model\")\n", 102 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() and USE_GPU else \"cpu\")\n", 103 | "if torch.cuda.is_available() != USE_GPU:\n", 104 | " print((f\"\\nNotice: Not using GPU - \"\n", 105 | " f\"Cuda available ({torch.cuda.is_available()}) \"\n", 106 | " f\"does not match USE_GPU ({USE_GPU})\"\n", 107 | " ))\n", 108 | "model = ACF(\n", 109 | " train_dataset.users,\n", 110 | " train_dataset.items,\n", 111 | " feature_path=EMBEDDING_PATH,\n", 112 | " model_dim=SETTINGS[\"model:model_dim\"],\n", 113 | " device=device\n", 114 | ").to(device)\n", 115 | "\n", 116 | "# Training setup\n", 117 | "print(\"\\nSetting up training\")\n", 118 | "optimizer = optim.Adam(\n", 119 | " model.parameters(),\n", 120 | " lr=SETTINGS[\"optimizer:lr\"],\n", 121 | " weight_decay=SETTINGS[\"optimizer:weight_decay\"],\n", 122 | ")\n", 123 | "criterion = warp_loss\n", 124 | "\n", 125 | "# Training\n", 126 | "print(\"\\nTraining\")\n" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "ExecuteTime": { 134 | "end_time": "2021-03-26T02:07:15.586369Z", 135 | "start_time": "2021-03-26T01:42:00.462845Z" 136 | } 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "%%time\n", 141 | "# Training\n", 142 | "version = (\n", 143 | " f\"{model.__class__.__name__}_\"\n", 144 | " f\"{DATASET.lower()}\"\n", 145 | ")\n", 146 | "\n", 147 | "datasets = train_dataset, valid_dataset\n", 148 | "trainer = ACFTrainer(\n", 149 | " model, datasets, criterion, optimizer, version,\n", 150 | " device=device, batch_size=SETTINGS[\"batch_sampler:batch_size\"],\n", 151 | " checkpoint_dir=CHECKPOINTS_DIR\n", 152 | ")\n", 153 | "\n", 154 | "trainer.fit(SETTINGS[\"train:max_epochs\"])" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [] 163 | } 164 | ], 165 | "metadata": { 166 | "kernelspec": { 167 | "display_name": "Python 3", 168 | "language": "python", 169 | "name": "python3" 170 | }, 171 | "language_info": { 172 | "codemirror_mode": { 173 | "name": "ipython", 174 | "version": 3 175 | }, 176 | "file_extension": ".py", 177 | "mimetype": "text/x-python", 178 | "name": "python", 179 | "nbconvert_exporter": "python", 180 | "pygments_lexer": "ipython3", 181 | "version": "3.7.10" 182 | } 183 | }, 184 | "nbformat": 4, 185 | "nbformat_minor": 4 186 | } 187 | -------------------------------------------------------------------------------- /dvbpr_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from PIL import Image 4 | 5 | import numpy as np 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | import torch.multiprocessing 9 | from torch.utils.data import DataLoader, Subset 10 | from torch.utils.data.sampler import RandomSampler, SequentialSampler 11 | 12 | from datasets import UserModeImgDataset, UserModeDataset, UserModeFeatDataset 13 | from models import DVBPR 14 | from trainers import ImgTrainer 15 | from trainers.losses import bpr_loss 16 | from utils.data import extract_embedding 17 | 18 | if __name__ == '__main__': 19 | # Parameters 20 | RNG_SEED = 0 21 | BASE_PATH = '/home/pcerdam/VisualRecSys-Tutorial-IUI2021/' 22 | TRAINING_PATH = os.path.join(BASE_PATH, "data", "naive-user-train.csv") 23 | EMBEDDING_PATH = os.path.join(BASE_PATH, "data", "embedding-resnet50.npy") 24 | VALIDATION_PATH = os.path.join(BASE_PATH, "data", "naive-user-validation.csv") 25 | IMAGES_PATH = os.path.join('/mnt/data2/wikimedia/mini-images-224-224-v2') 26 | CHECKPOINTS_DIR = os.path.join(BASE_PATH, "checkpoints") 27 | version = f"DVBPR_wikimedia_resnetEmbTable" 28 | USE_GPU = True # False # 29 | version = 'DVBPR_wikimediaAlexNet_notPretrained_100_wLatent' 30 | 31 | # Parameters (training) 32 | SETTINGS = { 33 | "dataloader:batch_size": 128, # 256, # 512, # 64, # 64, # 24, # 42_000,128, # x 34 | "dataloader:num_workers": 4, # os.cpu_count(), # 1, # 35 | "prev_checkpoint": False, # 'DVBPR_wikimediaAlexNetBig204_5epochs', 36 | "model:dim_visual": 100, #2048, 37 | "optimizer:lr": 0.001, 38 | "optimizer:weight_decay": 0.0001, 39 | "scheduler:factor": 0.6, 40 | "scheduler:patience": 2, 41 | "train:max_epochs": 5, # 1, # 5, # 150, 42 | "train:max_lrs": 5, 43 | "train:non_blocking": True, 44 | "train:train_per_valid_times": 1 # 0 45 | 46 | } 47 | 48 | # ================================================ 49 | 50 | # Freezing RNG seed if needed 51 | if RNG_SEED is not None: 52 | print(f"\nUsing random seed...") 53 | random.seed(RNG_SEED) 54 | torch.manual_seed(RNG_SEED) 55 | np.random.seed(RNG_SEED) 56 | 57 | # Load embedding from file 58 | print(f"\nLoading embedding from file... ({EMBEDDING_PATH})") 59 | embedding = np.load(EMBEDDING_PATH, allow_pickle=True) 60 | 61 | # Extract features and "id2index" mapping 62 | print("\nExtracting data into variables...") 63 | embedding, id2index, index2fn = extract_embedding(embedding, verbose=True) 64 | print(f">> Features shape: {embedding.shape}") 65 | 66 | # DataLoaders initialization 67 | print("\nInitialize DataLoaders") 68 | # Training DataLoader 69 | train_dataset = UserModeImgDataset( # UserModeDataset( # 70 | csv_file=TRAINING_PATH, 71 | img_path=IMAGES_PATH, 72 | id2index=id2index, 73 | index2fn=index2fn 74 | ) 75 | print(f">> Training dataset: {len(train_dataset)}") 76 | train_sampler = RandomSampler(train_dataset) 77 | train_dataloader = DataLoader( 78 | train_dataset, 79 | #Subset(train_dataset, list(range(10000))), # subset for faster tests 80 | batch_size=SETTINGS["dataloader:batch_size"], 81 | num_workers=SETTINGS["dataloader:num_workers"], 82 | shuffle=True, 83 | pin_memory=True, 84 | ) 85 | print(f">> Training dataloader: {len(train_dataloader)}") 86 | # Validation DataLoader 87 | valid_dataset = UserModeImgDataset( # UserModeDataset( # 88 | csv_file=VALIDATION_PATH, 89 | img_path=IMAGES_PATH, 90 | id2index=id2index, 91 | index2fn=index2fn 92 | ) 93 | print(f">> Validation dataset: {len(valid_dataset)}") 94 | valid_sampler = SequentialSampler(valid_dataset) 95 | valid_dataloader = DataLoader( 96 | #Subset(valid_dataset, list(range(10000))), # subset for faster tests 97 | valid_dataset, 98 | batch_size=SETTINGS["dataloader:batch_size"], 99 | num_workers=SETTINGS["dataloader:num_workers"], 100 | shuffle=True, 101 | pin_memory=True, 102 | ) 103 | print(f">> Validation dataloader: {len(valid_dataloader)}") 104 | # Model initialization 105 | print("\nInitialize model") 106 | device = torch.device("cuda:0" if torch.cuda.is_available() and USE_GPU else "cpu") 107 | if torch.cuda.is_available() != USE_GPU: 108 | print((f"\nNotice: Not using GPU - " 109 | f"Cuda available ({torch.cuda.is_available()}) " 110 | f"does not match USE_GPU ({USE_GPU})" 111 | )) 112 | N_USERS = len(set(train_dataset.ui)) 113 | N_ITEMS = len(embedding) 114 | print(f">> N_USERS = {N_USERS} | N_ITEMS = {N_ITEMS}") 115 | print(torch.Tensor(embedding).shape) 116 | model = DVBPR( 117 | N_USERS, # Number of users and items 118 | N_ITEMS, 119 | embedding, # experiments for debugging 120 | SETTINGS["model:dim_visual"], # Size of visual spaces 121 | ).to(device) 122 | 123 | print(model) 124 | 125 | # Training setup 126 | print("\nSetting up training") 127 | optimizer = optim.Adam( 128 | model.parameters(), 129 | lr=SETTINGS["optimizer:lr"], 130 | weight_decay=SETTINGS["optimizer:weight_decay"], 131 | ) 132 | criterion = nn.BCEWithLogitsLoss(reduction="sum") # bpr_loss # # # nn.MarginRankingLoss(reduction="mean") 133 | scheduler = optim.lr_scheduler.ReduceLROnPlateau( 134 | optimizer, mode="max", factor=SETTINGS["scheduler:factor"], 135 | patience=SETTINGS["scheduler:patience"], verbose=True, 136 | ) 137 | 138 | # ================================================ 139 | 140 | # Training 141 | trainer = ImgTrainer( 142 | model, device, criterion, optimizer, scheduler, 143 | checkpoint_dir=CHECKPOINTS_DIR, 144 | version=version, 145 | ) 146 | best_model, best_acc, best_loss, best_epoch = trainer.run( 147 | SETTINGS["train:max_epochs"], SETTINGS["train:max_lrs"], 148 | {"train": train_dataloader, "validation": valid_dataloader}, 149 | train_valid_loops=SETTINGS["train:train_per_valid_times"], 150 | use_checkpoint=SETTINGS["prev_checkpoint"] 151 | ) 152 | 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /trainers/acf_trainer.py: -------------------------------------------------------------------------------- 1 | # The following code is a derivative from the one published 2 | # at https://github.com/Darel13712/acf_pytorch by Darel13712 3 | 4 | import os 5 | from typing import Sequence 6 | 7 | import torch 8 | from torch.utils.data import DataLoader 9 | from torch import tensor 10 | import numpy as np 11 | from tqdm.auto import tqdm 12 | 13 | from utils.logger import Log 14 | 15 | 16 | def generate_collate_fn(max_profile_size, pad_token=0): 17 | def pad_profile(profile, max_size): 18 | result = np.full((max_size,), pad_token) 19 | result[:len(profile)] = profile 20 | return result 21 | 22 | def collate_fn(batch): 23 | users, profiles, pos, neg = zip(*batch) 24 | users, pos, neg = torch.tensor(users), torch.tensor(pos), torch.tensor(neg) 25 | max_size = max(len(p) for p in profiles) 26 | max_size = min(max_profile_size, max_size) 27 | profiles = [pad_profile(profile, max_size) for profile in profiles] 28 | profiles = torch.tensor(profiles) 29 | return users, profiles, pos, neg 30 | 31 | return collate_fn 32 | 33 | 34 | def get_device(device=None): 35 | if device is None: 36 | device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 37 | elif isinstance(device, int): 38 | device = torch.device(f'cuda:{device}') 39 | else: 40 | device = torch.device(device) 41 | return device 42 | 43 | 44 | class ACFTrainer(): 45 | """ 46 | Handles training process 47 | """ 48 | def __init__(self, model, datasets, loss, optimizer, version, batch_size=100, device=None, 49 | max_profile_size=9, checkpoint_dir=None): 50 | """ 51 | Parameters 52 | ---------- 53 | model: initialized UserNet 54 | dataset: initialized MovieLens 55 | loss: one of the warp functions 56 | optimizer: torch.optim 57 | run_name: directory to save results 58 | batch_size: number of samples to process for one update 59 | device: gpu or cpu 60 | """ 61 | self.pad_token = 0 62 | 63 | self.version = version 64 | self.epoch = 0 65 | self.best_loss = np.inf 66 | self.loss = loss 67 | self.optimizer = optimizer 68 | self.batch_size = batch_size 69 | 70 | self.device = get_device(device) 71 | self.model = model 72 | self.model = self.model.to(self.device) 73 | 74 | self.train, self.test = datasets 75 | self.all_items = self.preprocess_inputs(self.train.items, to_tensor=True) 76 | 77 | self.train_loader = DataLoader(self.train, batch_size=batch_size, shuffle=True, 78 | collate_fn=generate_collate_fn(max_profile_size), num_workers=8) 79 | self.test_loader = DataLoader(self.test, batch_size=batch_size, shuffle=True, 80 | collate_fn=generate_collate_fn(max_profile_size), num_workers=1) 81 | 82 | if checkpoint_dir is None: 83 | checkpoint_dir = os.path.join("checkpoints") 84 | assert os.path.isdir(checkpoint_dir) 85 | self.checkpoint_dir = checkpoint_dir 86 | self.logger = Log(self.version, checkpoint_dir=self.checkpoint_dir) 87 | 88 | @property 89 | def state(self): 90 | state = { 91 | "epoch": self.epoch, 92 | "loss": self.best_loss, 93 | "model_args": self.model.args(), 94 | "state_dict": self.model.state_dict(), 95 | "optimizer": self.optimizer.state_dict(), 96 | } 97 | return state 98 | 99 | def fit(self, num_epochs, k=10): 100 | num_train_batches = len(self.train) / self.batch_size 101 | num_test_batches = len(self.test) / self.batch_size 102 | for epoch in tqdm(range(num_epochs)): 103 | self.epoch = epoch 104 | for phase in ['train', 'val']: 105 | self.logger.epoch(epoch, phase) 106 | self.model.train(phase == 'train') 107 | loss = 0 108 | cur_step = 0 109 | if phase == 'train': 110 | t = tqdm(self.train_loader) 111 | for batch in t: 112 | self.optimizer.zero_grad() 113 | cur_loss = self.training_step(batch) 114 | self.optimizer.step() 115 | loss += cur_loss 116 | cur_step += 1 117 | avg_loss = loss / cur_step 118 | 119 | t.set_description(f"Average Loss {avg_loss:.4f}") 120 | t.refresh() 121 | 122 | loss /= num_train_batches 123 | self.logger.metrics(loss, 0, epoch, phase) 124 | else: 125 | with torch.no_grad(): 126 | for batch in tqdm(self.test_loader): 127 | cur_loss = self.validation_step(batch) 128 | loss += cur_loss 129 | loss /= num_test_batches 130 | # self.logger.metrics(loss, self.score(k=k), epoch, phase) 131 | self.logger.metrics(loss, 0.0, epoch, phase) 132 | 133 | if loss < self.best_loss: 134 | self.best_loss = loss 135 | self.logger.save(self.state, epoch) 136 | 137 | def get_profile_mask(self, profile_ids): 138 | return (profile_ids != self.pad_token).to(self.device) 139 | 140 | def training_step(self, batch): 141 | user_id, profile_ids, pos, neg = self.preprocess_inputs(*batch) 142 | profile_mask = self.get_profile_mask(profile_ids) 143 | pos_pred, neg_pred = self.model(user_id, profile_ids, pos, neg, profile_mask) 144 | 145 | loss = self.loss(pos_pred, neg_pred) 146 | loss.backward() 147 | return loss.item() 148 | 149 | def validation_step(self, batch): 150 | user_id, profile_ids, pos, neg = self.preprocess_inputs(*batch) 151 | profile_mask = self.get_profile_mask(profile_ids) 152 | pos_pred, neg_pred = self.model(user_id, profile_ids, pos, neg, profile_mask) 153 | 154 | loss = self.loss(pos_pred, neg_pred) 155 | return loss.item() 156 | 157 | def preprocess_inputs(self, *inputs, to_tensor=False): 158 | if to_tensor: 159 | inputs = tuple(torch.tensor(input_) for input_ in inputs) 160 | 161 | inputs = tuple(input_.long() for input_ in inputs) 162 | inputs = tuple(input_.to(self.device) for input_ in inputs) 163 | return inputs 164 | -------------------------------------------------------------------------------- /models/dvbpr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from tqdm.auto import tqdm 6 | import torchvision.models as models 7 | 8 | """ 9 | DVBPR -- PyTorch port 10 | Paper: http://cseweb.ucsd.edu/~jmcauley/pdfs/icdm17.pdf 11 | Original implementation: https://github.com/kang205/DVBPR/blob/master/DVBPR/main.py 12 | 13 | Note that we do not consider the GAN element of the paper in this work. 14 | """ 15 | 16 | 17 | class CNNF(nn.Module): 18 | """CNN-F network""" 19 | def __init__(self, hidden_dim=2048, fc_dim=64, weights=None, dropout=0.5): 20 | super(CNNF, self).__init__() 21 | self.hidden_dim = hidden_dim 22 | 23 | if weights is None: 24 | weights = { 25 | # conv layers: ((c_in, c_out, stride (square)), custom stride) 26 | 'cnn': [([3, 64, 11], [1, 4]), 27 | ([64, 256, 5], None), 28 | ([256, 256, 3], None), 29 | ([256, 256, 3], None), 30 | ([256, 256, 3], None)], 31 | 32 | # fc layers: n_in, n_out 33 | 'fc': [[256*22*2, fc_dim], # original: 256*7*7 -> 4096 34 | [fc_dim, fc_dim], 35 | [fc_dim, self.hidden_dim]] 36 | } 37 | 38 | self.convs = nn.ModuleList([nn.Conv2d(*params, padding_mode='replicate', stride=stride if stride else 1) 39 | for params, stride in weights['cnn']]) 40 | 41 | self.fcs = nn.ModuleList([nn.Linear(*params) for params in weights['fc']]) 42 | self.maxpool2d = nn.MaxPool2d(2) 43 | self.maxpool_idxs = [True, True, False, False, True] # CNN layers to maxpool 44 | self.dropout = nn.Dropout(p=dropout) 45 | self.layer_params = weights 46 | 47 | def forward(self, x): 48 | x = torch.reshape(x, shape=[-1, 3, 224, 224]) 49 | 50 | # convolutional layers 51 | for cnn_layer, apply_maxpool in zip(self.convs, self.maxpool_idxs): 52 | x = F.relu(cnn_layer(x)) 53 | # notable difference: original TF implementation has "SAME" padding 54 | x = self.maxpool2d(x) if apply_maxpool else x 55 | 56 | # fully connected layers 57 | x = torch.reshape(x, shape=[-1, self.layer_params['fc'][0][0]]) 58 | for fc_layer in self.fcs: 59 | x = F.relu(fc_layer(x)) 60 | x = self.dropout(x) 61 | 62 | return x 63 | 64 | def reset_parameters(self): 65 | for conv in self.convs: 66 | nn.init.xavier_uniform_(conv.weight) 67 | for fc in self.fcs: 68 | nn.init.xavier_uniform_(fc.weight) 69 | 70 | 71 | class DVBPR(nn.Module): 72 | def __init__(self, n_users, n_items, K=2048, use_cnnf=False): 73 | super().__init__() 74 | self.cache = None 75 | 76 | # CNN for learned image features 77 | if use_cnnf: 78 | self.cnn = CNNF(hidden_dim=K) # CNN-F is a smaller CNN 79 | else: 80 | alexnet = models.alexnet(pretrained=False) 81 | final_len = alexnet.classifier[-1].weight.shape[1] 82 | alexnet.classifier[-1] = nn.Linear(final_len, K) 83 | self.cnn = alexnet 84 | 85 | # Visual latent preference (theta) 86 | self.theta_users = nn.Embedding(n_users, K) 87 | 88 | # Latent factors (gamma) 89 | self.gamma_users = nn.Embedding(n_users, 100) 90 | self.gamma_items = nn.Embedding(n_items, 100) 91 | 92 | # Random weight initialization 93 | self.reset_parameters() 94 | 95 | def forward(self, ui, pimg, nimg, pi, ni): 96 | """Forward pass of the model. 97 | 98 | Feed forward a given input (batch). Each object is expected 99 | to be a Tensor. 100 | 101 | Args: 102 | ui: User index, as a Tensor. 103 | pimg: positive image, as a Tensor 104 | nimg: positive image, as a Tensor 105 | pi: Positive item index, as a Tensor. 106 | ni: Negative item index, as a Tensor. 107 | 108 | Returns: 109 | Network output (scalar) for each input. 110 | """ 111 | 112 | # User 113 | ui_visual_factors = self.theta_users(ui) # Visual factors of user u 114 | ui_latent_factors = self.gamma_users(ui) # Latent factors of user u 115 | 116 | # Items 117 | pi_features = self.cnn(pimg) # Pos. item visual features 118 | ni_features = self.cnn(nimg) # Neg. item visual features 119 | 120 | pi_latent_factors = self.gamma_items(pi) # Pos. item visual factors 121 | ni_latent_factors = self.gamma_items(ni) # Neg. item visual factors 122 | 123 | x_ui = (ui_visual_factors * pi_features).sum(1) + (pi_latent_factors * ui_latent_factors).sum(1) 124 | x_uj = (ui_visual_factors * ni_features).sum(1) + (ni_latent_factors * ui_latent_factors).sum(1) 125 | 126 | return x_ui, x_uj 127 | 128 | def recommend_all(self, user, img_list, cache=None, grad_enabled=False): 129 | with torch.set_grad_enabled(grad_enabled): 130 | # User 131 | u_visual_factors = self.theta_users(user) # Visual factors of user u 132 | ui_latent_factors = self.gamma_users(user) 133 | 134 | # Items 135 | i_latent_factors = self.gamma_items.weight # Items visual factors 136 | 137 | if cache is not None: 138 | visual_rating_space = cache 139 | elif self.cache is not None: 140 | visual_rating_space = self.cache 141 | else: 142 | visual_rating_space = self.generate_cache(img_list) 143 | 144 | x_ui = ((i_latent_factors*ui_latent_factors).sum(dim=1).squeeze() + \ 145 | (u_visual_factors * visual_rating_space).sum(dim=2).squeeze()) 146 | 147 | return x_ui 148 | 149 | def reset_parameters(self): 150 | """ Restart network weights using a Xavier uniform distribution. """ 151 | if isinstance(self.cnn, CNNF): 152 | self.cnn.reset_parameters() 153 | nn.init.uniform_(self.theta_users.weight) # Visual factors (theta) 154 | nn.init.uniform_(self.gamma_users.weight) # Visual factors (theta) 155 | nn.init.uniform_(self.gamma_items.weight) # Visual factors (theta) 156 | 157 | def generate_cache(self, imgs, grad_enabled=False, device='cpu'): 158 | cache = [] 159 | with torch.set_grad_enabled(grad_enabled): 160 | for img_idx in tqdm(imgs.keys()): 161 | img = imgs[img_idx] 162 | img = img.to(device).unsqueeze(0) 163 | cache.append(self.cnn(img)) 164 | self.cache = torch.stack(cache).to(device) 165 | return self.cache 166 | -------------------------------------------------------------------------------- /1 - Create image embeddings.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "\n", 11 | "import numpy as np\n", 12 | "import torch\n", 13 | "import torchvision\n", 14 | "from torch.utils.data import DataLoader\n", 15 | "from torchvision import transforms\n", 16 | "from tqdm.auto import tqdm\n", 17 | "\n", 18 | "from datasets.utils import PreprocessingDataset\n", 19 | "from models.utils import get_model_by_name\n", 20 | "from utils.environment import modified_environ" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Create image embeddings" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# Dataset\n", 37 | "DATASET = \"UGallery\"\n", 38 | "assert DATASET in [\"UGallery\", \"Wikimedia\"]" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# Parameters\n", 48 | "BATCH_SIZE, NUM_WORKERS = 8, 4\n", 49 | "IMAGES_EXT = [\"*.gif\", \"*.jpg\", \"*.jpeg\", \"*.png\", \"*.webp\"]\n", 50 | "USE_GPU = True\n", 51 | "\n", 52 | "# Model\n", 53 | "MODEL = \"resnet50\"\n", 54 | "LAYER = \"\" # if not defined the last layer, before the classification, output will be extracted\n", 55 | "assert MODEL in [\"alexnet\", \"vgg16\", \"resnet50\"]\n", 56 | "\n", 57 | "# Images path\n", 58 | "IMAGES_DIR = None\n", 59 | "if DATASET == \"Wikimedia\":\n", 60 | " IMAGES_DIR = os.path.join(\"/\", \"mnt\", \"data2\", \"wikimedia\", \"imagenes_tarea\")\n", 61 | "elif DATASET == \"UGallery\":\n", 62 | " IMAGES_DIR = os.path.join(\"/\", \"mnt\", \"workspace\", \"Ugallery\", \"mini-images-224-224-v2\")\n" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# Paths (output)\n", 72 | "LAYERED_OUTPUT = f\"-{LAYER}\" if LAYER else \"\"\n", 73 | "OUTPUT_EMBEDDING_PATH = os.path.join(\"data\", DATASET, f\"embedding-{MODEL}{LAYERED_OUTPUT}.npy\")\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "import PIL\n", 83 | "from PIL import ImageFile\n", 84 | "\n", 85 | "\n", 86 | "# Needed for some images in the Wikimedia dataset\n", 87 | "PIL.Image.MAX_IMAGE_PIXELS = 3_000_000_000\n", 88 | "# Some images are \"broken\" in Wikimedia dataset\n", 89 | "ImageFile.LOAD_TRUNCATED_IMAGES = True\n" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "%%time\n", 99 | "# Setting up torch device (useful if GPU available)\n", 100 | "print(\"\\nCreating device...\")\n", 101 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() and USE_GPU else \"cpu\")\n", 102 | "if torch.cuda.is_available() != USE_GPU:\n", 103 | " print((f\"\\nNotice: Not using GPU - \"\n", 104 | " f\"Cuda available ({torch.cuda.is_available()}) \"\n", 105 | " f\"does not match USE_GPU ({USE_GPU})\"\n", 106 | " ))\n", 107 | "\n", 108 | "# Downloading models for feature extraction\n", 109 | "print(\"\\nDownloading model...\")\n", 110 | "with modified_environ(TORCH_HOME=\".\"):\n", 111 | " print(f\"Model: {MODEL} (pretrained on imagenet)\")\n", 112 | " model = get_model_by_name(MODEL, output_layer=LAYER).eval().to(device)\n", 113 | "\n", 114 | "# Setting up transforms and dataset\n", 115 | "print(\"\\nSetting up transforms and dataset...\")\n", 116 | "images_transforms = transforms.Compose([\n", 117 | " transforms.Resize(256),\n", 118 | " transforms.CenterCrop(224),\n", 119 | " transforms.ToTensor(),\n", 120 | " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n", 121 | "])\n", 122 | "image_dataset = PreprocessingDataset(\n", 123 | " IMAGES_DIR,\n", 124 | " extensions=IMAGES_EXT,\n", 125 | " transform=images_transforms,\n", 126 | ")\n", 127 | "image_dataloader = DataLoader(image_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)\n", 128 | "print(f\">> Images dataset: {len(image_dataset)}\")\n", 129 | "\n", 130 | "# Calculate embedding dimension size\n", 131 | "dummy_input = torch.ones(1, *image_dataset[0][\"image\"].size()).to(device)\n", 132 | "dummy_output = model(dummy_input)\n", 133 | "emb_dim = dummy_output.shape[1:] if LAYER else dummy_output.size(1)\n", 134 | "print(f\">> Embedding dimension size: {emb_dim}\")\n", 135 | "\n", 136 | "# Feature extraction phase\n", 137 | "print(f\"\\nFeature extraction...\")\n", 138 | "output_ids = np.empty(len(image_dataset), dtype=object)\n", 139 | "if LAYER:\n", 140 | " output_embedding = torch.zeros((len(image_dataset), *emb_dim), dtype=torch.float32, device=device)\n", 141 | "else:\n", 142 | " output_embedding = torch.zeros((len(image_dataset), emb_dim), dtype=torch.float32, device=device)\n", 143 | "\n", 144 | "with torch.no_grad():\n", 145 | " for batch_i, sample in enumerate(tqdm(image_dataloader, desc=\"Feature extraction\")):\n", 146 | " item_image = sample[\"image\"].to(device)\n", 147 | " item_idx = sample[\"idx\"]\n", 148 | " output_ids[[*item_idx]] = sample[\"id\"]\n", 149 | " output_embedding[item_idx] = model(item_image).squeeze(-1).squeeze(-1)\n", 150 | "\n", 151 | "output_embedding = output_embedding.cpu().numpy()\n", 152 | "\n", 153 | "# Fill output embedding\n", 154 | "embedding = np.ndarray(\n", 155 | " shape=(len(image_dataset), 2),\n", 156 | " dtype=object,\n", 157 | ")\n", 158 | "for i in range(len(image_dataset)):\n", 159 | " embedding[i] = np.asarray([output_ids[i], output_embedding[i]])\n", 160 | "print(f\">> Embedding shape: {embedding.shape}\")\n", 161 | "\n", 162 | "# Save embedding to file\n", 163 | "print(f\"\\nSaving embedding to file... ({OUTPUT_EMBEDDING_PATH})\")\n", 164 | "np.save(OUTPUT_EMBEDDING_PATH, embedding, allow_pickle=True)\n", 165 | "\n", 166 | "# Free some memory\n", 167 | "if USE_GPU:\n", 168 | " print(f\"\\nCleaning GPU cache...\")\n", 169 | " model = model.to(torch.device(\"cpu\"))\n", 170 | " torch.cuda.empty_cache()\n", 171 | "\n", 172 | "# Finished\n", 173 | "print(\"\\nDone\")\n" 174 | ] 175 | } 176 | ], 177 | "metadata": { 178 | "kernelspec": { 179 | "display_name": "Python 3", 180 | "language": "python", 181 | "name": "python3" 182 | }, 183 | "language_info": { 184 | "codemirror_mode": { 185 | "name": "ipython", 186 | "version": 3 187 | }, 188 | "file_extension": ".py", 189 | "mimetype": "text/x-python", 190 | "name": "python", 191 | "nbconvert_exporter": "python", 192 | "pygments_lexer": "ipython3", 193 | "version": "3.7.10" 194 | } 195 | }, 196 | "nbformat": 4, 197 | "nbformat_minor": 4 198 | } 199 | -------------------------------------------------------------------------------- /3 - (VBPR) Training procedure.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2020-04-06T15:20:53.542195Z", 9 | "start_time": "2020-04-06T15:20:53.367196Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import os\n", 15 | "import random\n", 16 | "\n", 17 | "import numpy as np\n", 18 | "import torch\n", 19 | "import torch.nn as nn\n", 20 | "import torch.optim as optim\n", 21 | "from torch.utils.data import DataLoader\n", 22 | "from torch.utils.data.sampler import RandomSampler, SequentialSampler\n", 23 | "\n", 24 | "from datasets import UserModeDataset\n", 25 | "from models import VBPR\n", 26 | "from trainers import Trainer\n", 27 | "from utils.data import extract_embedding\n", 28 | "\n", 29 | "\n", 30 | "# Dataset\n", 31 | "DATASET = \"UGallery\"\n", 32 | "assert DATASET in [\"UGallery\", \"Wikimedia\"]\n", 33 | "\n", 34 | "# Parameters\n", 35 | "RNG_SEED = 0\n", 36 | "EMBEDDING_PATH = os.path.join(\"data\", DATASET, \"embedding-resnet50.npy\")\n", 37 | "TRAINING_PATH = os.path.join(\"data\", DATASET, \"naive-user-train.csv\")\n", 38 | "VALIDATION_PATH = os.path.join(\"data\", DATASET, \"naive-user-validation.csv\")\n", 39 | "CHECKPOINTS_DIR = os.path.join(\"checkpoints\")\n", 40 | "USE_GPU = True\n", 41 | "\n", 42 | "# Parameters (training)\n", 43 | "SETTINGS = {\n", 44 | " \"dataloader:batch_size\": 42_000,\n", 45 | " \"dataloader:num_workers\": os.cpu_count(),\n", 46 | " \"model:dim_latent\": 200,\n", 47 | " \"model:dim_visual\": 100,\n", 48 | " \"optimizer:lr\": 0.001,\n", 49 | " \"optimizer:weight_decay\": 0.0001,\n", 50 | " \"scheduler:factor\": 0.6,\n", 51 | " \"scheduler:patience\": 2,\n", 52 | " \"train:max_epochs\": 150,\n", 53 | " \"train:max_lrs\": 5,\n", 54 | " \"train:non_blocking\": True,\n", 55 | " \"train:train_per_valid_times\": 1,\n", 56 | "}\n" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "%%time\n", 66 | "# Freezing RNG seed if needed\n", 67 | "if RNG_SEED is not None:\n", 68 | " print(f\"\\nUsing random seed...\")\n", 69 | " random.seed(RNG_SEED)\n", 70 | " torch.manual_seed(RNG_SEED)\n", 71 | " np.random.seed(RNG_SEED)\n", 72 | "\n", 73 | "# Load embedding from file\n", 74 | "print(f\"\\nLoading embedding from file... ({EMBEDDING_PATH})\")\n", 75 | "embedding = np.load(EMBEDDING_PATH, allow_pickle=True)\n", 76 | "\n", 77 | "# Extract features and \"id2index\" mapping\n", 78 | "print(\"\\nExtracting data into variables...\")\n", 79 | "embedding, _, _ = extract_embedding(embedding, verbose=True)\n", 80 | "print(f\">> Features shape: {embedding.shape}\")\n", 81 | "\n", 82 | "# DataLoaders initialization\n", 83 | "print(\"\\nInitialize DataLoaders\")\n", 84 | "# Training DataLoader\n", 85 | "train_dataset = UserModeDataset(\n", 86 | " csv_file=TRAINING_PATH,\n", 87 | ")\n", 88 | "print(f\">> Training dataset: {len(train_dataset)}\")\n", 89 | "train_sampler = RandomSampler(train_dataset)\n", 90 | "train_dataloader = DataLoader(\n", 91 | " train_dataset,\n", 92 | " batch_size=SETTINGS[\"dataloader:batch_size\"],\n", 93 | " num_workers=SETTINGS[\"dataloader:num_workers\"],\n", 94 | " pin_memory=True,\n", 95 | ")\n", 96 | "print(f\">> Training dataloader: {len(train_dataloader)}\")\n", 97 | "# Validation DataLoader\n", 98 | "valid_dataset = UserModeDataset(\n", 99 | " csv_file=VALIDATION_PATH,\n", 100 | ")\n", 101 | "print(f\">> Validation dataset: {len(valid_dataset)}\")\n", 102 | "valid_sampler = SequentialSampler(valid_dataset)\n", 103 | "valid_dataloader = DataLoader(\n", 104 | " valid_dataset,\n", 105 | " batch_size=SETTINGS[\"dataloader:batch_size\"],\n", 106 | " num_workers=SETTINGS[\"dataloader:num_workers\"],\n", 107 | " pin_memory=True,\n", 108 | ")\n", 109 | "print(f\">> Validation dataloader: {len(valid_dataloader)}\")\n", 110 | "# Model initialization\n", 111 | "print(\"\\nInitialize model\")\n", 112 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() and USE_GPU else \"cpu\")\n", 113 | "if torch.cuda.is_available() != USE_GPU:\n", 114 | " print((f\"\\nNotice: Not using GPU - \"\n", 115 | " f\"Cuda available ({torch.cuda.is_available()}) \"\n", 116 | " f\"does not match USE_GPU ({USE_GPU})\"\n", 117 | " ))\n", 118 | "N_USERS = len(set(train_dataset.ui))\n", 119 | "N_ITEMS = len(embedding)\n", 120 | "print(f\">> N_USERS = {N_USERS} | N_ITEMS = {N_ITEMS}\")\n", 121 | "model = VBPR(\n", 122 | " N_USERS, N_ITEMS, # Number of users and items\n", 123 | " torch.Tensor(embedding), # Pretrained visual features\n", 124 | " SETTINGS[\"model:dim_latent\"], SETTINGS[\"model:dim_visual\"], # Size of internal spaces\n", 125 | ").to(device)\n", 126 | "\n", 127 | "# Training setup\n", 128 | "print(\"\\nSetting up training\")\n", 129 | "optimizer = optim.Adam(\n", 130 | " model.parameters(),\n", 131 | " lr=SETTINGS[\"optimizer:lr\"],\n", 132 | " weight_decay=SETTINGS[\"optimizer:weight_decay\"],\n", 133 | ")\n", 134 | "criterion = nn.BCEWithLogitsLoss(reduction=\"sum\")\n", 135 | "scheduler = optim.lr_scheduler.ReduceLROnPlateau(\n", 136 | " optimizer, mode=\"max\", factor=SETTINGS[\"scheduler:factor\"],\n", 137 | " patience=SETTINGS[\"scheduler:patience\"], verbose=True,\n", 138 | ")\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "%%time\n", 148 | "# Training\n", 149 | "version = f\"VBPR_{DATASET.lower()}\"\n", 150 | "trainer = Trainer(\n", 151 | " model, device, criterion, optimizer, scheduler,\n", 152 | " checkpoint_dir=CHECKPOINTS_DIR,\n", 153 | " version=version,\n", 154 | ")\n", 155 | "best_model, best_acc, best_loss, best_epoch = trainer.run(\n", 156 | " SETTINGS[\"train:max_epochs\"], SETTINGS[\"train:max_lrs\"],\n", 157 | " {\"train\": train_dataloader, \"validation\": valid_dataloader},\n", 158 | " train_valid_loops=SETTINGS[\"train:train_per_valid_times\"],\n", 159 | ")\n" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# Final result\n", 169 | "print(f\"\\nBest ACC {best_acc} reached at epoch {best_epoch}\")\n", 170 | "print(best_model)\n" 171 | ] 172 | } 173 | ], 174 | "metadata": { 175 | "kernelspec": { 176 | "display_name": "Python 3", 177 | "language": "python", 178 | "name": "python3" 179 | }, 180 | "language_info": { 181 | "codemirror_mode": { 182 | "name": "ipython", 183 | "version": 3 184 | }, 185 | "file_extension": ".py", 186 | "mimetype": "text/x-python", 187 | "name": "python", 188 | "nbconvert_exporter": "python", 189 | "pygments_lexer": "ipython3", 190 | "version": "3.7.10" 191 | } 192 | }, 193 | "nbformat": 4, 194 | "nbformat_minor": 4 195 | } 196 | -------------------------------------------------------------------------------- /3 - (DVBPR) Training procedure.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2020-04-06T15:20:53.542195Z", 9 | "start_time": "2020-04-06T15:20:53.367196Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import os\n", 15 | "import random\n", 16 | "from PIL import Image\n", 17 | "\n", 18 | "import numpy as np\n", 19 | "import torch.nn as nn\n", 20 | "import torch.optim as optim\n", 21 | "import torch.multiprocessing\n", 22 | "from torch.utils.data import DataLoader, Subset\n", 23 | "from torch.utils.data.sampler import RandomSampler, SequentialSampler\n", 24 | "\n", 25 | "from datasets import UserModeImgDataset, UserModeDataset, UserModeFeatDataset\n", 26 | "from models import DVBPR\n", 27 | "from trainers import ImgTrainer\n", 28 | "from utils.data import extract_embedding\n", 29 | "\n", 30 | "\n", 31 | "# Dataset\n", 32 | "DATASET = \"UGallery\"\n", 33 | "assert DATASET in [\"UGallery\", \"Wikimedia\"]\n", 34 | "\n", 35 | "# Parameters\n", 36 | "RNG_SEED = 0\n", 37 | "USE_GPU = True\n", 38 | "BASE_PATH = \"/home/pcerdam/VisualRecSys-Tutorial-IUI2021/\"\n", 39 | "\n", 40 | "TRAINING_PATH = os.path.join(BASE_PATH, \"data\", DATASET, \"naive-user-train.csv\")\n", 41 | "EMBEDDING_PATH = os.path.join(BASE_PATH, \"data\", DATASET, \"embedding-resnet50.npy\")\n", 42 | "VALIDATION_PATH = os.path.join(BASE_PATH, \"data\", DATASET, \"naive-user-validation.csv\")\n", 43 | "\n", 44 | "IMAGES_PATH = None\n", 45 | "if DATASET == \"Wikimedia\":\n", 46 | " IMAGES_PATH = os.path.join(\"/\", \"mnt\", \"data2\", \"wikimedia\", \"mini-images-224-224-v2\")\n", 47 | "elif DATASET == \"UGallery\":\n", 48 | " IMAGES_DIR = os.path.join(\"/\", \"mnt\", \"workspace\", \"Ugallery\", \"mini-images-224-224-v2\")\n", 49 | "\n", 50 | "CHECKPOINTS_DIR = os.path.join(BASE_PATH, \"checkpoints\")\n", 51 | "version = f\"DVBPR_{DATASET.lower()}\"\n", 52 | "\n", 53 | "# Parameters (training)\n", 54 | "SETTINGS = {\n", 55 | " \"dataloader:batch_size\": 128,\n", 56 | " \"dataloader:num_workers\": os.cpu_count(),\n", 57 | " \"prev_checkpoint\": False,\n", 58 | " \"model:dim_visual\": 100,\n", 59 | " \"optimizer:lr\": 0.001,\n", 60 | " \"optimizer:weight_decay\": 0.0001,\n", 61 | " \"scheduler:factor\": 0.6,\n", 62 | " \"scheduler:patience\": 2,\n", 63 | " \"train:max_epochs\": 5,\n", 64 | " \"train:max_lrs\": 5,\n", 65 | " \"train:non_blocking\": True,\n", 66 | " \"train:train_per_valid_times\": 1\n", 67 | "}" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "%%time\n", 77 | "# Freezing RNG seed if needed\n", 78 | "if RNG_SEED is not None:\n", 79 | " print(f\"\\nUsing random seed...\")\n", 80 | " random.seed(RNG_SEED)\n", 81 | " torch.manual_seed(RNG_SEED)\n", 82 | " np.random.seed(RNG_SEED)\n", 83 | "\n", 84 | "# Load embedding from file\n", 85 | "print(f\"\\nLoading embedding from file... ({EMBEDDING_PATH})\")\n", 86 | "embedding = np.load(EMBEDDING_PATH, allow_pickle=True)\n", 87 | "\n", 88 | "# Extract features and \"id2index\" mapping\n", 89 | "print(\"\\nExtracting data into variables...\")\n", 90 | "embedding, id2index, index2fn = extract_embedding(embedding, verbose=True)\n", 91 | "print(f\">> Features shape: {embedding.shape}\")" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "# DataLoaders initialization\n", 101 | "print(\"\\nInitialize DataLoaders\")\n", 102 | "\n", 103 | "# Training DataLoader\n", 104 | "train_dataset = UserModeImgDataset(\n", 105 | " csv_file=TRAINING_PATH,\n", 106 | " img_path=IMAGES_PATH,\n", 107 | " id2index=id2index,\n", 108 | " index2fn=index2fn\n", 109 | ")\n", 110 | "print(f\">> Training dataset: {len(train_dataset)}\")\n", 111 | "train_sampler = RandomSampler(train_dataset)\n", 112 | "train_dataloader = DataLoader(\n", 113 | " train_dataset,\n", 114 | " batch_size=SETTINGS[\"dataloader:batch_size\"],\n", 115 | " num_workers=SETTINGS[\"dataloader:num_workers\"],\n", 116 | " shuffle=True,\n", 117 | " pin_memory=True,\n", 118 | ")\n", 119 | "print(f\">> Training dataloader: {len(train_dataloader)}\")\n", 120 | "\n", 121 | "# Validation DataLoader\n", 122 | "valid_dataset = UserModeImgDataset(\n", 123 | " csv_file=VALIDATION_PATH,\n", 124 | " img_path=IMAGES_PATH,\n", 125 | " id2index=id2index,\n", 126 | " index2fn=index2fn\n", 127 | ")\n", 128 | "print(f\">> Validation dataset: {len(valid_dataset)}\")\n", 129 | "valid_sampler = SequentialSampler(valid_dataset)\n", 130 | "valid_dataloader = DataLoader(\n", 131 | " valid_dataset,\n", 132 | " batch_size=SETTINGS[\"dataloader:batch_size\"],\n", 133 | " num_workers=SETTINGS[\"dataloader:num_workers\"],\n", 134 | " shuffle=True,\n", 135 | " pin_memory=True,\n", 136 | ")\n", 137 | "print(f\">> Validation dataloader: {len(valid_dataloader)}\")" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "# Model initialization\n", 147 | "print(\"\\nInitialize model\")\n", 148 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() and USE_GPU else \"cpu\")\n", 149 | "if torch.cuda.is_available() != USE_GPU:\n", 150 | " print((f\"\\nNotice: Not using GPU - \"\n", 151 | " f\"Cuda available ({torch.cuda.is_available()}) \"\n", 152 | " f\"does not match USE_GPU ({USE_GPU})\"\n", 153 | " ))\n", 154 | "N_USERS = len(set(train_dataset.ui))\n", 155 | "N_ITEMS = len(embedding)\n", 156 | "print(f\">> N_USERS = {N_USERS} | N_ITEMS = {N_ITEMS}\")\n", 157 | "print(torch.Tensor(embedding).shape)\n", 158 | "model = DVBPR(\n", 159 | " N_USERS, # Number of users and items\n", 160 | " N_ITEMS,\n", 161 | " SETTINGS[\"model:dim_visual\"], # Size of visual spaces\n", 162 | ").to(device)\n", 163 | "\n", 164 | "print(model)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "# Training setup\n", 174 | "print(\"\\nSetting up training\")\n", 175 | "optimizer = optim.Adam(\n", 176 | " model.parameters(),\n", 177 | " lr=SETTINGS[\"optimizer:lr\"],\n", 178 | " weight_decay=SETTINGS[\"optimizer:weight_decay\"],\n", 179 | ")\n", 180 | "criterion = nn.BCEWithLogitsLoss(reduction=\"sum\")\n", 181 | "scheduler = optim.lr_scheduler.ReduceLROnPlateau(\n", 182 | " optimizer, mode=\"max\", factor=SETTINGS[\"scheduler:factor\"],\n", 183 | " patience=SETTINGS[\"scheduler:patience\"], verbose=True,\n", 184 | ")" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "%%time\n", 194 | "# Training\n", 195 | "trainer = ImgTrainer(\n", 196 | " model, device, criterion, optimizer, scheduler,\n", 197 | " checkpoint_dir=CHECKPOINTS_DIR,\n", 198 | " version=version,\n", 199 | ")\n", 200 | "best_model, best_acc, best_loss, best_epoch = trainer.run(\n", 201 | " SETTINGS[\"train:max_epochs\"], SETTINGS[\"train:max_lrs\"],\n", 202 | " {\"train\": train_dataloader, \"validation\": valid_dataloader},\n", 203 | " train_valid_loops=SETTINGS[\"train:train_per_valid_times\"],\n", 204 | " use_checkpoint=SETTINGS[\"prev_checkpoint\"]\n", 205 | ")" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "# Final result\n", 215 | "print(f\"\\nBest ACC {best_acc} reached at epoch {best_epoch}\")\n", 216 | "print(best_model)" 217 | ] 218 | } 219 | ], 220 | "metadata": { 221 | "kernelspec": { 222 | "display_name": "Python 3", 223 | "language": "python", 224 | "name": "python3" 225 | }, 226 | "language_info": { 227 | "codemirror_mode": { 228 | "name": "ipython", 229 | "version": 3 230 | }, 231 | "file_extension": ".py", 232 | "mimetype": "text/x-python", 233 | "name": "python", 234 | "nbconvert_exporter": "python", 235 | "pygments_lexer": "ipython3", 236 | "version": "3.7.10" 237 | } 238 | }, 239 | "nbformat": 4, 240 | "nbformat_minor": 4 241 | } 242 | -------------------------------------------------------------------------------- /3 - (CuratorNet) Training procedure.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2021-03-26T01:41:47.880863Z", 9 | "start_time": "2021-03-26T01:41:46.868570Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import os\n", 15 | "import random\n", 16 | "import time\n", 17 | "\n", 18 | "import numpy as np\n", 19 | "import torch\n", 20 | "import torch.nn as nn\n", 21 | "import torch.optim as optim\n", 22 | "from torch.utils.data import DataLoader\n", 23 | "from torch.utils.data.sampler import RandomSampler, SequentialSampler\n", 24 | "\n", 25 | "from datasets.profile_mode import ProfileModeDataset\n", 26 | "from models.curatornet import CuratorNet\n", 27 | "from utils.curatornet_sampler import SameProfileSizeBatchSampler\n", 28 | "from trainers import Trainer\n", 29 | "from utils.data import extract_embedding" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "# Dataset\n", 39 | "DATASET = \"UGallery\"\n", 40 | "assert DATASET in [\"UGallery\", \"Wikimedia\"]" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "ExecuteTime": { 48 | "end_time": "2021-03-26T01:41:50.055518Z", 49 | "start_time": "2021-03-26T01:41:50.052454Z" 50 | } 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "# Parameters\n", 55 | "RNG_SEED = 0\n", 56 | "EMBEDDING_PATH = os.path.join(\"data\", DATASET, \"embedding-resnet50.npy\")\n", 57 | "TRAINING_PATH = os.path.join(\"data\", DATASET, \"naive-profile-train.csv\")\n", 58 | "VALIDATION_PATH = os.path.join(\"data\", DATASET, \"naive-profile-validation.csv\")\n", 59 | "CHECKPOINTS_DIR = os.path.join(\"checkpoints\")\n", 60 | "USE_GPU = True\n", 61 | "\n", 62 | "# Parameters (training)\n", 63 | "SETTINGS = {\n", 64 | " \"batch_sampler:batch_size\": 128,\n", 65 | " \"batch_sampler:profile_items_per_batch\": 60_000,\n", 66 | " \"dataloader:num_workers\": os.cpu_count(),\n", 67 | " \"dataloader:pin_memory\": True,\n", 68 | " \"optimizer:lr\": 0.0001,\n", 69 | " \"optimizer:weight_decay\": 0.0001,\n", 70 | " \"scheduler:factor\": 0.6,\n", 71 | " \"scheduler:patience\": 2,\n", 72 | " \"scheduler:threshold\": 1e-4,\n", 73 | " \"train:max_epochs\": 10,\n", 74 | " \"train:max_lrs\": 10,\n", 75 | " \"train:non_blocking\": True,\n", 76 | " \"train:train_per_valid_times\": 1,\n", 77 | "}" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "ExecuteTime": { 85 | "end_time": "2021-03-26T01:41:59.213466Z", 86 | "start_time": "2021-03-26T01:41:51.073361Z" 87 | } 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "%%time\n", 92 | "# Freezing RNG seed if needed\n", 93 | "if RNG_SEED is not None:\n", 94 | " print(f\"\\nUsing random seed...\")\n", 95 | " random.seed(RNG_SEED)\n", 96 | " torch.manual_seed(RNG_SEED)\n", 97 | " np.random.seed(RNG_SEED)\n", 98 | "\n", 99 | "# Load embedding from file\n", 100 | "print(f\"\\nLoading embedding from file... ({EMBEDDING_PATH})\")\n", 101 | "embedding = np.load(EMBEDDING_PATH, allow_pickle=True)\n", 102 | "\n", 103 | "# Extract features and \"id2index\" mapping\n", 104 | "print(\"\\nExtracting data into variables...\")\n", 105 | "embedding, _, _ = extract_embedding(embedding, verbose=True)\n", 106 | "print(f\">> Features shape: {embedding.shape}\")\n", 107 | "\n", 108 | "# DataLoaders initialization\n", 109 | "print(\"\\nInitialize DataLoaders\")\n", 110 | "# Training DataLoader\n", 111 | "train_dataset = ProfileModeDataset(\n", 112 | " csv_file=TRAINING_PATH,\n", 113 | ")\n", 114 | "print(f\">> Training dataset: {len(train_dataset)}\")\n", 115 | "train_sampler = RandomSampler(train_dataset)\n", 116 | "train_batch_sampler = SameProfileSizeBatchSampler(\n", 117 | " sampler=train_sampler,\n", 118 | " batch_size=SETTINGS[\"batch_sampler:batch_size\"],\n", 119 | " profile_items_per_batch=SETTINGS[\"batch_sampler:profile_items_per_batch\"],\n", 120 | ")\n", 121 | "train_dataloader = DataLoader(\n", 122 | " train_dataset,\n", 123 | " sampler=train_batch_sampler,\n", 124 | " num_workers=SETTINGS[\"dataloader:num_workers\"],\n", 125 | " pin_memory=SETTINGS[\"dataloader:pin_memory\"],\n", 126 | ")\n", 127 | "print(f\">> Training dataloader: {len(train_dataloader)}\")\n", 128 | "# Validation DataLoader\n", 129 | "valid_dataset = ProfileModeDataset(\n", 130 | " csv_file=VALIDATION_PATH,\n", 131 | ")\n", 132 | "print(f\">> Validation dataset: {len(valid_dataset)}\")\n", 133 | "valid_sampler = SequentialSampler(valid_dataset)\n", 134 | "valid_batch_sampler = SameProfileSizeBatchSampler(\n", 135 | " sampler=valid_sampler,\n", 136 | " batch_size=SETTINGS[\"batch_sampler:batch_size\"],\n", 137 | " profile_items_per_batch=SETTINGS[\"batch_sampler:profile_items_per_batch\"],\n", 138 | ")\n", 139 | "valid_dataloader = DataLoader(\n", 140 | " valid_dataset,\n", 141 | " sampler=valid_batch_sampler,\n", 142 | " num_workers=SETTINGS[\"dataloader:num_workers\"],\n", 143 | " pin_memory=SETTINGS[\"dataloader:pin_memory\"],\n", 144 | ")\n", 145 | "print(f\">> Validation dataloader: {len(valid_dataloader)}\")\n", 146 | "# Model initialization\n", 147 | "print(\"\\nInitialize model\")\n", 148 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() and USE_GPU else \"cpu\")\n", 149 | "if torch.cuda.is_available() != USE_GPU:\n", 150 | " print((f\"\\nNotice: Not using GPU - \"\n", 151 | " f\"Cuda available ({torch.cuda.is_available()}) \"\n", 152 | " f\"does not match USE_GPU ({USE_GPU})\"\n", 153 | " ))\n", 154 | "model = CuratorNet(\n", 155 | " torch.Tensor(embedding),\n", 156 | " input_size=embedding.shape[1],\n", 157 | ").to(device)\n", 158 | "\n", 159 | "# Training setup\n", 160 | "print(\"\\nSetting up training\")\n", 161 | "optimizer = optim.Adam(\n", 162 | " model.parameters(),\n", 163 | " lr=SETTINGS[\"optimizer:lr\"],\n", 164 | " weight_decay=SETTINGS[\"optimizer:weight_decay\"],\n", 165 | ")\n", 166 | "criterion = nn.BCEWithLogitsLoss(reduction=\"sum\")\n", 167 | "scheduler = optim.lr_scheduler.ReduceLROnPlateau(\n", 168 | " optimizer, mode=\"max\", factor=SETTINGS[\"scheduler:factor\"],\n", 169 | " patience=SETTINGS[\"scheduler:patience\"], verbose=True,\n", 170 | " threshold=SETTINGS[\"scheduler:threshold\"],\n", 171 | ")\n", 172 | "\n", 173 | "# Training\n", 174 | "print(\"\\nTraining\")\n" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "ExecuteTime": { 182 | "end_time": "2021-03-26T02:07:15.586369Z", 183 | "start_time": "2021-03-26T01:42:00.462845Z" 184 | } 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "%%time\n", 189 | "# Training\n", 190 | "version = (\n", 191 | " f\"{model.__class__.__name__}_\"\n", 192 | " f\"{DATASET.lower()}\"\n", 193 | " # f\"_resnet50_\"\n", 194 | " # f\"{time.strftime('%Y-%m-%d-%H-%M-%S')}\"\n", 195 | ")\n", 196 | "trainer = Trainer(\n", 197 | " model, device, criterion, optimizer, scheduler,\n", 198 | " checkpoint_dir=CHECKPOINTS_DIR,\n", 199 | " version=version,\n", 200 | ")\n", 201 | "best_model, best_acc, best_loss, best_epoch = trainer.run(\n", 202 | " SETTINGS[\"train:max_epochs\"], SETTINGS[\"train:max_lrs\"],\n", 203 | " {\"train\": train_dataloader, \"validation\": valid_dataloader},\n", 204 | " train_valid_loops=SETTINGS[\"train:train_per_valid_times\"],\n", 205 | ")\n", 206 | "\n", 207 | "# Final result\n", 208 | "print(f\"\\nBest ACC {best_acc} reached at epoch {best_epoch}\")\n", 209 | "print(best_model)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [] 218 | } 219 | ], 220 | "metadata": { 221 | "kernelspec": { 222 | "display_name": "Python 3", 223 | "language": "python", 224 | "name": "python3" 225 | }, 226 | "language_info": { 227 | "codemirror_mode": { 228 | "name": "ipython", 229 | "version": 3 230 | }, 231 | "file_extension": ".py", 232 | "mimetype": "text/x-python", 233 | "name": "python", 234 | "nbconvert_exporter": "python", 235 | "pygments_lexer": "ipython3", 236 | "version": "3.7.10" 237 | } 238 | }, 239 | "nbformat": 4, 240 | "nbformat_minor": 4 241 | } 242 | -------------------------------------------------------------------------------- /trainers/trainer.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import time 4 | 5 | import torch 6 | from torch.utils.data import DataLoader 7 | from tqdm.auto import tqdm 8 | 9 | from models.utils import get_cpu_copy, save_checkpoint 10 | 11 | 12 | class Trainer: 13 | 14 | def __init__( 15 | self, model, device, criterion, optimizer, scheduler, 16 | checkpoint_dir=None, writer_dir=None, version=None, 17 | ): 18 | # Important objects for training 19 | self.model = model 20 | self.criterion = criterion 21 | self.optimizer = optimizer 22 | self.scheduler = scheduler 23 | 24 | # Device 25 | if device is None: 26 | device = torch.device("cpu") 27 | print(f">> Device is none... default: {device}") 28 | self.device = device 29 | self.model = self.model.to(self.device) 30 | 31 | # Version 32 | if version is None: 33 | version = ( 34 | f"{model.__class__.__name__}_" 35 | f"{time.strftime('%Y-%m-%d-%H-%M-%S')}" 36 | ) 37 | self.version = version 38 | print(f">> Model version: {self.version}") 39 | 40 | # Checkpoints 41 | if checkpoint_dir is None: 42 | checkpoint_dir = os.path.join("checkpoints") 43 | assert os.path.isdir(checkpoint_dir) 44 | self.checkpoint_dst = os.path.join(checkpoint_dir, f"{self.version}.tar") 45 | print(f">> Checkpoints stored at... {self.checkpoint_dst}") 46 | 47 | def run( 48 | self, max_epochs, max_learning_rates, dataloaders, 49 | non_blocking=True, train_valid_loops=1, save_last_model=False, 50 | ): 51 | # Prepare model 52 | self.model = self.model.to(self.device) 53 | 54 | # Save first checkpoint 55 | save_checkpoint( 56 | # Base values 57 | self.checkpoint_dst, model=self.model, 58 | criterion=self.criterion, 59 | optimizer=self.optimizer, 60 | scheduler=self.scheduler, 61 | # Epoch values 62 | epoch=None, accuracy=None, loss=None, 63 | ) 64 | 65 | # Starting values 66 | best_validation_acc = 0.0 67 | best_validation_loss = float("inf") 68 | used_lrs = [self.optimizer.param_groups[0]["lr"]] 69 | 70 | # Measure elapsed time 71 | start = time.time() 72 | 73 | # Progress bars 74 | assert all(key in dataloaders for key in ["train", "validation"]) 75 | pbar_epochs = tqdm( 76 | total=max_epochs, 77 | desc="Epoch", unit="epoch", 78 | postfix={ 79 | "current_lr": used_lrs[0], 80 | "used_lrs": len(used_lrs), 81 | }, 82 | ) 83 | pbar_train = tqdm( 84 | total=train_valid_loops * len(dataloaders["train"]), 85 | desc="Train", 86 | postfix={ 87 | "last_acc": None, 88 | "last_lostt": None, 89 | }, 90 | ) 91 | pbar_valid = tqdm( 92 | total=len(dataloaders["validation"]), 93 | desc="Valid", 94 | postfix={ 95 | "best_acc": None, 96 | "best_loss": None, 97 | "best_epoch": None, 98 | "bad_epochs": f"{self.scheduler.num_bad_epochs}", 99 | }, 100 | ) 101 | 102 | # Training loop 103 | for epoch in range(1, max_epochs + 1): 104 | # Each epoch has a training and a validation phase 105 | for phase in ["train", "validation"]: 106 | # Update model mode and progress bar 107 | if phase == "train": 108 | self.model.train() 109 | pbar_train.reset() 110 | pbar_valid.reset() 111 | elif phase == "validation": 112 | self.model.eval() 113 | 114 | # Value accumulators 115 | running_acc = torch.tensor(0, dtype=int, device=self.device) 116 | running_loss = torch.tensor(0.0, dtype=torch.double, device=self.device) 117 | 118 | # Iterate over data 119 | dataset = dataloaders[phase].dataset 120 | loop_times = train_valid_loops if phase == "train" else 1 121 | for _ in range(loop_times): 122 | for i_batch, data in enumerate(dataloaders[phase]): 123 | profile = data[0].to(self.device, non_blocking=non_blocking).squeeze(dim=0) 124 | pi = data[1].to(self.device, non_blocking=non_blocking).squeeze(dim=0) 125 | ni = data[2].to(self.device, non_blocking=non_blocking).squeeze(dim=0) 126 | target = torch.ones(pi.size(0), 1, 1, device=self.device) 127 | 128 | # Restart params gradients 129 | self.optimizer.zero_grad() 130 | 131 | # Forward pass 132 | with torch.set_grad_enabled(phase == "train"): 133 | output = self.model(profile, pi, ni) 134 | loss = self.criterion(output, target) 135 | # Backward pass 136 | if phase == "train": 137 | loss.backward() 138 | self.optimizer.step() 139 | 140 | # Statistics 141 | running_acc.add_((output > 0).sum()) 142 | running_loss.add_(loss.detach() * output.size(0)) 143 | 144 | # Update progress bar 145 | if phase == "train": 146 | pbar_train.update() 147 | else: 148 | pbar_valid.update() 149 | 150 | # Synchronize GPU (debugging) 151 | # torch.cuda.synchronize() 152 | 153 | # Aggregate statistics 154 | dataset_size = loop_times * len(dataset) 155 | epoch_acc = running_acc.item() / dataset_size 156 | epoch_loss = running_loss.item() / dataset_size 157 | # tqdm.write(f">> Epoch {epoch} ({phase.title()}) | ACC {100 * epoch_acc:.3f} - Loss {epoch_loss:.6f}") 158 | 159 | if phase == "train": 160 | # Update progress bar 161 | pbar_train.set_postfix({ 162 | "last_acc": f"{100 * epoch_acc:.3f}", 163 | "last_loss": f"{epoch_loss:.6f}", 164 | }) 165 | elif phase == "validation": 166 | new_optimal = False 167 | if self.scheduler.mode == "max": 168 | # Is this a new best accuracy? 169 | new_optimal = epoch_acc > best_validation_acc 170 | else: 171 | # Is this a new best loss? 172 | new_optimal = epoch_loss < best_validation_loss 173 | if new_optimal: 174 | # Save best model 175 | best_validation_acc = epoch_acc 176 | best_validation_loss = epoch_loss 177 | save_checkpoint( 178 | # Base values 179 | self.checkpoint_dst, model=get_cpu_copy(self.model), 180 | criterion=self.criterion, 181 | optimizer=self.optimizer, 182 | scheduler=self.scheduler, 183 | # Epoch values 184 | epoch=self.scheduler.last_epoch, 185 | accuracy=best_validation_acc, 186 | loss=best_validation_loss, 187 | ) 188 | # tqdm.write(f">> New best model (Epoch: {epoch}) | ACC {100 * epoch_acc:.3f} ({epoch_acc})") 189 | # Scheduler step 190 | if self.scheduler.mode == "max": 191 | self.scheduler.step(epoch_acc) 192 | else: 193 | self.scheduler.step(epoch_loss) 194 | next_lr = self.optimizer.param_groups[0]["lr"] 195 | if next_lr not in used_lrs: 196 | # tqdm.write(f">> Next lr: {next_lr} (Already used {used_lrs})") 197 | used_lrs.append(next_lr) 198 | pbar_epochs.set_postfix({ 199 | "used_lrs": len(used_lrs), 200 | "current_lr": next_lr, 201 | }) 202 | # Update progress bar 203 | pbar_valid.set_postfix({ 204 | "best_acc": f"{100 * best_validation_acc:.3f}", 205 | "best_loss": f"{best_validation_loss:.6f}", 206 | "best_epoch": f"{epoch}", 207 | "bad_epochs": f"{self.scheduler.num_bad_epochs}", 208 | }) 209 | 210 | # Update epochs pbar at the end 211 | pbar_epochs.update() 212 | # tqdm.write("\n") 213 | 214 | # Check if used all available learning rates 215 | if len(used_lrs) > max_learning_rates: 216 | print(f">> Reached max different lrs ({max_learning_rates}: {used_lrs})") 217 | break 218 | 219 | # Complete progress bars 220 | pbar_epochs.close() 221 | pbar_train.close() 222 | pbar_valid.close() 223 | 224 | # Report status 225 | elapsed = time.time() - start 226 | print(f">> Training completed in {elapsed // 60:.0f}m {elapsed % 60:.0f}s") 227 | print(f">> Best validation accuracy: ~{100 * best_validation_acc:.3f}%") 228 | print(f">> Best validation loss: ~{best_validation_loss:.6f}") 229 | 230 | if save_last_model: 231 | # Copy last model weights 232 | print(">> Copy last model") 233 | last_model_weights = copy.deepcopy(get_cpu_copy(self.model)) 234 | else: 235 | epoch_acc = None 236 | epoch_loss = None 237 | last_model_weights = None 238 | 239 | # Load best model weights 240 | print(">> Load best model") 241 | best_checkpoint = torch.load(self.checkpoint_dst, map_location=torch.device("cpu")) 242 | self.model.load_state_dict(best_checkpoint["model"]) 243 | 244 | # Move model back to device 245 | self.model.to(self.device) 246 | 247 | # Save last state 248 | print(">> Save last state") 249 | save_checkpoint( 250 | # Base values 251 | self.checkpoint_dst, model=get_cpu_copy(self.model), 252 | criterion=self.criterion, 253 | optimizer=self.optimizer, 254 | scheduler=self.scheduler, 255 | # Epoch values 256 | epoch=self.scheduler.last_epoch, 257 | accuracy=best_validation_acc, 258 | loss=best_validation_loss, 259 | # Last values 260 | last_model=last_model_weights, 261 | last_accuracy=epoch_acc, 262 | last_loss=epoch_loss, 263 | ) 264 | 265 | return self.model, best_checkpoint["accuracy"], best_checkpoint["loss"], best_checkpoint["epoch"] 266 | -------------------------------------------------------------------------------- /models/acf.py: -------------------------------------------------------------------------------- 1 | # The following code is a derivative from the one published 2 | # at https://github.com/Darel13712/acf_pytorch by Darel13712 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class ACFFeatureNet(nn.Module): 11 | """ 12 | Process auxiliary item features into latent space. 13 | All items for user can be processed in batch. 14 | """ 15 | def __init__(self, emb_dim, input_feature_dim, feature_dim, hidden_dim=None, output_dim=None): 16 | super().__init__() 17 | 18 | if not hidden_dim: 19 | hidden_dim = emb_dim 20 | 21 | if not output_dim: 22 | output_dim = emb_dim 23 | 24 | # e.g. 2048 => 128 25 | self.dim_reductor = nn.Linear(input_feature_dim, feature_dim) 26 | 27 | self.w_x = nn.Linear(feature_dim, hidden_dim) 28 | self.w_u = nn.Linear(emb_dim, hidden_dim) 29 | 30 | self.w = nn.Linear(hidden_dim, 1) 31 | 32 | self._kaiming_(self.w_x) 33 | self._kaiming_(self.w_u) 34 | self._kaiming_(self.w) 35 | 36 | def _kaiming_(self, layer): 37 | nn.init.kaiming_normal_(layer.weight, nonlinearity='relu') 38 | torch.nn.init.zeros_(layer.bias) 39 | 40 | def forward(self, user, components, profile_mask, return_attentions=False): 41 | x = self.dim_reductor(components) # Add 42 | x = x.movedim(0, -2) # BxPxHxD => PxHxBxD 43 | 44 | x_tilde = self.w_x(x) 45 | user = self.w_u(user) 46 | 47 | beta = F.relu(x_tilde + user) 48 | beta = self.w(beta) 49 | 50 | beta = F.softmax(beta, dim=1) 51 | 52 | x = (beta * x).sum(dim=1) 53 | x = x.movedim(-2, 0) # PxBxD => BxPxD 54 | 55 | feature_dim = x.shape[-1] 56 | profile_mask = profile_mask.float() 57 | profile_mask = profile_mask.unsqueeze(-1).expand((*profile_mask.shape, feature_dim)) 58 | 59 | x = profile_mask * x 60 | output = {'pooled_features': x} 61 | if return_attentions: 62 | output['attentions'] = beta.squeeze(-1).squeeze(-1) 63 | return output 64 | 65 | 66 | class ACFUserNet(nn.Module): 67 | """ 68 | Get user embedding accounting to surpassed items 69 | """ 70 | 71 | def __init__(self, users, items, emb_dim=128, input_feature_dim=0, profile_embedding=None, device=None): 72 | super().__init__() 73 | self.pad_token = 0 74 | 75 | self.emb_dim = emb_dim 76 | num_users = max(users) + 1 77 | num_items = max(items) + 1 78 | 79 | reduced_feature_dim = emb_dim 80 | self.feats = ACFFeatureNet(emb_dim, input_feature_dim, reduced_feature_dim) if input_feature_dim > 0 else None 81 | 82 | self.user_embedding = nn.Embedding(num_users, emb_dim) 83 | if not profile_embedding: 84 | self.profile_embedding = nn.Embedding(num_items, emb_dim, padding_idx=self.pad_token) 85 | else: 86 | self.profile_embedding = profile_embedding 87 | 88 | f = 1 if self.feats is not None else 0 89 | self.w_u = nn.Linear(emb_dim, emb_dim) 90 | self.w_v = nn.Linear(emb_dim, emb_dim) 91 | self.w_p = nn.Linear(emb_dim, emb_dim) 92 | self.w_x = nn.Linear(emb_dim, emb_dim) 93 | self.w = nn.Linear(emb_dim, 1) 94 | 95 | self._kaiming_(self.w_u) 96 | self._kaiming_(self.w_v) 97 | self._kaiming_(self.w_p) 98 | self._kaiming_(self.w_x) 99 | self._kaiming_(self.w) 100 | 101 | if device is None: 102 | device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 103 | 104 | self.device = device 105 | 106 | def _kaiming_(self, layer): 107 | nn.init.kaiming_normal_(layer.weight, nonlinearity='relu') 108 | torch.nn.init.zeros_(layer.bias) 109 | 110 | def forward(self, user_ids, profile_ids, features, profile_mask, return_component_attentions=False, 111 | return_profile_attentions=False, return_attentions=False): 112 | return_component_attentions = return_component_attentions or return_attentions 113 | return_profile_attentions = return_profile_attentions or return_attentions 114 | 115 | batch_size = user_ids.nelement() 116 | user = self.user_embedding(user_ids) 117 | 118 | if profile_ids.nelement() != 0: 119 | profile = self.profile_embedding(profile_ids) 120 | else: 121 | profile = torch.zeros((batch_size, 0, self.emb_dim), device=self.device) 122 | 123 | if self.feats is not None: 124 | features = features.flatten(start_dim=2, end_dim=3) # Add 125 | feat_output = self.feats(user, features, profile_mask, return_attentions=return_component_attentions) 126 | components = feat_output['pooled_features'] 127 | else: 128 | components = torch.tensor([], device=self.device) 129 | 130 | user = self.w_u(user) 131 | profile_query = self.w_p(profile) 132 | components = self.w_x(components) 133 | 134 | profile_query = profile_query.permute((1,0,2)) 135 | components = components.permute((1,0,2)) 136 | 137 | alpha = F.relu(user + profile_query + components) # TODO: + item, Add curent_item emb (?) 138 | alpha = self.w(alpha) 139 | 140 | profile_mask = profile_mask.permute((1,0)) 141 | profile_mask = profile_mask.unsqueeze(-1) 142 | alpha = alpha.masked_fill(torch.logical_not(profile_mask), float('-inf')) 143 | alpha = F.softmax(alpha, dim=0) 144 | 145 | is_nan = torch.isnan(alpha) 146 | if is_nan.any(): 147 | # softmax is nan when all elements in dim 0 are -infinity or infinity 148 | alpha = alpha.masked_fill(is_nan, 0.0) 149 | 150 | alpha = alpha.permute((1,0,2)) 151 | user_profile = (alpha * profile).sum(dim=1) 152 | 153 | user = user + user_profile 154 | output = {'user': user} 155 | if return_component_attentions: 156 | output['component_attentions'] = feat_output['attentions'] 157 | if return_profile_attentions: 158 | output['profile_attentions'] = alpha.squeeze(-1) 159 | 160 | return output 161 | 162 | @property 163 | def params(self): 164 | params_to_update = [] 165 | for name, param in self.named_parameters(): 166 | if param.requires_grad == True: 167 | params_to_update.append(param) 168 | return params_to_update 169 | 170 | 171 | class ACF(nn.Module): 172 | def __init__(self, 173 | users, 174 | items, 175 | feature_path, 176 | model_dim=128, 177 | input_feature_dim=0, 178 | tied_item_embedding=True, 179 | device=None): 180 | 181 | super().__init__() 182 | self.pad_token = 0 183 | self.device = device 184 | 185 | # Should be moved to an ACFRecommender 186 | self.users = users 187 | self.items = items 188 | self.feature_path = feature_path 189 | self.model_dim = model_dim 190 | self.input_feature_dim = input_feature_dim 191 | 192 | self.all_items = torch.tensor(items) 193 | self.all_items = self.all_items + 1 if self.all_items.min() == 0 else self.all_items 194 | self.feature_data = self.load_feature_data(feature_path) 195 | num_items = max(self.all_items) + 1 196 | 197 | input_feature_dim = self.feature_data.shape[-1] 198 | self.item_model = nn.Embedding(num_items, self.model_dim, padding_idx=self.pad_token) 199 | self.user_model = ( 200 | ACFUserNet( 201 | users, 202 | items, 203 | emb_dim=self.model_dim, 204 | input_feature_dim=input_feature_dim, 205 | profile_embedding=self.item_model, 206 | device=self.device) 207 | if tied_item_embedding else 208 | ACFUserNet( 209 | users, 210 | items, 211 | emb_dim=self.model_dim, 212 | input_feature_dim=input_feature_dim, 213 | device=self.device) 214 | ) 215 | 216 | def forward(self, user_id, profile_ids, pos, neg, profile_mask): 217 | profile_features = self.get_features(profile_ids).to(self.device) 218 | 219 | user_output = self.user_model(user_id, profile_ids, profile_features, profile_mask) 220 | user = user_output['user'] 221 | 222 | pos_pred = self.get_predictions(user, pos) 223 | neg_pred = self.get_predictions(user, neg) 224 | 225 | return pos_pred, neg_pred 226 | 227 | def get_predictions(self, user, items): 228 | item_embeddings = self.item_model(items) 229 | prediction = self.score(user, item_embeddings) 230 | return prediction 231 | 232 | def score(self, user, items): 233 | return (user * items).sum(1) / self.model_dim 234 | 235 | def recommend_all(self, user_id, profile_ids, return_attentions=False): 236 | # TODO: Improve 237 | profile_mask = (profile_ids != 0).to(self.device) 238 | profile_features = self.get_features(profile_ids).to(self.device) 239 | 240 | user_output = self.user_model(user_id, profile_ids, profile_features, profile_mask, return_attentions=return_attentions) 241 | user = user_output['user'] 242 | 243 | all_items = self.all_items.to(self.device) 244 | item_embeddings = self.item_model(all_items) 245 | scores = self.score(user, item_embeddings) 246 | 247 | if return_attentions: 248 | component_attentions = user_output['component_attentions'] 249 | profile_attentions = user_output['profile_attentions'] 250 | return scores, component_attentions, profile_attentions 251 | 252 | return scores 253 | 254 | def load_feature_data(self, feature_path): 255 | with open(feature_path, 'rb') as fp: 256 | feature_data = np.load(fp, allow_pickle=True) 257 | feature_data = feature_data[:,1].tolist() 258 | feature_data = np.array(feature_data) # Faster when transformed to numpy first 259 | feature_data = torch.tensor(feature_data) 260 | feature_data = feature_data.permute((0,2,3,1)) # TODO: Hack: by default d should be last dimension 261 | feature_data = self.append_default_features(feature_data) 262 | return feature_data 263 | 264 | def append_default_features(self, feature_data): 265 | feature_dims = feature_data.shape[1:] 266 | default_features = torch.zeros((1, *feature_dims)) 267 | feature_data = torch.cat((default_features, feature_data), dim=0) 268 | return feature_data 269 | 270 | def get_features(self, ids): 271 | if isinstance(ids, int): 272 | ids = torch.tensor([ids]) 273 | if isinstance(ids, list): 274 | ids = torch.tensor(ids) 275 | 276 | return self.feature_data[ids] 277 | 278 | def args(self): 279 | return { 280 | 'users': self.users, 281 | 'items': self.items, 282 | 'feature_path': self.feature_path, 283 | 'model_dim': self.model_dim, 284 | 'input_feature_dim': self.input_feature_dim, 285 | } 286 | 287 | @classmethod 288 | def from_checkpoint(cls, checkpoint, device=None): 289 | args = checkpoint['model_args'] 290 | model = cls( 291 | users=args['users'], 292 | items=args['items'], 293 | feature_path=args['feature_path'], 294 | model_dim=args['model_dim'], 295 | input_feature_dim=args['input_feature_dim'], 296 | device=device, 297 | ) 298 | model.load_state_dict(checkpoint['state_dict']) 299 | if device: 300 | model = model.to(device) 301 | 302 | return model 303 | -------------------------------------------------------------------------------- /trainers/img_trainer.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import time 4 | 5 | import torch 6 | from torch.utils.data import DataLoader 7 | from tqdm import tqdm 8 | 9 | from models.utils import get_cpu_copy, save_checkpoint 10 | 11 | 12 | class ImgTrainer: 13 | 14 | def __init__( 15 | self, model, device, criterion, optimizer, scheduler, 16 | checkpoint_dir=None, writer_dir=None, version=None, 17 | ): 18 | # Important objects for training 19 | self.model = model 20 | self.criterion = criterion 21 | self.optimizer = optimizer 22 | self.scheduler = scheduler 23 | 24 | # Device 25 | if device is None: 26 | device = torch.device("cpu") 27 | print(f">> Device is none... default: {device}") 28 | self.device = device 29 | self.model = self.model.to(self.device) 30 | 31 | # Version 32 | if version is None: 33 | version = ( 34 | f"{model.__class__.__name__}_" 35 | f"{time.strftime('%Y-%m-%d-%H-%M-%S')}" 36 | ) 37 | self.version = version 38 | print(f">> Model version: {self.version}") 39 | 40 | # Checkpoints 41 | if checkpoint_dir is None: 42 | checkpoint_dir = os.path.join("checkpoints") 43 | assert os.path.isdir(checkpoint_dir) 44 | self.checkpoint_dst = os.path.join(checkpoint_dir, f"{self.version}.tar") 45 | print(f">> Checkpoints stored at... {self.checkpoint_dst}") 46 | 47 | def run( 48 | self, max_epochs, max_learning_rates, dataloaders, 49 | non_blocking=True, train_valid_loops=1, save_last_model=False, use_checkpoint=None 50 | ): 51 | # Prepare model 52 | self.model = self.model.to(self.device) 53 | 54 | # Save first checkpoint 55 | if not use_checkpoint: 56 | save_checkpoint( 57 | # Base values 58 | self.checkpoint_dst, model=self.model, 59 | criterion=self.criterion, 60 | optimizer=self.optimizer, 61 | scheduler=self.scheduler, 62 | # Epoch values 63 | epoch=None, accuracy=None, loss=None, 64 | ) 65 | 66 | # Starting values 67 | best_validation_acc = 0.0 68 | best_validation_loss = float("inf") 69 | else: 70 | print(">> Loading checkpoint...") 71 | checkpoint_dst = os.path.join('/'.join(self.checkpoint_dst.split('/')[:-1]), f"{use_checkpoint}.tar") 72 | best_checkpoint = torch.load(checkpoint_dst, map_location=torch.device("cpu")) 73 | self.model.load_state_dict(best_checkpoint["model"]) 74 | 75 | # Move model back to device 76 | self.model.to(self.device) 77 | 78 | # Starting values 79 | best_validation_acc = best_checkpoint['accuracy'] 80 | best_validation_loss = best_checkpoint['loss'] 81 | 82 | used_lrs = [self.optimizer.param_groups[0]["lr"]] 83 | 84 | # Measure elapsed time 85 | start = time.time() 86 | 87 | # Progress bars 88 | assert all(key in dataloaders for key in ["train", "validation"]) 89 | pbar_epochs = tqdm( 90 | total=max_epochs, 91 | desc="Epoch", unit="epoch", 92 | postfix={ 93 | "current_lr": used_lrs[0], 94 | "used_lrs": len(used_lrs), 95 | }, 96 | ) 97 | pbar_train = tqdm( 98 | total=train_valid_loops * len(dataloaders["train"]), 99 | desc="Train", 100 | postfix={ 101 | "last_acc": None, 102 | "last_lostt": None, 103 | }, 104 | ) 105 | pbar_valid = tqdm( 106 | total=len(dataloaders["validation"]), 107 | desc="Valid", 108 | postfix={ 109 | "best_acc": None, 110 | "best_loss": None, 111 | "best_epoch": None, 112 | "bad_epochs": f"{self.scheduler.num_bad_epochs}", 113 | }, 114 | ) 115 | 116 | # Training loop 117 | 118 | for epoch in range(1, max_epochs + 1): 119 | # Each epoch has a training and a validation phase 120 | for phase in ["train", "validation"]: 121 | # Update model mode and progress bar 122 | if phase == "train": 123 | self.model.train() 124 | pbar_train.reset() 125 | pbar_valid.reset() 126 | elif phase == "validation": 127 | self.model.eval() 128 | 129 | # Value accumulators 130 | running_acc = torch.tensor(0, dtype=int, device=self.device) 131 | running_loss = torch.tensor(0.0, dtype=torch.double, device=self.device) 132 | 133 | # Iterate over data 134 | dataset = dataloaders[phase].dataset 135 | loop_times = train_valid_loops if phase == "train" else 1 136 | for _ in range(loop_times): 137 | for i_batch, data in enumerate(dataloaders[phase]): 138 | profile = data[0].to(self.device, non_blocking=non_blocking).squeeze(dim=0) 139 | pimg = data[1].to(self.device, non_blocking=non_blocking).squeeze(dim=0) 140 | nimg = data[2].to(self.device, non_blocking=non_blocking).squeeze(dim=0) 141 | pi = data[3].to(self.device, non_blocking=non_blocking).squeeze(dim=0) 142 | ni = data[4].to(self.device, non_blocking=non_blocking).squeeze(dim=0) 143 | target = torch.ones(pimg.size(0), device=self.device) 144 | 145 | # Restart params gradients 146 | self.optimizer.zero_grad() 147 | 148 | # Forward pass 149 | with torch.set_grad_enabled(phase == "train"): 150 | pos, neg = self.model(profile, pimg, nimg, pi, ni) 151 | output = pos-neg 152 | loss = self.criterion(output, target) 153 | loss += (1.0 * torch.norm(self.model.theta_users.weight)) 154 | 155 | # Backward pass 156 | if phase == "train": 157 | loss.backward() 158 | self.optimizer.step() 159 | 160 | # Statistics 161 | running_acc.add_((output > 0).sum()) 162 | running_loss.add_(loss.detach() * output.size(0)) 163 | 164 | # Update progress bar 165 | if phase == "train": 166 | pbar_train.update() 167 | pbar_train.set_postfix(last_lostt=loss.item()) 168 | else: 169 | pbar_valid.update() 170 | pbar_valid.set_postfix(last_lostt=loss.item()) 171 | 172 | # Synchronize GPU (debugging) 173 | # torch.cuda.synchronize() 174 | 175 | # Aggregate statistics 176 | dataset_size = loop_times * len(dataset) 177 | epoch_acc = running_acc.item() / dataset_size 178 | epoch_loss = running_loss.item() / dataset_size 179 | # tqdm.write(f">> Epoch {epoch} ({phase.title()}) | ACC {100 * epoch_acc:.3f} - Loss {epoch_loss:.6f}") 180 | 181 | if phase == "train": 182 | # Update progress bar 183 | pbar_train.set_postfix({ 184 | "last_acc": f"{100 * epoch_acc:.3f}", 185 | "last_loss": f"{epoch_loss:.6f}", 186 | }) 187 | elif phase == "validation": 188 | new_optimal = False 189 | if self.scheduler.mode == "max": 190 | # Is this a new best accuracy? 191 | new_optimal = epoch_acc > best_validation_acc 192 | else: 193 | # Is this a new best loss? 194 | new_optimal = epoch_loss < best_validation_loss 195 | if new_optimal: 196 | # Save best model 197 | best_validation_acc = epoch_acc 198 | best_validation_loss = epoch_loss 199 | save_checkpoint( 200 | # Base values 201 | self.checkpoint_dst, model=get_cpu_copy(self.model), 202 | criterion=self.criterion, 203 | optimizer=self.optimizer, 204 | scheduler=self.scheduler, 205 | # Epoch values 206 | epoch=self.scheduler.last_epoch, 207 | accuracy=best_validation_acc, 208 | loss=best_validation_loss, 209 | ) 210 | # tqdm.write(f">> New best model (Epoch: {epoch}) | ACC {100 * epoch_acc:.3f} ({epoch_acc})") 211 | # Scheduler step 212 | if self.scheduler.mode == "max": 213 | self.scheduler.step(epoch_acc) 214 | else: 215 | self.scheduler.step(epoch_loss) 216 | next_lr = self.optimizer.param_groups[0]["lr"] 217 | if next_lr not in used_lrs: 218 | # tqdm.write(f">> Next lr: {next_lr} (Already used {used_lrs})") 219 | used_lrs.append(next_lr) 220 | pbar_epochs.set_postfix({ 221 | "used_lrs": len(used_lrs), 222 | "current_lr": next_lr, 223 | }) 224 | # Update progress bar 225 | pbar_valid.set_postfix({ 226 | "best_acc": f"{100 * best_validation_acc:.3f}", 227 | "best_loss": f"{best_validation_loss:.6f}", 228 | "best_epoch": f"{epoch}", 229 | "bad_epochs": f"{self.scheduler.num_bad_epochs}", 230 | }) 231 | 232 | # Update epochs pbar at the end 233 | pbar_epochs.update() 234 | # tqdm.write("\n") 235 | 236 | # Check if used all available learning rates 237 | if len(used_lrs) > max_learning_rates: 238 | print(f">> Reached max different lrs ({max_learning_rates}: {used_lrs})") 239 | break 240 | 241 | # Complete progress bars 242 | pbar_epochs.close() 243 | pbar_train.close() 244 | pbar_valid.close() 245 | 246 | # Report status 247 | elapsed = time.time() - start 248 | print(f">> Training completed in {elapsed // 60:.0f}m {elapsed % 60:.0f}s") 249 | print(f">> Best validation accuracy: ~{100 * best_validation_acc:.3f}%") 250 | print(f">> Best validation loss: ~{best_validation_loss:.6f}") 251 | 252 | if save_last_model: 253 | # Copy last model weights 254 | print(">> Copy last model") 255 | last_model_weights = copy.deepcopy(get_cpu_copy(self.model)) 256 | else: 257 | epoch_acc = None 258 | epoch_loss = None 259 | last_model_weights = None 260 | 261 | # Load best model weights 262 | print(">> Load best model") 263 | best_checkpoint = torch.load(self.checkpoint_dst, map_location=torch.device("cpu")) 264 | self.model.load_state_dict(best_checkpoint["model"]) 265 | 266 | # Move model back to device 267 | self.model.to(self.device) 268 | 269 | # Save last state 270 | print(">> Save last state") 271 | save_checkpoint( 272 | # Base values 273 | self.checkpoint_dst, model=get_cpu_copy(self.model), 274 | criterion=self.criterion, 275 | optimizer=self.optimizer, 276 | scheduler=self.scheduler, 277 | # Epoch values 278 | epoch=self.scheduler.last_epoch, 279 | accuracy=best_validation_acc, 280 | loss=best_validation_loss, 281 | # Last values 282 | last_model=last_model_weights, 283 | last_accuracy=epoch_acc, 284 | last_loss=epoch_loss, 285 | ) 286 | 287 | return self.model, best_checkpoint["accuracy"], best_checkpoint["loss"], best_checkpoint["epoch"] 288 | -------------------------------------------------------------------------------- /3.5 - (VisRank) Evaluation procedure.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.metrics.pairwise import cosine_similarity\n", 14 | "from tqdm.auto import tqdm\n", 15 | "\n", 16 | "from models import VisRank\n", 17 | "from utils.data import extract_embedding\n", 18 | "from utils.metrics import (\n", 19 | " auc_exact,\n", 20 | " nDCG,\n", 21 | " precision,\n", 22 | " recall,\n", 23 | " reciprocal_rank,\n", 24 | ")\n", 25 | "\n", 26 | "\n", 27 | "# Dataset\n", 28 | "DATASET = \"UGallery\"\n", 29 | "assert DATASET in [\"UGallery\", \"Wikimedia\"]\n", 30 | "\n", 31 | "# Parameters\n", 32 | "FEATURE_EXTRACTOR = \"resnet50\"\n", 33 | "assert FEATURE_EXTRACTOR in [\"resnet50\"]\n" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# Mode\n", 43 | "MODE_PROFILE = \"user\"\n", 44 | "\n", 45 | "# Paths (general)\n", 46 | "EMBEDDING_PATH = os.path.join(\"data\", DATASET, f\"embedding-{FEATURE_EXTRACTOR}.npy\")\n", 47 | "EVALUATION_PATH = os.path.join(\"data\", DATASET, f\"naive-{MODE_PROFILE}-evaluation.csv\")\n", 48 | "\n", 49 | "# Paths (images)\n", 50 | "IMAGES_DIR = None\n", 51 | "if DATASET == \"Wikimedia\":\n", 52 | " IMAGES_DIR = os.path.join(\"/\", \"mnt\", \"data2\", \"wikimedia\", \"imagenes_tarea\")\n", 53 | "elif DATASET == \"UGallery\":\n", 54 | " IMAGES_DIR = os.path.join(\"/\", \"mnt\", \"workspace\", \"Ugallery\", \"mini-images-224-224-v2\")\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "# Load embedding from file\n", 64 | "print(f\"\\nLoading embedding from file... ({EMBEDDING_PATH})\")\n", 65 | "embedding = np.load(EMBEDDING_PATH, allow_pickle=True)\n", 66 | "\n", 67 | "# Extract features and \"id2index\" mapping\n", 68 | "print(\"\\nExtracting data into variables...\")\n", 69 | "features, _, item_index2fn = extract_embedding(embedding, verbose=True)\n", 70 | "print(f\">> Features shape: {features.shape}\")\n", 71 | "del embedding # Release some memory\n", 72 | "\n", 73 | "# Fallback for explicit_features\n", 74 | "explicit_features = np.copy(features)\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "# Load evaluation dataframe\n", 84 | "print(\"\\nLoad evaluation dataframe\")\n", 85 | "evaluation_df = pd.read_csv(EVALUATION_PATH)\n", 86 | "# Transform lists from str to int\n", 87 | "string_to_list = lambda s: list(map(int, s.split()))\n", 88 | "evaluation_df[\"profile\"] = evaluation_df[\"profile\"].apply(\n", 89 | " lambda s: string_to_list(s) if isinstance(s, str) else s,\n", 90 | ")\n", 91 | "evaluation_df[\"predict\"] = evaluation_df[\"predict\"].apply(\n", 92 | " lambda s: string_to_list(s) if isinstance(s, str) else s,\n", 93 | ")\n", 94 | "# Group evaluations by profile and user\n", 95 | "evaluation_df[\"profile\"] = evaluation_df[\"profile\"].map(tuple)\n", 96 | "evaluation_df = evaluation_df.groupby([\"profile\", \"user_id\"]).agg({\"predict\": sum}).reset_index()\n", 97 | "evaluation_df[\"profile\"] = evaluation_df[\"profile\"].map(list)\n", 98 | "print(f\">> Evaluation: {evaluation_df.shape}\")\n" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "# Model initialization\n", 108 | "print(\"\\nModel initialization\")\n", 109 | "model = VisRank(\n", 110 | " features, # Embedding\n", 111 | " similarity_method=cosine_similarity, # Similarity measure\n", 112 | ")\n" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "# Predict all\n", 122 | "# If True, ranks every item including already consumed items\n", 123 | "# If False, ranks ALL - PROFILE (consumed) + PREDICT (ground truth)\n", 124 | "PREDICT_ALL = False\n" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "import matplotlib.image as mpimg\n", 134 | "import matplotlib.pyplot as plt\n", 135 | "import torch\n" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "# %%time\n", 145 | "# Metrics\n", 146 | "N_EVALS = len(evaluation_df.index)\n", 147 | "# Area Under the Curve (AUC)\n", 148 | "AUC = np.zeros(N_EVALS, dtype=float)\n", 149 | "# Reciprocal Rank (RR)\n", 150 | "RR = np.zeros(N_EVALS, dtype=float)\n", 151 | "# Recall\n", 152 | "R20 = np.zeros(N_EVALS, dtype=float)\n", 153 | "R100 = np.zeros(N_EVALS, dtype=float)\n", 154 | "R200 = np.zeros(N_EVALS, dtype=float)\n", 155 | "# Precision\n", 156 | "P20 = np.zeros(N_EVALS, dtype=float)\n", 157 | "P100 = np.zeros(N_EVALS, dtype=float)\n", 158 | "P200 = np.zeros(N_EVALS, dtype=float)\n", 159 | "# Normalized discounted cumulative gain (nDCG)\n", 160 | "N20 = np.zeros(N_EVALS, dtype=float)\n", 161 | "N100 = np.zeros(N_EVALS, dtype=float)\n", 162 | "N200 = np.zeros(N_EVALS, dtype=float)\n", 163 | "PROFILE_SIZES = np.zeros(N_EVALS, dtype=int)\n", 164 | "N_ITEMS = len(features)\n", 165 | "\n", 166 | "\n", 167 | "evaluation_df[\"profile\"] = evaluation_df[\"profile\"].map(tuple)\n", 168 | "grouped_evals = evaluation_df.groupby([\"profile\", \"user_id\"]).agg({\"predict\": sum}).reset_index()\n", 169 | "for i, row in tqdm(enumerate(evaluation_df.itertuples()), total=len(evaluation_df.index)):\n", 170 | " # Load data into tensors\n", 171 | " profile = np.array(row.profile)\n", 172 | " user_id = int(row.user_id)\n", 173 | " predict = row.predict\n", 174 | " # Prediction\n", 175 | " indexes, _ = model.most_similar_to_profile(profile, k=None, method=\"maximum\", include_consumed=True)\n", 176 | " if not PREDICT_ALL:\n", 177 | " indexes = np.delete(\n", 178 | " indexes,\n", 179 | " np.where(np.isin(indexes, profile) & ~np.isin(indexes, predict)),\n", 180 | " )\n", 181 | " # Ranking\n", 182 | " pos_of_evals = torch.Tensor(np.where(np.isin(indexes, predict))).flatten()\n", 183 | " # Store metrics\n", 184 | " AUC[i] = auc_exact(pos_of_evals, N_ITEMS)\n", 185 | " RR[i] = reciprocal_rank(pos_of_evals)\n", 186 | " R20[i] = recall(pos_of_evals, 20)\n", 187 | " P20[i] = precision(pos_of_evals, 20)\n", 188 | " N20[i] = nDCG(pos_of_evals, 20)\n", 189 | " R100[i] = recall(pos_of_evals, 100)\n", 190 | " P100[i] = precision(pos_of_evals, 100)\n", 191 | " N100[i] = nDCG(pos_of_evals, 100)\n", 192 | " R200[i] = recall(pos_of_evals, 200)\n", 193 | " P200[i] = precision(pos_of_evals, 200)\n", 194 | " N200[i] = nDCG(pos_of_evals, 200)\n", 195 | " PROFILE_SIZES[i] = len(row.profile)\n" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "# Display stats\n", 205 | "print(f\"AVG AUC = {AUC.mean()}\")\n", 206 | "print(f\"AVG RR = {RR.mean()}\")\n", 207 | "print(f\"AVG R20 = {R20.mean()}\")\n", 208 | "print(f\"AVG P20 = {P20.mean()}\")\n", 209 | "print(f\"AVG NDCG20 = {N20.mean()}\")\n", 210 | "print(f\"AVG R100 = {R100.mean()}\")\n", 211 | "print(f\"AVG P100 = {P100.mean()}\")\n", 212 | "print(f\"AVG NDCG100 = {N100.mean()}\")\n", 213 | "print(f\"AVG R200 = {R200.mean()}\")\n", 214 | "print(f\"AVG P200 = {P200.mean()}\")\n", 215 | "print(f\"AVG NDCG200 = {N200.mean()}\")\n" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "## Results inspection" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "USER_ROW = 1\n", 232 | "\n", 233 | "assert 0 <= USER_ROW < len(evaluation_df)\n", 234 | "\n", 235 | "\n", 236 | "# Row in evaluation dataframe\n", 237 | "row = evaluation_df.iloc[USER_ROW]\n", 238 | "\n", 239 | "# Load data into tensors\n", 240 | "profile = np.array(row.profile, ndmin=1)\n", 241 | "user_id = int(row.user_id)\n", 242 | "predict = np.array(row.predict, ndmin=1)\n", 243 | "# Prediction\n", 244 | "indexes, _ = model.most_similar_to_profile(profile, k=None, method=\"maximum\", include_consumed=True)\n", 245 | "if not PREDICT_ALL:\n", 246 | " indexes = np.delete(\n", 247 | " indexes,\n", 248 | " np.where(np.isin(indexes, profile) & ~np.isin(indexes, predict)),\n", 249 | " )\n", 250 | "# Ranking\n", 251 | "pos_of_evals = torch.Tensor(np.where(np.isin(indexes, predict))).flatten()\n", 252 | "\n", 253 | "# Display metrics\n", 254 | "print(f\"| {'-' * 15} | {'-' * 7} |\")\n", 255 | "print(f\"| {'Metric':^15} | {'Score':^7} |\")\n", 256 | "print(f\"| {'-' * 15} | {'-' * 7} |\")\n", 257 | "print(f\"| {'AUC':^15} | {auc_exact(pos_of_evals, N_ITEMS):.5f} |\")\n", 258 | "print(f\"| {'RR':^15} | {reciprocal_rank(pos_of_evals):.5f} |\")\n", 259 | "for k in [20, 100, 500]:\n", 260 | " print(f\"| {'-' * 15} | {'-' * 7} |\")\n", 261 | " print(f\"| {f'Recall@{k}':^15} | {recall(pos_of_evals, k):.5f} |\")\n", 262 | " print(f\"| {f'Precision@{k}':^15} | {precision(pos_of_evals, k):.5f} |\")\n", 263 | " print(f\"| {f'nDCG@{k}':^15} | {nDCG(pos_of_evals, k):.5f} |\")\n", 264 | "print(f\"| {'-' * 15} | {'-' * 7} |\")\n", 265 | "\n", 266 | "# Ranking\n", 267 | "K = 20\n", 268 | "ranking = indexes\n", 269 | "if not PREDICT_ALL:\n", 270 | " ranking = ranking[(~np.isin(ranking, profile)) | (np.isin(ranking, predict))]\n", 271 | "ranking = ranking[:K]\n", 272 | "print()\n", 273 | "print(f\"Size of profile: {profile.size}\")\n", 274 | "print(f\"Position of actual items: {pos_of_evals.cpu().numpy()}\")\n", 275 | "\n", 276 | "\n", 277 | "\n", 278 | "COLUMNS = 10\n", 279 | "ELEMENTS = {\n", 280 | " \"Consumed\": profile,\n", 281 | " \"Recommendation\": ranking,\n", 282 | " \"Ground truth\": predict,\n", 283 | "}\n", 284 | "SHOW_FILENAME = False\n", 285 | "\n", 286 | "for label, items in ELEMENTS.items():\n", 287 | " n_rows = ((len(items) - 1) // COLUMNS + 1)\n", 288 | " fig = plt.figure(figsize=(COLUMNS * 2, 4 * n_rows))\n", 289 | " plt.title(f\"{label.title()} (n={len(items)})\")\n", 290 | " plt.axis(\"off\")\n", 291 | " for i, img_id in enumerate(items, start=1):\n", 292 | " img_fn = item_index2fn[img_id]\n", 293 | " image = mpimg.imread(os.path.join(IMAGES_DIR, img_fn))\n", 294 | " ax = fig.add_subplot(n_rows, COLUMNS, i)\n", 295 | " if SHOW_FILENAME:\n", 296 | " ax.set_title(img_fn)\n", 297 | " if label == \"Recommendation\":\n", 298 | " if img_id in predict:\n", 299 | " ax.patch.set_edgecolor(\"green\")\n", 300 | " ax.patch.set_linewidth(\"5\")\n", 301 | " if SHOW_FILENAME:\n", 302 | " ax.set_title(img_fn, color=\"green\")\n", 303 | " else:\n", 304 | " ax.set_title(\"Ground truth\", color=\"green\")\n", 305 | " elif img_id in profile:\n", 306 | " ax.patch.set_edgecolor(\"red\")\n", 307 | " ax.patch.set_linewidth(\"5\")\n", 308 | " if SHOW_FILENAME:\n", 309 | " ax.set_title(img_fn, color=\"red\")\n", 310 | " else:\n", 311 | " ax.set_title(\"Consumed\", color=\"red\")\n", 312 | " plt.xticks([])\n", 313 | " plt.yticks([])\n", 314 | " plt.imshow(image)\n" 315 | ] 316 | } 317 | ], 318 | "metadata": { 319 | "kernelspec": { 320 | "display_name": "3.8.5", 321 | "language": "python", 322 | "name": "3.8.5" 323 | }, 324 | "language_info": { 325 | "codemirror_mode": { 326 | "name": "ipython", 327 | "version": 3 328 | }, 329 | "file_extension": ".py", 330 | "mimetype": "text/x-python", 331 | "name": "python", 332 | "nbconvert_exporter": "python", 333 | "pygments_lexer": "ipython3", 334 | "version": "3.8.5" 335 | } 336 | }, 337 | "nbformat": 4, 338 | "nbformat_minor": 4 339 | } 340 | -------------------------------------------------------------------------------- /2 - Triplet sampling (Random).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import random\n", 11 | "\n", 12 | "import numpy as np\n", 13 | "import pandas as pd\n", 14 | "from tqdm.auto import tqdm\n", 15 | "\n", 16 | "from utils.data import (\n", 17 | " extract_embedding, get_interactions_dataframe,\n", 18 | " mark_evaluation_rows,\n", 19 | ")\n", 20 | "from utils.hashing import pre_hash, HashesContainer\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Triplet sampling" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# Dataset\n", 37 | "DATASET = \"UGallery\"\n", 38 | "assert DATASET in [\"UGallery\", \"Wikimedia\"]" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# Mode\n", 48 | "# Use 'MODE_PROFILE = True' for CuratorNet-like training \n", 49 | "# Use 'MODE_PROFILE = False' for VBPR-like training\n", 50 | "MODE_PROFILE = False\n", 51 | "MODE_PROFILE_VERBOSE = \"profile\" if MODE_PROFILE else \"user\"\n" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "# Feature extractor\n", 61 | "FEATURE_EXTRACTOR = \"resnet50\"\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "# Paths (general)\n", 71 | "EMBEDDING_PATH = os.path.join(\"data\", DATASET, f\"embedding-{FEATURE_EXTRACTOR}.npy\")\n", 72 | "INTERACTIONS_PATH = os.path.join(\"data\", DATASET, f\"{DATASET.lower()}.csv\")\n", 73 | "OUTPUT_TRAIN_PATH = os.path.join(\"data\", DATASET, f\"naive-{MODE_PROFILE_VERBOSE}-train.csv\")\n", 74 | "OUTPUT_VALID_PATH = os.path.join(\"data\", DATASET, f\"naive-{MODE_PROFILE_VERBOSE}-validation.csv\")\n", 75 | "OUTPUT_EVAL_PATH = os.path.join(\"data\", DATASET, f\"naive-{MODE_PROFILE_VERBOSE}-evaluation.csv\")\n", 76 | "\n", 77 | "# General constants\n", 78 | "RNG_SEED = 0\n", 79 | "\n", 80 | "# Sampling constants\n", 81 | "GROUP_USER_INTERACTIONS_BY_TIMESTAMP = True\n", 82 | "MAX_PROFILE_SIZE = 10\n", 83 | "TOTAL_SAMPLES_TRAIN = 5_000_000\n", 84 | "TOTAL_SAMPLES_VALID = 500_000\n" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "# Freezing RNG seed if needed\n", 94 | "if RNG_SEED is not None:\n", 95 | " print(f\"\\nUsing random seed... ({RNG_SEED})\")\n", 96 | " random.seed(RNG_SEED)\n", 97 | " np.random.seed(RNG_SEED)\n" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "# Load embedding from file\n", 107 | "print(f\"\\nLoading embedding from file... ({EMBEDDING_PATH})\")\n", 108 | "embedding = np.load(EMBEDDING_PATH, allow_pickle=True)\n", 109 | "\n", 110 | "# Extract features and \"id2index\" mapping\n", 111 | "print(\"\\nExtracting data into variables...\")\n", 112 | "features, item_id2index, _ = extract_embedding(embedding, verbose=True)\n", 113 | "print(f\">> Features shape: {features.shape}\")\n", 114 | "del embedding # Release some memory\n" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "# Load interactions CSVs\n", 124 | "print(f\"\\nLoading interactions from files...\")\n", 125 | "interactions_df = get_interactions_dataframe(\n", 126 | " INTERACTIONS_PATH,\n", 127 | " display_stats=True,\n", 128 | ")\n", 129 | "\n", 130 | "# Apply 'item_id2index', to work with indexes only\n", 131 | "print(\"\\nApply 'item_id2index' mapping for items...\")\n", 132 | "interactions_df[\"item_id\"] = interactions_df[\"item_id\"].map(str)\n", 133 | "n_missing_ids = interactions_df[~interactions_df[\"item_id\"].isin(item_id2index)][\"item_id\"].count()\n", 134 | "interactions_df = interactions_df[interactions_df[\"item_id\"].isin(item_id2index)]\n", 135 | "interactions_df[\"item_id\"] = interactions_df[\"item_id\"].map(item_id2index)\n", 136 | "print(f\">> Mapping applied, ({n_missing_ids} values in 'item_id2index')\")\n", 137 | "\n", 138 | "# Store mapping from user_id to index (0-index, no skipping)\n", 139 | "print(\"\\nCreate 'user_id2index' mapping for users...\")\n", 140 | "unique_user_ids = interactions_df[\"user_id\"].unique()\n", 141 | "new_user_ids = np.argsort(unique_user_ids)\n", 142 | "user_id2index = dict(zip(unique_user_ids, new_user_ids))\n", 143 | "\n", 144 | "# Apply 'user_id2index', to work with indexes only\n", 145 | "print(\"\\nApply 'user_id2index' mapping for users...\")\n", 146 | "n_missing_ids = interactions_df[~interactions_df[\"user_id\"].isin(user_id2index)][\"user_id\"].count()\n", 147 | "interactions_df = interactions_df[interactions_df[\"user_id\"].isin(user_id2index)]\n", 148 | "interactions_df[\"user_id\"] = interactions_df[\"user_id\"].map(user_id2index)\n", 149 | "print(f\">> Mapping applied, ({n_missing_ids} values in 'user_id2index')\")\n", 150 | "\n", 151 | "# Mark interactions used for evaluation procedure if needed\n", 152 | "if \"evaluation\" not in interactions_df:\n", 153 | " print(\"\\nApply evaluation split...\")\n", 154 | " interactions_df = mark_evaluation_rows(interactions_df)\n", 155 | " # Check if new column exists and has boolean dtype\n", 156 | " assert interactions_df[\"evaluation\"].dtype.name == \"bool\"\n", 157 | " print(f\">> Interactions: {interactions_df.shape}\")\n", 158 | "\n", 159 | "# Split interactions data according to evaluation column\n", 160 | "evaluation_df = interactions_df[interactions_df[\"evaluation\"]]\n", 161 | "interactions_df = interactions_df[~interactions_df[\"evaluation\"]]\n", 162 | "assert not interactions_df.empty\n", 163 | "assert not evaluation_df.empty\n", 164 | "print(f\">> Evaluation: {evaluation_df.shape} | Interactions: {interactions_df.shape}\")\n", 165 | "\n", 166 | "# Form interactions baskets, grouping by timestamp and user_id\n", 167 | "if GROUP_USER_INTERACTIONS_BY_TIMESTAMP:\n", 168 | " print(\"\\nForm interactions groups (baskets), by timestamp and user_id...\")\n", 169 | " interactions_df = interactions_df.groupby([\"timestamp\", \"user_id\"])[\"item_id\"].apply(list)\n", 170 | " interactions_df = interactions_df.reset_index()\n", 171 | " interactions_df = interactions_df.sort_values(\"timestamp\")\n", 172 | " interactions_df = interactions_df.reset_index(drop=True)\n", 173 | "else:\n", 174 | " print(\"\\nInteractions groups (baskets), by timestamp and user_id, skipped\")\n" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "# Copy interactions dataframe to complete evaluation dataframe\n", 184 | "_idf = interactions_df.sort_values(\"timestamp\").groupby([\"user_id\"])[\"item_id\"].apply(list).reset_index().copy()\n", 185 | "if GROUP_USER_INTERACTIONS_BY_TIMESTAMP:\n", 186 | " # Group and flatten interactions to create user profiles\n", 187 | " evaluation_df[\"profile\"] = evaluation_df[\"user_id\"].apply(\n", 188 | " lambda user_id: [\n", 189 | " item_id\n", 190 | " for row in _idf[_idf[\"user_id\"] == user_id][\"item_id\"]\n", 191 | " for interaction in row\n", 192 | " for item_id in interaction\n", 193 | " ],\n", 194 | " )\n", 195 | "if MAX_PROFILE_SIZE:\n", 196 | " # Reduce size of profiles if needed\n", 197 | " evaluation_df[\"profile\"] = evaluation_df[\"profile\"].apply(lambda profile: profile[-MAX_PROFILE_SIZE:])\n", 198 | "\n", 199 | "# Rename predict column and drop evaluation column\n", 200 | "evaluation_df.rename(columns={\"item_id\": \"predict\"}, inplace=True)\n", 201 | "evaluation_df.drop(columns=[\"evaluation\"], inplace=True)\n" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "print(\"\\nCreating helpers instances...\")\n", 211 | "# Creating hashes container for duplicates detection\n", 212 | "hashes_container = HashesContainer()\n", 213 | "\n", 214 | "# Sampling constants\n", 215 | "print(\"\\nCalculating important values...\")\n", 216 | "N_USERS = interactions_df[\"user_id\"].nunique()\n", 217 | "N_ITEMS = len(features)\n", 218 | "print(f\">> N_USERS = {N_USERS} | N_ITEMS = {N_ITEMS}\")\n" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "def random_triplet_sampling(samples_per_user, hashes_container, desc=None):\n", 228 | " interactions = interactions_df.copy()\n", 229 | " samples = []\n", 230 | " for ui, group in tqdm(interactions.groupby(\"user_id\"), desc=desc):\n", 231 | " # Get profile artworks\n", 232 | " full_profile = np.hstack(group[\"item_id\"].values).tolist()\n", 233 | " full_profile_set = set(full_profile)\n", 234 | " n = samples_per_user\n", 235 | " while n > 0:\n", 236 | " # Sample positive and negative items\n", 237 | " pi_index = random.randrange(len(full_profile))\n", 238 | " pi = full_profile[pi_index]\n", 239 | " # Get profile\n", 240 | " if MAX_PROFILE_SIZE:\n", 241 | " # \"pi_index + 1\" to include pi in profile\n", 242 | " profile = full_profile[max(0, pi_index - MAX_PROFILE_SIZE + 1):pi_index + 1]\n", 243 | " else:\n", 244 | " profile = list(full_profile)\n", 245 | " # (While loop is in the sampling method)\n", 246 | " while True:\n", 247 | " ni = random.randint(0, N_ITEMS - 1)\n", 248 | " if ni not in full_profile_set:\n", 249 | " break\n", 250 | " # If conditions are met, hash and enroll triple\n", 251 | " if MODE_PROFILE:\n", 252 | " triple = (profile, pi, ni)\n", 253 | " else:\n", 254 | " triple = (ui, pi, ni)\n", 255 | " if not hashes_container.enroll(pre_hash(triple, contains_iter=MODE_PROFILE)):\n", 256 | " continue\n", 257 | " # If not seen, store sample\n", 258 | " samples.append((profile, pi, ni, ui))\n", 259 | " n -= 1\n", 260 | " return samples\n" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "samples_training = random_triplet_sampling(\n", 270 | " np.ceil(TOTAL_SAMPLES_TRAIN / N_USERS),\n", 271 | " hashes_container,\n", 272 | " desc=\"Random sampling (training)\",\n", 273 | ")\n", 274 | "samples_testing = random_triplet_sampling(\n", 275 | " np.ceil(TOTAL_SAMPLES_VALID / N_USERS),\n", 276 | " hashes_container,\n", 277 | " desc=\"Random sampling (testing)\"\n", 278 | ")\n", 279 | "\n", 280 | "assert len(samples_training) >= TOTAL_SAMPLES_TRAIN\n", 281 | "assert len(samples_testing) >= TOTAL_SAMPLES_VALID\n", 282 | "\n", 283 | "# Total collected samples\n", 284 | "print(f\"Training samples: {len(samples_training)} ({TOTAL_SAMPLES_TRAIN})\")\n", 285 | "print(f\"Testing samples: {len(samples_testing)} ({TOTAL_SAMPLES_VALID})\")\n", 286 | "\n", 287 | "# Log out detected collisions\n", 288 | "print(f\">> Total hash collisions: {hashes_container.collisions}\")\n" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "# Merge triples into a single list\n", 298 | "print(\"\\nMerging strategies samples into a single list\")\n", 299 | "TRAINING_DATA = samples_training\n", 300 | "print(f\">> Training samples: {len(TRAINING_DATA)}\")\n", 301 | "# Merge strategies samples\n", 302 | "VALIDATION_DATA = samples_testing\n", 303 | "print(f\">> Validation samples: {len(VALIDATION_DATA)}\")\n" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "# Search for duplicated hashes\n", 313 | "print(f\"\\nNaive triples validation and looking for duplicates...\")\n", 314 | "validation_hash_check = HashesContainer()\n", 315 | "all_samples = [\n", 316 | " triple\n", 317 | " for subset in (TRAINING_DATA, VALIDATION_DATA)\n", 318 | " for triple in subset\n", 319 | "]\n", 320 | "user_ids = interactions_df[\"user_id\"].unique()\n", 321 | "user_data = dict()\n", 322 | "for triple in tqdm(all_samples, desc=\"Naive validation\"):\n", 323 | " profile, pi, ni, ui = triple\n", 324 | " if MODE_PROFILE:\n", 325 | " assert validation_hash_check.enroll(pre_hash((profile, pi, ni)))\n", 326 | " else:\n", 327 | " assert validation_hash_check.enroll(pre_hash((ui, pi, ni), contains_iter=False))\n", 328 | " assert 0 <= pi < N_ITEMS\n", 329 | " assert 0 <= ni < N_ITEMS\n", 330 | " assert pi != ni\n", 331 | " if ui == -1:\n", 332 | " continue\n", 333 | " assert ui in user_ids\n", 334 | " if not ui in user_data:\n", 335 | " user = interactions_df[interactions_df[\"user_id\"] == ui]\n", 336 | " user_data[ui] = set(np.hstack(user[\"item_id\"].values))\n", 337 | " user_artworks = user_data[ui]\n", 338 | " assert all(i in user_artworks for i in profile)\n", 339 | "print(\">> No duped hashes found\")\n" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "print(\"\\nCreating output files (train and valid)...\")\n", 349 | "# Training dataframe\n", 350 | "df_train = pd.DataFrame(TRAINING_DATA, columns=[\"profile\", \"pi\", \"ni\", \"ui\"])\n", 351 | "df_train[\"profile\"] = df_train[\"profile\"].map(lambda l: \" \".join(map(str, l)))\n", 352 | "print(f\">> Saving training samples ({OUTPUT_TRAIN_PATH})\")\n", 353 | "df_train.to_csv(OUTPUT_TRAIN_PATH, index=False)\n", 354 | "\n", 355 | "# Validation dataframe\n", 356 | "df_validation = pd.DataFrame(VALIDATION_DATA, columns=[\"profile\", \"pi\", \"ni\", \"ui\"])\n", 357 | "df_validation[\"profile\"] = df_validation[\"profile\"].map(lambda l: \" \".join(map(str, l)))\n", 358 | "print(f\">> Saving validation samples ({OUTPUT_VALID_PATH})\")\n", 359 | "df_validation.to_csv(OUTPUT_VALID_PATH, index=False)\n", 360 | "\n", 361 | "# Evaluation dataframe\n", 362 | "df_evaluation = evaluation_df.copy()\n", 363 | "# if GROUP_USER_INTERACTIONS_BY_TIMESTAMP:\n", 364 | "# df_evaluation[\"predict\"] = df_evaluation[\"predict\"].map(lambda l: \" \".join(map(str, l)))\n", 365 | "df_evaluation[\"profile\"] = df_evaluation[\"profile\"].map(lambda l: \" \".join(map(str, l)))\n", 366 | "print(f\">> Saving evaluation data ({OUTPUT_EVAL_PATH})\")\n", 367 | "df_evaluation.to_csv(OUTPUT_EVAL_PATH, index=False)\n" 368 | ] 369 | } 370 | ], 371 | "metadata": { 372 | "kernelspec": { 373 | "display_name": "Python 3", 374 | "language": "python", 375 | "name": "python3" 376 | }, 377 | "language_info": { 378 | "codemirror_mode": { 379 | "name": "ipython", 380 | "version": 3 381 | }, 382 | "file_extension": ".py", 383 | "mimetype": "text/x-python", 384 | "name": "python", 385 | "nbconvert_exporter": "python", 386 | "pygments_lexer": "ipython3", 387 | "version": "3.7.10" 388 | } 389 | }, 390 | "nbformat": 4, 391 | "nbformat_minor": 4 392 | } 393 | -------------------------------------------------------------------------------- /4 - Evaluation procedure.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2021-03-26T02:22:07.014878Z", 9 | "start_time": "2021-03-26T02:22:06.547979Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import os\n", 15 | "import torch\n", 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "from skimage import io\n", 19 | "from tqdm.auto import tqdm\n", 20 | "\n", 21 | "from models import VBPR, DVBPR, ACF, CuratorNet\n", 22 | "from datasets.user_mode_img import ToTensor\n", 23 | "from utils.data import extract_embedding\n", 24 | "from utils.metrics import (\n", 25 | " auc_exact,\n", 26 | " nDCG,\n", 27 | " precision,\n", 28 | " recall,\n", 29 | " reciprocal_rank,\n", 30 | ")\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "# Evaluation procedure" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "ExecuteTime": { 45 | "end_time": "2021-03-26T02:22:08.555918Z", 46 | "start_time": "2021-03-26T02:22:08.553818Z" 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# Dataset\n", 52 | "DATASET = \"UGallery\"\n", 53 | "assert DATASET in [\"UGallery\", \"Wikimedia\"]\n", 54 | "\n", 55 | "# Model\n", 56 | "MODEL = \"VBPR\"\n", 57 | "assert MODEL in [\"VBPR\", \"DVBPR\", \"CuratorNet\", \"ACF\"]\n", 58 | "\n", 59 | "FEATURE_EXTRACTOR = \"resnet50\"\n", 60 | "assert FEATURE_EXTRACTOR in [\"alexnet\", \"vgg16\", \"resnet50\"]\n", 61 | "\n", 62 | "FEATURE_LAYER = \"layer4\"\n", 63 | "FEATURE_LAYER = FEATURE_LAYER if MODEL == \"ACF\" else \"\" " 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "ExecuteTime": { 71 | "end_time": "2021-03-26T02:22:10.377057Z", 72 | "start_time": "2021-03-26T02:22:10.375132Z" 73 | } 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "# Mode\n", 78 | "# Use 'MODE_PROFILE = True' for CuratorNet-like training \n", 79 | "# Use 'MODE_PROFILE = False' for VBPR-like training\n", 80 | "MODE_PROFILE = MODEL in [\"CuratorNet\"]\n", 81 | "MODE_PROFILE = \"profile\" if MODE_PROFILE else \"user\"\n", 82 | "\n", 83 | "# Checkpoint (ex. 'VBPR_wikimedia')\n", 84 | "CHECKPOINT = f\"{MODEL}_{DATASET.lower()}\"" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "ExecuteTime": { 92 | "end_time": "2021-03-26T02:22:11.286344Z", 93 | "start_time": "2021-03-26T02:22:11.283714Z" 94 | } 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "# Paths (general)\n", 99 | "CHECKPOINT_EXT = \"pt\" if MODEL == \"ACF\" else \"tar\" \n", 100 | "CHECKPOINT_PATH = os.path.join(\"checkpoints\", f\"{CHECKPOINT}.{CHECKPOINT_EXT}\")\n", 101 | "FEATURE_EXTRACTOR = f\"{FEATURE_EXTRACTOR}-{FEATURE_LAYER}\" if FEATURE_LAYER else FEATURE_EXTRACTOR\n", 102 | "EMBEDDING_PATH = os.path.join(\"data\", DATASET, f\"embedding-{FEATURE_EXTRACTOR}.npy\")\n", 103 | "EVALUATION_PATH = os.path.join(\"data\", DATASET, f\"naive-{MODE_PROFILE}-evaluation.csv\")\n", 104 | "\n", 105 | "# Paths (images)\n", 106 | "IMAGES_DIR = None\n", 107 | "if DATASET == \"Wikimedia\":\n", 108 | " IMAGES_DIR = os.path.join(\"/\", \"mnt\", \"data2\", \"wikimedia\", \"imagenes_tarea\") # IMAGES_DIR = os.path.join(\"data\", \"mini-images-224-224-v2\", \"mini-images-224-224-v2\")\n", 109 | "elif DATASET == \"UGallery\":\n", 110 | " IMAGES_DIR = os.path.join(\"/\", \"mnt\", \"workspace\", \"Ugallery\", \"mini-images-224-224-v2\")\n", 111 | "\n", 112 | "# General constants\n", 113 | "RNG_SEED = 0\n", 114 | "USE_GPU = True" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "ExecuteTime": { 122 | "end_time": "2021-03-26T02:22:12.084777Z", 123 | "start_time": "2021-03-26T02:22:12.082033Z" 124 | } 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "# Freezing RNG seed if needed\n", 129 | "if RNG_SEED is not None:\n", 130 | " print(f\"\\nUsing random seed... ({RNG_SEED})\")\n", 131 | " torch.manual_seed(RNG_SEED)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "ExecuteTime": { 139 | "end_time": "2021-03-26T02:22:13.288058Z", 140 | "start_time": "2021-03-26T02:22:12.722614Z" 141 | } 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "# Load embedding from file\n", 146 | "print(f\"\\nLoading embedding from file... ({EMBEDDING_PATH})\")\n", 147 | "embedding = np.load(EMBEDDING_PATH, allow_pickle=True)\n", 148 | "\n", 149 | "# Extract features and \"id2index\" mapping\n", 150 | "print(\"\\nExtracting data into variables...\")\n", 151 | "features, id2index, item_index2fn = extract_embedding(embedding, verbose=True)\n", 152 | "print(f\">> Features shape: {features.shape}\")\n", 153 | "del embedding # Release some memory" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "ExecuteTime": { 161 | "end_time": "2021-03-26T02:22:14.006894Z", 162 | "start_time": "2021-03-26T02:22:13.987811Z" 163 | } 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "# Load evaluation dataframe\n", 168 | "print(\"\\nLoad evaluation dataframe\")\n", 169 | "evaluation_df = pd.read_csv(EVALUATION_PATH)\n", 170 | "# Transform lists from str to int\n", 171 | "string_to_list = lambda s: list(map(int, s.split()))\n", 172 | "evaluation_df[\"profile\"] = evaluation_df[\"profile\"].apply(\n", 173 | " lambda s: string_to_list(s) if isinstance(s, str) else s,\n", 174 | ")\n", 175 | "evaluation_df[\"predict\"] = evaluation_df[\"predict\"].apply(\n", 176 | " lambda s: string_to_list(s) if isinstance(s, str) else s,\n", 177 | ")\n", 178 | "# Group evaluations by profile and user\n", 179 | "evaluation_df[\"profile\"] = evaluation_df[\"profile\"].map(tuple)\n", 180 | "evaluation_df = evaluation_df.groupby([\"profile\", \"user_id\"]).agg({\"predict\": sum}).reset_index()\n", 181 | "evaluation_df[\"profile\"] = evaluation_df[\"profile\"].map(list)\n", 182 | "print(f\">> Evaluation: {evaluation_df.shape}\")" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "ExecuteTime": { 190 | "end_time": "2021-03-26T02:25:06.857895Z", 191 | "start_time": "2021-03-26T02:25:04.884129Z" 192 | } 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "# Create device instance\n", 197 | "print(\"\\nDevice initialization\")\n", 198 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() and USE_GPU else \"cpu\")\n", 199 | "if torch.cuda.is_available() != USE_GPU:\n", 200 | " print((f\"\\nNotice: Not using GPU - \"\n", 201 | " f\"Cuda available ({torch.cuda.is_available()}) \"\n", 202 | " f\"does not match USE_GPU ({USE_GPU})\"\n", 203 | " ))\n", 204 | "\n", 205 | "# Loading checkpoint\n", 206 | "if CHECKPOINT is not None:\n", 207 | " print(\"\\nLoading checkpoint\")\n", 208 | " checkpoint = torch.load(CHECKPOINT_PATH, map_location=torch.device(\"cpu\"))\n", 209 | " if 'epoch' in checkpoint and 'accuracy' in checkpoint:\n", 210 | " print(f\">> Best epoch: {checkpoint['epoch']} | Best accuracy: {checkpoint['accuracy']}\")\n", 211 | " elif 'epoch' in checkpoint and 'loss' in checkpoint:\n", 212 | " print(f\">> Best epoch: {checkpoint['epoch']} | Best Loss: {checkpoint['loss']}\")\n", 213 | "\n", 214 | "# Model initialization\n", 215 | "print(\"\\nModel initialization\")\n", 216 | "model = None\n", 217 | "checkpoint_loaded = False\n", 218 | "if MODEL == \"VBPR\":\n", 219 | " n_users = checkpoint[\"model\"][\"gamma_users.weight\"].size(0)\n", 220 | " n_items = checkpoint[\"model\"][\"gamma_items.weight\"].size(0)\n", 221 | " dim_gamma = checkpoint[\"model\"][\"gamma_users.weight\"].size(1)\n", 222 | " dim_theta = checkpoint[\"model\"][\"theta_users.weight\"].size(1)\n", 223 | " model = VBPR(\n", 224 | " n_users, n_items, # Number of users and items\n", 225 | " torch.Tensor(features), # Pretrained visual features\n", 226 | " dim_gamma, dim_theta, # Size of internal spaces\n", 227 | " ).to(device)\n", 228 | "elif MODEL == \"CuratorNet\":\n", 229 | " model = CuratorNet(\n", 230 | " torch.Tensor(features),\n", 231 | " input_size=features.shape[1],\n", 232 | " ).to(device)\n", 233 | "elif MODEL == \"ACF\":\n", 234 | " model = ACF.from_checkpoint(checkpoint, device=device)\n", 235 | " checkpoint_loaded = True\n", 236 | "elif MODEL == \"DVBPR\":\n", 237 | " n_users = checkpoint[\"model\"][\"theta_users.weight\"].size(0)\n", 238 | " n_items = checkpoint[\"model\"][\"gamma_items.weight\"].size(0)\n", 239 | " K = checkpoint[\"model\"][\"theta_users.weight\"].size(1)\n", 240 | " model = DVBPR(n_users, n_items, K=K).to(device)\n", 241 | " \n", 242 | "# Load state dict\n", 243 | "if not checkpoint_loaded and CHECKPOINT is not None:\n", 244 | " model.load_state_dict(checkpoint[\"model\"])\n", 245 | " print('loaded')\n", 246 | "\n", 247 | "# Change model mode to eval\n", 248 | "print(\"\\nChanging model mode to eval\")\n", 249 | "model.eval()" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": { 256 | "ExecuteTime": { 257 | "end_time": "2021-03-26T02:25:12.155616Z", 258 | "start_time": "2021-03-26T02:25:12.153969Z" 259 | } 260 | }, 261 | "outputs": [], 262 | "source": [ 263 | "# Predict all\n", 264 | "# If True, ranks every item including already consumed items\n", 265 | "# If False, ranks ALL - PROFILE (consumed) + PREDICT (ground truth)\n", 266 | "PREDICT_ALL = False" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": { 273 | "ExecuteTime": { 274 | "end_time": "2021-03-26T02:25:18.043466Z", 275 | "start_time": "2021-03-26T02:25:15.699736Z" 276 | } 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "%%time\n", 281 | "# Metrics\n", 282 | "N_EVALS = len(evaluation_df.index)\n", 283 | "# Area Under the Curve (AUC)\n", 284 | "AUC = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 285 | "# Reciprocal Rank (RR)\n", 286 | "RR = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 287 | "# Recall\n", 288 | "R20 = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 289 | "R100 = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 290 | "R200 = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 291 | "# Precision\n", 292 | "P20 = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 293 | "P100 = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 294 | "P200 = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 295 | "# Normalized discounted cumulative gain (nDCG)\n", 296 | "N20 = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 297 | "N100 = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 298 | "N200 = torch.zeros([N_EVALS], dtype=torch.float64, device=device)\n", 299 | "PROFILE_SIZES = torch.zeros([N_EVALS], dtype=int, device=device)\n", 300 | "N_ITEMS = len(features)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": { 307 | "ExecuteTime": { 308 | "end_time": "2021-03-26T02:25:18.043466Z", 309 | "start_time": "2021-03-26T02:25:15.699736Z" 310 | } 311 | }, 312 | "outputs": [], 313 | "source": [ 314 | "if MODEL in (\"VBPR\", \"CuratorNet\"):\n", 315 | " cache = model.generate_cache()\n", 316 | "elif MODEL == \"DVBPR\":\n", 317 | " def getimg(path, tensorizer):\n", 318 | " img = io.imread(path)\n", 319 | " return tensorizer(img)\n", 320 | " \n", 321 | " imglist = {}\n", 322 | " for path in tqdm(os.listdir(IMAGES_DIR)):\n", 323 | " if path in item_index2fn.values():\n", 324 | " img = getimg(os.path.join(IMAGES_DIR, path), ToTensor()) \n", 325 | " name = path.split('.')[0]\n", 326 | " imglist[id2index[name]] = img\n", 327 | "\n", 328 | " assert len(imglist) == N_ITEMS\n", 329 | " print('images loaded:', N_ITEMS)\n", 330 | " cache = model.generate_cache(imglist, device=device)\n", 331 | " print('generated cache: ', cache.shape)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": { 338 | "ExecuteTime": { 339 | "end_time": "2021-03-26T02:25:18.043466Z", 340 | "start_time": "2021-03-26T02:25:15.699736Z" 341 | } 342 | }, 343 | "outputs": [], 344 | "source": [ 345 | "evaluation_df[\"profile\"] = evaluation_df[\"profile\"].map(tuple)\n", 346 | "grouped_evals = evaluation_df.groupby([\"profile\", \"user_id\"]).agg({\"predict\": sum}).reset_index()\n", 347 | "for i, row in tqdm(enumerate(evaluation_df.itertuples()), total=len(evaluation_df.index)):\n", 348 | " # Load data into tensors\n", 349 | " profile = torch.tensor(row.profile).to(device, non_blocking=True).unsqueeze(0)\n", 350 | " user_id = torch.tensor([int(row.user_id)]).to(device, non_blocking=True)\n", 351 | " predict = torch.tensor(row.predict).to(device, non_blocking=True)\n", 352 | " # Prediction\n", 353 | " if MODEL == \"ACF\":\n", 354 | " acf_profile = profile + 1 # In ACF items are indexed starting at 1\n", 355 | " scores = model.recommend_all(user_id, acf_profile).squeeze()\n", 356 | " elif MODEL == 'DVBPR':\n", 357 | " scores = model.recommend_all(user_id, imglist, cache=cache)\n", 358 | " elif MODE_PROFILE == \"profile\":\n", 359 | " scores = model.recommend_all(profile, cache=cache)\n", 360 | " elif MODE_PROFILE == \"user\":\n", 361 | " scores = model.recommend_all(user_id, cache=cache).squeeze()\n", 362 | " \n", 363 | " # Ranking\n", 364 | " pos_of_evals = (torch.argsort(scores, descending=True)[..., None] == predict).any(-1).nonzero().flatten()\n", 365 | " if not PREDICT_ALL:\n", 366 | " pos_of_profi = (torch.argsort(scores, descending=True)[..., None] == profile).any(-1).nonzero().flatten()\n", 367 | " # Relevant dimensions\n", 368 | " _a, _b = pos_of_evals.size(0), pos_of_profi.size(0)\n", 369 | " # Calculate shift for each eval item\n", 370 | " shift = (pos_of_profi.expand(_a, _b) < pos_of_evals.reshape(_a, 1).expand(_a, _b)).sum(1)\n", 371 | " # Apply shift\n", 372 | " pos_of_evals -= shift.squeeze(0)\n", 373 | " # Store metrics\n", 374 | " AUC[i] = auc_exact(pos_of_evals, N_ITEMS)\n", 375 | " RR[i] = reciprocal_rank(pos_of_evals)\n", 376 | " R20[i] = recall(pos_of_evals, 20)\n", 377 | " P20[i] = precision(pos_of_evals, 20)\n", 378 | " N20[i] = nDCG(pos_of_evals, 20)\n", 379 | " R100[i] = recall(pos_of_evals, 100)\n", 380 | " P100[i] = precision(pos_of_evals, 100)\n", 381 | " N100[i] = nDCG(pos_of_evals, 100)\n", 382 | " R200[i] = recall(pos_of_evals, 200)\n", 383 | " P200[i] = precision(pos_of_evals, 200)\n", 384 | " N200[i] = nDCG(pos_of_evals, 200)\n", 385 | " PROFILE_SIZES[i] = len(row.profile)" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": { 392 | "ExecuteTime": { 393 | "end_time": "2021-03-26T02:25:19.777232Z", 394 | "start_time": "2021-03-26T02:25:19.768835Z" 395 | } 396 | }, 397 | "outputs": [], 398 | "source": [ 399 | "# Display stats\n", 400 | "print(f\"AVG AUC = {AUC.mean()}\")\n", 401 | "print(f\"AVG RR = {RR.mean()}\")\n", 402 | "print(f\"AVG R20 = {R20.mean()}\")\n", 403 | "print(f\"AVG P20 = {P20.mean()}\")\n", 404 | "print(f\"AVG NDCG20 = {N20.mean()}\")\n", 405 | "print(f\"AVG R100 = {R100.mean()}\")\n", 406 | "print(f\"AVG P100 = {P100.mean()}\")\n", 407 | "print(f\"AVG NDCG100 = {N100.mean()}\")\n", 408 | "print(f\"AVG R200 = {R200.mean()}\")\n", 409 | "print(f\"AVG P200 = {P200.mean()}\")\n", 410 | "print(f\"AVG NDCG200 = {N200.mean()}\")\n" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "## Relevant plots" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": { 424 | "ExecuteTime": { 425 | "end_time": "2021-03-26T02:25:46.376657Z", 426 | "start_time": "2021-03-26T02:25:46.371906Z" 427 | } 428 | }, 429 | "outputs": [], 430 | "source": [ 431 | "import numpy as np\n", 432 | "\n", 433 | "\n", 434 | "def smart_group(value):\n", 435 | " if value == 0:\n", 436 | " return 0\n", 437 | " digits = int(np.log10(value)) + 1\n", 438 | " return (10**(digits - 1)) * (value // (10**(digits - 1)))" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": { 445 | "ExecuteTime": { 446 | "end_time": "2021-03-26T02:25:46.978857Z", 447 | "start_time": "2021-03-26T02:25:46.880712Z" 448 | } 449 | }, 450 | "outputs": [], 451 | "source": [ 452 | "import pandas as pd\n", 453 | "\n", 454 | "\n", 455 | "metrics_data = [\n", 456 | " [\n", 457 | " PROFILE_SIZES[i].item(), AUC[i].item(), RR[i].item(),\n", 458 | " R20[i].item(), P20[i].item(), N20[i].item(),\n", 459 | " R100[i].item(), P100[i].item(), N100[i].item(),\n", 460 | " ]\n", 461 | " for i in range(N_EVALS)\n", 462 | "]\n", 463 | "metrics_df = pd.DataFrame(metrics_data, columns=[\n", 464 | " \"PROFILE_SIZES\", \"AUC\", \"RR\",\n", 465 | " \"R20\", \"P20\", \"N20\",\n", 466 | " \"R100\", \"P100\", \"N100\",\n", 467 | "])\n", 468 | "metrics_df[\"PROFILE_SIZES_STEPS\"] = metrics_df[\"PROFILE_SIZES\"].map(smart_group)" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "metadata": { 475 | "ExecuteTime": { 476 | "end_time": "2021-03-26T02:25:48.819523Z", 477 | "start_time": "2021-03-26T02:25:47.492939Z" 478 | } 479 | }, 480 | "outputs": [], 481 | "source": [ 482 | "import matplotlib.pyplot as plt\n", 483 | "import seaborn as sns\n", 484 | "\n", 485 | "\n", 486 | "# Metric\n", 487 | "METRIC = \"AUC\"\n", 488 | "# Profile size range\n", 489 | "metrics_df_plot = metrics_df.copy()\n", 490 | "metrics_df_plot = metrics_df_plot[\n", 491 | " (metrics_df_plot[\"PROFILE_SIZES_STEPS\"] >= 0) & (metrics_df_plot[\"PROFILE_SIZES_STEPS\"] < 100)\n", 492 | "]\n", 493 | "# Plot METRIC distribution across users grouped by profile size\n", 494 | "plt.figure(figsize=(24, 9))\n", 495 | "ax = sns.violinplot(x=\"PROFILE_SIZES_STEPS\", y=METRIC, data=metrics_df_plot, inner=None)\n", 496 | "if DATASET != \"Pinterest\":\n", 497 | " ax = sns.swarmplot(x=\"PROFILE_SIZES_STEPS\", y=METRIC, data=metrics_df_plot, color=\"black\", edgecolor=\"gray\")\n" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "metadata": { 504 | "ExecuteTime": { 505 | "end_time": "2021-03-26T02:25:50.930112Z", 506 | "start_time": "2021-03-26T02:25:50.836694Z" 507 | } 508 | }, 509 | "outputs": [], 510 | "source": [ 511 | "# Area Under the Curve distribution across users\n", 512 | "metrics_df[\"AUC\"].plot.box(sym=\"r+\")" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": { 519 | "ExecuteTime": { 520 | "end_time": "2021-03-26T02:25:53.368655Z", 521 | "start_time": "2021-03-26T02:25:53.263595Z" 522 | } 523 | }, 524 | "outputs": [], 525 | "source": [ 526 | "# First relevant item position (1 / reciprocal_rank) distribution across users\n", 527 | "# Line marks the 10% of the dataset\n", 528 | "graph = (1 / metrics_df[\"RR\"]).plot.box(sym=\"r+\")\n", 529 | "plt.ylim(0, features.shape[0])\n", 530 | "graph.axhline(features.shape[0] / 10, color=\"red\")" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": null, 536 | "metadata": { 537 | "ExecuteTime": { 538 | "end_time": "2021-03-26T02:25:53.924416Z", 539 | "start_time": "2021-03-26T02:25:53.786511Z" 540 | } 541 | }, 542 | "outputs": [], 543 | "source": [ 544 | "# First relevant item position (1 / reciprocal_rank) histogram\n", 545 | "graph = (1 / metrics_df[\"RR\"]).plot.hist(bins=50)" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": {}, 551 | "source": [ 552 | "## Results inspection" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": null, 558 | "metadata": { 559 | "ExecuteTime": { 560 | "end_time": "2021-03-26T02:25:54.837633Z", 561 | "start_time": "2021-03-26T02:25:54.834283Z" 562 | } 563 | }, 564 | "outputs": [], 565 | "source": [ 566 | "ROW = 0" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "metadata": { 573 | "ExecuteTime": { 574 | "end_time": "2021-03-26T02:25:55.556066Z", 575 | "start_time": "2021-03-26T02:25:55.538691Z" 576 | } 577 | }, 578 | "outputs": [], 579 | "source": [ 580 | "# Row in evaluation dataframe\n", 581 | "row = evaluation_df.iloc[ROW]\n", 582 | "\n", 583 | "# Load data into tensors\n", 584 | "profile = torch.tensor(row.profile).to(device, non_blocking=True).unsqueeze(0)\n", 585 | "user_id = torch.tensor([int(row.user_id)]).to(device, non_blocking=True)\n", 586 | "predict = torch.tensor(row.predict).to(device, non_blocking=True)\n", 587 | "# Prediction\n", 588 | "if MODEL == \"ACF\":\n", 589 | " acf_profile = profile + 1\n", 590 | " scores = model.recommend_all(user_id, acf_profile).squeeze()\n", 591 | "elif MODEL == 'DVBPR':\n", 592 | " scores = model.recommend_all(user_id, imglist, cache=cache).squeeze()\n", 593 | "elif MODE_PROFILE == \"profile\":\n", 594 | " scores = model.recommend_all(profile)\n", 595 | "elif MODE_PROFILE == \"user\":\n", 596 | " scores = model.recommend_all(user_id).squeeze()\n", 597 | "# Ranking\n", 598 | "pos_of_evals = (torch.argsort(scores, descending=True)[..., None] == predict).any(-1).nonzero().flatten()\n", 599 | "if not PREDICT_ALL:\n", 600 | " pos_of_profi = (torch.argsort(scores, descending=True)[..., None] == profile).any(-1).nonzero().flatten()\n", 601 | " pos_of_evals -= (pos_of_profi < pos_of_evals).sum()\n", 602 | "\n", 603 | "# Display metrics\n", 604 | "print(f\"| {'-' * 15} | {'-' * 7} |\")\n", 605 | "print(f\"| {'Metric':^15} | {'Score':^7} |\")\n", 606 | "print(f\"| {'-' * 15} | {'-' * 7} |\")\n", 607 | "print(f\"| {'AUC':^15} | {auc_exact(pos_of_evals, N_ITEMS):.5f} |\")\n", 608 | "print(f\"| {'RR':^15} | {reciprocal_rank(pos_of_evals):.5f} |\")\n", 609 | "for k in [20, 100, 500]:\n", 610 | " print(f\"| {'-' * 15} | {'-' * 7} |\")\n", 611 | " print(f\"| {f'Recall@{k}':^15} | {recall(pos_of_evals, k):.5f} |\")\n", 612 | " print(f\"| {f'Precision@{k}':^15} | {precision(pos_of_evals, k):.5f} |\")\n", 613 | " print(f\"| {f'nDCG@{k}':^15} | {nDCG(pos_of_evals, k):.5f} |\")\n", 614 | "print(f\"| {'-' * 15} | {'-' * 7} |\")\n", 615 | "\n", 616 | "# Profile and prediction\n", 617 | "profile = profile.cpu().numpy().flatten()\n", 618 | "predict = predict.cpu().numpy().flatten()\n", 619 | "# Ranking\n", 620 | "K = 20\n", 621 | "ranking = torch.argsort(scores, descending=True).cpu().numpy().flatten()\n", 622 | "if not PREDICT_ALL:\n", 623 | " ranking = ranking[(~np.isin(ranking, profile)) | (np.isin(ranking, predict))]\n", 624 | "ranking = ranking[:K]\n", 625 | "print()\n", 626 | "print(f\"Size of profile: {profile.size}\")\n", 627 | "print(f\"Position of actual items: {pos_of_evals.cpu().numpy()}\")\n" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": null, 633 | "metadata": { 634 | "ExecuteTime": { 635 | "end_time": "2021-03-26T02:26:02.034249Z", 636 | "start_time": "2021-03-26T02:26:00.729988Z" 637 | }, 638 | "scrolled": false 639 | }, 640 | "outputs": [], 641 | "source": [ 642 | "import matplotlib.image as mpimg\n", 643 | "import matplotlib.pyplot as plt\n", 644 | "\n", 645 | "\n", 646 | "COLUMNS = 10\n", 647 | "ELEMENTS = {\n", 648 | " \"Consumed\": profile,\n", 649 | " \"Recommendation\": ranking,\n", 650 | " \"Ground truth\": predict,\n", 651 | "}\n", 652 | "SHOW_FILENAME = False\n", 653 | "\n", 654 | "for label, items in ELEMENTS.items():\n", 655 | " n_rows = ((len(items) - 1) // COLUMNS + 1)\n", 656 | " fig = plt.figure(figsize=(COLUMNS * 2, 4 * n_rows))\n", 657 | " plt.title(f\"{label.title()} (n={len(items)})\")\n", 658 | " plt.axis(\"off\")\n", 659 | " for i, img_id in enumerate(items, start=1):\n", 660 | " img_fn = item_index2fn[img_id]\n", 661 | " image = mpimg.imread(os.path.join(IMAGES_DIR, img_fn))\n", 662 | " ax = fig.add_subplot(n_rows, COLUMNS, i)\n", 663 | " if SHOW_FILENAME:\n", 664 | " ax.set_title(img_fn)\n", 665 | " if label == \"Recommendation\":\n", 666 | " if img_id in predict:\n", 667 | " ax.patch.set_edgecolor(\"green\")\n", 668 | " ax.patch.set_linewidth(\"5\")\n", 669 | " if SHOW_FILENAME:\n", 670 | " ax.set_title(img_fn, color=\"green\")\n", 671 | " else:\n", 672 | " ax.set_title(\"Ground truth\", color=\"green\")\n", 673 | " elif img_id in profile:\n", 674 | " ax.patch.set_edgecolor(\"red\")\n", 675 | " ax.patch.set_linewidth(\"5\")\n", 676 | " if SHOW_FILENAME:\n", 677 | " ax.set_title(img_fn, color=\"red\")\n", 678 | " else:\n", 679 | " ax.set_title(\"Consumed\", color=\"red\")\n", 680 | " plt.xticks([])\n", 681 | " plt.yticks([])\n", 682 | " plt.imshow(image)\n" 683 | ] 684 | } 685 | ], 686 | "metadata": { 687 | "kernelspec": { 688 | "display_name": "Python 3", 689 | "language": "python", 690 | "name": "python3" 691 | }, 692 | "language_info": { 693 | "codemirror_mode": { 694 | "name": "ipython", 695 | "version": 3 696 | }, 697 | "file_extension": ".py", 698 | "mimetype": "text/x-python", 699 | "name": "python", 700 | "nbconvert_exporter": "python", 701 | "pygments_lexer": "ipython3", 702 | "version": "3.7.10" 703 | } 704 | }, 705 | "nbformat": 4, 706 | "nbformat_minor": 4 707 | } 708 | --------------------------------------------------------------------------------