├── pytorch ├── Masters │ ├── __init__.py │ └── master_gau.py ├── Models │ ├── __init__.py │ └── GAU.py ├── handlers │ ├── __init__.py │ ├── output_handler.py │ ├── sampler.py │ └── data_handlers.py ├── __init__.py ├── images │ ├── temporal.png │ └── topics.png ├── cleaning_text.py ├── layers.py ├── torch_utils.py ├── rank_metrics.py ├── nets.py ├── interactions.py └── losses.py ├── .gitignore └── README.md /pytorch/Masters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytorch/Models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytorch/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = 'v0.1.5' 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .idea/* 3 | *.pyc 4 | pytorch/logs/* -------------------------------------------------------------------------------- /pytorch/images/temporal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nguyenvo09/CombatingFakeNews/HEAD/pytorch/images/temporal.png -------------------------------------------------------------------------------- /pytorch/images/topics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nguyenvo09/CombatingFakeNews/HEAD/pytorch/images/topics.png -------------------------------------------------------------------------------- /pytorch/handlers/output_handler.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class FileHandler(object): 4 | def __init__(self, log_file): 5 | if log_file != None: 6 | self.mylogfile = open(log_file, "w") 7 | self.mylogfile_details = open(log_file + "_best_details.json", "w") 8 | 9 | def myprint(self, message): 10 | print(message) 11 | if self.mylogfile != None: 12 | print(message, file = self.mylogfile) 13 | self.mylogfile.flush() 14 | 15 | def myprint_details(self, message): 16 | # print(message) 17 | if self.mylogfile_details != None: 18 | print(message, file = self.mylogfile_details) 19 | self.mylogfile_details.flush() -------------------------------------------------------------------------------- /pytorch/cleaning_text.py: -------------------------------------------------------------------------------- 1 | import re 2 | from nltk.tokenize import TweetTokenizer 3 | 4 | def remove_special_tokens(text): 5 | text = text.replace("\n", " ") 6 | text = text.replace("\t", " ") 7 | text = text.replace("\r", " ") 8 | text = text.replace("\xa0", " ") 9 | text = text.replace('’', "'") 10 | text = text.replace("”", "\"") 11 | text = text.replace("“", "\"") 12 | text = re.sub(r'[^\x00-\x7F]+', ' ', text) 13 | args = text.split() 14 | text = " ".join(args) 15 | return text 16 | 17 | def clean_dataset(line): 18 | line = line.replace("\n", "") 19 | line = re.sub(r"https?:\S+", "url", line) 20 | line = remove_special_tokens(line) 21 | 22 | tknzr = TweetTokenizer() 23 | line = tknzr.tokenize(line) 24 | line = " ".join(line) 25 | 26 | tokens = line.split() 27 | for idx, token in enumerate(tokens): 28 | if token.startswith("@"): 29 | tokens[idx] = "@user" 30 | line = " ".join(tokens) 31 | 32 | return line -------------------------------------------------------------------------------- /pytorch/layers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Embedding layers useful for recommender models. 3 | """ 4 | import torch.nn as nn 5 | 6 | 7 | class ScaledEmbedding(nn.Embedding): 8 | """ 9 | Embedding layer that initialises its values 10 | to using a normal variable scaled by the inverse 11 | of the embedding dimension. 12 | """ 13 | 14 | def reset_parameters(self): 15 | """ 16 | Initialize parameters. 17 | """ 18 | 19 | self.weight.data.normal_(0, 0.01) # std = 0.01 20 | if self.padding_idx is not None: 21 | self.weight.data[self.padding_idx].fill_(0) 22 | 23 | 24 | class ZeroEmbedding(nn.Embedding): 25 | """ 26 | Embedding layer that initialises its values 27 | to using a normal variable scaled by the inverse 28 | of the embedding dimension. 29 | 30 | Used for biases. 31 | """ 32 | 33 | def reset_parameters(self): 34 | """ 35 | Initialize parameters. 36 | """ 37 | 38 | self.weight.data.zero_() 39 | if self.padding_idx is not None: 40 | self.weight.data[self.padding_idx].fill_(0) 41 | 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Recommending Fact-checking Articles to Combat Fake News 2 | This is the repository for the paper "The Rise of Guardians: Fact-checking URL Recommendation to Combat Fake News" SIGIR 2018, 3 | https://arxiv.org/abs/1806.07516 4 | 5 | ## Datasets 6 | - Link to download our full dataset for the paper: You can analyze characteristics of guardians based on this dataset. 7 | https://drive.google.com/file/d/1n2nZT2440BYy7PDwdSwLMpWHKrKRObF1/view 8 | 9 | - Link to download splitted data `Splitted_data.rar` 10 | https://drive.google.com/open?id=1riEsUNP3GHfn7XefuMkH50kW4W2dL0qW 11 | 12 | - The splitted data has training, dev and testing interactions. In each part, there are 12,197 guardians with at least 13 | one interaction for each guardian 14 | 15 | ## Analysis 16 | - Temporal behavior of guardians 17 | ![alt text](https://github.com/nguyenvo09/CombatingFakeNews/blob/master/pytorch/images/temporal.png) 18 | - Topical interests of guardians 19 | ![alt text](https://github.com/nguyenvo09/CombatingFakeNews/blob/master/pytorch/images/topics.png) 20 | 21 | ## How to run this code? 22 | - Download the splitted data and extract it. The expected path is `/pytorch/Splitted_data/sigir18/*` 23 | - Then, run the following command with default settings: 24 | ``` 25 | python Masters/master_gau.py 26 | ``` 27 | You could achive following performance: 28 | ``` 29 | |Epoch 11 | Train time: 8 (s) | Train loss: 79212.76166 | Eval time: 30.316 (s) | Vad mapks@10 = 0.06830 | Vad ndcg@10 = 0.08897 | Vad recall@10 = 0.15610 | Test mapks@10 = 0.06879 | Test ndcg@10 = 0.08991 | Test recall@10 = 0.15783 30 | |Epoch 12 | Train time: 8 (s) | Train loss: 75769.19746 | Eval time: 30.028 (s) | Vad mapks@10 = 0.06833 | Vad ndcg@10 = 0.08906 | Vad recall@10 = 0.15635 | Test mapks@10 = 0.06918 | Test ndcg@10 = 0.09030 | Test recall@10 = 0.15832 31 | |Epoch 13 | Train time: 8 (s) | Train loss: 72671.60144 | Eval time: 30.399 (s) | Vad mapks@10 = 0.06876 | Vad ndcg@10 = 0.08946 | Vad recall@10 = 0.15668 | Test mapks@10 = 0.06948 | Test ndcg@10 = 0.09066 | Test recall@10 = 0.15889 32 | |Epoch 14 | Train time: 8 (s) | Train loss: 69873.45222 | Eval time: 29.985 (s) | Vad mapks@10 = 0.06858 | Vad ndcg@10 = 0.08913 | Vad recall@10 = 0.15578 | Test mapks@10 = 0.06952 | Test ndcg@10 = 0.09063 | Test recall@10 = 0.15865 33 | ``` 34 | ## Requirements: 35 | We use PyTorch 0.4.1, Python 3.5. The SPPMI matrices, network and sim matrices are memory-intensive so please run 36 | it on a computer with at least 16GB. 37 | 38 | 39 | Please cite our paper if you find the data and code helpful, thanks: 40 | 41 | ``` 42 | @inproceedings{vo2018guardians, 43 | title={The Rise of Guardians: Fact-checking URL Recommendation to Combat Fake News}, 44 | author={Vo, Nguyen and Lee, Kyumin}, 45 | booktitle={The 41st International ACM SIGIR Conference 46 | on Research and Development in Information Retrieval}, 47 | year={2018} 48 | } 49 | ``` 50 | -------------------------------------------------------------------------------- /pytorch/torch_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | 5 | 6 | def flatten(x): 7 | ''' 8 | flatten high dimensional tensor x into an array 9 | :param x: shape (B, D1, D2, ...) 10 | :return: 1 dimensional tensor 11 | ''' 12 | dims = x.size()[1:] # remove the first dimension as it is batch dimension 13 | num_features = 1 14 | for s in dims: num_features *= s 15 | return x.contiguous().view(-1, num_features) 16 | 17 | 18 | def gpu(tensor, gpu=False): 19 | if gpu: return tensor.cuda() 20 | else: return tensor 21 | 22 | 23 | def cpu(tensor): 24 | 25 | if tensor.is_cuda: return tensor.cpu() 26 | else: return tensor 27 | 28 | 29 | def check_symmetric(a, rtol=1e-05, atol=1e-08): 30 | return np.allclose(a, a.T, rtol=rtol, atol=atol) 31 | 32 | 33 | def minibatch(*tensors, **kwargs): 34 | 35 | batch_size = kwargs.get('batch_size', 128) 36 | 37 | if len(tensors) == 1: 38 | tensor = tensors[0] 39 | for i in range(0, len(tensor), batch_size): 40 | yield tensor[i:i + batch_size] 41 | else: 42 | for i in range(0, len(tensors[0]), batch_size): 43 | yield tuple(x[i:i + batch_size] for x in tensors) 44 | 45 | 46 | def shuffle(*arrays, **kwargs): 47 | 48 | random_state = kwargs.get('random_state') 49 | 50 | if len(set(len(x) for x in arrays)) != 1: 51 | raise ValueError('All inputs to shuffle must have ' 52 | 'the same length.') 53 | 54 | if random_state is None: 55 | random_state = np.random.RandomState() 56 | 57 | shuffle_indices = np.arange(len(arrays[0])) 58 | random_state.shuffle(shuffle_indices) 59 | 60 | if len(arrays) == 1: 61 | return arrays[0][shuffle_indices] 62 | else: 63 | return tuple(x[shuffle_indices] for x in arrays) 64 | 65 | 66 | def assert_no_grad(variable): 67 | 68 | if variable.requires_grad: 69 | raise ValueError( 70 | "nn criterions don't compute the gradient w.r.t. targets - please " 71 | "mark these variables as volatile or not requiring gradients" 72 | ) 73 | 74 | def numpy2tensor(x, dtype): 75 | # torch.tensor(torch.from_numpy(var), dtype = torch.int) 76 | return torch.tensor(torch.from_numpy(x), dtype = dtype) 77 | 78 | def tensor2numpy(x): 79 | # return x.numpy() 80 | return cpu(x).numpy() 81 | 82 | def set_seed(seed, cuda=False): 83 | 84 | torch.manual_seed(seed) 85 | 86 | if cuda: 87 | torch.cuda.manual_seed(seed) 88 | 89 | 90 | def _predict_process_ids(user_ids, item_ids, num_items, use_cuda): 91 | """ 92 | 93 | Parameters 94 | ---------- 95 | user_ids 96 | item_ids 97 | num_items 98 | use_cuda 99 | 100 | Returns 101 | ------- 102 | 103 | """ 104 | if item_ids is None: 105 | item_ids = np.arange(num_items, dtype=np.int64) 106 | 107 | if np.isscalar(user_ids): 108 | user_ids = np.array(user_ids, dtype=np.int64) 109 | 110 | user_ids = torch.from_numpy(user_ids.reshape(-1, 1).astype(np.int64)) 111 | item_ids = torch.from_numpy(item_ids.reshape(-1, 1).astype(np.int64)) 112 | 113 | if item_ids.size()[0] != user_ids.size(0): 114 | user_ids = user_ids.expand(item_ids.size()) 115 | 116 | user_var = gpu(user_ids, use_cuda) 117 | item_var = gpu(item_ids, use_cuda) 118 | 119 | return user_var.squeeze(), item_var.squeeze() -------------------------------------------------------------------------------- /pytorch/handlers/sampler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module containing functions for negative item sampling. 3 | """ 4 | 5 | import numpy as np 6 | from scipy.sparse import csr_matrix 7 | import torch_utils 8 | import time 9 | np.random.seed(123456) 10 | 11 | class Sampler(object): 12 | def __init__(self): 13 | super(Sampler, self).__init__() 14 | self._candidate = dict() # negative candidates 15 | 16 | def set_interactions(self, interactions): 17 | """ 18 | 19 | Parameters 20 | ---------- 21 | interactions: :class:`interactions.Interactions` 22 | 23 | 24 | Returns 25 | ------- 26 | 27 | """ 28 | csr_data = interactions.tocsr() 29 | self.build_neg_dict(csr_data) 30 | 31 | def build_neg_dict(self, csr_data): 32 | #for each user, store the unobserved values into a dict for sampling later. 33 | # csr_data = csr_matrix(csr_data) 34 | # n_users, n_items = csr_data.shape 35 | # user_counts = np.zeros(n_users) 36 | # for u in range(n_users): user_counts = csr_data[u].getnnz() 37 | pass 38 | 39 | def random_sample_items(self, num_items, shape, random_state=None): 40 | """ 41 | Randomly sample a number of items based on shape. 42 | (we need to improve this since it is likely to sample a positive instance) 43 | https://github.com/maciejkula/spotlight/issues/36 44 | https://github.com/graytowne/caser_pytorch/blob/master/train_caser.py 45 | Parameters 46 | ---------- 47 | 48 | num_items: int 49 | Total number of items from which we should sample: 50 | the maximum value of a sampled item id will be smaller 51 | than this. 52 | shape: int or tuple of ints 53 | Shape of the sampled array. 54 | random_state: np.random.RandomState instance, optional 55 | Random state to use for sampling. 56 | 57 | Returns 58 | ------- 59 | 60 | items: np.array of shape [shape] 61 | Sampled item ids. 62 | """ 63 | 64 | if random_state is None: 65 | random_state = np.random.RandomState() 66 | 67 | items = random_state.randint(0, num_items, shape, dtype = np.int64) 68 | # items = random_state.randint(1, num_items, shape, dtype=np.int64) #random from 1 to num_items as 0 is PADDING_IDX 69 | 70 | return items 71 | 72 | # reuse from https://github.com/nguyenvo09/caser_pytorch/blob/master/train_caser.py#L203 73 | def get_train_instances(self, interactions, num_negatives, random_state=None): 74 | """ 75 | Sample negative from a candidate set of each user. The 76 | candidate set of each user is defined by: 77 | {All Items} \ {Items Rated by User} 78 | Parameters 79 | ---------- 80 | interactions: :class:`spotlight.interactions.Interactions` 81 | training instances, used for generate candidates 82 | num_negatives: int 83 | total number of negatives to sample for each sequence 84 | """ 85 | if random_state is None: 86 | random_state = np.random.RandomState() 87 | user_ids = interactions.user_ids.astype(np.int64) # may not be unique 88 | item_ids = interactions.item_ids.astype(np.int64) 89 | negative_samples = np.zeros((user_ids.shape[0], num_negatives), np.int64) 90 | if not self._candidate: 91 | all_items = np.arange(interactions.num_items) 92 | train = interactions.tocsr() 93 | for user, row in enumerate(train): 94 | self._candidate[user] = list(set(all_items) - set(row.indices)) 95 | 96 | for i, u in enumerate(user_ids): 97 | for j in range(num_negatives): 98 | x = self._candidate[u] 99 | negative_samples[i, j] = x[ 100 | random_state.randint(len(x))] 101 | 102 | return user_ids, item_ids, negative_samples -------------------------------------------------------------------------------- /pytorch/rank_metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def precision_at_k(r, k): 5 | """Score is precision @ k 6 | 7 | Relevance is binary (nonzero is relevant). 8 | 9 | >>> r = [0, 0, 1] 10 | >>> precision_at_k(r, 1) 11 | 0.0 12 | >>> precision_at_k(r, 2) 13 | 0.0 14 | >>> precision_at_k(r, 3) 15 | 0.33333333333333331 16 | >>> precision_at_k(r, 4) 17 | Traceback (most recent call last): 18 | File "", line 1, in ? 19 | ValueError: Relevance score length < k 20 | 21 | 22 | Args: 23 | r: Relevance scores (list or numpy) in rank order 24 | (first element is the first item) 25 | 26 | Returns: 27 | Precision @ k 28 | 29 | Raises: 30 | ValueError: len(r) must be >= k 31 | """ 32 | assert k >= 1 33 | r = np.asarray(r)[:k] != 0 34 | if r.size != k: 35 | raise ValueError('Relevance score length < k') 36 | return np.mean(r) 37 | 38 | 39 | def dcg_at_k(r, k, method=1): 40 | """Score is discounted cumulative gain (dcg) 41 | 42 | Relevance is positive real values. Can use binary 43 | as the previous methods. 44 | 45 | Example from 46 | http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf 47 | >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0] 48 | >>> dcg_at_k(r, 1) 49 | 3.0 50 | >>> dcg_at_k(r, 1, method=1) 51 | 3.0 52 | >>> dcg_at_k(r, 2) 53 | 5.0 54 | >>> dcg_at_k(r, 2, method=1) 55 | 4.2618595071429155 56 | >>> dcg_at_k(r, 10) 57 | 9.6051177391888114 58 | >>> dcg_at_k(r, 11) 59 | 9.6051177391888114 60 | 61 | Args: 62 | r: Relevance scores (list or numpy) in rank order 63 | (first element is the first item) 64 | k: Number of results to consider 65 | method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...] 66 | If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...] 67 | 68 | Returns: 69 | Discounted cumulative gain 70 | """ 71 | r = np.asfarray(r)[:k] 72 | if r.size: 73 | # if method == 0: 74 | # return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1))) 75 | # elif method == 1: 76 | return np.sum(r / np.log2(np.arange(2, r.size + 2))) #chuan roi 77 | # else: 78 | # raise ValueError('method must be 0 or 1.') 79 | return 0. 80 | 81 | 82 | def ndcg_at_k(r, k, method=1): 83 | """Score is normalized discounted cumulative gain (ndcg) 84 | 85 | Relevance is positive real values. Can use binary 86 | as the previous methods. 87 | 88 | Example from 89 | http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf 90 | >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0] 91 | >>> ndcg_at_k(r, 1) 92 | 1.0 93 | >>> r = [2, 1, 2, 0] 94 | >>> ndcg_at_k(r, 4) 95 | 0.9203032077642922 96 | >>> ndcg_at_k(r, 4, method=1) 97 | 0.96519546960144276 98 | >>> ndcg_at_k([0], 1) 99 | 0.0 100 | >>> ndcg_at_k([1], 2) 101 | 1.0 102 | 103 | Args: 104 | r: Relevance scores (list or numpy) in rank order 105 | (first element is the first item) 106 | k: Number of results to consider 107 | method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...] 108 | If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...] 109 | 110 | Returns: 111 | Normalized discounted cumulative gain 112 | """ 113 | dcg_max = dcg_at_k(sorted(r, reverse=True), k, method) 114 | if not dcg_max: 115 | return 0. 116 | return dcg_at_k(r, k, method) / dcg_max 117 | 118 | 119 | def _compute_precision_recall(rankedList, k): 120 | num_hit = np.sum(rankedList[:k]) 121 | precision = float(num_hit) / float(k) 122 | recall = float(num_hit) / (np.sum(rankedList) + 1e-10) 123 | return precision, recall 124 | 125 | 126 | def average_precision(r): 127 | """Score is average precision (area under PR curve) 128 | Relevance is binary (nonzero is relevant). 129 | >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1] 130 | >>> delta_r = 1. / sum(r) 131 | >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y]) 132 | 0.7833333333333333 133 | >>> average_precision(r) 134 | 0.78333333333333333 135 | Args: 136 | r: Relevance scores (list or numpy) in rank order 137 | (first element is the first item) 138 | Returns: 139 | Average precision 140 | """ 141 | r = np.asarray(r) != 0 142 | out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]] 143 | if not out: 144 | return 0. 145 | return np.mean(out) 146 | 147 | 148 | if __name__ == "__main__": 149 | import doctest 150 | doctest.testmod() 151 | -------------------------------------------------------------------------------- /pytorch/handlers/data_handlers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import collections 3 | from tqdm import tqdm 4 | import pandas as pd 5 | from scipy import sparse 6 | import torch_utils 7 | from typing import List, Dict 8 | 9 | 10 | # reuse from neural collaborative filtering 11 | def load_rating_file_as_list(filename) -> List[List[int]]: 12 | ratingList = [] 13 | with open(filename, "r") as f: 14 | line = f.readline() 15 | while line != None and line != "": 16 | arr = line.split("\t") 17 | user, item = int(arr[0]), int(arr[1]) 18 | ratingList.append([user, item]) 19 | line = f.readline() 20 | return ratingList 21 | 22 | 23 | def load_rating_file_as_dict(filename): 24 | users = collections.defaultdict(list) 25 | fin = open(filename, "r") 26 | for line in fin: 27 | arr = line.split("\t") 28 | user, item = int(arr[0]), int(arr[1]) 29 | users[user].append(item) 30 | return users 31 | 32 | 33 | def generate_negatives(ratings, removed: List, n_items: int = 4834): 34 | part1, part2 = removed 35 | negatives = collections.defaultdict(list) 36 | all_negs = set(list(range(n_items))) 37 | for user, positive_items in ratings.items(): 38 | positives = set(positive_items) 39 | a = set(part1[user]) 40 | b = set(part2[user]) 41 | negs = all_negs - (positives | a | b) 42 | negs = sorted(list(negs)) 43 | assert len(negs) < n_items 44 | negatives[user] = negs 45 | return negatives 46 | 47 | 48 | # reuse from neural collaborative filtering 49 | def load_negative_file(ratings, filename) -> List[List[int]]: 50 | """ 51 | We need to make sure the consistency of negative samples per interaction. 52 | Parameters 53 | ---------- 54 | ratings: :class:`list[tuple]` list of ratings 55 | filename: :class:`string` filename of negative 56 | samples with format (u,i) neg1 neg2 neg3 57 | Returns 58 | ------- 59 | 60 | """ 61 | negativeList = [] 62 | cnt = 0 63 | with open(filename, "r") as f: 64 | line = f.readline() 65 | while line != None and line != "": 66 | arr = line.split("\t") 67 | point = arr[0][1:-1] 68 | a, b = point.split(",") 69 | point = [int(a), int(b)] 70 | assert ratings[cnt] == point 71 | cnt += 1 72 | negatives = [] 73 | for x in arr[1:]: 74 | negatives.append(int(x)) 75 | negativeList.append(negatives) 76 | line = f.readline() 77 | assert len(ratings) == len(negativeList) 78 | return negativeList 79 | 80 | 81 | def load_network(adj_network): 82 | """ 83 | Loading adjacent network information. 84 | 85 | :param adj_network: 86 | :return: an adjacent network for every vertex (every user) 87 | """ 88 | fin = open(adj_network, "r") 89 | graph = collections.defaultdict(list) 90 | for line in tqdm(fin): 91 | line = line.replace("\n", "") 92 | args = line.split("\t") 93 | node = args[0].replace("(", "") 94 | node = node.replace(")", "") 95 | node = int(node) 96 | assert node not in graph 97 | graph[node] = list(map(float, args[1:])) 98 | return graph 99 | 100 | 101 | def computeSPPMI_matrix(P, shifted_k: int, check_sym = True): 102 | """ 103 | Computing SPPMI matrix. Expected is a symmetric matrix. Diagonal is expected to be all zeros. 104 | :param P: A symmetric square matrix where each element is the frequency of pair (i, j). 105 | :param shifted_k: `int` number of negative samples 106 | :param check_sym: `bool` checking symmetric or not. 107 | """ 108 | a, b = P.shape 109 | # print P.shape, type(P) 110 | P = np.asarray(P) 111 | # print 'Shape of P: ', P.shape 112 | assert a == b 113 | D = np.sum(P) 114 | cols = np.sum(P, axis = 0) # sum the columns (b,) 115 | rows = np.sum(P, axis = 1, keepdims=True) # sum the row (a,1) 116 | P = P / (cols + 1e-10) 117 | P = P / (rows + 1e-10) 118 | P = P * D 119 | PMI = np.log(P + 1e-10) 120 | S = PMI - np.log(shifted_k) 121 | mask = S > 0 122 | SPPMI = np.multiply(S, mask) 123 | assert np.min(SPPMI) == 0 124 | if check_sym: np.fill_diagonal(SPPMI, 0) # in-place operation 125 | assert torch_utils.check_symmetric(SPPMI) == True 126 | return SPPMI 127 | 128 | 129 | def load_data_item_item_for_sppmi(csvfile, no_items: int, check_sym = True): 130 | ''' 131 | We need to return co-occurrence matrix of item-item and guadians-guardians 132 | :param csvfile: 133 | :param n_guardians: 134 | :param n_urls: 135 | :return: 136 | ''' 137 | tp = pd.read_csv(csvfile) 138 | rows, cols, freqs = np.array(tp['item1']), np.array(tp['item2']), np.array(tp['freq']) 139 | assert freqs.shape == rows.shape and rows.shape == cols.shape 140 | 141 | weights = sparse.csr_matrix((np.array(tp['freq']), (rows, cols)), dtype=np.int16, shape=(no_items, no_items)) 142 | if check_sym: assert torch_utils.check_symmetric(weights.todense()) == True 143 | return weights.todense() 144 | 145 | 146 | def load_sim(csvfile, N): 147 | ''' 148 | We need to return co-occurrence matrix of item-item and guadians-guardians (keke) 149 | :param csvfile: 150 | :param n_guardians: 151 | :param n_urls: 152 | :return: 153 | ''' 154 | tp = pd.read_csv(csvfile, header = None, sep = ",") 155 | rows, cols, freqs = np.array(tp[0]), np.array(tp[1]), np.array(tp[2]) 156 | assert freqs.shape == rows.shape and rows.shape == cols.shape 157 | 158 | weights = sparse.csr_matrix((np.array(tp[2]), (rows, cols)), dtype=np.float32, shape=(N, N)) 159 | 160 | return weights.todense() 161 | 162 | 163 | def load_data_user_user_for_sppmi(csvfile, no_guardians): 164 | ''' 165 | We need to return co-occurrence matrix of item-item and guadians-guardians 166 | :param csvfile: 167 | :param n_guardians: 168 | :param n_urls: 169 | :return: 170 | ''' 171 | tp = pd.read_csv(csvfile) 172 | rows, cols, freqs = np.array(tp['guardian1']), np.array(tp['guardian2']), np.array(tp['freq']) 173 | assert freqs.shape == rows.shape and rows.shape == cols.shape 174 | 175 | weights = sparse.csr_matrix((np.array(tp['freq']), (rows, cols)), dtype=np.int16, shape=(no_guardians, no_guardians)) 176 | return weights.todense() 177 | 178 | 179 | if __name__ == '__main__': 180 | pass 181 | -------------------------------------------------------------------------------- /pytorch/nets.py: -------------------------------------------------------------------------------- 1 | import layers 2 | import torch_utils 3 | from torch import nn 4 | import torch 5 | 6 | 7 | class MF(nn.Module): 8 | def __init__(self, n_users, n_items, embedding_dim, pretrained_user_embeddings = None, pretrained_item_embeddings = None): 9 | super(MF, self).__init__() 10 | self.n_users = n_users 11 | self.n_items = n_items 12 | self.n_factors = embedding_dim 13 | 14 | if pretrained_user_embeddings is not None: 15 | self.user_embeddings = pretrained_user_embeddings 16 | else: 17 | self.user_embeddings = layers.ScaledEmbedding(n_users, embedding_dim) 18 | 19 | if pretrained_item_embeddings is not None: 20 | self.item_embeddings = pretrained_item_embeddings 21 | else: 22 | self.item_embeddings = layers.ScaledEmbedding(n_items, embedding_dim) 23 | 24 | self.user_bias = layers.ZeroEmbedding(n_users, 1) 25 | self.item_bias = layers.ZeroEmbedding(n_items, 1) 26 | 27 | def forward(self, uids, iids): 28 | user_embeds = self.user_embeddings(uids) #first dimension is batch size 29 | item_embeds = self.item_embeddings(iids) 30 | 31 | user_embeds = torch_utils.flatten(user_embeds) 32 | item_embeds = torch_utils.flatten(item_embeds) 33 | 34 | user_bias = self.user_bias(uids) 35 | item_bias = self.item_bias(iids) 36 | 37 | user_bias = torch_utils.flatten(user_bias) #bias has size batch_size * 1 38 | item_bias = torch_utils.flatten(item_bias) #bias has size batch_size * 1 39 | 40 | dot_product = (user_embeds * item_embeds).sum(1) #first dimension is batch_size, return dimension (batch_size) 41 | # dot_product = torch.mul(user_embeds, item_embeds).sum(1) # first dimension is batch_size 42 | 43 | return dot_product + user_bias.squeeze() + item_bias.squeeze() 44 | 45 | 46 | class GAU(nn.Module): 47 | def __init__(self, n_users, n_items, embedding_dim): 48 | super(GAU, self).__init__() 49 | self.n_users = n_users 50 | self.n_items = n_items 51 | self.n_factors = embedding_dim 52 | 53 | self.user_embeddings = layers.ScaledEmbedding(n_users, embedding_dim) 54 | self.item_embeddings = layers.ScaledEmbedding(n_items, embedding_dim) 55 | 56 | # addtional parameters in GAU model 57 | self.user_additional = layers.ScaledEmbedding(n_users, embedding_dim) 58 | self.item_additional = layers.ScaledEmbedding(n_items, embedding_dim) 59 | 60 | self.user_bias = layers.ZeroEmbedding(n_users, 1) 61 | self.item_bias = layers.ZeroEmbedding(n_items, 1) 62 | 63 | def forward(self, uids, iids, network = None): 64 | """ 65 | 66 | :param uids: shape (B, ) 67 | :param iids: shape (B, ) 68 | :param network: tuple where first element is a tensor array of shape (X, ), shape (X, total-number-of-user) where X <= B 69 | :return: 70 | """ 71 | user_embeds = self.user_embeddings(uids) #first dimension is batch size 72 | item_embeds = self.item_embeddings(iids) 73 | 74 | user_embeds = torch_utils.flatten(user_embeds) 75 | item_embeds = torch_utils.flatten(item_embeds) 76 | 77 | user_bias = self.user_bias(uids) 78 | item_bias = self.item_bias(iids) 79 | 80 | user_bias = torch_utils.flatten(user_bias) #bias has size batch_size * 1 81 | item_bias = torch_utils.flatten(item_bias) #bias has size batch_size * 1 82 | loss = (user_embeds * item_embeds).sum(1) + \ 83 | user_bias.squeeze() + item_bias.squeeze() # first dimension is batch_size, return dimension (batch_size) 84 | 85 | return loss 86 | 87 | def network_loss(self, network): 88 | target, binary_weights = network 89 | target_embeds = self.user_embeddings(target) # shape (X, D) 90 | W = torch.mm(target_embeds, torch.transpose(self.user_embeddings.weight, 0, 1)) # (X, D) x (n_users x D) 91 | network_loss = ((W - binary_weights) ** 2).sum() 92 | return network_loss 93 | 94 | def user_user_sppmi_loss(self, sppmi): 95 | target, weights = sppmi 96 | target_embeds = self.user_embeddings(target) # shape (X, D) 97 | W = torch.mm(target_embeds, torch.transpose(self.user_additional.weight, 0, 1)) # (X, D) x (n_users x D) 98 | mask = weights > 0 99 | A = mask.type(torch.float) * (W - weights) 100 | sppmi_loss = (A ** 2).sum() 101 | return sppmi_loss 102 | 103 | def item_item_sppmi_loss(self, sppmi): 104 | target, weights = sppmi 105 | target_embeds = self.item_embeddings(target) # shape (X, D) 106 | W = torch.mm(target_embeds, torch.transpose(self.item_additional.weight, 0, 1)) # (X, D) x (n_users x D) 107 | mask = weights > 0 108 | A = mask.type(torch.float) * (W - weights) 109 | sppmi_loss = (A ** 2).sum() 110 | return sppmi_loss 111 | 112 | def user_user_sim_loss(self, sim): 113 | target, weights = sim 114 | N, s, D = self.n_users, len(target), self.n_factors 115 | target_embeds = self.user_embeddings(target) # shape (X, D) 116 | B = target_embeds.repeat(N, 1).view(N, s, D).transpose(0, 1) 117 | A = B - self.user_embeddings.weight 118 | A = (A ** 2).sum(dim = -1) 119 | assert A.shape == (s, N) 120 | assert weights.shape == (s, N) 121 | loss = (A * weights).sum() 122 | # print("here, ", loss ) 123 | return loss 124 | 125 | def item_item_sim_loss(self, sim): 126 | target, weights = sim 127 | N, s, D = self.n_items, len(target), self.n_factors 128 | target_embeds = self.item_embeddings(target) # shape (X, D) 129 | B = target_embeds.repeat(N, 1).view(N, s, D).transpose(0, 1) 130 | A = B - self.item_embeddings.weight 131 | # print(A) 132 | A = (A ** 2).sum(dim = -1) 133 | assert A.shape == (s, N) 134 | assert weights.shape == (s, N) 135 | # print(weights) 136 | loss = (A * weights).sum() 137 | # print("item-item-sim, ", loss ) 138 | return loss 139 | 140 | def predict(self, uids, iids): 141 | user_embeds = self.user_embeddings(uids) # first dimension is batch size 142 | item_embeds = self.item_embeddings(iids) 143 | 144 | user_embeds = torch_utils.flatten(user_embeds) 145 | item_embeds = torch_utils.flatten(item_embeds) 146 | 147 | dot_products = (user_embeds * item_embeds).sum(1) 148 | 149 | return dot_products 150 | 151 | -------------------------------------------------------------------------------- /pytorch/Masters/master_gau.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0,'../../pytorch') 3 | import os 4 | from Models import GAU as gau 5 | import argparse 6 | import time 7 | import interactions 8 | from handlers import data_handlers as my_data_handlers 9 | 10 | 11 | def fit_models(args): 12 | train_file = os.path.join(args.path, args.dataset, '%s.train.rating' % args.dataset) 13 | vad_file = os.path.join(args.path, args.dataset, '%s.dev.rating' % args.dataset) 14 | test_file = os.path.join(args.path, args.dataset, '%s.test.rating' % args.dataset) 15 | 16 | network_file = os.path.join(args.path, args.dataset, '%s.adjacency.network' % args.dataset) 17 | user_user_sppmi_raw_file = os.path.join(args.path, args.dataset, '%s.user_user.frequency.csv' % args.dataset) 18 | item_item_sppmi_raw_file = os.path.join(args.path, args.dataset, '%s.item_item.frequency.csv' % args.dataset) 19 | user_user_sim_file = os.path.join(args.path, args.dataset, '%s.user_user_cosine_sim.csv' % args.dataset) 20 | item_item_sim_file = os.path.join(args.path, args.dataset, '%s.url_url_cosine_sim.csv' % args.dataset) 21 | 22 | trainRatings = my_data_handlers.load_rating_file_as_dict(train_file) 23 | vadRatings = my_data_handlers.load_rating_file_as_dict(vad_file) 24 | testRatings = my_data_handlers.load_rating_file_as_dict(test_file) 25 | vadNegatives = my_data_handlers.generate_negatives(vadRatings, removed = [trainRatings, testRatings]) 26 | testNegatives = my_data_handlers.generate_negatives(testRatings, removed = [trainRatings, vadRatings]) 27 | 28 | rec_model = gau.GAU_model(loss = args.loss_type, # 'pointwise, bpr, hinge, adaptive_hinge' 29 | embedding_dim = args.num_factors, 30 | n_iter = args.epochs, 31 | batch_size = args.batch_size, 32 | reg_l2 = args.reg, # L2 regularization 33 | learning_rate = args.lr, # learning_rate 34 | decay_step = args.decay_step, # step to decay the learning rate 35 | decay_weight = args.decay_weight, # percentage to decay the learning rat. 36 | optimizer_func = None, 37 | use_cuda = args.cuda, 38 | random_state = None, 39 | num_negative_samples = args.num_neg, 40 | trained_net = None, 41 | net_type = args.model, 42 | logfolder = args.log, 43 | full_settings = args) 44 | # t0 = time.time() 45 | t1 = time.time() 46 | print('parsing data') 47 | train_iteractions = interactions.load_data(train_file, dataset = args.dataset) 48 | adjNetwork = my_data_handlers.load_network(network_file) 49 | item_item_mat_freq = my_data_handlers.load_data_item_item_for_sppmi(item_item_sppmi_raw_file, no_items = train_iteractions.num_items) 50 | user_user_mat_freq = my_data_handlers.load_data_user_user_for_sppmi(user_user_sppmi_raw_file, no_guardians = train_iteractions.num_users) 51 | item_item_sppmi = my_data_handlers.computeSPPMI_matrix(item_item_mat_freq, shifted_k = args.shifted_k) 52 | user_user_sppmi = my_data_handlers.computeSPPMI_matrix(user_user_mat_freq, shifted_k = args.shifted_k) 53 | user_user_sim = my_data_handlers.load_sim(user_user_sim_file, train_iteractions.num_users) 54 | item_item_sim = my_data_handlers.load_sim(item_item_sim_file, train_iteractions.num_items) 55 | print('done extracting') 56 | t2 = time.time() 57 | print('loading data time: %d (seconds)' % (t2 - t1)) 58 | 59 | print('building the model') 60 | 61 | try: 62 | rec_model.fit(train_iteractions, 63 | verbose = True, # for printing out evaluation during training 64 | topN = 10, 65 | vadRatings = vadRatings, vadNegatives = vadNegatives, 66 | testRatings = testRatings, testNegatives = testNegatives, 67 | adjNetwork = adjNetwork, 68 | user_user_sppmi = user_user_sppmi, 69 | item_item_sppmi = item_item_sppmi, 70 | user_user_sim = user_user_sim, 71 | item_item_sim = item_item_sim, 72 | alpha_gau = args.alpha_gau, 73 | gamma_gau = args.gamma_gau, 74 | beta_gau = args.beta_gau) 75 | except KeyboardInterrupt: 76 | print('Exiting from training early') 77 | t10 = time.time() 78 | print('Total time: %d (seconds)' % (t10 - t1)) 79 | 80 | 81 | if __name__ == '__main__': 82 | parser = argparse.ArgumentParser("Description: GAU Model") 83 | parser.add_argument('--path', default = '../Splitted_data', help = 'Input data path', type = str) 84 | parser.add_argument('--dataset', default = 'sigir18', help = 'Dataset types', type = str) 85 | parser.add_argument('--epochs', default = 100, help = 'Number of epochs to run', type = int) 86 | parser.add_argument('--batch_size', default = 256, help = 'Batch size', type = int) 87 | parser.add_argument('--num_factors', default = 8, help = 'number of latent factors', type = int) 88 | parser.add_argument('--reg', type=float, default = 1e-6, help = 'Regularization for users and item embeddings') 89 | parser.add_argument('--num_neg', default = 3, type = int, help = 'Number of negative instances for each positive sample') 90 | parser.add_argument('--shifted_k', default = 2, type = int, help = 'shifted_k for computing SPPMI matrices: [1, 2, 5, 10, 50]') 91 | parser.add_argument('--alpha_gau', default = 0.8, type = float, help = 'factor to control contribution of network information') 92 | parser.add_argument('--gamma_gau', default = 0.8, type = float, help = 'factor to control contribution of user similarity') 93 | parser.add_argument('--beta_gau', default = 0.8, type = float, help = 'factor to control contribution of item similarity') 94 | parser.add_argument('--lr', default = 0.001, type = float, help = 'Learning rate') 95 | parser.add_argument('--log', default = "../logs/GAU_log", type = str, help = 'folder for logs and saved models') 96 | parser.add_argument('--optimizer', nargs = '?', default = 'adam', help = 'Specify an optimizer: adagrad, adam, rmsprop, sgd') 97 | parser.add_argument('--loss_type', nargs = '?', default = 'single_pointwise_square_loss') 98 | parser.add_argument('--model', default = 'gau', help = 'Selecting the model type [gau]', type = str) 99 | parser.add_argument('--topk', type = int, default = 10, help = 'top K') 100 | parser.add_argument('--cuda', type = int, default = 1, help = 'using cuda or not') 101 | parser.add_argument('--decay_step', type = int, default = 100, help = 'how many steps to decay the learning rate') 102 | parser.add_argument('--decay_weight', type = float, default = 0.0001, help = 'percent of decaying') 103 | 104 | args = parser.parse_args() 105 | fit_models(args) 106 | 107 | -------------------------------------------------------------------------------- /pytorch/interactions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes describing datasets of user-item interactions. Instances of these 3 | are returned by dataset-fetching and dataset-processing functions. 4 | """ 5 | 6 | import numpy as np 7 | 8 | import scipy.sparse as sp 9 | import json 10 | import pandas as pd 11 | 12 | 13 | def _sliding_window(tensor, window_size, step_size=1): 14 | 15 | for i in range(len(tensor), 0, -step_size): 16 | yield tensor[max(i - window_size, 0):i] 17 | 18 | 19 | def _generate_sequences(user_ids, item_ids, 20 | indices, 21 | max_sequence_length, 22 | step_size): 23 | 24 | for i in range(len(indices)): 25 | 26 | start_idx = indices[i] 27 | 28 | if i >= len(indices) - 1: 29 | stop_idx = None 30 | else: 31 | stop_idx = indices[i + 1] 32 | 33 | for seq in _sliding_window(item_ids[start_idx:stop_idx], 34 | max_sequence_length, 35 | step_size): 36 | 37 | yield (user_ids[i], seq) 38 | 39 | def load_data(path, sep = '\t', header=None, dataset=None): 40 | data = pd.read_csv(path, sep=sep, header=header) 41 | user_ids = np.asarray(data[0]) # data type? 42 | item_ids = np.asarray(data[1]) 43 | ratings = None 44 | timestamps = None 45 | if data.shape[1] >= 4: 46 | #contain timestamp 47 | timestamps = data[3] 48 | return Interactions(user_ids, item_ids, ratings, timestamps, dataset=dataset) 49 | 50 | 51 | class Interactions(object): 52 | """ 53 | Interactions object. Contains (at a minimum) pair of user-item 54 | interactions, but can also be enriched with ratings, timestamps, 55 | and interaction weights. 56 | 57 | For *implicit feedback* scenarios, user ids and item ids should 58 | only be provided for user-item pairs where an interaction was 59 | observed. All pairs that are not provided are treated as missing 60 | observations, and often interpreted as (implicit) negative 61 | signals. 62 | 63 | For *explicit feedback* scenarios, user ids, item ids, and 64 | ratings should be provided for all user-item-rating triplets 65 | that were observed in the dataset. 66 | 67 | Parameters 68 | ---------- 69 | 70 | user_ids: array of np.int32 71 | array of user ids of the user-item pairs 72 | item_ids: array of np.int32 73 | array of item ids of the user-item pairs 74 | ratings: array of np.float32, optional 75 | array of ratings 76 | timestamps: array of np.int32, optional 77 | array of timestamps 78 | weights: array of np.float32, optional 79 | array of weights 80 | num_users: int, optional 81 | Number of distinct users in the dataset. 82 | Must be larger than the maximum user id 83 | in user_ids. 84 | num_items: int, optional 85 | Number of distinct items in the dataset. 86 | Must be larger than the maximum item id 87 | in item_ids. 88 | 89 | Attributes 90 | ---------- 91 | 92 | user_ids: array of np.int32 93 | array of user ids of the user-item pairs 94 | item_ids: array of np.int32 95 | array of item ids of the user-item pairs 96 | ratings: array of np.float32, optional 97 | array of ratings 98 | timestamps: array of np.int32, optional 99 | array of timestamps 100 | weights: array of np.float32, optional 101 | array of weights 102 | num_users: int, optional 103 | Number of distinct users in the dataset. 104 | num_items: int, optional 105 | Number of distinct items in the dataset. 106 | """ 107 | 108 | def __init__(self, user_ids, item_ids, 109 | ratings=None, 110 | timestamps=None, 111 | weights=None, 112 | num_users=None, 113 | num_items=None, 114 | dataset = "sigir18"): 115 | 116 | # doing this is quite risky: if there are 4834 items but item with id 4833 may not appear in train 117 | self.num_users = num_users or int(user_ids.max() + 1) 118 | self.num_items = num_items or int(item_ids.max() + 1) 119 | 120 | 121 | self.user_ids = user_ids 122 | self.item_ids = item_ids 123 | self.ratings = ratings 124 | self.timestamps = timestamps 125 | self.weights = weights 126 | self.used_dataset = dataset 127 | 128 | self._check() 129 | 130 | def __repr__(self): 131 | 132 | return ('' 134 | .format( 135 | num_users=self.num_users, 136 | num_items=self.num_items, 137 | num_interactions=len(self) 138 | )) 139 | 140 | def __len__(self): 141 | 142 | return len(self.user_ids) 143 | 144 | def _check(self): 145 | 146 | if self.user_ids.max() >= self.num_users: 147 | raise ValueError('Maximum user id greater ' 148 | 'than declared number of users.') 149 | if self.item_ids.max() >= self.num_items: 150 | raise ValueError('Maximum item id greater ' 151 | 'than declared number of items.') 152 | 153 | num_interactions = len(self.user_ids) 154 | 155 | for name, value in (('item IDs', self.item_ids), 156 | ('ratings', self.ratings), 157 | ('timestamps', self.timestamps), 158 | ('weights', self.weights)): 159 | 160 | if value is None: 161 | continue 162 | 163 | if len(value) != num_interactions: 164 | raise ValueError('Invalid {} dimensions: length ' 165 | 'must be equal to number of interactions' 166 | .format(name)) 167 | 168 | def tocoo(self): 169 | """ 170 | Transform to a scipy.sparse COO matrix. 171 | """ 172 | 173 | row = self.user_ids 174 | col = self.item_ids 175 | data = self.ratings if self.ratings is not None else np.ones(len(self)) 176 | 177 | return sp.coo_matrix((data, (row, col)), 178 | shape=(self.num_users, self.num_items)) 179 | 180 | def tocsr(self): 181 | """ 182 | Transform to a scipy.sparse CSR matrix. 183 | """ 184 | 185 | return self.tocoo().tocsr() 186 | 187 | def todok(self): 188 | """ 189 | Transform to a scipy.sparse.dok_matrix CSR matrix. 190 | """ 191 | return self.tocoo().todok() 192 | 193 | def to_sequence(self, max_sequence_length=10, min_sequence_length=None, step_size=None): 194 | """ 195 | Transform to sequence form. 196 | 197 | User-item interaction pairs are sorted by their timestamps, 198 | and sequences of up to max_sequence_length events are arranged 199 | into a (zero-padded from the left) matrix with dimensions 200 | (num_sequences x max_sequence_length). 201 | 202 | Valid subsequences of users' interactions are returned. For 203 | example, if a user interacted with items [1, 2, 3, 4, 5], the 204 | returned interactions matrix at sequence length 5 and step size 205 | 1 will be be given by: 206 | 207 | .. code-block:: python 208 | 209 | [[1, 2, 3, 4, 5], 210 | [0, 1, 2, 3, 4], 211 | [0, 0, 1, 2, 3], 212 | [0, 0, 0, 1, 2], 213 | [0, 0, 0, 0, 1]] 214 | 215 | At step size 2: 216 | 217 | .. code-block:: python 218 | 219 | [[1, 2, 3, 4, 5], 220 | [0, 0, 1, 2, 3], 221 | [0, 0, 0, 0, 1]] 222 | 223 | Parameters 224 | ---------- 225 | 226 | max_sequence_length: int, optional 227 | Maximum sequence length. Subsequences shorter than this 228 | will be left-padded with zeros. 229 | min_sequence_length: int, optional 230 | If set, only sequences with at least min_sequence_length 231 | non-padding elements will be returned. 232 | step-size: int, optional 233 | The returned subsequences are the effect of moving a 234 | a sliding window over the input. This parameter 235 | governs the stride of that window. Increasing it will 236 | result in fewer subsequences being returned. 237 | 238 | Returns 239 | ------- 240 | 241 | sequence interactions: :class:`~SequenceInteractions` 242 | The resulting sequence interactions. 243 | """ 244 | 245 | if self.timestamps is None: 246 | raise ValueError('Cannot convert to sequences, ' 247 | 'timestamps not available.') 248 | 249 | if 0 in self.item_ids: 250 | raise ValueError('0 is used as an item id, conflicting ' 251 | 'with the sequence padding value.') 252 | 253 | if step_size is None: 254 | step_size = max_sequence_length 255 | 256 | # Sort first by user id, then by timestamp 257 | sort_indices = np.lexsort((self.timestamps, 258 | self.user_ids)) 259 | 260 | user_ids = self.user_ids[sort_indices] 261 | item_ids = self.item_ids[sort_indices] 262 | 263 | user_ids, indices, counts = np.unique(user_ids, 264 | return_index=True, 265 | return_counts=True) 266 | 267 | num_subsequences = int(np.ceil(counts / float(step_size)).sum()) 268 | 269 | sequences = np.zeros((num_subsequences, max_sequence_length), 270 | dtype=np.int32) 271 | sequence_users = np.empty(num_subsequences, 272 | dtype=np.int32) 273 | for i, (uid, 274 | seq) in enumerate(_generate_sequences(user_ids, 275 | item_ids, 276 | indices, 277 | max_sequence_length, 278 | step_size)): 279 | sequences[i][-len(seq):] = seq 280 | sequence_users[i] = uid 281 | 282 | if min_sequence_length is not None: 283 | long_enough = sequences[:, -min_sequence_length] != 0 284 | sequences = sequences[long_enough] 285 | sequence_users = sequence_users[long_enough] 286 | 287 | return (SequenceInteractions(sequences, 288 | user_ids=sequence_users, 289 | num_items=self.num_items)) 290 | 291 | 292 | class SequenceInteractions(object): 293 | """ 294 | Interactions encoded as a sequence matrix. 295 | 296 | Parameters 297 | ---------- 298 | 299 | sequences: array of np.int32 of shape (num_sequences x max_sequence_length) 300 | The interactions sequence matrix, as produced by 301 | :func:`~Interactions.to_sequence` 302 | num_items: int, optional 303 | The number of distinct items in the data 304 | 305 | Attributes 306 | ---------- 307 | 308 | sequences: array of np.int32 of shape (num_sequences x max_sequence_length) 309 | The interactions sequence matrix, as produced by 310 | :func:`~Interactions.to_sequence` 311 | """ 312 | 313 | def __init__(self, 314 | sequences, 315 | user_ids=None, num_items=None): 316 | 317 | self.sequences = sequences 318 | self.user_ids = user_ids 319 | self.max_sequence_length = sequences.shape[1] 320 | 321 | if num_items is None: 322 | self.num_items = sequences.max() + 1 323 | else: 324 | self.num_items = num_items 325 | 326 | def __repr__(self): 327 | 328 | num_sequences, sequence_length = self.sequences.shape 329 | 330 | return ('' 332 | .format( 333 | num_sequences=num_sequences, 334 | sequence_length=sequence_length, 335 | )) 336 | -------------------------------------------------------------------------------- /pytorch/losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | Loss functions for recommender models. 3 | 4 | The pointwise, BPR, and hinge losses are a good fit for 5 | implicit feedback models trained through negative sampling. 6 | 7 | The regression and Poisson losses are used for explicit feedback 8 | models. 9 | """ 10 | 11 | import torch 12 | import numpy as np 13 | import torch.nn.functional as F 14 | from torch.autograd import Variable 15 | 16 | from torch_utils import assert_no_grad 17 | 18 | 19 | def single_pointwise_square_loss(positive_predictions, mask=None, average=False): 20 | """ 21 | square loss: (y - y_hat)^2 22 | 23 | This is also known as one-class prediction (i.e. no use negative sample at all). 24 | Learned from Neural Collaborative Filtering (Section 3.1.1) 25 | 26 | ref: https://dl.acm.org/citation.cfm?id=3052569 27 | Parameters 28 | ---------- 29 | positive_predictions: :class:`torch.Tensor` 30 | shape (batch_size, ) 31 | mask: :class:`numpy.ndarray` 32 | shape (batch_size, ) 33 | average 34 | 35 | Returns 36 | ------- 37 | 38 | """ 39 | positive_labels = np.ones(positive_predictions.size()).flatten() 40 | is_cuda = positive_predictions.is_cuda 41 | if is_cuda: 42 | positive_labels = Variable(torch.from_numpy(positive_labels)).type(torch.FloatTensor).cuda() # fix expected FloatTensor but got LongTensor 43 | else: 44 | positive_labels = Variable(torch.from_numpy(positive_labels)).type(torch.FloatTensor) #fix expected FloatTensor but got LongTensor 45 | 46 | positive_loss = (positive_predictions - positive_labels) * (positive_predictions - positive_labels) 47 | loss = positive_loss 48 | if mask is not None: 49 | mask = mask.float() 50 | loss = loss * mask 51 | return loss.sum() / mask.sum() 52 | if average: 53 | return loss.mean() 54 | else: 55 | return loss.sum() 56 | 57 | 58 | def single_pointwise_bceloss(positive_predictions, mask=None, average=False): 59 | """ 60 | This is cross entropy without negative sampling. Similar to classification problem where 61 | we only want to predict a score of a data instance in our dataset. 62 | 63 | This is also known as one-class prediction (i.e. no use negative sample at all). 64 | Learned from Neural Collaborative Filtering (Section 3.1.1) 65 | 66 | ref: https://dl.acm.org/citation.cfm?id=3052569 67 | Parameters 68 | ---------- 69 | positive_predictions: :class:`torch.Tensor` 70 | shape (batch_size, ) 71 | mask 72 | average 73 | 74 | Returns 75 | ------- 76 | 77 | """ 78 | positive_labels = np.ones(positive_predictions.size()).flatten() 79 | is_cuda = positive_predictions.is_cuda 80 | if is_cuda: 81 | positive_labels = Variable(torch.from_numpy(positive_labels)).type(torch.FloatTensor).cuda() # fix expected FloatTensor but got LongTensor 82 | else: 83 | positive_labels = Variable(torch.from_numpy(positive_labels)).type(torch.FloatTensor) #fix expected FloatTensor but got LongTensor 84 | positive_predictions = F.sigmoid(positive_predictions) 85 | positive_loss = F.binary_cross_entropy(positive_predictions, positive_labels) 86 | loss = positive_loss 87 | if mask is not None: 88 | mask = mask.float() 89 | loss = loss * mask 90 | return loss.sum() / mask.sum() 91 | if average: 92 | return loss.mean() 93 | else: 94 | return loss.sum() 95 | 96 | 97 | def pointwise_bceloss(positive_predictions, negative_predictions, mask=None, average=False): 98 | """ 99 | This is cross entropy loss. The difference is that for every positive instance, we also negative sample 100 | a negative instance to compute the loss. 101 | 102 | Learned from Neural Collaborative Filtering (Section 3.1.1) 103 | 104 | ref: https://dl.acm.org/citation.cfm?id=3052569 105 | 106 | Parameters 107 | ---------- 108 | positive_predictions: :class:`torch.Tensor` 109 | shape (batch_size, ) 110 | negative_predictions: :class:`torch.Tensor` 111 | shape (batch_size, ) 112 | mask 113 | average 114 | 115 | Returns 116 | ------- 117 | 118 | """ 119 | positive_labels = np.ones(positive_predictions.size()).flatten() 120 | negative_labels = np.zeros(negative_predictions.size()).flatten() 121 | 122 | is_cuda = positive_predictions.is_cuda 123 | if is_cuda: 124 | positive_labels = Variable(torch.from_numpy(positive_labels)).type(torch.FloatTensor).cuda() # fix expected FloatTensor but got LongTensor 125 | negative_labels = Variable(torch.from_numpy(negative_labels)).type(torch.FloatTensor).cuda() # fix expected FloatTensor but got LongTensor 126 | else: 127 | positive_labels = Variable(torch.from_numpy(positive_labels)).type(torch.FloatTensor) #fix expected FloatTensor but got LongTensor 128 | negative_labels = Variable(torch.from_numpy(negative_labels)).type(torch.FloatTensor) #fix expected FloatTensor but got LongTensor 129 | 130 | positive_predictions = F.sigmoid(positive_predictions) 131 | negative_predictions = F.sigmoid(negative_predictions) 132 | 133 | positive_loss = F.binary_cross_entropy(positive_predictions, positive_labels) 134 | negative_loss = F.binary_cross_entropy(negative_predictions, negative_labels) 135 | 136 | loss = positive_loss + negative_loss 137 | 138 | if mask is not None: 139 | mask = mask.float() 140 | loss = loss * mask 141 | return loss.sum() / mask.sum() 142 | 143 | if average: 144 | return loss.mean() 145 | else: 146 | return loss.sum() 147 | 148 | def pointwise_loss(positive_predictions, negative_predictions, mask=None): 149 | """ 150 | Logistic loss function. 151 | 152 | Parameters 153 | ---------- 154 | 155 | positive_predictions: :class:`torch.Tensor` 156 | Tensor containing predictions for known positive items. 157 | negative_predictions: :class:`torch.Tensor` 158 | Tensor containing predictions for sampled negative items. 159 | mask: tensor, optional 160 | A binary tensor used to zero the loss from some entries 161 | of the loss tensor. 162 | 163 | Returns 164 | ------- 165 | 166 | loss, float 167 | The mean value of the loss function. 168 | """ 169 | 170 | positives_loss = (1.0 - F.sigmoid(positive_predictions)) 171 | negatives_loss = F.sigmoid(negative_predictions) 172 | 173 | loss = (positives_loss + negatives_loss) 174 | 175 | if mask is not None: 176 | mask = mask.float() 177 | loss = loss * mask 178 | return loss.sum() / mask.sum() 179 | 180 | return loss.mean() 181 | 182 | 183 | def bpr_loss(positive_predictions, negative_predictions, mask=None, average=False): 184 | """ 185 | Bayesian Personalised Ranking [1]_ pairwise loss function. 186 | 187 | Parameters 188 | ---------- 189 | 190 | positive_predictions: tensor 191 | Tensor containing predictions for known positive items. 192 | negative_predictions: tensor 193 | Tensor containing predictions for sampled negative items. 194 | mask: tensor, optional 195 | A binary tensor used to zero the loss from some entries 196 | of the loss tensor. 197 | 198 | Returns 199 | ------- 200 | 201 | loss, float 202 | The mean value of the loss function. 203 | 204 | References 205 | ---------- 206 | 207 | .. [1] Rendle, Steffen, et al. "BPR: Bayesian personalized ranking from 208 | implicit feedback." Proceedings of the twenty-fifth conference on 209 | uncertainty in artificial intelligence. AUAI Press, 2009. 210 | """ 211 | # old code of Spotlight 212 | # loss = (1.0 - F.sigmoid(positive_predictions - negative_predictions)) 213 | 214 | # ben: checked 215 | loss = - torch.log(F.sigmoid(positive_predictions - negative_predictions)) 216 | 217 | if mask is not None: 218 | mask = mask.float() 219 | loss = loss * mask 220 | return loss.sum() / mask.sum() 221 | 222 | if average: 223 | return loss.mean() 224 | else: 225 | return loss.sum() 226 | 227 | 228 | 229 | def hinge_loss(positive_predictions, negative_predictions, mask=None, average=False): 230 | """ 231 | Hinge pairwise loss function. 232 | 233 | Parameters 234 | ---------- 235 | 236 | positive_predictions: tensor 237 | Tensor containing predictions for known positive items. 238 | negative_predictions: tensor 239 | Tensor containing predictions for sampled negative items. 240 | mask: tensor, optional 241 | A binary tensor used to zero the loss from some entries 242 | of the loss tensor. 243 | 244 | Returns 245 | ------- 246 | 247 | loss, float 248 | The mean value of the loss function. 249 | """ 250 | # checked, usually we need to use a threshold as soft-margin (but this function does not have it) 251 | loss = torch.clamp(negative_predictions - 252 | positive_predictions + 253 | 1.0, 0.0) 254 | 255 | if mask is not None: 256 | mask = mask.float() 257 | loss = loss * mask 258 | return loss.sum() / mask.sum() 259 | 260 | if average: 261 | return loss.mean() 262 | else: 263 | return loss.sum() 264 | 265 | 266 | def adaptive_hinge_loss(positive_predictions, negative_predictions, mask=None): 267 | """ 268 | Adaptive hinge pairwise loss function. Takes a set of predictions 269 | for implicitly negative items, and selects those that are highest, 270 | thus sampling those negatives that are closes to violating the 271 | ranking implicit in the pattern of user interactions. 272 | 273 | Approximates the idea of weighted approximate-rank pairwise loss 274 | introduced in [2]_ 275 | 276 | Parameters 277 | ---------- 278 | 279 | positive_predictions: tensor 280 | Tensor containing predictions for known positive items. 281 | negative_predictions: tensor 282 | Iterable of tensors containing predictions for sampled negative items. 283 | More tensors increase the likelihood of finding ranking-violating 284 | pairs, but risk overfitting. 285 | mask: tensor, optional 286 | A binary tensor used to zero the loss from some entries 287 | of the loss tensor. 288 | 289 | Returns 290 | ------- 291 | 292 | loss, float 293 | The mean value of the loss function. 294 | 295 | References 296 | ---------- 297 | 298 | .. [2] Weston, Jason, Samy Bengio, and Nicolas Usunier. "Wsabie: 299 | Scaling up to large vocabulary image annotation." IJCAI. 300 | Vol. 11. 2011. 301 | """ 302 | # checked 303 | highest_negative_predictions, _ = torch.max(negative_predictions, 0) 304 | 305 | return hinge_loss(positive_predictions, highest_negative_predictions.squeeze(), mask=mask) 306 | 307 | 308 | def regression_loss(observed_ratings, predicted_ratings): 309 | """ 310 | Regression loss. 311 | 312 | Parameters 313 | ---------- 314 | 315 | observed_ratings: tensor 316 | Tensor containing observed ratings. 317 | predicted_ratings: tensor 318 | Tensor containing rating predictions. 319 | 320 | Returns 321 | ------- 322 | 323 | loss, float 324 | The mean value of the loss function. 325 | """ 326 | 327 | assert_no_grad(observed_ratings) 328 | 329 | return ((observed_ratings - predicted_ratings) ** 2).mean() 330 | 331 | 332 | def poisson_loss(observed_ratings, predicted_ratings): 333 | """ 334 | Poisson loss. 335 | 336 | Parameters 337 | ---------- 338 | 339 | observed_ratings: tensor 340 | Tensor containing observed ratings. 341 | predicted_ratings: tensor 342 | Tensor containing rating predictions. 343 | 344 | Returns 345 | ------- 346 | 347 | loss, float 348 | The mean value of the loss function. 349 | """ 350 | 351 | assert_no_grad(observed_ratings) 352 | 353 | return (predicted_ratings - observed_ratings * torch.log(predicted_ratings)).mean() 354 | 355 | 356 | def logistic_loss(observed_ratings, predicted_ratings): 357 | """ 358 | Logistic loss for explicit data. 359 | 360 | Parameters 361 | ---------- 362 | 363 | observed_ratings: tensor 364 | Tensor containing observed ratings which 365 | should be +1 or -1 for this loss function. 366 | predicted_ratings: tensor 367 | Tensor containing rating predictions. 368 | 369 | Returns 370 | ------- 371 | 372 | loss, float 373 | The mean value of the loss function. 374 | """ 375 | 376 | assert_no_grad(observed_ratings) 377 | 378 | # Convert target classes from (-1, 1) to (0, 1) 379 | observed_ratings = torch.clamp(observed_ratings, 0, 1) 380 | 381 | return F.binary_cross_entropy_with_logits(predicted_ratings, 382 | observed_ratings, 383 | size_average=True) 384 | -------------------------------------------------------------------------------- /pytorch/Models/GAU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch_utils 4 | import nets as my_nets 5 | import losses as my_losses 6 | import torch_utils as my_utils 7 | import torch.optim as optim 8 | import time 9 | import os 10 | from handlers import output_handler, sampler 11 | import datetime 12 | import json 13 | from interactions import Interactions 14 | import collections 15 | from typing import Dict, List 16 | import rank_metrics 17 | 18 | 19 | class GAU_model(object): 20 | """ 21 | Model for GAU 22 | 23 | Parameters 24 | ---------- 25 | 26 | loss: string, optional 27 | The loss function for approximating a softmax with negative sampling. 28 | One of 'pointwise', 'bpr', 'hinge', 'adaptive_hinge', corresponding 29 | to losses from :class:`spotlight.losses`. 30 | 31 | embedding_dim: int, optional 32 | Number of embedding dimensions to use for representing items. 33 | Overridden if representation is an instance of a representation class. 34 | n_iter: int, optional 35 | Number of iterations to run. 36 | batch_size: int, optional 37 | Minibatch size. 38 | l2: float, optional 39 | L2 loss penalty. 40 | learning_rate: float, optional 41 | Initial learning rate. 42 | optimizer_func: function, optional 43 | Function that takes in module parameters as the first argument and 44 | returns an instance of a PyTorch optimizer. Overrides l2 and learning 45 | rate if supplied. If no optimizer supplied, then use ADAM by default. 46 | use_cuda: boolean, optional 47 | Run the model on a GPU. 48 | sparse: boolean, optional 49 | Use sparse gradients for embedding layers. 50 | random_state: instance of numpy.random.RandomState, optional 51 | Random state to use when fitting. 52 | num_negative_samples: int, optional 53 | Number of negative samples to generate for adaptive hinge loss. 54 | 55 | """ 56 | def __init__(self, 57 | loss = 'pointwise', 58 | embedding_dim = 32, 59 | n_iter = 10, 60 | batch_size = 256, 61 | reg_l2 = 1e-6, # L2 norm, followed Caser 62 | learning_rate = 1e-3, # learning rate or step size, followed Caser 63 | decay_step = 500, 64 | decay_weight = 0.1, 65 | layers_size = [64, 32, 16, 8], # layers' size 66 | optimizer_func = None, # e.g. adam 67 | use_cuda = False, 68 | # sparse = False, 69 | random_state = None, 70 | num_negative_samples = 3, 71 | trained_net = None, 72 | net_type = "mf", 73 | logfolder = None, 74 | full_settings = None): 75 | assert loss in ('pointwise', 'bpr', 'hinge', 'adaptive_hinge', "single_pointwise_square_loss") 76 | self._loss = loss 77 | self._embedding_dim = embedding_dim 78 | self._n_iter = n_iter 79 | self._batch_size = batch_size 80 | self._learning_rate = learning_rate 81 | self._reg_l2 = reg_l2 82 | # self._decay_step = decay_step 83 | # self._decay_weight = decay_weight 84 | 85 | self._layers_size = layers_size 86 | self._optimizer_func = optimizer_func 87 | 88 | self._use_cuda = use_cuda 89 | self._random_state = random_state or np.random.RandomState() 90 | self._num_negative_samples = num_negative_samples 91 | 92 | self._n_users, self._n_items = None, None 93 | self._net = None 94 | self._optimizer = None 95 | # self._lr_decay = None 96 | self._loss_func = None 97 | self._net_type = net_type 98 | assert logfolder != "" 99 | self.logfolder = logfolder 100 | if not os.path.exists(logfolder): 101 | os.mkdir(logfolder) 102 | 103 | curr_date = datetime.datetime.now().timestamp() # second 104 | self.logfile_text = os.path.join(logfolder, "%s_result.txt" % int(curr_date)) 105 | self.saved_model = os.path.join(logfolder, "%s_saved_model" % int(curr_date)) 106 | self.output_handler = output_handler.FileHandler(self.logfile_text) 107 | self.output_handler.myprint(json.dumps(full_settings.__dict__, indent=2, sort_keys=True)) 108 | 109 | # for evaluation during training 110 | self._sampler = sampler.Sampler() 111 | self._candidate = dict() 112 | 113 | if trained_net is not None: 114 | self._net = trained_net 115 | 116 | def __repr__(self): 117 | """ Return a string of the model when you want to print""" 118 | return "Vanilla Matrix Factorization Model" 119 | 120 | def _initialized(self): 121 | return self._net is not None 122 | 123 | def _initialize(self, interactions): 124 | """ 125 | 126 | Parameters 127 | ---------- 128 | interactions: :class:`interactions.Interactions` 129 | Returns 130 | ------- 131 | 132 | """ 133 | self._n_users, self._n_items = interactions.num_users, interactions.num_items 134 | if self._net_type == "gau": 135 | self._net = my_nets.GAU(self._n_users, self._n_items, self._embedding_dim) 136 | 137 | # put the model into cuda if use cuda 138 | self._net = my_utils.gpu(self._net, self._use_cuda) 139 | 140 | if self._optimizer_func is None: 141 | self._optimizer = optim.Adam( 142 | self._net.parameters(), 143 | weight_decay = self._reg_l2, 144 | lr = self._learning_rate) 145 | else: 146 | self._optimizer = self._optimizer_func(self._net.parameters()) 147 | 148 | # losses functions 149 | self._loss_func = my_losses.single_pointwise_square_loss 150 | print("Using: ", self._loss_func) 151 | 152 | def _check_input(self, user_ids, item_ids, allow_items_none=False): 153 | 154 | if isinstance(user_ids, int): user_id_max = user_ids 155 | else: user_id_max = user_ids.max() 156 | 157 | if user_id_max >= self._n_users: 158 | raise ValueError('Maximum user id greater than number of users in model.') 159 | 160 | if allow_items_none and item_ids is None: return 161 | 162 | if isinstance(item_ids, int): item_id_max = item_ids 163 | else: item_id_max = item_ids.max() 164 | 165 | if item_id_max >= self._n_items: 166 | raise ValueError('Maximum item id greater than number of items in model.') 167 | 168 | def fit(self, interactions, 169 | verbose=True, 170 | topN = 10, 171 | vadRatings = None, 172 | vadNegatives = None, 173 | testRatings = None, 174 | testNegatives = None, 175 | adjNetwork = None, 176 | user_user_sppmi = None, 177 | item_item_sppmi = None, 178 | user_user_sim = None, 179 | item_item_sim = None, 180 | alpha_gau: float = None, gamma_gau: float = None, beta_gau: float = None): 181 | """ 182 | Fit the model. 183 | Parameters 184 | ---------- 185 | 186 | interactions: :class:`interactions.Interactions` 187 | The input sequence dataset. 188 | vadRatings: :class:`list[list[int]]` 189 | vadNegatives: :class:`list[list[int]]` 190 | testRatings: :class:`list[list[int]]` 191 | testNegatives: :class:`list[list[int]]` 192 | Negative samples of every pair of (user, item) in testRatings. shape (bs, 100) 193 | 100 negative samples 194 | """ 195 | 196 | self._sampler.set_interactions(interactions) 197 | if not self._initialized(): 198 | self._initialize(interactions) 199 | 200 | best_map, best_ndcg, best_epoch, test_ndcg, test_map = 0, 0, 0, 0, 0 201 | test_results_dict = None 202 | 203 | for epoch_num in range(self._n_iter): 204 | user_ids, item_ids, neg_items_ids = self._sampler.get_train_instances(interactions, self._num_negative_samples, random_state = self._random_state) 205 | self._check_input(user_ids, item_ids) 206 | users, items, neg_items = my_utils.shuffle(user_ids, item_ids, neg_items_ids, random_state = self._random_state) 207 | 208 | user_ids_tensor = my_utils.gpu(torch.from_numpy(users), self._use_cuda) 209 | item_ids_tensor = my_utils.gpu(torch.from_numpy(items), self._use_cuda) 210 | neg_item_ids_tensor = my_utils.gpu(torch.from_numpy(neg_items), self._use_cuda) 211 | self._check_shape(user_ids_tensor, item_ids_tensor, neg_item_ids_tensor, self._num_negative_samples) 212 | epoch_loss = 0.0 213 | t1 = time.time() 214 | visited_users = set() 215 | visited_users_sppmi = set() 216 | visited_item_sppmi = set() 217 | visited_user_sim = set() 218 | visited_item_sim = set() 219 | 220 | for (minibatch_num, 221 | (batch_user, batch_item, batch_negatives)) in enumerate(my_utils.minibatch(user_ids_tensor, item_ids_tensor, neg_item_ids_tensor, 222 | batch_size = self._batch_size)): 223 | 224 | # need to duplicate batch_user and batch_item 225 | network = self._prepare_network_input(batch_user, visited_users, adjNetwork) 226 | user_user_sppmi_selected = self._select_user_user_sppmi_input(batch_user, visited_users_sppmi, user_user_sppmi) 227 | item_item_sppmi_selected = self._select_user_user_sppmi_input(batch_item, visited_item_sppmi, item_item_sppmi) 228 | user_user_sim_selected = self._select_user_user_sppmi_input(batch_item, visited_user_sim, user_user_sim) 229 | item_item_sim_selected = self._select_user_user_sppmi_input(batch_item, visited_item_sim, item_item_sim) 230 | self._optimizer.zero_grad() 231 | loss = self._get_loss(batch_user, batch_item, network, user_user_sppmi_selected, 232 | item_item_sppmi_selected, user_user_sim_selected, item_item_sim_selected, alpha_gau, gamma_gau, beta_gau) 233 | 234 | epoch_loss += loss.item() 235 | loss.backward() 236 | self._optimizer.step() 237 | 238 | epoch_loss /= minibatch_num + 1 239 | t2 = time.time() 240 | epoch_train_time = t2 - t1 241 | if verbose: # validation after each epoch 242 | t1 = time.time() 243 | result_val = self.evaluate(vadRatings, vadNegatives, topN) 244 | mapks = result_val["map"] 245 | ndcg = result_val["ndcg"] 246 | recall = result_val["recall"] 247 | 248 | result_test = self.evaluate(testRatings, testNegatives, topN) 249 | maps_test = result_test["map"] 250 | ndcg_test = result_test["ndcg"] 251 | recall_test = result_test["recall"] 252 | 253 | t2 = time.time() 254 | eval_time = t2 - t1 255 | self.output_handler.myprint('|Epoch %d | Train time: %d (s) | Train loss: %.5f | Eval time: %.3f (s) ' 256 | '| Vad mapks@%d = %.5f | Vad ndcg@%d = %.5f | Vad recall@%d = %.5f ' 257 | '| Test mapks@%d = %.5f | Test ndcg@%d = %.5f | Test recall@%d = %.5f' 258 | % (epoch_num, epoch_train_time, epoch_loss, eval_time, topN, mapks, topN, ndcg, 259 | topN, recall, topN, maps_test, topN, ndcg_test, topN, recall_test)) 260 | if ndcg > best_ndcg: 261 | with open(self.saved_model, "wb") as f: 262 | torch.save(self._net, f) 263 | test_results_dict = result_test 264 | best_map, best_ndcg, best_epoch = mapks, ndcg, epoch_num 265 | test_map, test_ndcg = maps_test, ndcg_test 266 | 267 | if np.isnan(epoch_loss) or epoch_loss == 0.0: 268 | raise ValueError('Degenerate epoch loss: {}'.format(epoch_loss)) 269 | 270 | self.output_handler.myprint('Best result: ' 271 | '| vad precisions@%d = %.3f | vad ndcg@%d = %.3f ' 272 | '| test precisions@%d = %.3f | test ndcg@%d = %.3f | epoch = %d' % (topN, best_map, topN, best_ndcg, 273 | topN, test_map, topN, test_ndcg, best_epoch)) 274 | self.output_handler.myprint_details(json.dumps(test_results_dict, sort_keys = True, indent = 2)) 275 | 276 | def _check_shape(self, users, items, neg_items, num_negatives): 277 | assert users.shape == items.shape 278 | assert neg_items.shape == (users.shape[0], num_negatives) # key difference 279 | 280 | def _get_negative_prediction(self, user_ids): 281 | """ Code from Spotlight """ 282 | negative_items = self._sampler.random_sample_items( 283 | self._n_items, 284 | len(user_ids), 285 | random_state=self._random_state) 286 | negative_var = my_utils.gpu(torch.from_numpy(negative_items), self._use_cuda) 287 | negative_prediction = self._net(user_ids, negative_var) 288 | return negative_prediction 289 | 290 | def _get_multiple_negative_predictions_normal(self, user_ids, item_ids, neg_item_ids, n, 291 | network, 292 | user_user_sppmi, 293 | item_item_sppmi, **kargs): 294 | """ 295 | We compute prediction for every pair of (user, neg_item). Since shape of user_ids is (batch_size, ) 296 | and neg_item_ids.shape = (batch_size, n), we need to reshape user_ids a little bit. 297 | 298 | Parameters 299 | ---------- 300 | user_ids: :class:`torch.Tensor` 301 | shape (batch_size, ) 302 | item_ids: :class:`torch.Tensor` 303 | shape (batch_size, ) 304 | neg_item_ids: :class:`torch.Tensor` 305 | shape (batch_size, n) 306 | n: int 307 | 308 | Returns 309 | ------- 310 | 311 | """ 312 | batch_size = user_ids.size(0) 313 | assert neg_item_ids.size() == (batch_size, n) 314 | # needs to check 315 | user_ids_tmp = user_ids.view(batch_size, 1).expand(batch_size, n).reshape(batch_size * n) 316 | 317 | assert user_ids_tmp.size() == (batch_size * n, ) 318 | batch_negatives_tmp = neg_item_ids.view(batch_size * n) 319 | 320 | negative_prediction = self._net(user_ids_tmp, batch_negatives_tmp) 321 | positive_prediction = self._net(user_ids, item_ids) # (batch_size) 322 | positive_prediction = positive_prediction.view(batch_size, 1).expand(batch_size, n).reshape(batch_size * n) 323 | 324 | assert positive_prediction.shape == negative_prediction.shape 325 | # print(self._loss_func) 326 | loss = self._loss_func(positive_prediction, negative_prediction) 327 | 328 | if len(network) != 0: 329 | loss += self._net.network_loss(network) 330 | 331 | # network_loss = 0 332 | if len(user_user_sppmi) != 0: 333 | loss += self._net.user_user_sppmi_loss(user_user_sppmi) 334 | 335 | if len(item_item_sppmi) != 0: 336 | loss += self._net.item_item_sppmi_loss(item_item_sppmi) 337 | 338 | if "user_user_sim" in kargs and "item_item_sim" in kargs: 339 | user_user_sim = kargs["user_user_sim"] 340 | item_item_sim = kargs["item_item_sim"] 341 | gamma_gau = kargs["gamma_gau"] 342 | beta_gau = kargs["beta_gau"] 343 | if len(user_user_sim) != 0: 344 | loss += gamma_gau * self._net.user_user_sim_loss(user_user_sim) 345 | if len(item_item_sim) != 0: 346 | loss += beta_gau * self._net.item_item_sim_loss(item_item_sim) 347 | 348 | return loss 349 | 350 | def _select_user_user_sppmi_input(self, batch_user, done, user_user_sppmi): 351 | """ 352 | 353 | Parameters 354 | ---------- 355 | batch_user: :class:`torch.Tensor` shape (batch_size, ) 356 | visited_users: :class:`set` 357 | id of visited users for network information 358 | adjNetwork: :class:`dict` 359 | key: userID `int` and values are adjacent vertices 360 | 361 | Returns 362 | ------- 363 | """ 364 | # network 365 | targets = [] 366 | T = torch_utils.tensor2numpy(batch_user) 367 | for u in T: 368 | if u in done: continue 369 | done.add(u) 370 | targets.append(u) 371 | if len(targets) == 0: 372 | return [] 373 | targets = np.array(targets) 374 | selected = user_user_sppmi[targets] 375 | 376 | user_indices = torch_utils.numpy2tensor(np.array(targets), dtype = torch.long) 377 | selected = torch_utils.numpy2tensor(np.array(selected), dtype = torch.float) 378 | 379 | sppmi = [torch_utils.gpu(user_indices, gpu = self._use_cuda), torch_utils.gpu(selected, self._use_cuda)] 380 | return sppmi 381 | 382 | def _prepare_network_input(self, batch_user, visited_users, adjNetwork): 383 | """ 384 | 385 | Parameters 386 | ---------- 387 | batch_user: :class:`torch.Tensor` shape (batch_size, ) 388 | visited_users: :class:`set` 389 | id of visited users for network information 390 | adjNetwork: :class:`dict` 391 | key: userID `int` and values are adjacent vertices 392 | 393 | Returns 394 | ------- 395 | """ 396 | # network 397 | targets, labels = [], [] 398 | T = torch_utils.tensor2numpy(batch_user) 399 | for u in T: 400 | if u in visited_users: continue 401 | visited_users.add(u) 402 | neighbors = adjNetwork.get(u, [0] * self._n_users) 403 | targets.append(u) 404 | labels.append(neighbors) 405 | user_indices = torch_utils.numpy2tensor(np.array(targets), dtype = torch.long) 406 | labels = torch_utils.numpy2tensor(np.array(labels), dtype = torch.float) 407 | network = [torch_utils.gpu(user_indices, gpu = True), torch_utils.gpu(labels, True)] 408 | return network 409 | 410 | def _get_loss(self, user_ids, item_ids, network, user_user_sppmi, item_item_sppmi, 411 | user_user_sim, item_item_sim, alpha_gau = None, gamma_gau = None, beta_gau = None): 412 | """ 413 | We compute prediction for every pair of (user, neg_item). Since shape of user_ids is (batch_size, ) 414 | and neg_item_ids.shape = (batch_size, n), we need to reshape user_ids a little bit. 415 | 416 | Parameters 417 | ---------- 418 | user_ids: :class:`torch.Tensor` 419 | shape (batch_size, ) 420 | item_ids: :class:`torch.Tensor` 421 | shape (batch_size, ) 422 | 423 | Returns 424 | ------- 425 | 426 | """ 427 | batch_size = user_ids.size(0) 428 | positive_prediction = self._net(user_ids, item_ids) # (batch_size) 429 | loss = self._loss_func(positive_prediction) 430 | if len(network) != 0: 431 | loss += alpha_gau * self._net.network_loss(network) 432 | 433 | if len(user_user_sppmi) != 0: 434 | loss += self._net.user_user_sppmi_loss(user_user_sppmi) 435 | 436 | if len(item_item_sppmi) != 0: 437 | loss += self._net.item_item_sppmi_loss(item_item_sppmi) 438 | 439 | if len(user_user_sim) != 0: 440 | loss += gamma_gau * self._net.user_user_sim_loss(user_user_sim) 441 | 442 | if len(item_item_sim) != 0: 443 | loss += beta_gau * self._net.item_item_sim_loss(item_item_sim) 444 | 445 | return loss 446 | 447 | def predict(self, user_ids, item_ids): 448 | """ 449 | Make predictions: given a sequence of interactions, predict 450 | the next item in the sequence. 451 | 452 | Parameters 453 | ---------- 454 | 455 | sequences: array, (1 x max_sequence_length) 456 | Array containing the indices of the items in the sequence. 457 | item_ids: array (num_items x 1), optional 458 | Array containing the item ids for which prediction scores 459 | are desired. If not supplied, predictions for all items 460 | will be computed. 461 | 462 | Returns 463 | ------- 464 | 465 | predictions: array 466 | Predicted scores for all items in item_ids. 467 | """ 468 | 469 | self._net.train(False) # very important 470 | 471 | user_ids, item_ids = my_utils._predict_process_ids(user_ids, item_ids, self._n_items, self._use_cuda) 472 | assert user_ids.shape == item_ids.shape 473 | out = self._net.predict(user_ids, item_ids) 474 | 475 | return my_utils.cpu(out).detach().numpy().flatten() 476 | 477 | def evaluate(self, ratings: Dict[int, List[int]], negatives: Dict[int, List[int]], topN: int): 478 | """ 479 | evaluate performance of models 480 | :param ratings: key: user, value: list of positive items 481 | :param negatives: key: user, value: list of negative items 482 | :param topN: int 483 | :return: 484 | """ 485 | ndcgs, apks, recalls = [], [], [] 486 | for user in sorted(ratings.keys()): 487 | pos_items = ratings[user] 488 | neg_items = negatives[user] 489 | assert type(pos_items) == list and type(neg_items) == list 490 | 491 | items = neg_items + pos_items 492 | users = np.full(len(items), user, dtype=np.int64) 493 | items = np.asarray(items) 494 | predictions = self.predict(users, items) 495 | labels = [0.0] * len(neg_items) + [1.0] * len(pos_items) 496 | labels = np.array(labels) 497 | # compute metric here 498 | 499 | indices = np.argsort(-predictions)[:topN] # indices of items with highest scores 500 | ranklist = labels[indices] 501 | ndcg = rank_metrics.ndcg_at_k(ranklist, topN) 502 | _, recall = rank_metrics._compute_precision_recall(ranklist, topN) 503 | apk = rank_metrics.average_precision(ranklist[:topN]) 504 | ndcgs.append(ndcg) 505 | apks.append(apk) 506 | recalls.append(recall) 507 | 508 | results = {} 509 | results["ndcg"] = np.nanmean(ndcgs) 510 | results["ndcg_list"] = ndcgs 511 | results["map"] = np.nanmean(apks) 512 | results["maps_list"] = apks 513 | results["recall"] = np.nanmean(recalls) 514 | results["recalls_list"] = recalls 515 | 516 | return results 517 | --------------------------------------------------------------------------------