├── .gitignore
├── README.md
├── utils.py
├── evaluation.py
├── caser.py
├── data_process
    └── 3_item_dpp_emb.py
├── interactions.py
├── LICENSE
└── train_caser.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.pyc
3 | .idea/
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # DPPLikelihoods4SeqRec
 3 | 
 4 | A PyTorch implementation of the paper:
 5 | 
 6 | *Determinantal Point Process Likelihoods for Sequential Recommendation, Yuli Liu, Christian Walder and Lexing Xie, SIGIR '22*
 7 | 
 8 | # Requirements
 9 | * Python 2 or 3
10 | * [PyTorch v0.4+](https://github.com/pytorch/pytorch)
11 | * Numpy
12 | * SciPy
13 | 
14 | # Usage
15 | 1. Install required packages.
16 | 2. run <code>python train_caser.py</code>
17 | 
18 | # Configurations
19 | 
20 | 
21 | #### Data
22 | 
23 | 
24 | #### Model Args (in train_caser.py)
25 | 
26 | 
27 | # Citation
28 | 
29 | If you use this code in your paper, please cite the paper:
30 | 
31 | 
32 | # Acknowledgment
33 | 
34 | This project (utils.py, interactions.py, etc.) is heavily built on [Spotlight](https://github.com/maciejkula/spotlight) and [Caser](https://github.com/graytowne/caser_pytorch). 
35 | Thanks to the authors!
36 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | import torch.nn.functional as F
 5 | import random
 6 | 
 7 | activation_getter = {'iden': lambda x: x, 'relu': F.relu, 'tanh': torch.tanh, 'sigm': torch.sigmoid}
 8 | 
 9 | 
10 | def gpu(tensor, gpu=False):
11 | 
12 |     if gpu:
13 |         return tensor.cuda()
14 |     else:
15 |         return tensor
16 | 
17 | 
18 | def cpu(tensor):
19 | 
20 |     if tensor.is_cuda:
21 |         return tensor.cpu()
22 |     else:
23 |         return tensor
24 | 
25 | 
26 | def minibatch(*tensors, **kwargs):
27 | 
28 |     batch_size = kwargs.get('batch_size', 128)
29 | 
30 |     if len(tensors) == 1:
31 |         tensor = tensors[0]
32 |         for i in range(0, len(tensor), batch_size):
33 |             yield tensor[i:i + batch_size]
34 |     else:
35 |         for i in range(0, len(tensors[0]), batch_size):
36 |             yield tuple(x[i:i + batch_size] for x in tensors)
37 | 
38 | 
39 | def shuffle(*arrays, **kwargs):
40 | 
41 |     require_indices = kwargs.get('indices', False)
42 | 
43 |     if len(set(len(x) for x in arrays)) != 1:
44 |         raise ValueError('All inputs to shuffle must have '
45 |                          'the same length.')
46 | 
47 |     shuffle_indices = np.arange(len(arrays[0]))
48 |     np.random.shuffle(shuffle_indices)
49 | 
50 |     if len(arrays) == 1:
51 |         result = arrays[0][shuffle_indices]
52 |     else:
53 |         result = tuple(x[shuffle_indices] for x in arrays)
54 | 
55 |     if require_indices:
56 |         return result, shuffle_indices
57 |     else:
58 |         return result
59 | 
60 | 
61 | def assert_no_grad(variable):
62 | 
63 |     if variable.requires_grad:
64 |         raise ValueError(
65 |             "nn criterions don't compute the gradient w.r.t. targets - please "
66 |             "mark these variables as volatile or not requiring gradients"
67 |         )
68 | 
69 | 
70 | def set_seed(seed, cuda=False):
71 | 
72 |     np.random.seed(seed)
73 |     random.seed(seed)
74 |     if cuda:
75 |         torch.cuda.manual_seed(seed)
76 |     else:
77 |         torch.manual_seed(seed)
78 | 
79 | 
80 | def str2bool(v):
81 |     return v.lower() in ('true')


--------------------------------------------------------------------------------
/evaluation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | 
  4 | def _compute_apk(targets, predictions, k):
  5 | 
  6 |     if len(predictions) > k:
  7 |         predictions = predictions[:k]
  8 | 
  9 |     score = 0.0
 10 |     num_hits = 0.0
 11 | 
 12 |     for i, p in enumerate(predictions):
 13 |         if p in targets and p not in predictions[:i]:
 14 |             num_hits += 1.0
 15 |             score += num_hits / (i + 1.0)
 16 | 
 17 |     if not list(targets):
 18 |         return 0.0
 19 | 
 20 |     return score / min(len(targets), k)
 21 | 
 22 | def dcg_at_k(r, k, method=1):
 23 |     r = np.asfarray(r)[:k]
 24 |     if r.size:
 25 |         if method == 0:
 26 |             return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
 27 |         elif method == 1:
 28 |             return np.sum(r / np.log2(np.arange(2, r.size + 2)))
 29 |         else:
 30 |             raise ValueError('method must be 0 or 1.')
 31 |     return 0.
 32 | 
 33 | def ndcg_at_k(r, k, method=1):
 34 |     dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
 35 |     if not dcg_max:
 36 |         return 0.
 37 |     return dcg_at_k(r, k, method) / dcg_max
 38 | 
 39 | def cc_at_k(cc, k, CATE_NUM):
 40 |     cates = set()
 41 |     for i in range(k):
 42 |         if i > (len(cc)-1):
 43 |             break
 44 |         for c in cc[i]:
 45 |            cates.add(c)
 46 |     return len(cates) / CATE_NUM
 47 | 
 48 | def _compute_precision_recall(targets, predictions, k, iidcate_map, cate_num):
 49 | 
 50 |     pred = predictions[:k]
 51 |     r = []
 52 |     cc = []
 53 |     for i in pred:
 54 |         if i in targets:
 55 |             r.append(1)
 56 |         else:
 57 |             r.append(0)
 58 |         if i == 0:
 59 |             continue
 60 |         else:
 61 |             cc.append(iidcate_map[i-1])
 62 | 
 63 |     num_hit = len(set(pred).intersection(set(targets)))
 64 |     precision = float(num_hit) / len(pred)
 65 |     recall = float(num_hit) / len(targets)
 66 |     ndcg = ndcg_at_k(r, k) 
 67 |     cc = cc_at_k(cc, k, cate_num)
 68 |     return precision, recall, ndcg, cc
 69 | 
 70 | def evaluate_ranking(model, test, config, l_kernel, cate, train=None, k=10):
 71 |     """
 72 |     Compute Precision@k, Recall@k scores and average precision (AP).
 73 |     One score is given for every user with interactions in the test
 74 |     set, representing the AP, Precision@k and Recall@k of all their
 75 |     test items.
 76 | 
 77 |     Parameters
 78 |     ----------
 79 | 
 80 |     model: fitted instance of a recommender model
 81 |         The model to evaluate.
 82 |     test: :class:`spotlight.interactions.Interactions`
 83 |         Test interactions.
 84 |     train: :class:`spotlight.interactions.Interactions`, optional
 85 |         Train interactions. If supplied, rated items in
 86 |         interactions will be excluded.
 87 |     k: int or array of int,
 88 |         The maximum number of predicted items
 89 |     """
 90 | 
 91 |     test = test.tocsr()
 92 | 
 93 |     if train is not None:
 94 |         train = train.tocsr()
 95 | 
 96 |     if not isinstance(k, list):
 97 |         ks = [k]
 98 |     else:
 99 |         ks = k
100 | 
101 |     precisions = [list() for _ in range(len(ks))]
102 |     recalls = [list() for _ in range(len(ks))]
103 |     ndcgs = [list() for _ in range(len(ks))]
104 |     ccs = [list() for _ in range(len(ks))]
105 |     apks = list()
106 | 
107 |     for user_id, row in enumerate(test):
108 | 
109 |         if not len(row.indices):
110 |             continue
111 |         
112 |         predictions = -model.predict(user_id)
113 |         if train is not None:
114 |                 rated = set(train[user_id].indices)
115 |         else:
116 |             rated = []
117 |                 
118 |         predictions = predictions.argsort()
119 |         predictions = [p for p in predictions if p not in rated]
120 |             
121 |         targets = row.indices  
122 |         if 0 in targets:
123 |             print('there is 0')
124 | 
125 |         for i, _k in enumerate(ks):
126 |             precision, recall, ndcg, cc = _compute_precision_recall(targets, predictions, _k, cate, config.cate_num)
127 |             precisions[i].append(precision)
128 |             recalls[i].append(recall)
129 |             ndcgs[i].append(ndcg)
130 |             ccs[i].append(cc)
131 | 
132 |         apks.append(_compute_apk(targets, predictions, k=np.inf))
133 | 
134 |     precisions = [np.array(i) for i in precisions]
135 |     recalls = [np.array(i) for i in recalls]
136 | 
137 |     if not isinstance(k, list):
138 |         precisions = precisions[0]
139 |         recalls = recalls[0]
140 | 
141 |     return precisions, recalls, ndcgs, ccs
142 | 


--------------------------------------------------------------------------------
/caser.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from utils import activation_getter
  6 | 
  7 | 
  8 | class Caser(nn.Module):
  9 |     """
 10 |     Convolutional Sequence Embedding Recommendation Model (Caser)[1].
 11 | 
 12 |     [1] Personalized Top-N Sequential Recommendation via Convolutional Sequence Embedding, Jiaxi Tang and Ke Wang , WSDM '18
 13 | 
 14 |     Parameters
 15 |     ----------
 16 | 
 17 |     num_users: int,
 18 |         Number of users.
 19 |     num_items: int,
 20 |         Number of items.
 21 |     model_args: args,
 22 |         Model-related arguments, like latent dimensions.
 23 |     """
 24 | 
 25 |     def __init__(self, num_users, num_items, model_args):
 26 |         super(Caser, self).__init__()
 27 |         self.args = model_args
 28 | 
 29 |         # init args
 30 |         L = self.args.L
 31 |         dims = self.args.d
 32 |         self.n_h = self.args.nh
 33 |         self.n_v = self.args.nv
 34 |         self.drop_ratio = self.args.drop
 35 |         self.ac_conv = activation_getter[self.args.ac_conv]
 36 |         self.ac_fc = activation_getter[self.args.ac_fc]
 37 | 
 38 |         # user and item embeddings
 39 |         self.user_embeddings = nn.Embedding(num_users, dims)
 40 |         self.item_embeddings = nn.Embedding(num_items, dims)
 41 | 
 42 |         # vertical conv layer
 43 |         self.conv_v = nn.Conv2d(1, self.n_v, (L, 1))
 44 | 
 45 |         # horizontal conv layer
 46 |         lengths = [i + 1 for i in range(L)]
 47 |         self.conv_h = nn.ModuleList([nn.Conv2d(1, self.n_h, (i, dims)) for i in lengths])
 48 | 
 49 |         # fully-connected layer
 50 |         self.fc1_dim_v = self.n_v * dims
 51 |         self.fc1_dim_h = self.n_h * len(lengths)
 52 |         fc1_dim_in = self.fc1_dim_v + self.fc1_dim_h
 53 |         # W1, b1 can be encoded with nn.Linear
 54 |         self.fc1 = nn.Linear(fc1_dim_in, dims)
 55 |         # W2, b2 are encoded with nn.Embedding, as we don't need to compute scores for all items
 56 |         self.W2 = nn.Embedding(num_items, dims+dims)
 57 |         self.b2 = nn.Embedding(num_items, 1)
 58 | 
 59 |         # dropout
 60 |         self.dropout = nn.Dropout(self.drop_ratio)
 61 | 
 62 |         ## weight initialization
 63 |         self.user_embeddings.weight.data.normal_(0, 1.0 / self.user_embeddings.embedding_dim)
 64 |         self.item_embeddings.weight.data.normal_(0, 1.0 / self.item_embeddings.embedding_dim)
 65 |         self.W2.weight.data.normal_(0, 1.0 / self.W2.embedding_dim)
 66 |         self.b2.weight.data.zero_()
 67 | 
 68 |         self.cache_x = None
 69 | 
 70 |     def forward(self, seq_var, user_var, item_var, for_pred=False):
 71 |         """
 72 |         The forward propagation used to get recommendation scores, given
 73 |         triplet (user, sequence, targets).
 74 | 
 75 |         Parameters
 76 |         ----------
 77 | 
 78 |         seq_var: torch.FloatTensor with size [batch_size, max_sequence_length]
 79 |             a batch of sequence
 80 |         user_var: torch.LongTensor with size [batch_size]
 81 |             a batch of user
 82 |         item_var: torch.LongTensor with size [batch_size]
 83 |             a batch of items
 84 |         for_pred: boolean, optional
 85 |             Train or Prediction. Set to True when evaluation.
 86 |         """
 87 | 
 88 |         # Embedding Look-up
 89 |         item_embs = self.item_embeddings(seq_var).unsqueeze(1)  # use unsqueeze() to get 4-D, seq embeddings
 90 |         user_emb = self.user_embeddings(user_var).squeeze(1)
 91 | 
 92 |         # Convolutional Layers
 93 |         out, out_h, out_v = None, None, None
 94 |         # vertical conv layer
 95 |         if self.n_v:
 96 |             out_v = self.conv_v(item_embs)
 97 |             out_v = out_v.view(-1, self.fc1_dim_v)  # prepare for fully connect
 98 | 
 99 |         # horizontal conv layer
100 |         out_hs = list()
101 |         if self.n_h:
102 |             for conv in self.conv_h:
103 |                 conv_out = self.ac_conv(conv(item_embs).squeeze(3))
104 |                 pool_out = F.max_pool1d(conv_out, conv_out.size(2)).squeeze(2)
105 |                 out_hs.append(pool_out)
106 |             out_h = torch.cat(out_hs, 1)  # prepare for fully connect
107 | 
108 |         # Fully-connected Layers, final item embeddings
109 |         out = torch.cat([out_v, out_h], 1)
110 |         # apply dropout
111 |         out = self.dropout(out)
112 | 
113 |         # fully-connected layer
114 |         z = self.ac_fc(self.fc1(out))
115 |         x = torch.cat([z, user_emb], 1) #z is combined by seq item embs and user emb
116 | 
117 |         w2 = self.W2(item_var)  
118 |         b2 = self.b2(item_var)
119 | 
120 |         if for_pred:
121 |             w2 = w2.squeeze()
122 |             b2 = b2.squeeze()
123 |             res = (x * w2).sum(1) + b2
124 |         else:
125 |             res = torch.baddbmm(b2, w2, x.unsqueeze(2)).squeeze()
126 | 
127 |         return res
128 | 


--------------------------------------------------------------------------------
/data_process/3_item_dpp_emb.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from copy import deepcopy 
  4 | import tensorflow.compat.v1 as tf
  5 | tf.disable_v2_behavior()
  6 | from random import shuffle
  7 | import time
  8 | from ast import literal_eval
  9 | import pickle as cPickle
 10 | 
 11 | import torch.nn.functional as F
 12 | import torch
 13 | ####################################
 14 | # logistic dpp, used to generate diverse item embedings based on item sets 
 15 | # this code is mainly based the source code of "Multi-Task Determinantal Point Processes for Recommendation", thanks to the authors
 16 | # generate user_num, item_num, input files (item sets with fixed length 5), and output_files, according to your data
 17 | ####################################
 18 | 
 19 | t0 = time.time()
 20 | 
 21 | np.random.seed(0)
 22 | 
 23 | ####################################
 24 | # parameters 
 25 | ####################################
 26 | user_num = 4641 
 27 | item_num = 2235
 28 | emb_dim = 64
 29 | set_length = 5 #k_sized length of a set
 30 | lr = 1e-4
 31 | decay_step = 100
 32 | decay = 0.95
 33 | 
 34 | sigmoid_lbda = 0.01
 35 | epochs = 100
 36 | runs = 1
 37 | batch_size = 1024
 38 | emb_init_mean = 0.
 39 | emb_init_std = 0.01
 40 | diag_init_mean = 1.
 41 | diag_init_std = 0.01
 42 | regu_weight = 0.
 43 | 
 44 | ################################
 45 | # get sets from prepared sets files.
 46 | # format:
 47 | #       each line: u;id1,id2,...,id5;id2,id3,...,id6;...
 48 | # positive sets are selected from a user's interacted items (in training dataset), each set contains 5 items
 49 | # negative sets are randomly selected items that a user is not interested in
 50 | ################################
 51 | def get_sets(pos_set_file, neg_set_file):
 52 |     
 53 |     upos_sets = []
 54 |     with open(pos_set_file) as f:
 55 |         for l in f.readlines():
 56 |             sstr = l.strip().split(';')
 57 |             u, sets = int(sstr[0]), sstr[1:]
 58 | 
 59 |             for s in sets:
 60 |                 a_set = []
 61 |                 s1 = s.split(',')
 62 |                 for id in s1:
 63 |                     a_set.append(int(id))
 64 |                 if len(a_set) == set_length:
 65 |                     upos_sets.append(a_set)
 66 | 
 67 |     uneg_sets = []
 68 |     with open(neg_set_file) as f:
 69 |         for l in f.readlines():
 70 |             sstr = l.strip().split(';')
 71 |             u, sets = int(sstr[0]), sstr[1:]
 72 | 
 73 |             for s in sets:
 74 |                 a_set = []
 75 |                 s1 = s.split(',')
 76 |                 for id in s1:
 77 |                     a_set.append(int(id))
 78 |                 if len(a_set) == set_length:
 79 |                     uneg_sets.append(a_set)
 80 |     return np.array(upos_sets), np.array(uneg_sets)
 81 | 
 82 | ################################
 83 | # create model 
 84 | ################################
 85 | def set_det(item_sets):
 86 |     subV = tf.gather(weights['V'],item_sets)
 87 |     subD = tf.matrix_diag(tf.square(tf.gather(weights['D'],item_sets)))
 88 |     K1 = tf.matmul(subV, tf.transpose(subV,perm=[0,2,1]))
 89 |     K = tf.add(K1,subD)
 90 |     eps = tf.eye(tf.shape(K)[1],tf.shape(K)[1],[tf.shape(K)[0]])
 91 |     K = tf.add(K,eps)
 92 |     res = tf.matrix_determinant(K)
 93 |     return res
 94 | 
 95 | def logsigma(itemSet):
 96 |     return tf.reduce_mean(tf.log(1-tf.exp(-sigmoid_lbda*set_det(itemSet))))
 97 | 
 98 | def regularization(itemSet):
 99 |     itemsInBatch, _ = tf.unique(tf.reshape(itemSet,[-1]))
100 |     subV = tf.gather(weights['V'],itemsInBatch)
101 |     subD = tf.gather(weights['D'],itemsInBatch)
102 |     subV_norm = tf.reduce_mean(tf.norm(subV,axis=1))
103 |     subD_norm = tf.norm(subD)
104 |     return subV_norm+subD_norm
105 | 
106 | ################################
107 | # tf graph
108 | ################################
109 | 
110 | pset_input = tf.placeholder(tf.int32, [None,None])   #item sets
111 | nset_input = tf.placeholder(tf.int32, [None,None])   #item sets
112 | 
113 | #get processed sets
114 | pos_sets, neg_sets = get_sets('pos_item_sets_3.txt', 'neg_item_sets_3.txt')
115 | train_size = len(pos_sets)
116 | 
117 | print(pos_sets.shape, neg_sets.shape)
118 | for run in range(runs):
119 |     # Construct model
120 |     pset_input = tf.placeholder(tf.int32, [None,None])   #item sets
121 |     nset_input = tf.placeholder(tf.int32, [None,None])   #item sets
122 | 
123 |     # Store layers weight & bias
124 |     initializer = tf.keras.initializers.glorot_normal()
125 |     weights = {
126 |         'V': tf.Variable(initializer([item_num, emb_dim]), name='item_embeddings'),
127 |         'D': tf.Variable(initializer([item_num]), name='item_bias')
128 |     }
129 |     # Construct model
130 |     loss = logsigma(pset_input) + tf.log(1 - logsigma(nset_input)) # - regu_weight*regularization(pset_input) + regu_weight*regularization(nset_input)
131 | 
132 |     optimizer = tf.train.AdamOptimizer(learning_rate=lr,beta1=0.01,beta2=0.01) 
133 |     train_op = optimizer.minimize(-loss)
134 | 
135 |     # Initializing the variables
136 |     init = tf.global_variables_initializer()
137 | 
138 |     print("start training...")
139 |     with tf.Session() as sess:
140 |         sess.run(init)
141 |         # Training cycle
142 |         for epoch in range(epochs):
143 |             ave_cost = 0.
144 |             nbatch = 0
145 |             while True:
146 |                 if nbatch*batch_size <= train_size:
147 |                     pos_batch = pos_sets[nbatch*batch_size: (nbatch+1)*batch_size]
148 |                     neg_batch = neg_sets[nbatch*batch_size: (nbatch+1)*batch_size]
149 |                 else:
150 |                     if train_size - (nbatch-1)*batch_size > 0:
151 |                         pos_batch = pos_sets[(nbatch-1)*batch_size: train_size]
152 |                         neg_batch = neg_sets[(nbatch-1)*batch_size: train_size]
153 |                     break
154 |                 nbatch += 1
155 | 
156 |                 _, c = sess.run([train_op, loss], feed_dict={pset_input: pos_batch, nset_input: neg_batch})
157 |                 ave_cost += c / nbatch
158 | 
159 |         param = sess.run(weights)
160 |         cPickle.dump(param, open('item_kernel_3.pkl', 'wb')) #T=3
161 | 


--------------------------------------------------------------------------------
/interactions.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Classes describing datasets of user-item interactions. Instances of these
  3 | are returned by dataset-fetching and dataset-processing functions.
  4 | """
  5 | 
  6 | import numpy as np
  7 | 
  8 | import scipy.sparse as sp
  9 | 
 10 | 
 11 | class Interactions(object):
 12 |     """
 13 |     Interactions object. Contains (at a minimum) pair of user-item
 14 |     interactions. This is designed only for implicit feedback scenarios.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 | 
 19 |     file_path: file contains (user,item,rating) triplets
 20 |     user_map: dict of user mapping
 21 |     item_map: dict of item mapping
 22 |     """
 23 | 
 24 |     def __init__(self, file_path,
 25 |                  user_map=None,
 26 |                  item_map=None):
 27 | 
 28 |         if not user_map and not item_map:
 29 |             user_map = dict()
 30 |             item_map = dict()
 31 | 
 32 |             num_user = 0
 33 |             num_item = 0
 34 |         else:
 35 |             num_user = len(user_map)
 36 |             num_item = len(item_map) + 1  
 37 | 
 38 |         user_ids = list()
 39 |         item_ids = list()
 40 |         # read users and items from file
 41 |         with open(file_path, 'r') as fin:
 42 |             for line in fin:
 43 |                 ids = line.strip().split()
 44 |                 u = ids[0]
 45 |                 iids = ids[1:]
 46 |                 for i in iids:
 47 |                     user_ids.append(u)
 48 |                     item_ids.append(i)
 49 | 
 50 |         # update user and item mapping
 51 |         for u in user_ids:
 52 |             if u not in user_map:
 53 |                 user_map[u] = num_user  
 54 |                 num_user += 1
 55 |         for i in item_ids:
 56 |             if i not in item_map:
 57 |                 item_map[i] = num_item
 58 |                 num_item += 1
 59 | 
 60 |         user_ids = np.array([user_map[u] for u in user_ids])
 61 |         item_ids = np.array([item_map[i] for i in item_ids])
 62 | 
 63 |         self.num_users = num_user
 64 |         self.num_items = num_item
 65 |         
 66 |         self.user_ids = user_ids
 67 |         self.item_ids = item_ids
 68 | 
 69 |         self.user_map = user_map
 70 |         self.item_map = item_map
 71 | 
 72 |         self.sequences = None
 73 |         self.test_sequences = None
 74 | 
 75 |     def __len__(self):
 76 | 
 77 |         return len(self.user_ids)
 78 | 
 79 |     def tocoo(self):
 80 |         """
 81 |         Transform to a scipy.sparse COO matrix.
 82 |         """
 83 | 
 84 |         row = self.user_ids
 85 |         col = self.item_ids
 86 |         data = np.ones(len(self))
 87 | 
 88 |         return sp.coo_matrix((data, (row, col)),
 89 |                              shape=(self.num_users, self.num_items))
 90 | 
 91 |     def tocsr(self):
 92 |         """
 93 |         Transform to a scipy.sparse CSR matrix.
 94 |         """
 95 | 
 96 |         return self.tocoo().tocsr()
 97 | 
 98 |     def to_sequence(self, sequence_length=5, target_length=1):
 99 |         """
100 |         Transform to sequence form.
101 | 
102 |         Valid subsequences of users' interactions are returned. For
103 |         example, if a user interacted with items [1, 2, 3, 4, 5, 6, 7, 8, 9], the
104 |         returned interactions matrix at sequence length 5 and target length 3
105 |         will be be given by:
106 | 
107 |         sequences:
108 | 
109 |            [[1, 2, 3, 4, 5],
110 |             [2, 3, 4, 5, 6],
111 |             [3, 4, 5, 6, 7]]
112 | 
113 |         targets:
114 | 
115 |            [[6, 7],
116 |             [7, 8],
117 |             [8, 9]]
118 | 
119 |         sequence for test (the last 'sequence_length' items of each user's sequence):
120 | 
121 |         [[5, 6, 7, 8, 9]]
122 | 
123 |         Parameters
124 |         ----------
125 | 
126 |         sequence_length: int
127 |             Sequence length. Subsequences shorter than this
128 |             will be left-padded with zeros.
129 |         target_length: int
130 |             Sequence target length.
131 |         """
132 | 
133 |         # change the item index start from 1 as 0 is used for padding in sequences
134 |         for k, v in self.item_map.items():
135 |             self.item_map[k] = v + 1
136 |         self.item_ids = self.item_ids + 1
137 |         self.num_items += 1
138 | 
139 |         max_sequence_length = sequence_length + target_length
140 | 
141 |         # Sort first by user id
142 |         sort_indices = np.lexsort((self.user_ids,))
143 | 
144 |         user_ids = self.user_ids[sort_indices]
145 |         item_ids = self.item_ids[sort_indices]
146 | 
147 |         user_ids, indices, counts = np.unique(user_ids,
148 |                                               return_index=True,
149 |                                               return_counts=True)
150 | 
151 |         num_subsequences = sum([c - max_sequence_length + 1 if c >= max_sequence_length else 1 for c in counts])
152 | 
153 |         sequences = np.zeros((num_subsequences, sequence_length),
154 |                              dtype=np.int64)
155 |         sequences_targets = np.zeros((num_subsequences, target_length),
156 |                                      dtype=np.int64)
157 |         sequence_users = np.empty(num_subsequences,
158 |                                   dtype=np.int64)
159 | 
160 |         test_sequences = np.zeros((self.num_users, sequence_length),
161 |                                   dtype=np.int64)
162 |         test_users = np.empty(self.num_users,
163 |                               dtype=np.int64)
164 | 
165 |         _uid = None
166 |         for i, (uid,
167 |                 item_seq) in enumerate(_generate_sequences(user_ids,
168 |                                                            item_ids,
169 |                                                            indices,
170 |                                                            max_sequence_length)):
171 |             if uid != _uid:
172 |                 test_sequences[uid][:] = item_seq[-sequence_length:]  #last previous sequence
173 |                 test_users[uid] = uid
174 |                 _uid = uid
175 |             sequences_targets[i][:] = item_seq[-target_length:]
176 |             sequences[i][:] = item_seq[:sequence_length]
177 |             sequence_users[i] = uid
178 |         self.sequences = SequenceInteractions(sequence_users, sequences, sequences_targets)
179 |         self.test_sequences = SequenceInteractions(test_users, test_sequences)
180 | 
181 | 
182 | class SequenceInteractions(object):
183 |     """
184 |     Interactions encoded as a sequence matrix.
185 | 
186 |     Parameters
187 |     ----------
188 |     user_ids: np.array
189 |         sequence users
190 |     sequences: np.array
191 |         The interactions sequence matrix, as produced by
192 |         :func:`~Interactions.to_sequence`
193 |     targets: np.array
194 |         sequence targets
195 |     """
196 | 
197 |     def __init__(self,
198 |                  user_ids,
199 |                  sequences,
200 |                  targets=None):
201 |         self.user_ids = user_ids
202 |         self.sequences = sequences
203 |         self.targets = targets
204 | 
205 |         self.L = sequences.shape[1]
206 |         self.T = None
207 |         if np.any(targets):
208 |             self.T = targets.shape[1]
209 | 
210 | 
211 | def _sliding_window(tensor, window_size, step_size=1):
212 |     if len(tensor) - window_size >= 0:
213 |         for i in range(len(tensor), 0, -step_size):
214 |             if i - window_size >= 0:
215 |                 yield tensor[i - window_size:i]
216 |             else:
217 |                 break
218 |     else:
219 |         num_paddings = window_size - len(tensor)
220 |         # Pad sequence with 0s if it is shorter than windows size.
221 |         yield np.pad(tensor, (num_paddings, 0), 'constant')
222 | 
223 | 
224 | def _generate_sequences(user_ids, item_ids,
225 |                         indices,
226 |                         max_sequence_length):
227 |     for i in range(len(indices)):
228 | 
229 |         start_idx = indices[i]
230 | 
231 |         if i >= len(indices) - 1:
232 |             stop_idx = None
233 |         else:
234 |             stop_idx = indices[i + 1]
235 | 
236 |         for seq in _sliding_window(item_ids[start_idx:stop_idx],
237 |                                    max_sequence_length):
238 |             yield (user_ids[i], seq)
239 | 
240 | 
241 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/train_caser.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from time import time
  3 | 
  4 | import torch.optim as optim
  5 | import torch.nn.functional as F
  6 | from torch.autograd import Variable
  7 | import pickle as cPickle
  8 | 
  9 | from caser import Caser
 10 | from evaluation import evaluate_ranking
 11 | from interactions import Interactions
 12 | from utils import *
 13 | 
 14 | 
 15 | class Recommender(object):
 16 |     """
 17 |     Contains attributes and methods that needed to train a sequential
 18 |     recommendation model. Models are trained by many tuples of
 19 |     (users, sequences, targets, negatives) and negatives are from negative
 20 |     sampling: for any known tuple of (user, sequence, targets), one or more
 21 |     items are randomly sampled to act as negatives.
 22 | 
 23 | 
 24 |     Parameters
 25 |     ----------
 26 | 
 27 |     n_iter: int,
 28 |         Number of iterations to run.
 29 |     batch_size: int,
 30 |         Minibatch size.
 31 |     l2: float,
 32 |         L2 loss penalty, also known as the 'lambda' of l2 regularization.
 33 |     neg_samples: int,
 34 |         Number of negative samples to generate for each targets.
 35 |         If targets=3 and neg_samples=3, then it will sample 9 negatives.
 36 |     learning_rate: float,
 37 |         Initial learning rate.
 38 |     use_cuda: boolean,
 39 |         Run the model on a GPU or CPU.
 40 |     model_args: args,
 41 |         Model-related arguments, like latent dimensions.
 42 |     """
 43 | 
 44 |     def __init__(self,
 45 |                  n_iter=None,
 46 |                  batch_size=None,
 47 |                  l2=None,
 48 |                  neg_samples=None,
 49 |                  learning_rate=None,
 50 |                  use_cuda=False,
 51 |                  model_args=None):
 52 | 
 53 |         # model related
 54 |         self._num_items = None
 55 |         self._num_users = None
 56 |         self._net = None
 57 |         self.model_args = model_args
 58 | 
 59 |         # learning related
 60 |         self._batch_size = batch_size
 61 |         self._n_iter = n_iter
 62 |         self._learning_rate = learning_rate
 63 |         self._l2 = l2
 64 |         self._neg_samples = neg_samples
 65 |         self._device = torch.device("cuda" if use_cuda else "cpu")
 66 | 
 67 |         # rank evaluation related
 68 |         self.test_sequence = None
 69 |         self._candidate = dict()
 70 | 
 71 |     @property
 72 |     def _initialized(self):
 73 |         return self._net is not None
 74 | 
 75 |     def _initialize(self, interactions):
 76 |         self._num_items = interactions.num_items
 77 |         self._num_users = interactions.num_users
 78 | 
 79 |         self.test_sequence = interactions.test_sequences
 80 | 
 81 |         self._net = Caser(self._num_users,
 82 |                           self._num_items,
 83 |                           self.model_args).to(self._device)
 84 | 
 85 |         self._optimizer = optim.Adam(self._net.parameters(),
 86 |                                      weight_decay=self._l2,
 87 |                                      lr=self._learning_rate)
 88 | 
 89 |     def fit(self, train, test, cate, config, verbose=False):
 90 |         """
 91 |         The general training loop to fit the model
 92 | 
 93 |         Parameters
 94 |         ----------
 95 | 
 96 |         train: :class:`spotlight.interactions.Interactions`
 97 |             training instances, also contains test sequences
 98 |         test: :class:`spotlight.interactions.Interactions`
 99 |             only contains targets for test sequences
100 |         verbose: bool, optional
101 |             print the logs
102 |         """
103 |         ##################################
104 |         # read pre-learned kernel
105 |         ###################################
106 |         lk_param = cPickle.load(open(config.l_kernel_emb, 'rb'), encoding="latin1")
107 |         lk_tensor = torch.FloatTensor(lk_param['V']).to(self._device)
108 |         
109 |         lk_emb_i = F.normalize(lk_tensor, p=2, dim=1)
110 |         l_kernel = torch.matmul(lk_emb_i, lk_emb_i.t())
111 |         
112 |         #l_kernel = torch.sigmoid(l_kernel)  #this line is optional; use this line, if encounter non-invertible or nan problem
113 |         
114 |         #l_kernel_un = torch.matmul(lk_tensor, lk_tensor.t()) ##un-normalized pre-learned kernel
115 | 
116 |         # convert to sequences, targets and users
117 |         sequences_np = train.sequences.sequences
118 |         targets_np = train.sequences.targets
119 |         users_np = train.sequences.user_ids.reshape(-1, 1)
120 | 
121 |         L, T = train.sequences.L, train.sequences.T
122 | 
123 |         n_train = sequences_np.shape[0]
124 | 
125 |         output_str = 'total training instances: %d' % n_train
126 |         print(output_str)
127 | 
128 |         if not self._initialized:
129 |             self._initialize(train)
130 | 
131 |         start_epoch = 0
132 |         pre_list = []
133 |         for epoch_num in range(start_epoch, self._n_iter):
134 | 
135 |             t1 = time()
136 | 
137 |             # set model to training mode
138 |             self._net.train()
139 | 
140 |             users_np, sequences_np, targets_np = shuffle(users_np,
141 |                                                          sequences_np,
142 |                                                          targets_np)
143 | 
144 |             negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples)
145 | 
146 |             # convert numpy arrays to PyTorch tensors and move it to the corresponding devices
147 |             users, sequences, targets, negatives = (torch.from_numpy(users_np).long(),
148 |                                                     torch.from_numpy(sequences_np).long(),
149 |                                                     torch.from_numpy(targets_np).long(),
150 |                                                     torch.from_numpy(negatives_np).long())
151 | 
152 |             users, sequences, targets, negatives = (users.to(self._device),
153 |                                                     sequences.to(self._device),
154 |                                                     targets.to(self._device),
155 |                                                     negatives.to(self._device))
156 | 
157 |             epoch_loss = 0.0
158 | 
159 |             for (minibatch_num,
160 |                  (batch_users,
161 |                   batch_sequences,
162 |                   batch_targets,
163 |                   batch_negatives)) in enumerate(minibatch(users,
164 |                                                            sequences,
165 |                                                            targets,
166 |                                                            negatives,
167 |                                                            batch_size=self._batch_size)):
168 |                 items_to_predict = torch.cat((batch_targets, batch_negatives, batch_sequences), 1)
169 |                 items_prediction = self._net(batch_sequences,
170 |                                              batch_users,
171 |                                              items_to_predict)
172 |                 
173 |                 (targets_prediction, negatives_prediction,
174 |                  seq_prediction) = torch.split(items_prediction,
175 |                                                       [batch_targets.size(1),
176 |                                                       batch_negatives.size(1),
177 |                                                       batch_sequences.size(1)], dim=1)
178 | 
179 |                 self._optimizer.zero_grad()
180 |                 
181 |                 if config.dpp_loss == 0:
182 |                     # compute the binary cross-entropy loss
183 |                     positive_loss = -torch.mean(
184 |                         torch.log(torch.sigmoid(targets_prediction)))
185 |                     negative_loss = -torch.mean(
186 |                         torch.log(1 - torch.sigmoid(negatives_prediction)))
187 |                     loss = positive_loss + negative_loss
188 | 
189 |                 ###############################################
190 |                 # compute the dpp set loss
191 |                 ###############################################
192 |                 # DSL
193 |                 elif config.dpp_loss == 1:
194 |                     dpp_lhs = []
195 |                     size = targets_prediction.shape[0]
196 |                     batch_sets = torch.cat((batch_targets, batch_negatives), 1)
197 |                     batch_predictions = torch.cat((targets_prediction, negatives_prediction), 1)
198 |                     #minibatch format
199 |                     if config.batch_format == 1:
200 |                         batch_pos_kernel = torch.zeros(size, config.T, config.T).cuda()
201 |                         batch_set_kernel = torch.zeros(size, config.T + config.neg_samples, config.T + config.neg_samples).cuda()
202 |                         
203 |                         for n in range(size):
204 |                             batch_pos_kernel[n] = l_kernel[batch_targets[n]-1][:, batch_targets[n]-1]
205 |                             batch_set_kernel[n] = l_kernel[batch_sets[n]-1][:, batch_sets[n]-1]
206 |                         
207 |                         batch_pos_q = torch.diag_embed(torch.exp(targets_prediction))  #can also try sigmoid in some cases
208 |                         batch_set_q = torch.diag_embed(torch.exp(batch_predictions))
209 |                         
210 |                         batch_pos_kernel = torch.bmm(torch.bmm(batch_pos_q, batch_pos_kernel), batch_pos_q)
211 |                         batch_set_kernel = torch.bmm(torch.bmm(batch_set_q, batch_set_kernel), batch_set_q)
212 |                         
213 |                         p_diag = torch.eye(config.T)*1e-5
214 |                         pa_diag = p_diag.reshape((1, config.T, config.T))
215 |                         pbatch_diag = pa_diag.repeat(size, 1, 1)
216 |                         
217 |                         s_diag = torch.eye(config.T+config.neg_samples)
218 |                         sa_diag = s_diag.reshape((1, config.T + config.neg_samples, config.T + config.neg_samples))
219 |                         sbatch_diag = sa_diag.repeat(size, 1, 1)
220 |                         
221 |                         batch_pos_det = torch.det(batch_pos_kernel.cpu() + pbatch_diag).cuda()
222 |                         batch_set_det = torch.det(batch_set_kernel.cpu() + sbatch_diag).cuda()
223 |                         
224 |                         dpp_loss = torch.log(batch_pos_det/batch_set_det)
225 |                         loss = -torch.mean(dpp_loss)
226 |                     else:
227 |                         for n in range(size):
228 |                             pos_q = torch.diag_embed(torch.exp(targets_prediction[n]))  
229 |                             set_q = torch.diag_embed(torch.exp(batch_predictions[n]))
230 |                             
231 |                             pos_l_kernel = l_kernel[batch_targets[n]-1][:, batch_targets[n]-1]
232 |                             set_l_kernel = l_kernel[batch_sets[n]-1][:, batch_sets[n]-1]
233 |                             
234 |                             pos_k = torch.mm(torch.mm(pos_q, pos_l_kernel), pos_q)
235 |                             set_k = torch.mm(torch.mm(set_q, set_l_kernel), set_q)
236 |                 
237 |                             pos_det = torch.det(pos_k.cpu() + torch.eye(len(batch_targets[n]))*1e-5).cuda() 
238 |                             set_det = torch.det(set_k.cpu() + torch.eye(len(batch_sets[n]))).cuda()
239 | 
240 |                             dpp_loss = torch.log(pos_det/set_det)
241 |                             
242 |                             dpp_lhs.append(dpp_loss)
243 |                         loss = -torch.mean(torch.stack(dpp_lhs))
244 |                 # CDSL
245 |                 elif config.dpp_loss == 2:
246 |                     dpp_lhs = []
247 |                     size = targets_prediction.shape[0]
248 |                     set_items = torch.cat((batch_sequences, batch_targets, batch_negatives), 1)
249 |                     set_predictions = torch.cat((seq_prediction, targets_prediction, negatives_prediction), 1)
250 |                     
251 |                     pos_items = torch.cat((batch_sequences, batch_targets), 1)
252 |                     pos_predictions = torch.cat((seq_prediction, targets_prediction), 1) #L+T
253 |                     if config.batch_format == 1:
254 |                         batch_pos_kernel = torch.zeros(size, config.L + config.T, config.L + config.T).cuda()
255 |                         batch_set_kernel = torch.zeros(size, config.L + config.T + config.neg_samples, config.L + config.T + config.neg_samples).cuda()
256 |                         
257 |                         for n in range(size):
258 |                             batch_pos_kernel[n] = l_kernel[pos_items[n]-1][:, pos_items[n]-1]
259 |                             batch_set_kernel[n] = l_kernel[set_items[n]-1][:, set_items[n]-1]
260 |                         
261 |                         batch_pos_q = torch.diag_embed(torch.exp(pos_predictions))
262 |                         batch_set_q = torch.diag_embed(torch.exp(set_predictions))
263 |                         
264 |                         batch_pos_kernel = torch.bmm(torch.bmm(batch_pos_q, batch_pos_kernel), batch_pos_q)
265 |                         batch_set_kernel = torch.bmm(torch.bmm(batch_set_q, batch_set_kernel), batch_set_q)
266 |                         
267 |                         p_diag = torch.eye(config.L + config.T)*1e-3
268 |                         pa_diag = p_diag.reshape((1, config.L + config.T, config.L + config.T))
269 |                         pbatch_diag = pa_diag.repeat(size, 1, 1)
270 |                         
271 |                         s_diag = torch.diag_embed(torch.FloatTensor([1e-3]*config.L+[1]*(config.neg_samples+config.T)))
272 |                         sa_diag = s_diag.reshape((1, config.L + config.T + config.neg_samples, config.L + config.T + config.neg_samples))
273 |                         sbatch_diag = sa_diag.repeat(size, 1, 1)
274 |                         
275 |                         batch_pos_det = torch.det(batch_pos_kernel.cpu() + pbatch_diag).cuda()
276 |                         batch_set_det = torch.det(batch_set_kernel.cpu() + sbatch_diag).cuda()
277 |                         
278 |                         dpp_loss = torch.log(batch_pos_det/batch_set_det)
279 |                         loss = -torch.mean(dpp_loss)
280 |                     else:
281 |                         diag_I = torch.diag_embed(torch.FloatTensor([1e-3]*config.L+[1]*(config.neg_samples+config.T)))
282 |                         diag_posI = torch.diag_embed(torch.FloatTensor([1e-3]*(config.L+config.T)))
283 |                         for n in range(size):
284 |                             pos_q = torch.diag_embed(torch.exp(pos_predictions[n]))  
285 |                             set_q = torch.diag_embed(torch.exp(set_predictions[n]))
286 | 
287 |                             pos_l_kernel = l_kernel[pos_items[n]-1][:, pos_items[n]-1]
288 |                             set_l_kernel = l_kernel[set_items[n]-1][:, set_items[n]-1]
289 |                             
290 |                             pos_k = torch.mm(torch.mm(pos_q, pos_l_kernel), pos_q)
291 |                             set_k = torch.mm(torch.mm(set_q, set_l_kernel), set_q)
292 |                 
293 |                             pos_det = torch.det(pos_k.cpu() + diag_posI).cuda() 
294 |                             set_det = torch.det(set_k.cpu() + diag_I).cuda()
295 | 
296 |                             dpp_loss = torch.log(pos_det/set_det)
297 |                             dpp_lhs.append(dpp_loss)
298 |                         loss = -torch.mean(torch.stack(dpp_lhs))
299 |                 
300 |                 epoch_loss += loss.item()
301 | 
302 |                 loss.backward()
303 |                 self._optimizer.step()
304 | 
305 |             epoch_loss /= minibatch_num + 1
306 | 
307 |             t2 = time()
308 |             if verbose:
309 |                 if (epoch_num+1) % 10 == 0:
310 |                     precision, recall, ndcg, cc = evaluate_ranking(self, test, config, l_kernel, cate, train, k=[3, 5, 10])
311 |                     output_str = "Epoch %d [%.1f s], loss=%.4f, " \
312 |                                 "prec@3=%.4f, *prec@5=%.4f, prec@10=%.4f, " \
313 |                                 "recall@3=%.4f, recall@5=%.4f, recall@10=%.4f, " \
314 |                                 "ndcg@3=%.4f, ndcg@5=%.4f, ndcg@10=%.4f, " \
315 |                                 "*cc@3=%.4f, cc@5=%.4f, cc@10=%.4f, [%.1f s]" % (epoch_num + 1,
316 |                                                                                             t2 - t1,
317 |                                                                                             epoch_loss,
318 |                                                                                             np.mean(precision[0]),
319 |                                                                                             np.mean(precision[1]),
320 |                                                                                             np.mean(precision[2]),
321 |                                                                                             np.mean(recall[0]),
322 |                                                                                             np.mean(recall[1]),
323 |                                                                                             np.mean(recall[2]),
324 |                                                                                             np.mean(ndcg[0]),
325 |                                                                                             np.mean(ndcg[1]),
326 |                                                                                             np.mean(ndcg[2]),
327 |                                                                                             np.mean(cc[0]),
328 |                                                                                             np.mean(cc[1]),
329 |                                                                                             np.mean(cc[2]),
330 |                                                                                             time() - t2)
331 |                     
332 |                     print(output_str)
333 |             else:
334 |                 output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
335 |                                                                         t2 - t1,
336 |                                                                         epoch_loss,
337 |                                                                         time() - t2)
338 |                 print(output_str)
339 |         
340 |     def _generate_negative_samples(self, users, interactions, n):
341 |         
342 |         """
343 |         Sample negative from a candidate set of each user. The
344 |         candidate set of each user is defined by:
345 |         {All Items} \ {Items Rated by User}
346 | 
347 |         Parameters
348 |         ----------
349 | 
350 |         users: array of np.int64
351 |             sequence users
352 |         interactions: :class:`spotlight.interactions.Interactions`
353 |             training instances, used for generate candidates
354 |         n: int
355 |             total number of negatives to sample for each sequence
356 |         """
357 | 
358 |         users_ = users.squeeze()
359 |         negative_samples = np.zeros((users_.shape[0], n), np.int64)
360 |         if not self._candidate:
361 |             all_items = np.arange(interactions.num_items - 1) + 1  # 0 for padding
362 |             train = interactions.tocsr()
363 |             for user, row in enumerate(train):
364 |                 self._candidate[user] = list(set(all_items) - set(row.indices))
365 | 
366 |         for i, u in enumerate(users_):
367 |             for j in range(n):
368 |                 x = self._candidate[u]
369 |                 negative_samples[i, j] = x[
370 |                     np.random.randint(len(x))]
371 | 
372 |         return negative_samples
373 | 
374 |     def predict(self, user_id, item_ids=None):
375 |         """
376 |         Make predictions for evaluation: given a user id, it will
377 |         first retrieve the test sequence associated with that user
378 |         and compute the recommendation scores for items.
379 | 
380 |         Parameters
381 |         ----------
382 | 
383 |         user_id: int
384 |            users id for which prediction scores needed.
385 |         item_ids: array, optional
386 |             Array containing the item ids for which prediction scores
387 |             are desired. If not supplied, predictions for all items
388 |             will be computed.
389 |         """
390 | 
391 |         if self.test_sequence is None:
392 |             raise ValueError('Missing test sequences, cannot make predictions')
393 | 
394 |         # set model to evaluation model
395 |         self._net.eval()
396 |         with torch.no_grad():
397 |             sequences_np = self.test_sequence.sequences[user_id, :]
398 |             sequences_np = np.atleast_2d(sequences_np)
399 | 
400 |             if item_ids is None:
401 |                 item_ids = np.arange(self._num_items).reshape(-1, 1)
402 | 
403 |             sequences = torch.from_numpy(sequences_np).long()
404 |             item_ids = torch.from_numpy(item_ids).long()
405 |             user_id = torch.from_numpy(np.array([[user_id]])).long()
406 | 
407 |             user, sequences, items = (user_id.to(self._device),
408 |                                       sequences.to(self._device),
409 |                                       item_ids.to(self._device))
410 | 
411 |             out = self._net(sequences,
412 |                             user,
413 |                             items,
414 |                             for_pred=True)
415 | 
416 |         return out.cpu().numpy().flatten()
417 |     
418 |     def sigma(self, x):
419 |         res = 1 - torch.exp(-model_config.sigma_alpha*x)
420 |         return res
421 | 
422 | def get_cates_map(cate_file):
423 |     iidcate_map = {}  #iid:cates
424 |     ## movie_id:cate_ids, cate_ids is not only one
425 |     with open(cate_file) as f_cate:
426 |         for l in f_cate.readlines():
427 |             if len(l) == 0: break
428 |             l = l.strip('\n')
429 |             items = [int(i) for i in l.split(' ')]
430 |             iid, cate_ids = items[0], items[1:]
431 |             iidcate_map[iid] = cate_ids
432 |     return iidcate_map
433 | 
434 | if __name__ == '__main__':
435 |     parser = argparse.ArgumentParser()
436 |     # data arguments
437 |     parser.add_argument('--train_root', type=str, default='datasets/beauty/train_3.txt')
438 |     parser.add_argument('--test_root', type=str, default='datasets/beauty/test_3.txt')
439 |     parser.add_argument('--cateid_root', type=str, default='datasets/beauty/cate.txt')
440 |     parser.add_argument('--l_kernel_emb', type=str, default='datasets/beauty/item_kernel_3.pkl')
441 |     parser.add_argument('--cate_num', type=int, default=213)
442 |     parser.add_argument('--L', type=int, default=5)
443 |     parser.add_argument('--T', type=int, default=3, help="consistent with the postfix of dataset")
444 |     # dpp arguments
445 |     parser.add_argument('--neg_samples', type=int, default=3, help="Z")
446 |     parser.add_argument('--dpp_loss', type=int, default=2, help="0:cross-entropy, 1:DSL, 2:CDSL")
447 |     parser.add_argument('--batch_format', type=int, default=1, help="use minibatch format for dpp loss or not")
448 |     # train arguments
449 |     parser.add_argument('--n_iter', type=int, default=100)
450 |     parser.add_argument('--seed', type=int, default=1234)
451 |     parser.add_argument('--batch_size', type=int, default=512)
452 |     parser.add_argument('--learning_rate', type=float, default=0.001, help="[0.0005 0.001 0.0015], default 0.001") 
453 |     parser.add_argument('--l2', type=float, default=1e-4)  
454 |     parser.add_argument('--use_cuda', type=str2bool, default=True)
455 | 
456 |     config = parser.parse_args()
457 | 
458 |     # model dependent arguments
459 |     model_parser = argparse.ArgumentParser()
460 |     model_parser.add_argument('--d', type=int, default=50)
461 |     model_parser.add_argument('--nv', type=int, default=4)
462 |     model_parser.add_argument('--nh', type=int, default=16)
463 |     model_parser.add_argument('--drop', type=float, default=0.5)
464 |     model_parser.add_argument('--ac_conv', type=str, default='relu')
465 |     model_parser.add_argument('--ac_fc', type=str, default='relu')
466 |     model_parser.add_argument('--sigma_alpha', type=float, default=0.01)
467 | 
468 |     model_config = model_parser.parse_args()
469 |     model_config.L = config.L
470 | 
471 |     # set seed
472 |     set_seed(config.seed,
473 |              cuda=config.use_cuda)
474 | 
475 |     # load dataset
476 |     train = Interactions(config.train_root)
477 |     # transform triplets to sequence representation
478 |     train.to_sequence(config.L, config.T)
479 | 
480 |     test = Interactions(config.test_root,
481 |                         user_map=train.user_map,
482 |                         item_map=train.item_map)
483 | 
484 |     cate = get_cates_map(config.cateid_root)
485 | 
486 |     print(config)
487 |     print(model_config)
488 |     # fit model
489 |     model = Recommender(n_iter=config.n_iter,
490 |                         batch_size=config.batch_size,
491 |                         learning_rate=config.learning_rate,
492 |                         l2=config.l2,
493 |                         neg_samples=config.neg_samples,
494 |                         model_args=model_config,
495 |                         use_cuda=config.use_cuda)
496 | 
497 |     model.fit(train, test, cate, config, verbose=True)
498 | 


--------------------------------------------------------------------------------