├── README.md ├── Data_loader.py ├── utils.py ├── generator_recsys.py └── nextitrec_baseline.py /README.md: -------------------------------------------------------------------------------- 1 | # NextItNet-Pytorch 2 | 3 | PyTorch implementation of NextItNet from "A Simple Convolutional Generative Network for Next Item Recommendation". 4 | 5 | Use the leave-one-out strategy to train, validate, and test network parameters. 6 | 7 | Dataset: MovieLens-1M https://grouplens.org/datasets/movielens/1m/ 8 | 9 | Please cite this paper if you find our code is useful 10 | 11 | @inproceedings{yuan2018simple, 12 | title={A Simple Convolutional Generative Network for Next Item Recommendation }, 13 | author={Yuan, Fajie and Karatzoglou, Alexandros and Arapakis, Ioannis and Jose, Joemon M and He, Xiangnan}, 14 | booktitle={Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining}, 15 | year={2019}, 16 | organization={ACM} 17 | } 18 | -------------------------------------------------------------------------------- /Data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os import listdir 3 | from os.path import isfile, join 4 | import numpy as np 5 | import time 6 | 7 | # This Data_Loader file is copied online 8 | 9 | def INFO_LOG(info): 10 | print("[%s]%s"%(time.strftime("%Y-%m-%d %X", time.localtime()), info)) 11 | 12 | 13 | class Data_Loader: 14 | def __init__(self, options): 15 | self.pad = "" 16 | positive_data_file = options['dir_name'] 17 | positive_examples = list(open(positive_data_file, "r").readlines()) 18 | positive_examples = [s for s in positive_examples] 19 | 20 | self.max_document_length = max([len(x.split(",")) for x in positive_examples]) 21 | 22 | self.item_fre = {self.pad: 0} 23 | 24 | for sample in positive_examples: 25 | for item in sample.strip().split(","): 26 | if item in self.item_fre.keys(): 27 | self.item_fre[item] += 1 28 | else: 29 | self.item_fre[item] = 1 30 | self.item_fre[self.pad] += self.max_document_length - len(sample.strip().split(",")) 31 | 32 | count_pairs = sorted(self.item_fre.items(), key=lambda x: (-x[1], x[0])) 33 | self.items_voc, _ = list(zip(*count_pairs)) 34 | self.item2id = dict(zip(self.items_voc, range(len(self.items_voc)))) 35 | self.padid = self.item2id[self.pad] 36 | self.id2item = {value:key for key, value in self.item2id.items()} 37 | 38 | INFO_LOG("Vocab size:{}".format(self.size())) 39 | 40 | self.items = np.array(self.getSamplesid(positive_examples)) 41 | 42 | 43 | 44 | def sample2id(self, sample): 45 | sample2id = [] 46 | for s in sample.strip().split(','): 47 | sample2id.append(self.item2id[s]) 48 | # while len(sample2id) < self.max_document_length: 49 | # sample2id.append(self.unkid) 50 | sample2id = ([self.padid] * (self.max_document_length - len(sample2id))) + sample2id 51 | return sample2id 52 | 53 | def getSamplesid(self, samples): 54 | samples2id = [] 55 | for sample in samples: 56 | samples2id.append(self.sample2id(sample)) 57 | 58 | return samples2id 59 | 60 | 61 | def size(self): 62 | return len(self.item2id) 63 | 64 | 65 | def load_generator_data(self, sample_size): 66 | text = self.text 67 | mod_size = len(text) - len(text)%sample_size 68 | text = text[0:mod_size] 69 | text = text.reshape(-1, sample_size) 70 | return text, self.vocab_indexed 71 | 72 | 73 | def string_to_indices(self, sentence, vocab): 74 | indices = [ self.item2id[s] for s in sentence.split(',') ] 75 | return indices 76 | 77 | def inidices_to_string(self, sentence, vocab): 78 | id_ch = { vocab[ch] : ch for ch in vocab } 79 | sent = [] 80 | for c in sentence: 81 | if id_ch[c] == 'eol': 82 | break 83 | sent += id_ch[c] 84 | 85 | return "".join(sent) 86 | 87 | 88 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import Variable 4 | import torch.nn as nn 5 | 6 | 7 | def sample_top(a=[], top_k=10): 8 | idx = np.argsort(a)[::-1] 9 | idx = idx[:top_k] 10 | probs = a[idx] 11 | probs = probs / np.sum(probs) 12 | choice = np.random.choice(idx, p=probs) 13 | return choice 14 | 15 | # fajie 16 | def sample_top_k(a=[], top_k=10): 17 | idx = np.argsort(a)[::-1] 18 | idx = idx[:top_k] 19 | # probs = a[idx] 20 | # probs = probs / np.sum(probs) 21 | # choice = np.random.choice(idx, p=probs) 22 | return idx 23 | 24 | # print(sample_top_k(np.array([0.02,0.01,0.01,0.16,0.8]),3)) 25 | 26 | def to_var(x, requires_grad=False, volatile=False): 27 | """ 28 | Varialbe type that automatically choose cpu or cuda 29 | """ 30 | if torch.cuda.is_available(): 31 | x = x.cuda() 32 | return Variable(x, requires_grad=requires_grad, volatile=volatile) 33 | 34 | 35 | def prune_rate(model, verbose=True): 36 | """ 37 | Print out prune rate for each layer and the whole network 38 | """ 39 | total_nb_param = 0 40 | nb_zero_param = 0 41 | 42 | layer_id = 0 43 | 44 | for parameter in model.parameters(): 45 | 46 | param_this_layer = 1 47 | for dim in parameter.data.size(): 48 | param_this_layer *= dim 49 | total_nb_param += param_this_layer 50 | 51 | # only pruning linear and conv layers 52 | if len(parameter.data.size()) != 1: 53 | layer_id += 1 54 | zero_param_this_layer = \ 55 | np.count_nonzero(parameter.cpu().data.numpy() == 0) 56 | nb_zero_param += zero_param_this_layer 57 | 58 | if verbose: 59 | print("Layer {} | {} layer | {:.2f}% parameters pruned" \ 60 | .format( 61 | layer_id, 62 | 'Conv' if len(parameter.data.size()) == 4 \ 63 | else 'Linear', 64 | 100. * zero_param_this_layer / param_this_layer, 65 | )) 66 | pruning_perc = 100. * nb_zero_param / total_nb_param 67 | if verbose: 68 | print("Final pruning rate: {:.2f}%".format(pruning_perc)) 69 | return pruning_perc 70 | 71 | 72 | def arg_nonzero_min(a): 73 | """ 74 | nonzero argmin of a non-negative array 75 | """ 76 | 77 | if not a: 78 | return 79 | 80 | min_ix, min_v = None, None 81 | # find the starting value (should be nonzero) 82 | for i, e in enumerate(a): 83 | if e != 0: 84 | min_ix = i 85 | min_v = e 86 | if not min_ix: 87 | print('Warning: all zero') 88 | return np.inf, np.inf 89 | 90 | # search for the smallest nonzero 91 | for i, e in enumerate(a): 92 | if e < min_v and e != 0: 93 | min_v = e 94 | min_ix = i 95 | 96 | return min_v, min_ix 97 | 98 | 99 | def getOneHot(y): #[batch_size, class_num] 100 | shape = y.size() 101 | _, ind = y.max(dim=-1) 102 | y_hard = torch.zeros_like(y).view(-1, shape[-1]) 103 | y_hard.scatter_(1, ind.view(-1, 1), 1) 104 | y_hard = y_hard.view(*shape) 105 | return y_hard -------------------------------------------------------------------------------- /generator_recsys.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | import torch.functional as F 4 | import torch.nn.functional as F2 5 | import time 6 | import math 7 | from torch.autograd import Variable 8 | import numpy as np 9 | 10 | 11 | class ResidualBlock(nn.Module): 12 | 13 | def __init__(self, in_channel, out_channel, kernel_size=3, dilation=None): 14 | super(ResidualBlock, self).__init__() 15 | self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=(1, kernel_size), padding=0, dilation=dilation) 16 | # self.conv1.weight = self.truncated_normal_(self.conv1.weight, 0, 0.02) 17 | # self.conv1.bias.data.zero_() 18 | 19 | self.ln1 = nn.LayerNorm(out_channel, eps=1e-8) 20 | self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=(1, kernel_size), padding=0, dilation=dilation*2) 21 | # self.conv1.weight = self.truncated_normal_(self.conv1.weight, 0, 0.02) 22 | # self.conv1.bias.data.zero_() 23 | 24 | self.ln2 = nn.LayerNorm(out_channel, eps=1e-8) 25 | self.dilation = dilation 26 | self.kernel_size = kernel_size 27 | 28 | def forward(self, x): # x: [batch_size, seq_len, embed_size] 29 | x_pad = self.conv_pad(x, self.dilation) 30 | out = self.conv1(x_pad).squeeze(2).permute(0, 2, 1) 31 | out = F2.relu(self.ln1(out)) 32 | out_pad = self.conv_pad(out, self.dilation*2) 33 | out = self.conv2(out_pad).squeeze(2).permute(0, 2, 1) 34 | out = F2.relu(self.ln2(out)) 35 | out = out + x 36 | return out 37 | 38 | def conv_pad(self, x, dilation): 39 | inputs_pad = x.permute(0, 2, 1) # [batch_size, embed_size, seq_len] 40 | inputs_pad = inputs_pad.unsqueeze(2) # [batch_size, embed_size, 1, seq_len] 41 | pad = nn.ZeroPad2d(((self.kernel_size - 1) * dilation, 0, 0, 0)) 42 | inputs_pad = pad(inputs_pad) # [batch_size, embed_size, 1, seq_len+(self.kernel_size-1)*dilations] 43 | return inputs_pad 44 | 45 | def truncated_normal_(self, tensor, mean=0, std=0.09): 46 | size = tensor.shape 47 | tmp = tensor.new_empty(size + (4,)).normal_() 48 | valid = (tmp < 2) & (tmp > -2) 49 | ind = valid.max(-1, keepdim=True)[1] 50 | tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1)) 51 | tensor.data.mul_(std).add_(mean) 52 | return tensor 53 | 54 | class NextItNet_Decoder(nn.Module): 55 | 56 | def __init__(self, model_para): 57 | super(NextItNet_Decoder, self).__init__() 58 | self.model_para = model_para 59 | self.item_size = model_para['item_size'] 60 | self.embed_size = model_para['dilated_channels'] 61 | self.embeding = nn.Embedding(self.item_size, self.embed_size) 62 | stdv = np.sqrt(1. / self.item_size) 63 | self.embeding.weight.data.uniform_(-stdv, stdv) # important initializer 64 | # nn.init.uniform_(self.in_embed.weight, -1.0, 1.0) 65 | 66 | self.dilations = model_para['dilations'] 67 | self.residual_channels = model_para['dilated_channels'] 68 | self.kernel_size = model_para['kernel_size'] 69 | rb = [ResidualBlock(self.residual_channels, self.residual_channels, kernel_size=self.kernel_size, 70 | dilation=dilation) for dilation in self.dilations] 71 | self.residual_blocks = nn.Sequential(*rb) 72 | 73 | self.final_layer = nn.Linear(self.residual_channels, self.item_size) 74 | self.final_layer.weight.data.normal_(0.0, 0.01) # initializer 75 | self.final_layer.bias.data.fill_(0.1) 76 | 77 | def forward(self, x, onecall=False): # inputs: [batch_size, seq_len] 78 | inputs = self.embeding(x) # [batch_size, seq_len, embed_size] 79 | 80 | dilate_outputs = self.residual_blocks(inputs) 81 | 82 | if onecall: 83 | hidden = dilate_outputs[:, -1, :].view(-1, self.residual_channels) # [batch_size, embed_size] 84 | else: 85 | hidden = dilate_outputs.view(-1, self.residual_channels) # [batch_size*seq_len, embed_size] 86 | out = self.final_layer(hidden) 87 | 88 | return out 89 | 90 | 91 | 92 | # class NextItNet_Decoder(nn.Module): 93 | # 94 | # def __init__(self, model_para): 95 | # super(NextItNet_Decoder, self).__init__() 96 | # self.model_para = model_para 97 | # self.item_size = model_para['item_size'] 98 | # self.embed_size = model_para['dilated_channels'] 99 | # self.embeding = nn.Embedding(self.item_size, self.embed_size) 100 | # 101 | # self.dilations = model_para['dilations'] 102 | # self.residual_channels = model_para['dilated_channels'] 103 | # self.kernel_size = model_para['kernel_size'] 104 | # residual_block = [nn.ModuleList([nn.Conv2d(self.residual_channels, self.residual_channels, 105 | # kernel_size=(1, model_para['kernel_size']), padding=0, dilation=dilation), 106 | # nn.LayerNorm(self.residual_channels), 107 | # # Layer_norm(self.residual_channels), 108 | # # nn.ReLU(), 109 | # nn.Conv2d(self.residual_channels, self.residual_channels, 110 | # kernel_size=(1, model_para['kernel_size']), padding=0, dilation=2*dilation), 111 | # nn.LayerNorm(self.residual_channels), 112 | # # Layer_norm(self.residual_channels), 113 | # # nn.ReLU() 114 | # ]) for dilation in self.dilations] 115 | # self.residual_blocks = nn.ModuleList(residual_block) 116 | # 117 | # self.softmax_layer = nn.Linear(self.residual_channels, self.item_size) 118 | # 119 | # def forward(self, x, onecall=False): # inputs: [batch_size, seq_len] 120 | # inputs = self.embeding(x) # [batch_size, seq_len, embed_size] 121 | # 122 | # for i, block in enumerate(self.residual_blocks): 123 | # ori = inputs 124 | # 125 | # inputs_pad = self.conv_pad(inputs, self.dilations[i]) 126 | # # print(inputs_pad.size()) 127 | # dilated_conv = block[0](inputs_pad).squeeze(2) # [batch_size, embed_size, seq_len] 128 | # dilated_conv = dilated_conv.permute(0, 2, 1) 129 | # relu1 = F2.relu(block[1](dilated_conv)) # [batch_size, seq_len, embed_size] 130 | # 131 | # inputs_pad = self.conv_pad(relu1, self.dilations[i]*2) 132 | # # print(inputs_pad.size()) 133 | # dilated_conv = block[2](inputs_pad).squeeze(2) # [batch_size, embed_size, seq_len] 134 | # dilated_conv = dilated_conv.permute(0, 2, 1) 135 | # relu1 = F2.relu(block[3](dilated_conv)) # [batch_size, seq_len, embed_size] 136 | # inputs = ori + relu1 137 | # 138 | # if onecall: 139 | # hidden = inputs[:, -1, :].view(-1, self.residual_channels) # [batch_size, embed_size] 140 | # else: 141 | # hidden = inputs.view(-1, self.residual_channels) # [batch_size*seq_len, embed_size] 142 | # out = self.softmax_layer(hidden) 143 | # 144 | # return out 145 | # 146 | # def conv_pad(self, inputs, dila_): 147 | # inputs_pad = inputs.permute(0, 2, 1) # [batch_size, embed_size, seq_len] 148 | # inputs_pad = inputs_pad.unsqueeze(2) # [batch_size, embed_size, 1, seq_len] 149 | # pad = nn.ZeroPad2d(((self.kernel_size - 1) * dila_, 0, 0, 0)) 150 | # inputs_pad = pad(inputs_pad) # [batch_size, embed_size, 1, seq_len+(self.kernel_size-1)*self.dilations[i]] 151 | # return inputs_pad 152 | 153 | 154 | class Layer_norm(nn.Module): 155 | def __init__(self, size): 156 | super(Layer_norm, self).__init__() 157 | # self.beta = torch.zeros(size, requires_grad=True) 158 | # self.gamma = torch.ones(size, requires_grad=True) 159 | self.beta = nn.Parameter(torch.zeros(size)) 160 | # nn.init.zeros_(self.beta) 161 | self.gamma = nn.Parameter(torch.ones(size)) 162 | # nn.init.ones_(self.gamma) 163 | self.size = size 164 | self.epsilon = 1e-8 165 | 166 | def forward(self, x): 167 | shape = x.size() 168 | # print(shape) 169 | # print(x.mean(dim=2).size()) 170 | # print(x.std(dim=2, unbiased=False).size()) 171 | x = (x - x.mean(dim=2).view(shape[0], shape[1], 1)) / (x.std(dim=2, unbiased=False).view(shape[0], shape[1], 1) + self.epsilon) 172 | return self.gamma * x + self.beta 173 | 174 | 175 | class SamePad2d(nn.Module): 176 | """Mimics tensorflow's 'SAME' padding. 177 | """ 178 | 179 | def __init__(self, kernel_size, stride): 180 | super(SamePad2d, self).__init__() 181 | self.kernel_size = torch.nn.modules.utils._pair(kernel_size) 182 | self.stride = torch.nn.modules.utils._pair(stride) 183 | 184 | def forward(self, input): 185 | in_width = input.size()[2] 186 | in_height = input.size()[3] 187 | out_width = math.ceil(float(in_width) / float(self.stride[0])) 188 | out_height = math.ceil(float(in_height) / float(self.stride[1])) 189 | pad_along_width = ((out_width - 1) * self.stride[0] + 190 | self.kernel_size[0] - in_width) 191 | pad_along_height = ((out_height - 1) * self.stride[1] + 192 | self.kernel_size[1] - in_height) 193 | pad_left = math.floor(pad_along_width / 2) 194 | pad_top = math.floor(pad_along_height / 2) 195 | pad_right = pad_along_width - pad_left 196 | pad_bottom = pad_along_height - pad_top 197 | return F2.pad(input, (pad_left, pad_right, pad_top, pad_bottom), 'constant', 0) 198 | 199 | def __repr__(self): 200 | return self.__class__.__name__ 201 | 202 | -------------------------------------------------------------------------------- /nextitrec_baseline.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from generator_recsys import NextItNet_Decoder 4 | import utils 5 | import shutil 6 | import time 7 | import math 8 | import numpy as np 9 | import argparse 10 | import Data_loader 11 | import os 12 | import random 13 | 14 | # You can run it directly, first training and then evaluating 15 | # nextitrec_generate.py can only be run when the model parameters are saved, i.e., 16 | # save_path = saver.save(sess, 17 | # "Data/Models/generation_model/model_nextitnet.ckpt".format(iter, numIters)) 18 | # if you are dealing very huge industry dataset, e.g.,several hundred million items, you may have memory problem during training, but it 19 | # be easily solved by simply changing the last layer, you do not need to calculate the cross entropy loss 20 | # based on the whole item vector. Similarly, you can also change the last layer (use tf.nn.embedding_lookup or gather) in the prediction phrase 21 | # if you want to just rank the recalled items instead of all items. The current code should be okay if the item size < 5 million. 22 | 23 | 24 | 25 | #Strongly suggest running codes on GPU with more than 10G memory!!! 26 | #if your session data is very long e.g, >50, and you find it may not have very strong internal sequence properties, you can consider generate subsequences 27 | def generatesubsequence(train_set): 28 | # create subsession only for training 29 | subseqtrain = [] 30 | for i in range(len(train_set)): 31 | # print x_train[i] 32 | seq = train_set[i] 33 | lenseq = len(seq) 34 | # session lens=100 shortest subsession=5 realvalue+95 0 35 | for j in range(lenseq - 2): 36 | subseqend = seq[:len(seq) - j] 37 | subseqbeg = [0] * j 38 | subseq = np.append(subseqbeg, subseqend) 39 | # beginseq=padzero+subseq 40 | # newsubseq=pad+subseq 41 | subseqtrain.append(subseq) 42 | x_train = np.array(subseqtrain) # list to ndarray 43 | del subseqtrain 44 | # Randomly shuffle data 45 | np.random.seed(10) 46 | shuffle_train = np.random.permutation(np.arange(len(x_train))) 47 | x_train = x_train[shuffle_train] 48 | print("generating subsessions is done!") 49 | return x_train 50 | 51 | 52 | def INFO_LOG(info): 53 | print("[%s]%s"%(time.strftime("%Y-%m-%d %X", time.localtime()), info)) 54 | 55 | # os.environ['CUDA_VISIBLE_DEVICES'] = '3' 56 | 57 | def getBatch(data, batch_size): 58 | start_inx = 0 59 | end_inx = batch_size 60 | 61 | while end_inx < len(data): 62 | batch = data[start_inx:end_inx] 63 | start_inx = end_inx 64 | end_inx += batch_size 65 | yield batch 66 | 67 | # if end_inx >= len(data): 68 | # batch = data[start_inx:] 69 | # yield batch 70 | 71 | 72 | parser = argparse.ArgumentParser() 73 | parser.add_argument('--top_k', type=int, default=5, 74 | help='Sample from top k predictions') 75 | parser.add_argument('--beta1', type=float, default=0.9, 76 | help='hyperpara-Adam') 77 | parser.add_argument('--batch_size', default=128, type=int) 78 | # history_sequences_20181014_fajie 79 | # ml20m_update_ls30gr5 80 | # mllatest_update_ls100gr3.csv 81 | parser.add_argument('--datapath', type=str, default='Data/Session/ml20m_update_ls30gr5.csv', 82 | help='data path') 83 | parser.add_argument('--epochs', default=200, type=int) 84 | parser.add_argument('--device', default='cuda', type=str) 85 | parser.add_argument('--savedir', default='Data/checkpoint', type=str) 86 | parser.add_argument('--tt_percentage', type=float, default=0.2, 87 | help='0.2 means 80% training 20% testing') 88 | parser.add_argument('--is_generatesubsession', type=bool, default=False, 89 | help='whether generating a subsessions, e.g., 12345-->01234,00123,00012 It may be useful for very some very long sequences') 90 | parser.add_argument('--lr', default=0.001, type=float) 91 | parser.add_argument('--shrink_lr', action="store_true", default=False) 92 | parser.add_argument('--L2', default=0, type=float) 93 | args = parser.parse_args() 94 | print(args) 95 | dl = Data_loader.Data_Loader({'model_type': 'generator', 'dir_name': args.datapath}) 96 | all_samples = dl.items 97 | items_voc = dl.item2id 98 | 99 | print("shape: ", np.shape(all_samples)) 100 | 101 | # Split train/test set 102 | dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples))) 103 | train_set, valid_set = all_samples[:dev_sample_index], all_samples[dev_sample_index:] 104 | 105 | # Randomly shuffle data 106 | np.random.seed(10) 107 | shuffle_indices = np.random.permutation(np.arange(len(train_set))) 108 | train_set = train_set[shuffle_indices] 109 | 110 | 111 | if args.is_generatesubsession: 112 | x_train = generatesubsequence(train_set) 113 | 114 | model_para = { 115 | #if you changed the parameters here, also do not forget to change paramters in nextitrec_generate.py 116 | 'item_size': len(items_voc), 117 | 'dilated_channels': 256, 118 | # if you use nextitnet_residual_block, you can use [1, 4, ], 119 | # if you use nextitnet_residual_block_one, you can tune and i suggest [1, 2, 4, ], for a trial 120 | # when you change it do not forget to change it in nextitrec_generate.py 121 | 'dilations': [1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4], 122 | 'kernel_size': 3, 123 | 'batch_size':args.batch_size, 124 | 'iterations':200, 125 | 'is_negsample':False, #False denotes no negative sampling 126 | 'seq_len': len(all_samples[0]), 127 | 'pad': dl.padid, 128 | } 129 | print("dilations", model_para["dilations"]) 130 | print("dilated_channels", model_para["dilated_channels"]) 131 | print("batch_size", model_para["batch_size"]) 132 | 133 | args.device = 'cuda' if torch.cuda.is_available() else 'cpu' 134 | 135 | model = NextItNet_Decoder(model_para).to(args.device) 136 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=0) 137 | 138 | if args.shrink_lr == True: 139 | lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.02) 140 | 141 | criterion = nn.CrossEntropyLoss() 142 | best_acc = 0 143 | 144 | 145 | def test(epoch): 146 | global best_acc 147 | model.eval() 148 | # test_loss = 0 149 | correct = 0 150 | total = 0 151 | batch_size = model_para['batch_size'] 152 | batch_num = valid_set.shape[0] / batch_size 153 | INFO_LOG("-------------------------------------------------------test") 154 | with torch.no_grad(): 155 | start = time.time() 156 | for batch_idx, batch_sam in enumerate(getBatch(valid_set, batch_size)): 157 | inputs, targets = torch.LongTensor(batch_sam[:,:-1]).to(args.device), torch.LongTensor(batch_sam[:,-1]).to(args.device).view([-1]) 158 | outputs = model(inputs, onecall=True) # [batch_size, item_size] only predict the last position 159 | 160 | _, sort_idx_20 = torch.topk(outputs, k=args.top_k + 15, sorted=True) # [batch_size, 20] 161 | _, sort_idx_5 = torch.topk(outputs, k=args.top_k, sorted=True) # [batch_size, 5] 162 | accuracy(sort_idx_5.data.cpu().numpy(), sort_idx_20.data.cpu().numpy(), targets.data.cpu().numpy(), 163 | batch_idx, batch_num, epoch) 164 | 165 | _, predicted = outputs.max(1) 166 | total += targets.size(0) 167 | correct += predicted.eq(targets).sum().item() 168 | 169 | end = time.time() 170 | print('Acc(hit@1): %.3f%% (%d/%d)' % (100. * correct / total, correct, total)) 171 | INFO_LOG("TIME FOR EPOCH During Testing: {}".format(end - start)) 172 | INFO_LOG("TIME FOR BATCH (mins): {}".format((end - start) / batch_num)) 173 | acc = 100. * correct / total 174 | if acc > best_acc: 175 | best_acc = acc 176 | state = { 177 | 'net': model.state_dict(), 178 | 'acc(hit@1)': acc 179 | } 180 | torch.save(state, '%s/best_weishi_%s.t7' % (args.savedir, model_para['dilations'])) 181 | print('epoch:%d accuracy(hit@1):%.3f best:%.3f' % (epoch, acc, best_acc)) 182 | 183 | INFO_LOG("epoch: {}\t total_epoch:{}\t total_batches:{}".format( 184 | epoch, args.epochs, batch_num)) 185 | INFO_LOG("Accuracy mrr_5: {}".format(sum(curr_preds_5) / float(len(curr_preds_5)))) 186 | INFO_LOG("Accuracy mrr_20: {}".format(sum(curr_preds_20) / float(len(curr_preds_20)))) 187 | INFO_LOG("Accuracy hit_5: {}".format(sum(rec_preds_5) / float(len(rec_preds_5)))) 188 | INFO_LOG("Accuracy hit_20: {}".format(sum(rec_preds_20) / float(len(rec_preds_20)))) 189 | INFO_LOG("Accuracy ndcg_5: {}".format(sum(ndcg_preds_5) / float(len(ndcg_preds_5)))) 190 | INFO_LOG("Accuracy ndcg_20: {}".format(sum(ndcg_preds_20) / float(len(ndcg_preds_20)))) 191 | 192 | 193 | def train(epoch): 194 | model.train() 195 | train_loss = 0 196 | correct = 0 197 | total = 0 198 | batch_size = model_para['batch_size'] 199 | batch_num = train_set.shape[0] / batch_size 200 | start = time.time() 201 | INFO_LOG("-------------------------------------------------------train") 202 | for batch_idx, batch_sam in enumerate(getBatch(train_set, batch_size)): 203 | inputs, targets = torch.LongTensor(batch_sam[:, :-1]).to(args.device), torch.LongTensor(batch_sam[:, 1:]).to( 204 | args.device).view([-1]) 205 | optimizer.zero_grad() 206 | outputs = model(inputs) # [batch_size*seq_len, item_size] 207 | loss = criterion(outputs, targets) 208 | 209 | L2_loss = 0 210 | for name, param in model.named_parameters(): 211 | if 'weight' in name: 212 | L2_loss += torch.norm(param, 2) 213 | loss += args.L2 * L2_loss 214 | 215 | loss.backward() 216 | optimizer.step() 217 | 218 | train_loss += loss.item() 219 | 220 | _, predicted = outputs.max(1) 221 | total += targets.size(0) 222 | correct += predicted.eq(targets).sum().item() 223 | if batch_idx % max(10, batch_num//10) == 0: 224 | INFO_LOG("epoch: {}\t {}/{}".format(epoch, batch_idx, batch_num)) 225 | print('Loss: %.3f | Acc(hit@1): %.3f%% (%d/%d)' % ( 226 | train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) 227 | end = time.time() 228 | INFO_LOG("TIME FOR EPOCH During Training: {}".format(end - start)) 229 | INFO_LOG("TIME FOR BATCH (mins): {}".format((end - start) / batch_num)) 230 | if args.shrink_lr: 231 | lr_scheduler.step() 232 | 233 | 234 | def accuracy(pred_items_5, pred_items_20, target, batch_idx, batch_num, epoch): # output: [batch_size, 20] target: [batch_size] 235 | """Computes the accuracy over the k top predictions for the specified values of k""" 236 | # print(type(pred_items_20[0])) 237 | # print(type(pred_items_5[0])) 238 | for bi in range(pred_items_5.shape[0]): 239 | 240 | true_item=target[bi] 241 | predictmap_5={ch : i for i, ch in enumerate(pred_items_5[bi])} 242 | predictmap_20 = {ch: i for i, ch in enumerate(pred_items_20[bi])} 243 | 244 | rank_5 = predictmap_5.get(true_item) 245 | rank_20 = predictmap_20.get(true_item) 246 | if rank_5 == None: 247 | curr_preds_5.append(0.0) 248 | rec_preds_5.append(0.0) 249 | ndcg_preds_5.append(0.0) 250 | else: 251 | MRR_5 = 1.0/(rank_5+1) 252 | Rec_5 = 1.0#3 253 | ndcg_5 = 1.0 / math.log(rank_5 + 2, 2) # 3 254 | curr_preds_5.append(MRR_5) 255 | rec_preds_5.append(Rec_5)#4 256 | ndcg_preds_5.append(ndcg_5) # 4 257 | if rank_20 == None: 258 | curr_preds_20.append(0.0) 259 | rec_preds_20.append(0.0)#2 260 | ndcg_preds_20.append(0.0)#2 261 | else: 262 | MRR_20 = 1.0/(rank_20+1) 263 | Rec_20 = 1.0#3 264 | ndcg_20 = 1.0 / math.log(rank_20 + 2, 2) # 3 265 | curr_preds_20.append(MRR_20) 266 | rec_preds_20.append(Rec_20) # 4 267 | ndcg_preds_20.append(ndcg_20) # 4 268 | 269 | if batch_idx % max(10, batch_num//10) == 0: 270 | # INFO_LOG("epoch/total_epoch: {}/{}\t batch/total_batches: {}/{} \t loss: {:.3f}".format( 271 | # epoch, args.epochs, batch_idx, batch_num, loss/(batch_idx+1))) 272 | INFO_LOG("epoch/total_epoch: {}/{}\t batch/total_batches: {}/{}".format( 273 | epoch, args.epochs, batch_idx, batch_num)) 274 | INFO_LOG("Accuracy hit_5: {}".format(sum(rec_preds_5) / float(len(rec_preds_5)))) # 5 275 | INFO_LOG("Accuracy hit_20: {}".format(sum(rec_preds_20) / float(len(rec_preds_20)))) # 5 276 | 277 | 278 | 279 | if __name__ == '__main__': 280 | for i, (key, u) in enumerate(model.state_dict().items()): 281 | print(key, u.size()) 282 | for epoch in range(args.epochs): 283 | train(epoch) 284 | curr_preds_5 = [] 285 | rec_preds_5 = [] 286 | ndcg_preds_5 = [] 287 | curr_preds_20 = [] 288 | rec_preds_20 = [] 289 | ndcg_preds_20 = [] 290 | test(epoch) 291 | state = { 292 | 'net': model.state_dict(), 293 | } 294 | torch.save(state, '%s/ckpt_%d.t7' % (args.savedir, epoch)) 295 | 296 | --------------------------------------------------------------------------------