├── Data_loader_SASRec.py ├── Data_loader_SSEPT.py ├── Modules.py ├── README.md ├── SASRec_Alpha.py ├── SSEPT_Alpha.py ├── baseline_SASRec.py ├── baseline_SSEPT.py ├── data_loader.py ├── data_loader_finetune.py ├── deep_GRec.py ├── deep_nextitnet.py ├── deep_nextitnet_coldrec.py ├── fineall.py ├── generator_deep.py ├── generator_deep_GRec.py ├── ops.py ├── ops_copytop.py ├── ops_original.py ├── requirements.txt ├── train_grec_sc1.sh ├── train_nextitnet_sc1.sh ├── train_nextitnet_sc2.sh ├── train_nextitnet_sc3.sh ├── train_sasrec_sc1.sh ├── train_ssept_sc1.sh ├── utils.py └── utils_GRec.py /Data_loader_SASRec.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os import listdir 3 | from os.path import isfile, join 4 | import numpy as np 5 | import time 6 | 7 | 8 | def INFO_LOG(info): 9 | print("[%s]%s" % (time.strftime("%Y-%m-%d %X", time.localtime()), info)) 10 | 11 | 12 | class Data_Loader: 13 | def __init__(self, options): 14 | self.pad = "" 15 | positive_data_file = options['dir_name'] 16 | positive_examples = list(open(positive_data_file, "r").readlines()) 17 | positive_examples = [s for s in positive_examples] 18 | 19 | self.max_document_length = max([len(x.split(",")) for x in positive_examples]) 20 | 21 | self.item_fre = {self.pad: 0} 22 | 23 | for sample in positive_examples: 24 | for item in sample.strip().split(","): 25 | if item in self.item_fre.keys(): 26 | self.item_fre[item] += 1 27 | else: 28 | self.item_fre[item] = 1 29 | self.item_fre[self.pad] += self.max_document_length - len(sample.strip().split(",")) 30 | 31 | # count_pairs = sorted(self.item_fre.items(), key=lambda x: (-x[1], x[0])) 32 | count_pairs = self.item_fre.items() 33 | self.items_voc, _ = list(zip(*count_pairs)) 34 | self.item2id = dict(zip(self.items_voc, range(len(self.items_voc)))) 35 | self.padid = self.item2id[self.pad] 36 | self.id2item = {value: key for key, value in self.item2id.items()} 37 | 38 | INFO_LOG("Vocab size:{}".format(self.size())) 39 | 40 | self.items = np.array(self.getSamplesid(positive_examples)) 41 | 42 | def sample2id(self, sample): 43 | sample2id = [] 44 | for s in sample.strip().split(','): 45 | sample2id.append(self.item2id[s]) 46 | 47 | sample2id = ([self.padid] * (self.max_document_length - len(sample2id))) + sample2id 48 | return sample2id 49 | 50 | def getSamplesid(self, samples): 51 | samples2id = [] 52 | for sample in samples: 53 | samples2id.append(self.sample2id(sample)) 54 | 55 | return samples2id 56 | 57 | def size(self): 58 | return len(self.item2id) 59 | 60 | def load_generator_data(self, sample_size): 61 | text = self.text 62 | mod_size = len(text) - len(text) % sample_size 63 | text = text[0:mod_size] 64 | text = text.reshape(-1, sample_size) 65 | return text, self.vocab_indexed 66 | 67 | def string_to_indices(self, sentence, vocab): 68 | indices = [self.item2id[s] for s in sentence.split(',')] 69 | return indices 70 | 71 | def inidices_to_string(self, sentence, vocab): 72 | id_ch = {vocab[ch]: ch for ch in vocab} 73 | sent = [] 74 | for c in sentence: 75 | if id_ch[c] == 'eol': 76 | break 77 | sent += id_ch[c] 78 | 79 | return "".join(sent) 80 | 81 | 82 | -------------------------------------------------------------------------------- /Data_loader_SSEPT.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os import listdir 3 | from os.path import isfile, join 4 | import numpy as np 5 | import time 6 | 7 | 8 | def INFO_LOG(info): 9 | print("[%s]%s" % (time.strftime("%Y-%m-%d %X", time.localtime()), info)) 10 | 11 | 12 | class Data_Loader: 13 | def __init__(self, options): 14 | self.pad = "" 15 | positive_data_file = options['dir_name'] 16 | positive_examples = list(open(positive_data_file, "r").readlines()) 17 | positive_examples = [s for s in positive_examples] 18 | 19 | self.max_document_length = max([len(x.split(",")[1:]) for x in positive_examples]) 20 | 21 | users = [int(x.split(",")[0]) for x in positive_examples] 22 | users = np.reshape(users, (-1, 1)) 23 | self.user_size = len(np.unique(users)) 24 | self.item_fre = {self.pad: 0} 25 | 26 | for sample in positive_examples: 27 | for item in sample.strip().split(",")[1:]: 28 | if item in self.item_fre.keys(): 29 | self.item_fre[item] += 1 30 | else: 31 | self.item_fre[item] = 1 32 | self.item_fre[self.pad] += self.max_document_length - len(sample.strip().split(",")[1:]) 33 | 34 | # count_pairs = sorted(self.item_fre.items(), key=lambda x: (-x[1], x[0])) 35 | count_pairs = self.item_fre.items() 36 | self.items_voc, _ = list(zip(*count_pairs)) 37 | self.item2id = dict(zip(self.items_voc, range(len(self.items_voc)))) 38 | self.padid = self.item2id[self.pad] 39 | self.id2item = {value: key for key, value in self.item2id.items()} 40 | 41 | INFO_LOG("Vocab size:{}".format(self.size())) 42 | 43 | self.items = np.array(self.getSamplesid(positive_examples)) 44 | self.items = np.concatenate((users, self.items), axis=1) 45 | 46 | def sample2id(self, sample): 47 | sample2id = [] 48 | for s in sample.strip().split(',')[1:]: 49 | sample2id.append(self.item2id[s]) 50 | 51 | sample2id = ([self.padid] * (self.max_document_length - len(sample2id))) + sample2id 52 | return sample2id 53 | 54 | def getSamplesid(self, samples): 55 | samples2id = [] 56 | for sample in samples: 57 | samples2id.append(self.sample2id(sample)) 58 | 59 | return samples2id 60 | 61 | def size(self): 62 | return len(self.item2id) 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /Modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | from utils import normalize 6 | from torch.nn.parameter import Parameter 7 | class multihead_attention_alpha(nn.Module): 8 | def __init__(self, model_para, reader, layer_id, layer_num, hidden_size, num_units=None, num_heads=8, dropout_rate=0, causality=True, 9 | with_qk=False): 10 | super(multihead_attention_alpha, self).__init__() 11 | self.num_units = num_units 12 | self.num_heads = num_heads 13 | self.dropout_rate = dropout_rate 14 | self.causality = causality 15 | self.with_qk = with_qk 16 | self.hidden_size = hidden_size 17 | self.fc1 = nn.Linear(self.hidden_size, num_units) 18 | self.fc2 = nn.Linear(self.hidden_size, num_units) 19 | self.fc3 = nn.Linear(self.hidden_size, num_units) 20 | self.softmax = nn.Softmax(dim=-1) 21 | self.dropout = nn.Dropout(self.dropout_rate) 22 | self.layer_norm = nn.LayerNorm(num_units) 23 | self.rez = nn.Parameter(torch.zeros(1)) 24 | 25 | self.method = model_para['method'] 26 | 27 | if model_para["load_model"]: 28 | if model_para['method'] == 'stackC': 29 | if layer_id >= layer_num - 6: 30 | relative_layer_id = layer_id - 6 31 | else: 32 | relative_layer_id = layer_id 33 | elif model_para['method'] == 'stackA': 34 | if layer_id >= layer_num - 6: 35 | relative_layer_id = int((layer_id - 6) // 2 + 6) 36 | else: 37 | relative_layer_id = layer_id 38 | else: 39 | print("method is wrong!!!!!!!!!!!!!!") 40 | relative_layer_id = str(relative_layer_id) 41 | 42 | 43 | 44 | initial_name = "transformers." + relative_layer_id + ".SelfAttention.fc1.weight" 45 | self.fc1.weight = Parameter(reader[initial_name]) 46 | initial_name = "transformers." + relative_layer_id + ".SelfAttention.fc1.bias" 47 | self.fc1.bias = Parameter(reader[initial_name]) 48 | print("load selfattention fc1 weight", layer_id, " from ", relative_layer_id) 49 | print("load selfattention fc1 bias", layer_id, " from ", relative_layer_id) 50 | 51 | initial_name = "transformers." + relative_layer_id + ".SelfAttention.fc2.weight" 52 | self.fc2.weight = Parameter(reader[initial_name]) 53 | initial_name = "transformers." + relative_layer_id + ".SelfAttention.fc2.bias" 54 | self.fc2.bias = Parameter(reader[initial_name]) 55 | print("load selfattention fc2 weight", layer_id, " from ", relative_layer_id) 56 | print("load selfattention fc2 bias", layer_id, " from ", relative_layer_id) 57 | 58 | initial_name = "transformers." + relative_layer_id + ".SelfAttention.fc3.weight" 59 | self.fc3.weight = Parameter(reader[initial_name]) 60 | initial_name = "transformers." + relative_layer_id + ".SelfAttention.fc3.bias" 61 | self.fc3.bias = Parameter(reader[initial_name]) 62 | print("load selfattention fc3 weight", layer_id, " from ", relative_layer_id) 63 | print("load selfattention fc3 bias", layer_id, " from ", relative_layer_id) 64 | 65 | 66 | def forward(self, queries, keys): 67 | if self.num_units is None: 68 | self.num_units = queries.size(-1) 69 | # Linear projections 70 | 71 | Q = self.fc1(queries) # (N, T_q, C) 72 | K = self.fc2(keys) # (N, T_k, C) 73 | V = self.fc3(keys) # (N, T_k, C) 74 | 75 | # Split and concat 76 | q_split = int(Q.size(2) / self.num_heads) 77 | k_split = int(K.size(2) / self.num_heads) 78 | v_split = int(V.size(2) / self.num_heads) 79 | Q_ = torch.cat(torch.split(Q, q_split, dim=2), dim=0) # (h*N, T_q, C/h) 80 | K_ = torch.cat(torch.split(K, k_split, dim=2), dim=0) # (h*N, T_k, C/h) 81 | V_ = torch.cat(torch.split(V, v_split, dim=2), dim=0) # (h*N, T_k, C/h) 82 | 83 | # Multiplication 84 | outputs = torch.matmul(Q_, K_.permute(0, 2, 1)) # (h*N, T_q, T_k) 85 | 86 | # Scale 87 | outputs = outputs / (K_.size(-1) ** 0.5) 88 | 89 | # Key Masking 90 | #key_masks = torch.sign(torch.abs(torch.sum(keys, -1))) # (N, T_k) 91 | #key_masks = torch.cat(self.num_heads * [key_masks]) # (h*N, T_k) 92 | #key_masks = torch.cat(queries.size(1) * [key_masks.unsqueeze(1)], dim=1) # (h*N, T_q, T_k) 93 | 94 | #paddings = torch.ones_like(outputs) * (-2 ** 32 + 1) 95 | #outputs = torch.where(torch.eq(key_masks, 0), paddings, outputs) # (h*N, T_q, T_k) 96 | 97 | # Causality = Future blinding 98 | if self.causality: 99 | diag_vals = torch.ones_like(outputs[0, :, :]) # (T_q, T_k) 100 | tril = torch.tril(diag_vals) # (T_q, T_k) 101 | masks = torch.cat(outputs.size(0) * [tril.unsqueeze(0)]) # (h*N, T_q, T_k) 102 | 103 | paddings = torch.ones_like(masks) * (-2 ** 32 + 1) 104 | outputs = torch.where(torch.eq(masks, 0), paddings, outputs) # (h*N, T_q, T_k) 105 | 106 | # Activation 107 | outputs = self.softmax(outputs) # (h*N, T_q, T_k) 108 | 109 | # Query Masking 110 | # query_masks = torch.sign(torch.abs(torch.sum(queries,-1))) # (N, T_q) 111 | # query_masks = torch.cat(self.num_heads*[query_masks]) # (h*N, T_q) 112 | # query_masks = torch.cat(keys.size(1)*[query_masks.unsqueeze(-1)], dim=2) # (h*N, T_q, T_k) 113 | # outputs *= query_masks # broadcasting. (N, T_q, C) 114 | 115 | # Dropouts 116 | 117 | outputs = self.dropout(outputs) 118 | 119 | # Weighted sum 120 | outputs = torch.matmul(outputs, V_) # ( h*N, T_q, C/h) 121 | 122 | # Restore shape 123 | o_split = int(outputs.size(0) / self.num_heads) 124 | outputs = torch.cat(torch.split(outputs, o_split, dim=0), dim=2) # (N, T_q, C) 125 | 126 | # Residual connection 127 | outputs = queries + outputs * self.rez 128 | 129 | # Normalize 130 | outputs = self.layer_norm(outputs) # (N, T_q, C) 131 | 132 | if self.with_qk: 133 | return Q, K 134 | else: 135 | return outputs 136 | 137 | 138 | class feedforward_alpha(nn.Module): 139 | 140 | def __init__(self, model_para, reader, layer_id, layer_num, num_units, dropout_rate=0.5): 141 | super(feedforward_alpha, self).__init__() 142 | self.inner_cnn = nn.Conv1d(num_units[0], num_units[0], 1) 143 | self.readout_cnn = nn.Conv1d(num_units[0], num_units[1], 1) 144 | self.dropout = nn.Dropout(dropout_rate) 145 | self.layer_norm = nn.LayerNorm(num_units[1]) 146 | self.rez = nn.Parameter(torch.zeros(1)) 147 | 148 | def forward(self, inputs): 149 | residual = inputs 150 | x = inputs.transpose(1, 2) # [N, C, T_q] 151 | x = F.relu(self.inner_cnn(x)) 152 | x = self.dropout(x) 153 | x = self.readout_cnn(x) 154 | x = x.transpose(1, 2) # [N, C, T_q] 155 | x = self.dropout(x) 156 | x = residual + x * self.rez 157 | outputs = self.layer_norm(x) 158 | return outputs 159 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # StackRec: Efficient Training of Very Deep Sequential Recommender Models by Iterative Stacking 2 | 3 | ## Datasets 4 | You can download datasets that have been pre-processed: 5 | - ML20: https://pan.baidu.com/s/14pk0N-yraoxGgsnbJRPG5Q code(提取码): 7yha 6 | - ColdRec: 7 | https://pan.baidu.com/s/1AkTImhvnD8WyXCTOuynZ8g code(提取码): 9cs2 8 | https://pan.baidu.com/s/1byW5uCZbdEjGzoXJAlPalQ code(提取码): 856z 9 | - Video-6M: 10 | https://drive.google.com/file/d/1wd3xzF9VnZ6r35nMb3-H4E31vWK87vjW/view?usp=sharing 11 | 12 | We construct a large-scale session-based recommendation dataset (denoted as Video-6M) by collecting the interactiton behaviors of nearly 6 million users in a week from a commercial recommender system. The dataset can be used to evaluate very deep recommendation models (up to 100 layers), such as NextItNet (as shown in our paper StackRec(SIGIR2021)). 13 | If you use this dataset in your paper, you should cite our NextItNet and StackRec for publish permission. 14 | 15 | ``` 16 | @article{yuan2019simple, 17 | title={A simple convolutional generative network for next item recommendation}, 18 | author={Yuan, Fajie and Karatzoglou, Alexandros and Arapakis, Ioannis and Jose, Joemon M and He, Xiangnan}, 19 | journal={Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining}, 20 | year={2019} 21 | } 22 | 23 | @article{wang2020stackrec, 24 | title={StackRec: Efficient Training of Very Deep Sequential Recommender Models by Iterative Stacking}, 25 | author={Wang, Jiachun and Yuan, Fajie and Chen, Jian and Wu, Qingyao and Li, Chengmin and Yang, Min and Sun, Yang and Zhang, Guoxiao}, 26 | journal={Proceedings of the 44th International ACM SIGIR conference on Research and Development in Information Retrieval}, 27 | year={2021} 28 | } 29 | ``` 30 | 31 | ## File Description 32 | ``` 33 | requirements.txt: the experiment environment 34 | 35 | train_nextitnet_sc1.sh: the shell script to train StackRec with NextItNet in CL scenario 36 | train_nextitnet_sc2.sh: the shell script to train StackRec with NextItNet in TF scenario 37 | train_nextitnet_sc3.sh: the shell script to train StackRec with NextItNet in TS scenario 38 | deep_nextitnet.py: the training file of NextItNet 39 | deep_nextitnet_coldrec.py: the training file of NextItNet customized for coldrec source dataset 40 | data_loader.py: the dataset loading file of NextItNet and GRec 41 | data_loader_finetune.py: the dataset loading file of NextItNet and GRec customized for coldrec dataset 42 | generator_deep.py: the model file of NextItNet 43 | ops.py: the module file of NextItNet and GRec with stacking methods doubling blocks 44 | ops_copytop.py: the module file of NextItNet with stacking methods allowed to stack top blocks 45 | ops_original.py: the module file of NextItNet with stacking methods without alpha 46 | fineall.py: the training file of NextItNet customized for coldrec target dataset 47 | 48 | train_grec_sc1.sh: the shell script to train StackRec with GRec in CL scenario 49 | deep_GRec: the training file of GRec 50 | generator_deep_GRec.py: the model file of GRec 51 | utils_GRec.py: some tools for GRec 52 | 53 | train_sasrec_sc1.sh: the shell script to train StackRec with SASRec in CL scenario 54 | baseline_SASRec.py: the training file of SASRec 55 | Data_loader_SASRec.py: the dataset loading file of SASRec 56 | SASRec_Alpha.py: the model file of SASRec 57 | 58 | train_ssept_sc1.sh: the shell script to train StackRec with SSEPT in CL scenario 59 | baseline_SSEPT.py: the training file of SSEPT 60 | Data_loader_SSEPT.py: the dataset loading file of SSEPT 61 | SSEPT_Alpha.py: the model file of SSEPT 62 | utils.py: some tools for SASRec and SSEPT 63 | Modules.py: the module file of SASRec and SSEPT with stacking methods 64 | ``` 65 | 66 | ## Stacking with NextItNet 67 | ### Train in the CL scenario 68 | 69 | Execute example: 70 | 71 | ``` 72 | sh train_nextitnet_sc1.sh 73 | ``` 74 | 75 | ### Train in the TS scenario 76 | 77 | Execute example: 78 | 79 | ``` 80 | sh train_nextitnet_sc2.sh 81 | ``` 82 | 83 | ### Train in the TF scenario 84 | 85 | Execute example: 86 | 87 | ``` 88 | sh train_nextitnet_sc3.sh 89 | ``` 90 | 91 | ## Stacking with GRec 92 | 93 | Execute example: 94 | 95 | ``` 96 | sh train_grec_sc1.sh 97 | ``` 98 | 99 | ## Stacking with SASRec 100 | 101 | Execute example: 102 | 103 | ``` 104 | sh train_sasrec_sc1.sh 105 | ``` 106 | 107 | ## Stacking with SSEPT 108 | 109 | Execute example: 110 | 111 | ``` 112 | sh train_ssept_sc1.sh 113 | ``` 114 | 115 | ## Key Configuration 116 | - method: five stacking methods including from_scratch, stackC, stackA, stackR and stackE 117 | - data_ratio: the percentage of training data 118 | - dilation_count: the number of dilation factors {1,2,4,8} 119 | - num_blocks: the number of residual blocks 120 | - load_model: whether load pre-trained model or not 121 | -------------------------------------------------------------------------------- /SASRec_Alpha.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import argparse 4 | from utils import normalize 5 | from torch.nn.parameter import Parameter 6 | 7 | from Modules import * 8 | class TransformerLayer_Alpha(nn.Module): 9 | def __init__(self, hidden_size, num_heads, model_para, reader, layerid, layer_num, dropout_rate=0.5): 10 | super(TransformerLayer_Alpha, self).__init__() 11 | self.hidden_size = hidden_size 12 | self.num_heads = num_heads 13 | self.dropout_rate = dropout_rate 14 | self.SelfAttention = multihead_attention_alpha(model_para, reader, layerid, layer_num, hidden_size, num_units=self.hidden_size, 15 | num_heads=self.num_heads, dropout_rate=self.dropout_rate, 16 | causality=True, with_qk=False) 17 | self.ff = feedforward_alpha(model_para, reader, layerid, layer_num, num_units=[self.hidden_size, self.hidden_size], dropout_rate=self.dropout_rate) 18 | 19 | def forward(self, input): 20 | x = self.SelfAttention(queries=input, keys=input) 21 | out = self.ff(x) 22 | return out 23 | 24 | 25 | class SASRec_Alpha(nn.Module): 26 | def __init__(self, model_para, device='gpu'): 27 | super(SASRec_Alpha, self).__init__() 28 | self.model_para = model_para 29 | self.load_model = model_para['load_model'] 30 | self.method = model_para['method'] 31 | 32 | self.hidden_size = model_para['hidden_factor'] 33 | self.item_num = int(model_para['item_size']) 34 | self.max_len = model_para['seq_len'] 35 | self.device = torch.device(device) 36 | self.num_blocks = model_para['num_blocks'] 37 | self.num_heads = model_para['num_heads'] 38 | self.dropout_rate = model_para['dropout'] 39 | 40 | self.item_embeddings = nn.Embedding( 41 | num_embeddings=self.item_num, 42 | embedding_dim=self.hidden_size, 43 | ) 44 | self.pos_embeddings = nn.Embedding( 45 | num_embeddings=self.max_len, 46 | embedding_dim=self.hidden_size, 47 | ) 48 | 49 | self.reader = None 50 | if self.load_model: 51 | self.model_path = model_para['model_path'] 52 | self.reader = torch.load(self.model_path) 53 | self.item_embeddings.weight = Parameter(self.reader['item_embeddings.weight']) 54 | self.pos_embeddings.weight = Parameter(self.reader['pos_embeddings.weight']) 55 | print("load item_embeddings.weight") 56 | print("load pos_embeddings.weight") 57 | else: 58 | # init embedding 59 | nn.init.normal_(self.item_embeddings.weight, 0, 0.01) 60 | nn.init.normal_(self.pos_embeddings.weight, 0, 0.01) 61 | 62 | rb = [TransformerLayer_Alpha(self.hidden_size, self.num_heads, self.model_para, self.reader, layerid, self.num_blocks, dropout_rate=self.dropout_rate) for layerid in range(self.num_blocks)] 63 | 64 | self.transformers = nn.Sequential(*rb) 65 | 66 | #dropout 67 | self.dropout = nn.Dropout(self.dropout_rate) 68 | 69 | #layer norm 70 | self.layer_norm_pre = nn.LayerNorm(self.hidden_size) 71 | 72 | 73 | #softmax Layer 74 | self.final = nn.Linear(self.hidden_size, self.item_num) 75 | # 76 | # if self.load_model: 77 | # self.final.weight = Parameter(self.reader['final.weight']) 78 | # self.final.bias = Parameter(self.reader['final.bias']) 79 | # print("load final.weight") 80 | # print("load final.bias") 81 | 82 | def forward(self, inputs, onecall=True): 83 | input_emb = self.item_embeddings(inputs) 84 | pos_emb_input = torch.cat(inputs.size(0)*[torch.arange(start=0,end=inputs.size(1)).to(self.device).unsqueeze(0)]) 85 | pos_emb_input = pos_emb_input.long() 86 | pos_emb = self.pos_embeddings(pos_emb_input) 87 | x = input_emb + pos_emb 88 | 89 | x = self.dropout(x) 90 | 91 | x = self.layer_norm_pre(x) 92 | 93 | x = self.transformers(x) 94 | 95 | if onecall: 96 | x = x[:, -1, :].view(-1, self.hidden_size) # [batch_size, hidden_size] 97 | else: 98 | x = x.view(-1, self.hidden_size) # [batch_size*seq_len, hidden_size] 99 | 100 | out = self.final(x) 101 | return out 102 | -------------------------------------------------------------------------------- /SSEPT_Alpha.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import argparse 4 | from utils import normalize 5 | from torch.nn.parameter import Parameter 6 | 7 | from Modules import * 8 | class TransformerLayer_Alpha(nn.Module): 9 | def __init__(self, hidden_size, num_heads, model_para, reader, layerid, layer_num, dropout_rate=0.5): 10 | super(TransformerLayer_Alpha, self).__init__() 11 | self.hidden_size = hidden_size 12 | self.num_heads = num_heads 13 | self.dropout_rate = dropout_rate 14 | self.SelfAttention = multihead_attention_alpha(model_para, reader, layerid, layer_num, hidden_size, num_units=self.hidden_size, 15 | num_heads=self.num_heads, dropout_rate=self.dropout_rate, 16 | causality=True, with_qk=False) 17 | self.ff = feedforward_alpha(model_para, reader, layerid, layer_num, num_units=[self.hidden_size, self.hidden_size], dropout_rate=self.dropout_rate) 18 | 19 | def forward(self, input): 20 | x = self.SelfAttention(queries=input, keys=input) 21 | out = self.ff(x) 22 | return out 23 | 24 | 25 | class SSEPT_Alpha(nn.Module): 26 | def __init__(self, model_para, device='gpu'): 27 | super(SSEPT_Alpha, self).__init__() 28 | self.model_para = model_para 29 | self.load_model = model_para['load_model'] 30 | self.method = model_para['method'] 31 | 32 | self.hidden_size = model_para['hidden_factor'] 33 | self.emb_size = model_para['emb_size'] 34 | self.user_num = int(model_para['user_size']) 35 | self.item_num = int(model_para['item_size']) 36 | self.max_len = model_para['seq_len'] 37 | self.device = torch.device(device) 38 | self.num_blocks = model_para['num_blocks'] 39 | self.num_heads = model_para['num_heads'] 40 | self.dropout_rate = model_para['dropout'] 41 | 42 | self.user_embeddings = nn.Embedding( 43 | num_embeddings=self.user_num, 44 | embedding_dim=self.emb_size, 45 | ) 46 | self.item_embeddings = nn.Embedding( 47 | num_embeddings=self.item_num, 48 | embedding_dim=self.emb_size, 49 | ) 50 | self.pos_embeddings = nn.Embedding( 51 | num_embeddings=self.max_len, 52 | embedding_dim=self.hidden_size, 53 | ) 54 | 55 | self.reader = None 56 | if self.load_model: 57 | self.model_path = model_para['model_path'] 58 | self.reader = torch.load(self.model_path) 59 | self.user_embeddings.weight = Parameter(self.reader['user_embeddings.weight']) 60 | self.item_embeddings.weight = Parameter(self.reader['item_embeddings.weight']) 61 | self.pos_embeddings.weight = Parameter(self.reader['pos_embeddings.weight']) 62 | print("load user_embeddings.weight") 63 | print("load item_embeddings.weight") 64 | print("load pos_embeddings.weight") 65 | else: 66 | # init embedding 67 | nn.init.normal_(self.user_embeddings.weight, 0, 0.01) 68 | nn.init.normal_(self.item_embeddings.weight, 0, 0.01) 69 | nn.init.normal_(self.pos_embeddings.weight, 0, 0.01) 70 | 71 | rb = [TransformerLayer_Alpha(self.hidden_size, self.num_heads, self.model_para, self.reader, layerid, self.num_blocks, dropout_rate=self.dropout_rate) for layerid in range(self.num_blocks)] 72 | 73 | self.transformers = nn.Sequential(*rb) 74 | 75 | #dropout 76 | self.dropout = nn.Dropout(self.dropout_rate) 77 | 78 | #layer norm 79 | self.layer_norm_pre = nn.LayerNorm(self.hidden_size) 80 | 81 | #softmax Layer 82 | self.final = nn.Linear(self.hidden_size, self.item_num) 83 | # 84 | # if self.load_model: 85 | # self.final.weight = Parameter(self.reader['final.weight']) 86 | # self.final.bias = Parameter(self.reader['final.bias']) 87 | # print("load final.weight") 88 | # print("load final.bias") 89 | 90 | def forward(self, users, inputs, onecall=True): 91 | user_emb = self.user_embeddings(users) 92 | input_emb = self.item_embeddings(inputs) 93 | pos_emb_input = torch.cat(inputs.size(0)*[torch.arange(start=0,end=inputs.size(1)).to(self.device).unsqueeze(0)]) 94 | pos_emb_input = pos_emb_input.long() 95 | pos_emb = self.pos_embeddings(pos_emb_input) 96 | #input_emb = user_emb + input_emb 97 | user_emb = user_emb.expand((user_emb.size(0), input_emb.size(1), user_emb.size(2))) 98 | input_emb = torch.cat([user_emb,input_emb], dim=2) 99 | x = input_emb + pos_emb 100 | 101 | x = self.dropout(x) 102 | 103 | x = self.layer_norm_pre(x) 104 | 105 | x = self.transformers(x) 106 | 107 | if onecall: 108 | x = x[:, -1, :].view(-1, self.hidden_size) # [batch_size, hidden_size] 109 | else: 110 | x = x.view(-1, self.hidden_size) # [batch_size*seq_len, hidden_size] 111 | 112 | out = self.final(x) 113 | return out 114 | -------------------------------------------------------------------------------- /baseline_SASRec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import utils 4 | import shutil 5 | import time 6 | import math 7 | import numpy as np 8 | import argparse 9 | import Data_loader_SASRec as Data_loader 10 | import os 11 | import random 12 | import SASRec_Alpha 13 | import ast 14 | 15 | 16 | torch.manual_seed(10) 17 | 18 | def getBatch(data, batch_size): 19 | start_inx = 0 20 | end_inx = batch_size 21 | 22 | while end_inx < len(data): 23 | batch = data[start_inx:end_inx] 24 | start_inx = end_inx 25 | end_inx += batch_size 26 | yield batch 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument('--top_k', type=int, default=5, 31 | help='Sample from top k predictions') 32 | parser.add_argument('--beta1', type=float, default=0.9, 33 | help='hyperpara-Adam') 34 | parser.add_argument('--batch_size', default=256, type=int) 35 | parser.add_argument('--datapath', type=str, default='Data/movielen_20/movielen_20.csv', 36 | help='data path') 37 | parser.add_argument('--device', default='cuda', type=str) 38 | parser.add_argument('--save_dir', default='Models/ML20_baseline_12_emb64_bs256', type=str) 39 | parser.add_argument('--eval_iter', type=int, default=5000, 40 | help='sample generator output evry x steps') 41 | parser.add_argument('--early_stop', type=int, default=10, 42 | help='after x step early stop') 43 | parser.add_argument('--step', type=int, default=350000, 44 | help='trainging step') 45 | parser.add_argument('--tt_percentage', type=float, default=0.2, 46 | help='0.2 means 80% training 20% testing') 47 | parser.add_argument('--data_ratio', type=float, default=1, 48 | help='real trainging data') 49 | parser.add_argument('--learning_rate', type=float, default=0.001, 50 | help='learning rate') 51 | parser.add_argument('--L2', type=float, default=0, 52 | help='L2 regularization') 53 | parser.add_argument('--hidden_factor', type=int, default=64, 54 | help='Number of hidden factors, i.e., embedding size.') 55 | parser.add_argument('--num_heads', default=1, type=int) 56 | parser.add_argument('--num_blocks', default=12, type=int) 57 | parser.add_argument('--dropout', default=0, type=float) 58 | parser.add_argument('--method', type=str, default="from_scratch", 59 | help='from_scratch, stack') 60 | parser.add_argument('--load_model', type=ast.literal_eval, default=False, 61 | help='whether loading pretrain model') 62 | parser.add_argument('--model_path', type=str, default="Models/", 63 | help='load model path') 64 | args = parser.parse_args() 65 | print(args) 66 | 67 | 68 | dl = Data_loader.Data_Loader({'model_type': 'generator', 'dir_name': args.datapath}) 69 | all_samples = dl.items 70 | items_voc = dl.item2id 71 | print("shape: ", np.shape(all_samples)) 72 | 73 | 74 | # Randomly shuffle data 75 | np.random.seed(10) 76 | shuffle_indices = np.random.permutation(np.arange(len(all_samples))) 77 | all_samples = all_samples[shuffle_indices] 78 | 79 | # Split train/test set 80 | dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples))) 81 | train_set, valid_set = all_samples[:dev_sample_index], all_samples[dev_sample_index:] 82 | 83 | random.seed(10) 84 | ratio = args.data_ratio 85 | train_set_len = len(train_set) 86 | train_index_set = set(list(range(train_set_len))) 87 | 88 | if ratio == 0.2: 89 | train_ratio = int(ratio * float(train_set_len)) 90 | real_train_index_set = random.sample(list(train_index_set), train_ratio) 91 | real_train_set = train_set[real_train_index_set] 92 | train_set = np.array(real_train_set) 93 | print("real train len", len(train_set)) 94 | elif ratio == 0.4: 95 | last_ratio = ratio - 0.2 96 | last_train_ratio = int(last_ratio * float(train_set_len)) 97 | last_train_index_set = random.sample(list(train_index_set), last_train_ratio) 98 | last_train_set = train_set[last_train_index_set] 99 | 100 | remain_train_index_set = train_index_set - set(last_train_index_set) 101 | remain_len = len(remain_train_index_set) 102 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0/4.0 * float(remain_len))) 103 | new_train_set = train_set[new_train_index_set] 104 | real_train_set = np.concatenate((last_train_set, new_train_set), axis=0) 105 | train_set = np.array(real_train_set) 106 | print("real train len", len(train_set)) 107 | elif ratio == 0.6: 108 | last_last_ratio = ratio - 0.2 - 0.2 109 | last_last_train_ratio = int(last_last_ratio * float(train_set_len)) 110 | last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) 111 | last_last_train_set = train_set[last_last_train_index_set] 112 | 113 | remain_train_index_set = train_index_set - set(last_last_train_index_set) 114 | remain_len = len(remain_train_index_set) 115 | last_train_index_set = random.sample(list(remain_train_index_set), int(1.0/4.0 * float(remain_len))) 116 | last_train_set = train_set[last_train_index_set] 117 | real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) 118 | 119 | remain_train_index_set = remain_train_index_set - set(last_train_index_set) 120 | remain_len = len(remain_train_index_set) 121 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0/3.0 * float(remain_len))) 122 | new_train_set = train_set[new_train_index_set] 123 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 124 | train_set = np.array(real_train_set) 125 | print("real train len", len(train_set)) 126 | elif ratio == 0.8: 127 | last_last_ratio = ratio - 0.2 - 0.2 - 0.2 128 | last_last_train_ratio = int(last_last_ratio * float(train_set_len)) 129 | last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) 130 | last_last_train_set = train_set[last_last_train_index_set] 131 | 132 | remain_train_index_set = train_index_set - set(last_last_train_index_set) 133 | remain_len = len(remain_train_index_set) 134 | last_train_index_set = random.sample(list(remain_train_index_set), int(1.0/4.0 * float(remain_len))) 135 | last_train_set = train_set[last_train_index_set] 136 | real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) 137 | 138 | remain_train_index_set = remain_train_index_set - set(last_train_index_set) 139 | remain_len = len(remain_train_index_set) 140 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0/3.0 * float(remain_len))) 141 | new_train_set = train_set[new_train_index_set] 142 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 143 | 144 | remain_train_index_set = remain_train_index_set - set(new_train_index_set) 145 | remain_len = len(remain_train_index_set) 146 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0/2.0 * float(remain_len))) 147 | new_train_set = train_set[new_train_index_set] 148 | 149 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 150 | train_set = np.array(real_train_set) 151 | print("real train len", len(train_set)) 152 | 153 | elif ratio == 1: 154 | train_set = np.array(train_set) 155 | print("real train len", len(train_set)) 156 | else: 157 | train_ratio = int(ratio * float(train_set_len)) 158 | real_train_index_set = random.sample(list(train_index_set), train_ratio) 159 | real_train_set = train_set[real_train_index_set] 160 | train_set = np.array(real_train_set) 161 | print("real train len", len(train_set)) 162 | 163 | model_para = { 164 | 'item_size': len(items_voc), 165 | 'embed_dim': args.hidden_factor, 166 | 'hidden_factor': args.hidden_factor, 167 | 'num_blocks': args.num_blocks, 168 | 'num_heads': args.num_heads, 169 | 'dropout': args.dropout, 170 | 'batch_size': args.batch_size, 171 | 'step': args.step, 172 | 'seq_len': len(all_samples[0]), 173 | 'learning_rate': args.learning_rate, 174 | 'load_model': args.load_model, 175 | 'model_path': args.model_path, 176 | 'method': args.method 177 | } 178 | print(model_para) 179 | 180 | if not os.path.exists(args.save_dir): 181 | os.makedirs(args.save_dir) 182 | 183 | args.device = 'cuda' if torch.cuda.is_available() else 'cpu' 184 | 185 | model = SASRec_Alpha(model_para, device=args.device, ).to(args.device) 186 | 187 | optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.L2) 188 | # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) 189 | 190 | criterion = nn.CrossEntropyLoss() 191 | 192 | numIters = 1 193 | max_mrr = 0 194 | break_stick = 0 195 | early_stop = 0 196 | while(1): 197 | if break_stick == 1: 198 | break 199 | model.train() 200 | batch_no = 0 201 | batch_size = model_para['batch_size'] 202 | 203 | for batch_idx, batch_sam in enumerate(getBatch(train_set, batch_size)): 204 | start = time.time() 205 | 206 | inputs, targets = torch.LongTensor(batch_sam[:, :-1]).to(args.device), torch.LongTensor(batch_sam[:, 1:]).to( 207 | args.device).view([-1]) 208 | optimizer.zero_grad() 209 | outputs = model(inputs, onecall=False) # [batch_size*seq_len, item_size] 210 | loss = criterion(outputs, targets) 211 | 212 | loss.backward() 213 | optimizer.step() 214 | 215 | end = time.time() 216 | if numIters % args.eval_iter == 0: 217 | print("-------------------------------------------------------train") 218 | print("LOSS: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}".format( 219 | loss.item(), batch_no, numIters, train_set.shape[0] / batch_size)) 220 | print("TIME FOR BATCH", end - start) 221 | print("TIME FOR EPOCH (mins)", (end - start) * (train_set.shape[0] / batch_size) / 60.0) 222 | 223 | batch_no += 1 224 | 225 | if numIters % args.eval_iter == 0: 226 | print("-------------------------------------------------------test") 227 | 228 | model.eval() 229 | batch_size_test = batch_size*1 230 | curr_preds_5 = [] 231 | rec_preds_5 = [] 232 | ndcg_preds_5 = [] 233 | curr_preds_10 = [] 234 | rec_preds_10 = [] 235 | ndcg_preds_10 = [] 236 | with torch.no_grad(): 237 | for batch_idx, batch_sam in enumerate(getBatch(valid_set, batch_size)): 238 | inputs, targets = torch.LongTensor(batch_sam[:,:-1]).to(args.device), torch.LongTensor(batch_sam[:,-1]).to(args.device).view([-1]) 239 | outputs = model(inputs) # [batch_size, item_size] only predict the last position 240 | 241 | _, sort_idx_10 = torch.topk(outputs, k=args.top_k+5, sorted=True) # [batch_size, 10] 242 | _, sort_idx_5 = torch.topk(outputs, k=args.top_k, sorted=True) # [batch_size, 5] 243 | 244 | pred_items_5, pred_items_10, target = sort_idx_5.data.cpu().numpy(), sort_idx_10.data.cpu().numpy(), targets.data.cpu().numpy() 245 | for bi in range(pred_items_5.shape[0]): 246 | 247 | true_item = target[bi] 248 | predictmap_5 = {ch : i for i, ch in enumerate(pred_items_5[bi])} 249 | predictmap_10 = {ch: i for i, ch in enumerate(pred_items_10[bi])} 250 | 251 | rank_5 = predictmap_5.get(true_item) 252 | rank_10 = predictmap_10.get(true_item) 253 | if rank_5 == None: 254 | curr_preds_5.append(0.0) 255 | rec_preds_5.append(0.0) 256 | ndcg_preds_5.append(0.0) 257 | else: 258 | MRR_5 = 1.0/(rank_5+1) 259 | Rec_5 = 1.0#3 260 | ndcg_5 = 1.0 / math.log(rank_5 + 2, 2) # 3 261 | curr_preds_5.append(MRR_5) 262 | rec_preds_5.append(Rec_5)#4 263 | ndcg_preds_5.append(ndcg_5) # 4 264 | if rank_10 == None: 265 | curr_preds_10.append(0.0) 266 | rec_preds_10.append(0.0)#2 267 | ndcg_preds_10.append(0.0)#2 268 | else: 269 | MRR_10 = 1.0/(rank_10+1) 270 | Rec_10 = 1.0#3 271 | ndcg_10 = 1.0 / math.log(rank_10 + 2, 2) # 3 272 | curr_preds_10.append(MRR_10) 273 | rec_preds_10.append(Rec_10)#4 274 | ndcg_preds_10.append(ndcg_10) # 4 275 | 276 | mrr = sum(curr_preds_5) / float(len(curr_preds_5)) 277 | mrr_10 = sum(curr_preds_10) / float(len(curr_preds_10)) 278 | hit = sum(rec_preds_5) / float(len(rec_preds_5)) 279 | hit_10 = sum(rec_preds_10) / float(len(rec_preds_10)) 280 | ndcg = sum(ndcg_preds_5) / float(len(ndcg_preds_5)) 281 | ndcg_10 = sum(ndcg_preds_10) / float(len(ndcg_preds_10)) 282 | 283 | if mrr > max_mrr: 284 | max_mrr = mrr 285 | print("Save model! mrr_5:", mrr) 286 | print("Save model! mrr_10:", mrr_10) 287 | print("Save model! hit_5:", hit) 288 | print("Save model! hit_10:", hit_10) 289 | print("Save model! ndcg_5:", ndcg) 290 | print("Save model! ndcg_10:", ndcg_10) 291 | torch.save(model.state_dict(), args.save_dir + "/{}_{}_{}_{}.pkl".format(args.num_blocks, args.learning_rate, args.data_ratio, args.step)) 292 | early_stop = 0 293 | else: 294 | print("mrr_5:", mrr) 295 | print("mrr_10:", mrr_10) 296 | print("hit_5:", hit) 297 | print("hit_10:", hit_10) 298 | print("ndcg_5:", ndcg) 299 | print("ndcg_10:", ndcg_10) 300 | early_stop += 1 301 | if numIters >= model_para['step']: 302 | break_stick = 1 303 | break 304 | if early_stop >= args.early_stop: 305 | break_stick = 1 306 | print("early stop!") 307 | break 308 | 309 | numIters += 1 310 | 311 | 312 | if __name__ == '__main__': 313 | main() 314 | 315 | 316 | -------------------------------------------------------------------------------- /baseline_SSEPT.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import utils 4 | import shutil 5 | import time 6 | import math 7 | import numpy as np 8 | import argparse 9 | import Data_loader_SSEPT as Data_loader 10 | import os 11 | import random 12 | import SSEPT_Alpha 13 | import ast 14 | 15 | torch.manual_seed(10) 16 | 17 | 18 | def sampler(batch, usernum, itemnum, maxlen): 19 | threshold_user = 0.2 20 | threshold_item = 0.99 21 | new_batch = [] 22 | for seq in batch: 23 | 24 | if random.random() > threshold_user: 25 | user = np.random.randint(0, usernum) 26 | seq[0] = user 27 | 28 | idx = maxlen - 1 29 | for i in reversed(seq[1:-1]): 30 | if i != 0 and random.random() > threshold_item: 31 | i = np.random.randint(0, itemnum) 32 | seq[idx] = i 33 | idx -= 1 34 | if idx == 0: break 35 | new_batch.append(seq) 36 | new_batch = np.array(new_batch) 37 | return new_batch 38 | 39 | 40 | def getBatch(data, batch_size): 41 | start_inx = 0 42 | end_inx = batch_size 43 | 44 | while end_inx < len(data): 45 | batch = data[start_inx:end_inx] 46 | start_inx = end_inx 47 | end_inx += batch_size 48 | yield batch 49 | 50 | 51 | def main(): 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument('--top_k', type=int, default=5, 54 | help='Sample from top k predictions') 55 | parser.add_argument('--beta1', type=float, default=0.9, 56 | help='hyperpara-Adam') 57 | parser.add_argument('--batch_size', default=128, type=int) 58 | parser.add_argument('--datapath', type=str, default='Data/movielen_20/movielen_20_context.csv', 59 | help='data path') 60 | parser.add_argument('--device', default='cuda', type=str) 61 | parser.add_argument('--save_dir', default='Models/ML20_baseline_24_emb64_bs128', type=str) 62 | parser.add_argument('--eval_iter', type=int, default=2000, 63 | help='sample generator output evry x steps') 64 | parser.add_argument('--early_stop', type=int, default=20, 65 | help='after x step early stop') 66 | parser.add_argument('--step', type=int, default=250000, 67 | help='trainging step') 68 | parser.add_argument('--tt_percentage', type=float, default=0.2, 69 | help='0.2 means 80% training 20% testing') 70 | parser.add_argument('--data_ratio', type=float, default=1, 71 | help='real trainging data') 72 | parser.add_argument('--learning_rate', type=float, default=0.001, 73 | help='learning rate') 74 | parser.add_argument('--L2', type=float, default=0, 75 | help='L2 regularization') 76 | parser.add_argument('--hidden_factor', type=int, default=128, 77 | help='Number of hidden factors, i.e., embedding size.') 78 | parser.add_argument('--num_heads', default=1, type=int) 79 | parser.add_argument('--num_blocks', default=24, type=int) 80 | parser.add_argument('--dropout', default=0.2, type=float) 81 | parser.add_argument('--method', type=str, default="from_scratch", 82 | help='from_scratch, stack') 83 | parser.add_argument('--load_model', type=ast.literal_eval, default=False, 84 | help='whether loading pretrain model') 85 | parser.add_argument('--model_path', type=str, default="Models/", 86 | help='load model path') 87 | args = parser.parse_args() 88 | print(args) 89 | 90 | dl = Data_loader.Data_Loader({'model_type': 'generator', 'dir_name': args.datapath}) 91 | all_samples = dl.items 92 | items_voc = dl.item2id 93 | print("shape: ", np.shape(all_samples)) 94 | user_size = dl.user_size 95 | 96 | # Randomly shuffle data 97 | np.random.seed(10) 98 | shuffle_indices = np.random.permutation(np.arange(len(all_samples))) 99 | all_samples = all_samples[shuffle_indices] 100 | 101 | # Split train/test set 102 | dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples))) 103 | train_set, valid_set = all_samples[:dev_sample_index], all_samples[dev_sample_index:] 104 | 105 | random.seed(10) 106 | ratio = args.data_ratio 107 | train_set_len = len(train_set) 108 | train_index_set = set(list(range(train_set_len))) 109 | 110 | if ratio == 0.2: 111 | train_ratio = int(ratio * float(train_set_len)) 112 | real_train_index_set = random.sample(list(train_index_set), train_ratio) 113 | real_train_set = train_set[real_train_index_set] 114 | train_set = np.array(real_train_set) 115 | print("real train len", len(train_set)) 116 | elif ratio == 0.4: 117 | last_ratio = ratio - 0.2 118 | last_train_ratio = int(last_ratio * float(train_set_len)) 119 | last_train_index_set = random.sample(list(train_index_set), last_train_ratio) 120 | last_train_set = train_set[last_train_index_set] 121 | 122 | remain_train_index_set = train_index_set - set(last_train_index_set) 123 | remain_len = len(remain_train_index_set) 124 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) 125 | new_train_set = train_set[new_train_index_set] 126 | real_train_set = np.concatenate((last_train_set, new_train_set), axis=0) 127 | train_set = np.array(real_train_set) 128 | print("real train len", len(train_set)) 129 | elif ratio == 0.6: 130 | last_last_ratio = ratio - 0.2 - 0.2 131 | last_last_train_ratio = int(last_last_ratio * float(train_set_len)) 132 | last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) 133 | last_last_train_set = train_set[last_last_train_index_set] 134 | 135 | remain_train_index_set = train_index_set - set(last_last_train_index_set) 136 | remain_len = len(remain_train_index_set) 137 | last_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) 138 | last_train_set = train_set[last_train_index_set] 139 | real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) 140 | 141 | remain_train_index_set = remain_train_index_set - set(last_train_index_set) 142 | remain_len = len(remain_train_index_set) 143 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 3.0 * float(remain_len))) 144 | new_train_set = train_set[new_train_index_set] 145 | 146 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 147 | train_set = np.array(real_train_set) 148 | print("real train len", len(train_set)) 149 | elif ratio == 0.8: 150 | last_last_ratio = ratio - 0.2 - 0.2 - 0.2 151 | last_last_train_ratio = int(last_last_ratio * float(train_set_len)) 152 | last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) 153 | last_last_train_set = train_set[last_last_train_index_set] 154 | 155 | remain_train_index_set = train_index_set - set(last_last_train_index_set) 156 | remain_len = len(remain_train_index_set) 157 | last_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) 158 | last_train_set = train_set[last_train_index_set] 159 | real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) 160 | 161 | remain_train_index_set = remain_train_index_set - set(last_train_index_set) 162 | remain_len = len(remain_train_index_set) 163 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 3.0 * float(remain_len))) 164 | new_train_set = train_set[new_train_index_set] 165 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 166 | 167 | remain_train_index_set = remain_train_index_set - set(new_train_index_set) 168 | remain_len = len(remain_train_index_set) 169 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 2.0 * float(remain_len))) 170 | new_train_set = train_set[new_train_index_set] 171 | 172 | 173 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 174 | train_set = np.array(real_train_set) 175 | print("real train len", len(train_set)) 176 | 177 | elif ratio == 1: 178 | 179 | train_set = np.array(train_set) 180 | print("real train len", len(train_set)) 181 | else: 182 | train_ratio = int(ratio * float(train_set_len)) 183 | real_train_index_set = random.sample(list(train_index_set), train_ratio) 184 | real_train_set = train_set[real_train_index_set] 185 | train_set = np.array(real_train_set) 186 | print("real train len", len(train_set)) 187 | 188 | model_para = { 189 | 'user_size': user_size, 190 | 'item_size': len(items_voc), 191 | 'emb_size': int(args.hidden_factor / 2), 192 | 'hidden_factor': args.hidden_factor, 193 | 'num_blocks': args.num_blocks, 194 | 'num_heads': args.num_heads, 195 | 'dropout': args.dropout, 196 | 'batch_size': args.batch_size, 197 | 'step': args.step, 198 | 'seq_len': len(all_samples[0]) - 1, 199 | 'learning_rate': args.learning_rate, 200 | 'load_model': args.load_model, 201 | 'model_path': args.model_path, 202 | 'method': args.method 203 | } 204 | print(model_para) 205 | 206 | if not os.path.exists(args.save_dir): 207 | os.makedirs(args.save_dir) 208 | 209 | args.device = 'cuda' if torch.cuda.is_available() else 'cpu' 210 | 211 | model = SSEPT_Alpha(model_para, device=args.device, ).to(args.device) 212 | 213 | optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.L2) 214 | # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) 215 | 216 | criterion = nn.CrossEntropyLoss() 217 | 218 | numIters = 1 219 | max_mrr = 0 220 | break_stick = 0 221 | early_stop = 0 222 | while (1): 223 | if break_stick == 1: 224 | break 225 | model.train() 226 | batch_no = 0 227 | batch_size = model_para['batch_size'] 228 | 229 | for batch_idx, batch_sam in enumerate(getBatch(train_set, batch_size)): 230 | start = time.time() 231 | 232 | batch_sam = sampler(batch_sam, model_para['user_size'], model_para['item_size'], model_para['seq_len']) 233 | users, inputs, targets = torch.LongTensor(batch_sam[:, 0:1]).to(args.device), torch.LongTensor( 234 | batch_sam[:, 1:-1]).to(args.device), torch.LongTensor(batch_sam[:, 2:]).to( 235 | args.device).view([-1]) 236 | optimizer.zero_grad() 237 | outputs = model(users, inputs, onecall=False) # [batch_size*seq_len, item_size] 238 | loss = criterion(outputs, targets) 239 | 240 | loss.backward() 241 | optimizer.step() 242 | 243 | end = time.time() 244 | if numIters % args.eval_iter == 0: 245 | print("-------------------------------------------------------train") 246 | print("LOSS: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}".format( 247 | loss.item(), batch_no, numIters, train_set.shape[0] / batch_size)) 248 | print("TIME FOR BATCH", end - start) 249 | print("TIME FOR EPOCH (mins)", (end - start) * (train_set.shape[0] / batch_size) / 60.0) 250 | 251 | batch_no += 1 252 | 253 | if numIters % args.eval_iter == 0: 254 | print("-------------------------------------------------------test") 255 | 256 | model.eval() 257 | batch_size_test = batch_size * 1 258 | curr_preds_5 = [] 259 | rec_preds_5 = [] 260 | ndcg_preds_5 = [] 261 | curr_preds_10 = [] 262 | rec_preds_10 = [] 263 | ndcg_preds_10 = [] 264 | with torch.no_grad(): 265 | for batch_idx, batch_sam in enumerate(getBatch(valid_set, batch_size)): 266 | users, inputs, targets = torch.LongTensor(batch_sam[:, 0:1]).to(args.device), torch.LongTensor( 267 | batch_sam[:, 1:-1]).to(args.device), torch.LongTensor(batch_sam[:, -1]).to( 268 | args.device).view([-1]) 269 | outputs = model(users, inputs) # [batch_size, item_size] only predict the last position 270 | 271 | _, sort_idx_10 = torch.topk(outputs, k=args.top_k + 5, sorted=True) # [batch_size, 10] 272 | _, sort_idx_5 = torch.topk(outputs, k=args.top_k, sorted=True) # [batch_size, 5] 273 | 274 | pred_items_5, pred_items_10, target = sort_idx_5.data.cpu().numpy(), sort_idx_10.data.cpu().numpy(), targets.data.cpu().numpy() 275 | for bi in range(pred_items_5.shape[0]): 276 | 277 | true_item = target[bi] 278 | predictmap_5 = {ch: i for i, ch in enumerate(pred_items_5[bi])} 279 | predictmap_10 = {ch: i for i, ch in enumerate(pred_items_10[bi])} 280 | 281 | rank_5 = predictmap_5.get(true_item) 282 | rank_10 = predictmap_10.get(true_item) 283 | if rank_5 == None: 284 | curr_preds_5.append(0.0) 285 | rec_preds_5.append(0.0) 286 | ndcg_preds_5.append(0.0) 287 | else: 288 | MRR_5 = 1.0 / (rank_5 + 1) 289 | Rec_5 = 1.0 # 3 290 | ndcg_5 = 1.0 / math.log(rank_5 + 2, 2) # 3 291 | curr_preds_5.append(MRR_5) 292 | rec_preds_5.append(Rec_5) # 4 293 | ndcg_preds_5.append(ndcg_5) # 4 294 | if rank_10 == None: 295 | curr_preds_10.append(0.0) 296 | rec_preds_10.append(0.0) # 2 297 | ndcg_preds_10.append(0.0) # 2 298 | else: 299 | MRR_10 = 1.0 / (rank_10 + 1) 300 | Rec_10 = 1.0 # 3 301 | ndcg_10 = 1.0 / math.log(rank_10 + 2, 2) # 3 302 | curr_preds_10.append(MRR_10) 303 | rec_preds_10.append(Rec_10) # 4 304 | ndcg_preds_10.append(ndcg_10) # 4 305 | 306 | mrr = sum(curr_preds_5) / float(len(curr_preds_5)) 307 | mrr_10 = sum(curr_preds_10) / float(len(curr_preds_10)) 308 | hit = sum(rec_preds_5) / float(len(rec_preds_5)) 309 | hit_10 = sum(rec_preds_10) / float(len(rec_preds_10)) 310 | ndcg = sum(ndcg_preds_5) / float(len(ndcg_preds_5)) 311 | ndcg_10 = sum(ndcg_preds_10) / float(len(ndcg_preds_10)) 312 | 313 | if mrr > max_mrr: 314 | max_mrr = mrr 315 | print("Save model! mrr_5:", mrr) 316 | print("Save model! mrr_10:", mrr_10) 317 | print("Save model! hit_5:", hit) 318 | print("Save model! hit_10:", hit_10) 319 | print("Save model! ndcg_5:", ndcg) 320 | print("Save model! ndcg_10:", ndcg_10) 321 | torch.save(model.state_dict(), 322 | args.save_dir + "/{}_{}_{}_{}.pkl".format(args.num_blocks, args.learning_rate, 323 | args.data_ratio, args.step)) 324 | early_stop = 0 325 | else: 326 | print("mrr_5:", mrr) 327 | print("mrr_10:", mrr_10) 328 | print("hit_5:", hit) 329 | print("hit_10:", hit_10) 330 | print("ndcg_5:", ndcg) 331 | print("ndcg_10:", ndcg_10) 332 | early_stop += 1 333 | if numIters >= model_para['step']: 334 | break_stick = 1 335 | break 336 | if early_stop >= args.early_stop: 337 | break_stick = 1 338 | print("early stop!") 339 | break 340 | 341 | numIters += 1 342 | 343 | 344 | if __name__ == '__main__': 345 | main() 346 | 347 | 348 | -------------------------------------------------------------------------------- /data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os import listdir 3 | from os.path import isfile, join 4 | import numpy as np 5 | from tensorflow.contrib import learn 6 | from collections import Counter 7 | 8 | 9 | class Data_Loader: 10 | def __init__(self, options): 11 | positive_data_file = options['dir_name'] 12 | positive_examples = list(open(positive_data_file, "r").readlines()) 13 | positive_examples = [s for s in positive_examples] 14 | 15 | padid = str(options['padid']) 16 | 17 | split_tip = ',' 18 | 19 | max_document_length = max([len(x.split(split_tip)) for x in positive_examples]) 20 | 21 | new_positive_examples = [] 22 | for x in positive_examples: 23 | x = x.strip() 24 | x_list = x.split(split_tip) 25 | x_len = len(x_list) 26 | if x_len != max_document_length: 27 | padlen = max_document_length - x_len 28 | x_list = padlen * [padid] + x_list 29 | x = split_tip.join(x_list) 30 | new_positive_examples.append(x) 31 | 32 | vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) 33 | self.item = np.array(list(vocab_processor.fit_transform(new_positive_examples))) 34 | self.item_dict = vocab_processor.vocabulary_._mapping -------------------------------------------------------------------------------- /data_loader_finetune.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os import listdir 3 | from os.path import isfile, join 4 | import numpy as np 5 | from tensorflow.contrib import learn 6 | from collections import Counter 7 | 8 | # finetune, 9 | # input 1,2,3,4,5,6,,targetIDs 10 | # output 1,2,3,4,5,6,'CLS',targetIDs 'CLS' denotes classifier 11 | 12 | class Data_Loader: 13 | def __init__(self, options): 14 | 15 | positive_data_file = options['dir_name'] 16 | positive_examples = list(open(positive_data_file, "r").readlines()) 17 | # positive_examples = [[s[0],s[2:]]for s in positive_examples] 18 | 19 | # [user,itemseq] = [[s[0], s[2:]] for s in positive_examples] 20 | # print user 21 | colon=",," 22 | source = [s.split(colon)[0] for s in positive_examples] 23 | target= [s.split(colon)[1] for s in positive_examples] 24 | 25 | 26 | max_document_length = max([len(x.split(",")) for x in source]) 27 | # max_document_length = max([len(x.split()) for x in positive_examples]) #split by space, one or many, not sensitive 28 | vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) 29 | self.item = np.array(list(vocab_processor.fit_transform(source))) 30 | self.item_dict = vocab_processor.vocabulary_._mapping 31 | 32 | max_document_length_target = max([len(x.split(",")) for x in target]) 33 | vocab_processor_target = learn.preprocessing.VocabularyProcessor(max_document_length_target) 34 | self.target = np.array(list(vocab_processor_target.fit_transform(target))) # pad 0 in the end 35 | self.target_dict = vocab_processor_target.vocabulary_._mapping 36 | 37 | # self.separator = len(self.item) + len(self.target) # it is just used for separating such as : 38 | # self.separator = len(self.item_dict) # denote '[CLS]' 39 | self.separator = 0 # denote '[CLS]' 40 | lens = self.item.shape[0] 41 | # sep=np.full((lens, 1), self.separator) 42 | 43 | # self.example = np.hstack((self.item,sep,self.target)) 44 | # concat source and one target 45 | 46 | self.example = [] 47 | for line in range(lens): 48 | source_line = self.item[line] 49 | target_line = self.target[line] 50 | target_num = len(target_line) 51 | 52 | 53 | for j in range(target_num): 54 | if target_line[j] != 0: 55 | # np.array(target_line[j]) 56 | # unit = np.append(np.array(self.separator),source_line) 57 | unit = np.append(source_line, np.array(self.separator)) 58 | unit = np.append(unit, np.array(target_line[j])) 59 | self.example.append(unit) 60 | 61 | self.example = np.array(self.example) 62 | print(self.example.shape) 63 | -------------------------------------------------------------------------------- /deep_GRec.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import data_loader 3 | import generator_deep_GRec as generator_deep 4 | import shutil 5 | import time 6 | import math 7 | import numpy as np 8 | import argparse 9 | import sys 10 | import os 11 | import random 12 | import ast 13 | import collections 14 | 15 | 16 | tf.set_random_seed(10) 17 | 18 | 19 | MaskedLmInstance = collections.namedtuple("MaskedLmInstance", 20 | ["index", "label"]) 21 | 22 | 23 | def create_masked_lm_predictions_frombatch(item_batch, masked_lm_prob, 24 | max_predictions_per_seq, items, rng, item_size): 25 | rng = random.Random() 26 | output_tokens_batch = [] 27 | maskedpositions_batch = [] 28 | maskedlabels_batch = [] 29 | masked_lm_weights_batch = [] 30 | item_batch_ = item_batch[:, 1:] # remove start and end 31 | for line_list in range(item_batch_.shape[0]): 32 | 33 | output_tokens, masked_lm_positions, masked_lm_labels = create_masked_lm_predictions(item_batch_[line_list], 34 | masked_lm_prob, 35 | max_predictions_per_seq, 36 | items, rng, item_size) 37 | # print output_tokens 38 | output_tokens.insert(0, item_batch[line_list][0]) 39 | output_tokens_batch.append(output_tokens) 40 | maskedpositions_batch.append(masked_lm_positions) 41 | maskedlabels_batch.append(masked_lm_labels) 42 | masked_lm_weights = [1.0] * len(masked_lm_labels) 43 | # note you can not change here since it should be consistent with 'num_to_predict' in create_masked_lm_predictions 44 | num_to_predict = min(max_predictions_per_seq, 45 | max(1, int(round(len(item_batch_[line_list]) * masked_lm_prob)))) 46 | 47 | while len(masked_lm_weights) < num_to_predict: 48 | masked_lm_weights.append(0.0) 49 | masked_lm_weights_batch.append(masked_lm_weights) 50 | 51 | return output_tokens_batch, maskedpositions_batch, maskedlabels_batch, masked_lm_weights_batch 52 | 53 | 54 | def create_masked_predictions_frombatch(item_batch): 55 | output_tokens_batch = [] 56 | maskedpositions_batch = [] 57 | maskedlabels_batch = [] 58 | for line_list in range(item_batch.shape[0]): 59 | output_tokens, masked_lm_positions, masked_lm_labels = create_endmask(item_batch[line_list]) 60 | output_tokens_batch.append(output_tokens) 61 | maskedpositions_batch.append(masked_lm_positions) 62 | maskedlabels_batch.append(masked_lm_labels) 63 | return output_tokens_batch, maskedpositions_batch, maskedlabels_batch 64 | 65 | 66 | def create_endmask(tokens): 67 | masked_lm_positions = [] 68 | masked_lm_labels = [] 69 | lens = len(tokens) 70 | masked_token = 0 71 | dutokens = list(tokens) 72 | dutokens[-1] = masked_token 73 | 74 | masked_lm_positions.append(lens - 1) 75 | masked_lm_labels.append(tokens[-1]) 76 | return dutokens, masked_lm_positions, masked_lm_labels 77 | 78 | 79 | # from BERT 80 | def create_masked_lm_predictions(tokens, masked_lm_prob, 81 | max_predictions_per_seq, vocab_words, rng, item_size): 82 | """Creates the predictions for the masked LM objective.""" 83 | 84 | cand_indexes = [] 85 | for (i, token) in enumerate(tokens): 86 | if token == "[CLS]" or token == "[SEP]": 87 | continue 88 | cand_indexes.append(i) 89 | 90 | rng.shuffle(cand_indexes) 91 | 92 | output_tokens = list(tokens) 93 | 94 | num_to_predict = min(max_predictions_per_seq, 95 | max(1, int(round(len(tokens) * masked_lm_prob)))) 96 | 97 | masked_lms = [] 98 | covered_indexes = set() 99 | for index in cand_indexes: 100 | if len(masked_lms) >= num_to_predict: 101 | break 102 | if index in covered_indexes: 103 | continue 104 | covered_indexes.add(index) 105 | 106 | masked_token = None 107 | # 80% of the time, replace with [MASK] 108 | if rng.random() < 1.0: 109 | # masked_token = "[MASK]" 110 | masked_token = 0 # item_size is "[MASK]" 0 represents '' 111 | else: 112 | # 10% of the time, keep original 113 | if rng.random() < 0.5: 114 | masked_token = tokens[index] 115 | # 10% of the time, replace with random word 116 | else: 117 | masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)] 118 | 119 | output_tokens[index] = masked_token 120 | 121 | masked_lms.append(MaskedLmInstance(index=index, label=tokens[index])) 122 | 123 | masked_lms = sorted(masked_lms, key=lambda x: x.index) 124 | 125 | masked_lm_positions = [] 126 | masked_lm_labels = [] 127 | for p in masked_lms: 128 | masked_lm_positions.append(p.index) 129 | masked_lm_labels.append(p.label) 130 | 131 | return (output_tokens, masked_lm_positions, masked_lm_labels) 132 | 133 | 134 | def main(): 135 | parser = argparse.ArgumentParser() 136 | parser.add_argument('--top_k', type=int, default=5, 137 | help='sample from top k predictions') 138 | parser.add_argument('--beta1', type=float, default=0.9, 139 | help='hyperpara-Adam') 140 | parser.add_argument('--datapath', type=str, default="Data/movielen_20/movielen_20.csv", 141 | help='data path') 142 | parser.add_argument('--save_dir', type=str, default="Models/ml20", 143 | help='save dir path') 144 | parser.add_argument('--eval_iter', type=int, default=1000, 145 | help='sample generator output evry x steps') 146 | parser.add_argument('--early_stop', type=int, default=10, 147 | help='after x step early stop') 148 | parser.add_argument('--step', type=int, default=400000, 149 | help='trainging step') 150 | parser.add_argument('--tt_percentage', type=float, default=0.2, 151 | help='0.2 means 80% training 20% testing') 152 | parser.add_argument('--data_ratio', type=float, default=1, 153 | help='real trainging data') 154 | parser.add_argument('--learning_rate', type=float, default=0.001, 155 | help='learning rate') 156 | parser.add_argument('--L2', type=float, default=0, 157 | help='L2 regularization') 158 | parser.add_argument('--dilation_count', type=int, default=16, 159 | help='dilation count number') 160 | parser.add_argument('--method', type=str, default="from_scratch", 161 | help='from_scratch, StackR, stackC, stackA') 162 | parser.add_argument('--load_model', type=ast.literal_eval, default=False, 163 | help='whether loading pretrain model') 164 | parser.add_argument('--model_path', type=str, default="Models/", 165 | help='load model path') 166 | parser.add_argument('--padid', type=int, default=0, 167 | help='pad id') 168 | 169 | parser.add_argument('--masked_lm_prob', type=float, default=0.2, 170 | help='0.2 means 20% items are masked') 171 | parser.add_argument('--max_predictions_per_seq', type=int, default=50, 172 | help='maximum number of masked tokens') 173 | parser.add_argument('--max_position', type=int, default=100, 174 | help='maximum number of for positional embedding, it has to be larger than the sequence lens') 175 | parser.add_argument('--has_positionalembedding', type=bool, default=False, 176 | help='whether contains positional embedding before performing cnnn') 177 | 178 | args = parser.parse_args() 179 | 180 | print(args) 181 | 182 | dl = data_loader.Data_Loader({'dir_name': args.datapath, 'padid': args.padid}) 183 | all_samples = dl.item 184 | print(all_samples.shape) 185 | items = dl.item_dict 186 | itemlist = items.values() 187 | item_size = len(items) 188 | print("len(items)", item_size) 189 | 190 | max_predictions_per_seq = args.max_predictions_per_seq 191 | masked_lm_prob = args.masked_lm_prob 192 | 193 | # Randomly shuffle data 194 | np.random.seed(10) 195 | shuffle_indices = np.random.permutation(np.arange(len(all_samples))) 196 | all_samples = all_samples[shuffle_indices] 197 | 198 | # Split train/test set 199 | dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples))) 200 | train_set, valid_set = all_samples[:dev_sample_index], all_samples[dev_sample_index:] 201 | 202 | random.seed(10) 203 | ratio = args.data_ratio 204 | train_set_len = len(train_set) 205 | train_index_set = set(list(range(train_set_len))) 206 | 207 | if ratio == 0.2: 208 | train_ratio = int(ratio * float(train_set_len)) 209 | real_train_index_set = random.sample(list(train_index_set), train_ratio) 210 | real_train_set = train_set[real_train_index_set] 211 | train_set = np.array(real_train_set) 212 | print("real train len", len(train_set)) 213 | elif ratio == 0.4: 214 | last_ratio = ratio - 0.2 215 | last_train_ratio = int(last_ratio * float(train_set_len)) 216 | last_train_index_set = random.sample(list(train_index_set), last_train_ratio) 217 | last_train_set = train_set[last_train_index_set] 218 | 219 | remain_train_index_set = train_index_set - set(last_train_index_set) 220 | remain_len = len(remain_train_index_set) 221 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) 222 | new_train_set = train_set[new_train_index_set] 223 | 224 | real_train_set = np.concatenate((last_train_set, new_train_set), axis=0) 225 | train_set = np.array(real_train_set) 226 | print("real train len", len(train_set)) 227 | elif ratio == 0.6: 228 | last_last_ratio = ratio - 0.2 - 0.2 229 | last_last_train_ratio = int(last_last_ratio * float(train_set_len)) 230 | last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) 231 | last_last_train_set = train_set[last_last_train_index_set] 232 | 233 | remain_train_index_set = train_index_set - set(last_last_train_index_set) 234 | remain_len = len(remain_train_index_set) 235 | last_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) 236 | last_train_set = train_set[last_train_index_set] 237 | real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) 238 | 239 | remain_train_index_set = remain_train_index_set - set(last_train_index_set) 240 | remain_len = len(remain_train_index_set) 241 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 3.0 * float(remain_len))) 242 | new_train_set = train_set[new_train_index_set] 243 | 244 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 245 | train_set = np.array(real_train_set) 246 | print("real train len", len(train_set)) 247 | elif ratio == 0.8: 248 | last_last_ratio = ratio - 0.2 - 0.2 - 0.2 249 | last_last_train_ratio = int(last_last_ratio * float(train_set_len)) 250 | last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) 251 | last_last_train_set = train_set[last_last_train_index_set] 252 | 253 | remain_train_index_set = train_index_set - set(last_last_train_index_set) 254 | remain_len = len(remain_train_index_set) 255 | last_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) 256 | last_train_set = train_set[last_train_index_set] 257 | real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) 258 | 259 | remain_train_index_set = remain_train_index_set - set(last_train_index_set) 260 | remain_len = len(remain_train_index_set) 261 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 3.0 * float(remain_len))) 262 | new_train_set = train_set[new_train_index_set] 263 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 264 | 265 | remain_train_index_set = remain_train_index_set - set(new_train_index_set) 266 | remain_len = len(remain_train_index_set) 267 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 2.0 * float(remain_len))) 268 | new_train_set = train_set[new_train_index_set] 269 | 270 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 271 | train_set = np.array(real_train_set) 272 | print("real train len", len(train_set)) 273 | elif ratio == 1: 274 | train_set = np.array(train_set) 275 | print("real train len", len(train_set)) 276 | else: 277 | train_ratio = int(ratio * float(train_set_len)) 278 | real_train_index_set = random.sample(list(train_index_set), train_ratio) 279 | real_train_set = train_set[real_train_index_set] 280 | train_set = np.array(real_train_set) 281 | print("real train len", len(train_set)) 282 | 283 | model_para = { 284 | 'item_size': len(items), 285 | 'dilated_channels': 64, 286 | 'dilations': [1, 4] * args.dilation_count, 287 | 'step': args.step, 288 | 'kernel_size': 3, 289 | 'learning_rate': args.learning_rate, 290 | 'L2': args.L2, 291 | 'batch_size': 1024, 292 | 'load_model': args.load_model, 293 | 'model_path': args.model_path, 294 | 'method': args.method, 295 | 'max_position': args.max_position, 296 | # maximum number of for positional embedding, it has to be larger than the sequence lens 297 | 'has_positionalembedding': args.has_positionalembedding 298 | } 299 | 300 | print(model_para) 301 | 302 | if not os.path.exists(args.save_dir): 303 | os.makedirs(args.save_dir) 304 | 305 | itemrec = generator_deep.NextItNet_Decoder(model_para) 306 | itemrec.train_graph() 307 | optimizer = tf.train.AdamOptimizer(model_para['learning_rate'], beta1=args.beta1).minimize(itemrec.loss) 308 | itemrec.predict_graph(reuse=True) 309 | 310 | gpu_options = tf.GPUOptions(allow_growth=True) 311 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 312 | 313 | saver = tf.train.Saver(max_to_keep=1) 314 | 315 | init = tf.global_variables_initializer() 316 | sess.run(init) 317 | 318 | # writer=tf.summary.FileWriter('./stack_graph',sess.graph) 319 | 320 | numIters = 1 321 | max_mrr = 0 322 | break_stick = 0 323 | early_stop = 0 324 | while (1): 325 | if break_stick == 1: 326 | break 327 | 328 | batch_no = 0 329 | batch_size = model_para['batch_size'] 330 | 331 | while (batch_no + 1) * batch_size < train_set.shape[0]: 332 | 333 | start = time.time() 334 | 335 | item_batch = train_set[batch_no * batch_size: (batch_no + 1) * batch_size, :] 336 | 337 | output_tokens_batch, maskedpositions_batch, maskedlabels_batch, masked_lm_weights_batch = create_masked_lm_predictions_frombatch( 338 | item_batch, masked_lm_prob, max_predictions_per_seq, items=itemlist, rng=None, item_size=item_size 339 | ) 340 | 341 | _, loss = sess.run( 342 | [optimizer, itemrec.loss], 343 | feed_dict={ 344 | itemrec.itemseq_output: item_batch[:, 1:], # 2 3 4 5 6 7 8 9 345 | itemrec.itemseq_input: output_tokens_batch, # 1 2 0 4 5 0 7 8 9 346 | itemrec.masked_position: maskedpositions_batch, # [1 4] 347 | itemrec.masked_items: maskedlabels_batch, # [3,6] 348 | itemrec.label_weights: masked_lm_weights_batch # [1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0] #useless 349 | }) 350 | end = time.time() 351 | if numIters % args.eval_iter == 0: 352 | print("-------------------------------------------------------train") 353 | print("LOSS: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}".format( 354 | loss, batch_no, numIters, train_set.shape[0] / batch_size)) 355 | print("TIME FOR BATCH", end - start) 356 | print("TIME FOR EPOCH (mins)", (end - start) * (train_set.shape[0] / batch_size) / 60.0) 357 | 358 | batch_no += 1 359 | 360 | if numIters % args.eval_iter == 0: 361 | print("-------------------------------------------------------test") 362 | batch_no_test = 0 363 | batch_size_test = batch_size * 1 364 | curr_preds_5 = [] 365 | rec_preds_5 = [] 366 | ndcg_preds_5 = [] 367 | curr_preds_10 = [] 368 | rec_preds_10 = [] 369 | ndcg_preds_10 = [] 370 | while (batch_no_test + 1) * batch_size_test < valid_set.shape[0]: 371 | item_batch = valid_set[batch_no_test * batch_size_test: (batch_no_test + 1) * batch_size_test, :] 372 | [probs_10, probs_5] = sess.run( 373 | [itemrec.top_10, itemrec.top_5], 374 | feed_dict={ 375 | itemrec.itemseq_input: item_batch[:, 0:-1] 376 | }) 377 | # print(probs_10[1].shape) #(256,1,10) 378 | for bi in range(batch_size_test): 379 | pred_items_10 = probs_10[1][bi][-1] 380 | pred_items_5 = probs_5[1][bi][-1] 381 | 382 | true_item = item_batch[bi][-1] 383 | predictmap_5 = {ch: i for i, ch in enumerate(pred_items_5)} 384 | pred_items_10 = {ch: i for i, ch in enumerate(pred_items_10)} 385 | 386 | rank_5 = predictmap_5.get(true_item) 387 | rank_10 = pred_items_10.get(true_item) 388 | if rank_5 == None: 389 | curr_preds_5.append(0.0) 390 | rec_preds_5.append(0.0) 391 | ndcg_preds_5.append(0.0) 392 | else: 393 | MRR_5 = 1.0 / (rank_5 + 1) 394 | Rec_5 = 1.0 395 | ndcg_5 = 1.0 / math.log(rank_5 + 2, 2) 396 | curr_preds_5.append(MRR_5) 397 | rec_preds_5.append(Rec_5) 398 | ndcg_preds_5.append(ndcg_5) 399 | if rank_10 == None: 400 | curr_preds_10.append(0.0) 401 | rec_preds_10.append(0.0) 402 | ndcg_preds_10.append(0.0) 403 | else: 404 | MRR_10 = 1.0 / (rank_10 + 1) 405 | Rec_10 = 1.0 406 | ndcg_10 = 1.0 / math.log(rank_10 + 2, 2) 407 | curr_preds_10.append(MRR_10) 408 | rec_preds_10.append(Rec_10) 409 | ndcg_preds_10.append(ndcg_10) 410 | 411 | batch_no_test += 1 412 | 413 | mrr = sum(curr_preds_5) / float(len(curr_preds_5)) 414 | mrr_10 = sum(curr_preds_10) / float(len(curr_preds_10)) 415 | hit = sum(rec_preds_5) / float(len(rec_preds_5)) 416 | hit_10 = sum(rec_preds_10) / float(len(rec_preds_10)) 417 | ndcg = sum(ndcg_preds_5) / float(len(ndcg_preds_5)) 418 | ndcg_10 = sum(ndcg_preds_10) / float(len(ndcg_preds_10)) 419 | 420 | if mrr > max_mrr: 421 | max_mrr = mrr 422 | 423 | print("Save model! mrr_5:", mrr) 424 | print("Save model! mrr_10:", mrr_10) 425 | print("Save model! hit_5:", hit) 426 | print("Save model! hit_10:", hit_10) 427 | print("Save model! ndcg_5:", ndcg) 428 | print("Save model! ndcg_10:", ndcg_10) 429 | early_stop = 0 430 | saver.save(sess, args.save_dir + "/{}_{}_{}_{}.ckpt".format(args.dilation_count, args.learning_rate, 431 | args.data_ratio, args.step)) 432 | else: 433 | print("mrr_5:", mrr) 434 | print("mrr_10:", mrr_10) 435 | print("hit_5:", hit) 436 | print("hit_10:", hit_10) 437 | print("ndcg_5:", ndcg) 438 | print("ndcg_10:", ndcg_10) 439 | early_stop += 1 440 | 441 | if numIters >= model_para['step']: 442 | break_stick = 1 443 | break 444 | if early_stop >= args.early_stop: 445 | break_stick = 1 446 | print("early stop!") 447 | break 448 | 449 | numIters += 1 450 | 451 | 452 | if __name__ == '__main__': 453 | main() 454 | -------------------------------------------------------------------------------- /deep_nextitnet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import data_loader 3 | import generator_deep 4 | import shutil 5 | import time 6 | import math 7 | import numpy as np 8 | import argparse 9 | import sys 10 | import os 11 | import random 12 | import ast 13 | 14 | tf.set_random_seed(10) 15 | 16 | def main(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--top_k', type=int, default=5, 19 | help='sample from top k predictions') 20 | parser.add_argument('--beta1', type=float, default=0.9, 21 | help='hyperpara-Adam') 22 | parser.add_argument('--datapath', type=str, default="Data/movielen_20/movielen_20.csv", 23 | help='data path') 24 | parser.add_argument('--save_dir', type=str, default="Models/ml20_baseline_16_emb64_bs256", 25 | help='save dir path') 26 | parser.add_argument('--eval_iter', type=int, default=1000, 27 | help='output evry x steps') 28 | parser.add_argument('--early_stop', type=int, default=10, 29 | help='after x eval_iter early stop') 30 | parser.add_argument('--step', type=int, default=400000, 31 | help='trainging step') 32 | parser.add_argument('--tt_percentage', type=float, default=0.2, 33 | help='0.2 means 80% training 20% testing') 34 | parser.add_argument('--data_ratio', type=float, default=0.4, 35 | help='real training data') 36 | parser.add_argument('--learning_rate', type=float, default=0.001, 37 | help='learning rate') 38 | parser.add_argument('--L2', type=float, default=0, 39 | help='L2 regularization') 40 | parser.add_argument('--dilation_count', type=int, default=16, 41 | help='dilation count number') 42 | parser.add_argument('--method', type=str, default="from_scratch", 43 | help='from_scratch, StackR, stackC, stackA') 44 | parser.add_argument('--load_model', type=ast.literal_eval, default=False, 45 | help='whether loading pretrain model') 46 | parser.add_argument('--model_path', type=str, default="Models/", 47 | help='load model path') 48 | parser.add_argument('--padid', type=int, default=0, 49 | help='pad id') 50 | args = parser.parse_args() 51 | 52 | print(args) 53 | 54 | dl = data_loader.Data_Loader({'dir_name': args.datapath, 'padid': args.padid}) 55 | all_samples = dl.item 56 | print(all_samples.shape) 57 | items = dl.item_dict 58 | print("len(items)",len(items)) 59 | 60 | # Randomly shuffle data 61 | np.random.seed(10) 62 | shuffle_indices = np.random.permutation(np.arange(len(all_samples))) 63 | all_samples = all_samples[shuffle_indices] 64 | 65 | # Split train/test set 66 | dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples))) 67 | train_set, valid_set = all_samples[:dev_sample_index], all_samples[dev_sample_index:] 68 | 69 | random.seed(10) 70 | ratio = args.data_ratio 71 | train_set_len = len(train_set) 72 | train_index_set = set(list(range(train_set_len))) 73 | 74 | 75 | if ratio == 0.2: 76 | train_ratio = int(ratio * float(train_set_len)) 77 | real_train_index_set = random.sample(list(train_index_set), train_ratio) 78 | real_train_set = train_set[real_train_index_set] 79 | train_set = np.array(real_train_set) 80 | print("real train len", len(train_set)) 81 | elif ratio == 0.4: 82 | last_ratio = ratio - 0.2 83 | last_train_ratio = int(last_ratio * float(train_set_len)) 84 | last_train_index_set = random.sample(list(train_index_set), last_train_ratio) 85 | last_train_set = train_set[last_train_index_set] 86 | 87 | remain_train_index_set = train_index_set - set(last_train_index_set) 88 | remain_len = len(remain_train_index_set) 89 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0/4.0 * float(remain_len))) 90 | new_train_set = train_set[new_train_index_set] 91 | 92 | real_train_set = np.concatenate((last_train_set, new_train_set), axis=0) 93 | train_set = np.array(real_train_set) 94 | print("real train len", len(train_set)) 95 | elif ratio == 0.6: 96 | last_last_ratio = ratio - 0.2 - 0.2 97 | last_last_train_ratio = int(last_last_ratio * float(train_set_len)) 98 | last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) 99 | last_last_train_set = train_set[last_last_train_index_set] 100 | 101 | remain_train_index_set = train_index_set - set(last_last_train_index_set) 102 | remain_len = len(remain_train_index_set) 103 | last_train_index_set = random.sample(list(remain_train_index_set), int(1.0/4.0 * float(remain_len))) 104 | last_train_set = train_set[last_train_index_set] 105 | real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) 106 | 107 | remain_train_index_set = remain_train_index_set - set(last_train_index_set) 108 | remain_len = len(remain_train_index_set) 109 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0/3.0 * float(remain_len))) 110 | new_train_set = train_set[new_train_index_set] 111 | 112 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 113 | train_set = np.array(real_train_set) 114 | print("real train len", len(train_set)) 115 | elif ratio == 0.8: 116 | last_last_ratio = ratio - 0.2 - 0.2 - 0.2 117 | last_last_train_ratio = int(last_last_ratio * float(train_set_len)) 118 | last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) 119 | last_last_train_set = train_set[last_last_train_index_set] 120 | 121 | remain_train_index_set = train_index_set - set(last_last_train_index_set) 122 | remain_len = len(remain_train_index_set) 123 | last_train_index_set = random.sample(list(remain_train_index_set), int(1.0/4.0 * float(remain_len))) 124 | last_train_set = train_set[last_train_index_set] 125 | real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) 126 | 127 | remain_train_index_set = remain_train_index_set - set(last_train_index_set) 128 | remain_len = len(remain_train_index_set) 129 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0/3.0 * float(remain_len))) 130 | new_train_set = train_set[new_train_index_set] 131 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 132 | 133 | remain_train_index_set = remain_train_index_set - set(new_train_index_set) 134 | remain_len = len(remain_train_index_set) 135 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0/2.0 * float(remain_len))) 136 | new_train_set = train_set[new_train_index_set] 137 | 138 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 139 | train_set = np.array(real_train_set) 140 | print("real train len", len(train_set)) 141 | elif ratio == 1: 142 | train_set = np.array(train_set) 143 | print("real train len", len(train_set)) 144 | else: 145 | train_ratio = int(ratio * float(train_set_len)) 146 | real_train_index_set = random.sample(list(train_index_set), train_ratio) 147 | real_train_set = train_set[real_train_index_set] 148 | train_set = np.array(real_train_set) 149 | print("real train len", len(train_set)) 150 | 151 | model_para = { 152 | 'item_size': len(items), 153 | 'dilated_channels': 64, 154 | 'dilations': [1,4]*args.dilation_count, 155 | 'step': args.step, 156 | 'kernel_size': 3, 157 | 'learning_rate': args.learning_rate, 158 | 'L2': args.L2, 159 | 'batch_size': 256, 160 | 'load_model': args.load_model, 161 | 'model_path': args.model_path, 162 | 'method': args.method 163 | } 164 | 165 | print(model_para) 166 | 167 | 168 | if not os.path.exists(args.save_dir): 169 | os.makedirs(args.save_dir) 170 | 171 | itemrec = generator_deep.NextItNet_Decoder(model_para) 172 | itemrec.train_graph() 173 | optimizer = tf.train.AdamOptimizer(model_para['learning_rate'], beta1=args.beta1).minimize(itemrec.loss) 174 | itemrec.predict_graph(reuse=True) 175 | 176 | gpu_options = tf.GPUOptions(allow_growth=True) 177 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 178 | saver = tf.train.Saver(max_to_keep=1) 179 | 180 | init = tf.global_variables_initializer() 181 | sess.run(init) 182 | 183 | numIters = 1 184 | max_mrr = 0 185 | break_stick = 0 186 | early_stop = 0 187 | 188 | while(1): 189 | if break_stick == 1: 190 | break 191 | 192 | batch_no = 0 193 | batch_size = model_para['batch_size'] 194 | 195 | while (batch_no + 1) * batch_size < train_set.shape[0]: 196 | 197 | start = time.time() 198 | 199 | item_batch = train_set[batch_no * batch_size: (batch_no + 1) * batch_size, :] 200 | _, loss = sess.run( 201 | [optimizer, itemrec.loss], 202 | feed_dict={ 203 | itemrec.itemseq_input: item_batch 204 | }) 205 | 206 | end = time.time() 207 | 208 | if numIters % args.eval_iter == 0: 209 | print("-------------------------------------------------------train") 210 | print("LOSS: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}".format( 211 | loss, batch_no, numIters, train_set.shape[0] / batch_size)) 212 | print("TIME FOR BATCH", end - start) 213 | print("TIME FOR EPOCH (mins)", (end - start) * (train_set.shape[0] / batch_size) / 60.0) 214 | 215 | batch_no += 1 216 | 217 | if numIters % args.eval_iter == 0: 218 | print("-------------------------------------------------------test") 219 | batch_no_test = 0 220 | batch_size_test = batch_size*1 221 | curr_preds_5 = [] 222 | rec_preds_5 = [] 223 | ndcg_preds_5 = [] 224 | curr_preds_10 = [] 225 | rec_preds_10 = [] 226 | ndcg_preds_10 = [] 227 | while (batch_no_test + 1) * batch_size_test < valid_set.shape[0]: 228 | item_batch = valid_set[batch_no_test * batch_size_test: (batch_no_test + 1) * batch_size_test, :] 229 | [probs_10, probs_5] = sess.run( 230 | [itemrec.top_10, itemrec.top_5], 231 | feed_dict={ 232 | itemrec.input_predict: item_batch 233 | }) 234 | for bi in range(batch_size_test): 235 | pred_items_10 = probs_10[1][bi][-1] 236 | pred_items_5 = probs_5[1][bi][-1] 237 | 238 | true_item = item_batch[bi][-1] 239 | predictmap_5 = {ch : i for i, ch in enumerate(pred_items_5)} 240 | pred_items_10 = {ch: i for i, ch in enumerate(pred_items_10)} 241 | 242 | rank_5 = predictmap_5.get(true_item) 243 | rank_10 = pred_items_10.get(true_item) 244 | if rank_5 == None: 245 | curr_preds_5.append(0.0) 246 | rec_preds_5.append(0.0) 247 | ndcg_preds_5.append(0.0) 248 | else: 249 | MRR_5 = 1.0/(rank_5+1) 250 | Rec_5 = 1.0 251 | ndcg_5 = 1.0 / math.log(rank_5 + 2, 2) 252 | curr_preds_5.append(MRR_5) 253 | rec_preds_5.append(Rec_5) 254 | ndcg_preds_5.append(ndcg_5) 255 | if rank_10 == None: 256 | curr_preds_10.append(0.0) 257 | rec_preds_10.append(0.0) 258 | ndcg_preds_10.append(0.0) 259 | else: 260 | MRR_10 = 1.0/(rank_10+1) 261 | Rec_10 = 1.0 262 | ndcg_10 = 1.0 / math.log(rank_10 + 2, 2) 263 | curr_preds_10.append(MRR_10) 264 | rec_preds_10.append(Rec_10) 265 | ndcg_preds_10.append(ndcg_10) 266 | 267 | batch_no_test += 1 268 | 269 | mrr = sum(curr_preds_5) / float(len(curr_preds_5)) 270 | mrr_10 = sum(curr_preds_10) / float(len(curr_preds_10)) 271 | hit = sum(rec_preds_5) / float(len(rec_preds_5)) 272 | hit_10 = sum(rec_preds_10) / float(len(rec_preds_10)) 273 | ndcg = sum(ndcg_preds_5) / float(len(ndcg_preds_5)) 274 | ndcg_10 = sum(ndcg_preds_10) / float(len(ndcg_preds_10)) 275 | 276 | if mrr > max_mrr: 277 | max_mrr = mrr 278 | 279 | print("Save model! mrr_5:", mrr) 280 | print("Save model! mrr_10:", mrr_10) 281 | print("Save model! hit_5:", hit) 282 | print("Save model! hit_10:", hit_10) 283 | print("Save model! ndcg_5:", ndcg) 284 | print("Save model! ndcg_10:", ndcg_10) 285 | early_stop = 0 286 | saver.save(sess, args.save_dir + "/{}_{}_{}_{}.ckpt".format(args.dilation_count, args.learning_rate, args.data_ratio, args.step)) 287 | else: 288 | print("mrr_5:", mrr) 289 | print("mrr_10:", mrr_10) 290 | print("hit_5:", hit) 291 | print("hit_10:", hit_10) 292 | print("ndcg_5:", ndcg) 293 | print("ndcg_10:", ndcg_10) 294 | early_stop += 1 295 | 296 | if numIters >= model_para['step']: 297 | break_stick = 1 298 | break 299 | if early_stop >= args.early_stop: 300 | break_stick = 1 301 | print("early stop!") 302 | break 303 | 304 | numIters += 1 305 | 306 | 307 | if __name__ == '__main__': 308 | main() 309 | -------------------------------------------------------------------------------- /deep_nextitnet_coldrec.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import data_loader 3 | import generator_deep 4 | import shutil 5 | import time 6 | import math 7 | import numpy as np 8 | import argparse 9 | import sys 10 | import os 11 | import random 12 | import ast 13 | 14 | tf.set_random_seed(10) 15 | 16 | #Strongly suggest running codes on GPU with more than 10G memory!!! 17 | 18 | def main(): 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('--top_k', type=int, default=5, 21 | help='sample from top k predictions') 22 | parser.add_argument('--beta1', type=float, default=0.9, 23 | help='hyperpara-Adam') 24 | parser.add_argument('--datapath', type=str, default="Data/coldrec/rec50_pretrain.csv", 25 | help='data path') 26 | parser.add_argument('--save_dir', type=str, default="Models/coldrec_baseline_4_emb64_bs256", 27 | help='save dir path') 28 | parser.add_argument('--eval_iter', type=int, default=1000, 29 | help='sample generator output evry x steps') 30 | parser.add_argument('--early_stop', type=int, default=10, 31 | help='after x step early stop') 32 | parser.add_argument('--step', type=int, default=400000, 33 | help='trainging step') 34 | parser.add_argument('--tt_percentage', type=float, default=0.2, 35 | help='0.2 means 80% training 20% testing') 36 | parser.add_argument('--data_ratio', type=float, default=1, 37 | help='real trainging data') 38 | parser.add_argument('--learning_rate', type=float, default=0.001, 39 | help='learning rate') 40 | parser.add_argument('--L2', type=float, default=0.001, 41 | help='L2 regularization') 42 | parser.add_argument('--dilation_count', type=int, default=4, 43 | help='dilation count number') 44 | parser.add_argument('--method', type=str, default="from_scratch", 45 | help='from_scratch, random_init, stack') 46 | parser.add_argument('--load_model', type=ast.literal_eval, default=False, 47 | help='whether loading pretrain model') 48 | parser.add_argument('--copy_softmax', type=ast.literal_eval, default=True, 49 | help='whether copying softmax param') 50 | parser.add_argument('--copy_layernorm', type=ast.literal_eval, default=True, 51 | help='whether copying layernorm param') 52 | parser.add_argument('--model_path', type=str, default="Models/", 53 | help='load model path') 54 | parser.add_argument('--padid', type=int, default=0, 55 | help='pad id') 56 | args = parser.parse_args() 57 | 58 | print(args) 59 | 60 | dl = data_loader.Data_Loader({'dir_name': args.datapath, 'padid': args.padid}) 61 | all_samples = dl.item 62 | print(all_samples.shape) 63 | items = dl.item_dict 64 | print("len(items)",len(items)) 65 | 66 | # Randomly shuffle data 67 | np.random.seed(10) 68 | shuffle_indices = np.random.permutation(np.arange(len(all_samples))) 69 | all_samples = all_samples[shuffle_indices] 70 | 71 | # Split train/test set 72 | dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples))) 73 | train_set, valid_set = all_samples[:dev_sample_index], all_samples[dev_sample_index:] 74 | 75 | random.seed(10) 76 | ratio = args.data_ratio 77 | train_set_len = len(train_set) 78 | train_index_set = set(list(range(train_set_len))) 79 | 80 | if ratio == 0.2: 81 | train_ratio = int(ratio * float(train_set_len)) 82 | real_train_index_set = random.sample(list(train_index_set), train_ratio) 83 | real_train_set = train_set[real_train_index_set] 84 | train_set = np.array(real_train_set) 85 | print("real train len", len(train_set)) 86 | elif ratio == 0.4: 87 | last_ratio = ratio - 0.2 88 | last_train_ratio = int(last_ratio * float(train_set_len)) 89 | last_train_index_set = random.sample(list(train_index_set), last_train_ratio) 90 | last_train_set = train_set[last_train_index_set] 91 | 92 | remain_train_index_set = train_index_set - set(last_train_index_set) 93 | remain_len = len(remain_train_index_set) 94 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) 95 | new_train_set = train_set[new_train_index_set] 96 | 97 | real_train_set = np.concatenate((last_train_set, new_train_set), axis=0) 98 | train_set = np.array(real_train_set) 99 | print("real train len", len(train_set)) 100 | elif ratio == 0.6: 101 | last_last_ratio = ratio - 0.2 - 0.2 102 | last_last_train_ratio = int(last_last_ratio * float(train_set_len)) 103 | last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) 104 | last_last_train_set = train_set[last_last_train_index_set] 105 | 106 | remain_train_index_set = train_index_set - set(last_last_train_index_set) 107 | remain_len = len(remain_train_index_set) 108 | last_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) 109 | last_train_set = train_set[last_train_index_set] 110 | real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) 111 | 112 | remain_train_index_set = remain_train_index_set - set(last_train_index_set) 113 | remain_len = len(remain_train_index_set) 114 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 3.0 * float(remain_len))) 115 | new_train_set = train_set[new_train_index_set] 116 | 117 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 118 | train_set = np.array(real_train_set) 119 | print("real train len", len(train_set)) 120 | elif ratio == 0.8: 121 | last_last_ratio = ratio - 0.2 - 0.2 - 0.2 122 | last_last_train_ratio = int(last_last_ratio * float(train_set_len)) 123 | last_last_train_index_set = random.sample(list(train_index_set), last_last_train_ratio) 124 | last_last_train_set = train_set[last_last_train_index_set] 125 | 126 | remain_train_index_set = train_index_set - set(last_last_train_index_set) 127 | remain_len = len(remain_train_index_set) 128 | last_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len))) 129 | last_train_set = train_set[last_train_index_set] 130 | real_train_set = np.concatenate((last_last_train_set, last_train_set), axis=0) 131 | 132 | remain_train_index_set = remain_train_index_set - set(last_train_index_set) 133 | remain_len = len(remain_train_index_set) 134 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 3.0 * float(remain_len))) 135 | new_train_set = train_set[new_train_index_set] 136 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 137 | 138 | remain_train_index_set = remain_train_index_set - set(new_train_index_set) 139 | remain_len = len(remain_train_index_set) 140 | new_train_index_set = random.sample(list(remain_train_index_set), int(1.0 / 2.0 * float(remain_len))) 141 | new_train_set = train_set[new_train_index_set] 142 | 143 | real_train_set = np.concatenate((real_train_set, new_train_set), axis=0) 144 | train_set = np.array(real_train_set) 145 | print("real train len", len(train_set)) 146 | elif ratio == 1: 147 | train_set = np.array(train_set) 148 | print("real train len", len(train_set)) 149 | else: 150 | train_ratio = int(ratio * float(train_set_len)) 151 | real_train_index_set = random.sample(list(train_index_set), train_ratio) 152 | real_train_set = train_set[real_train_index_set] 153 | train_set = np.array(real_train_set) 154 | print("real train len", len(train_set)) 155 | 156 | model_para = { 157 | 'item_size': len(items), 158 | 'dilated_channels': 64, 159 | 'dilations': [1,4]*args.dilation_count, 160 | 'step': args.step, 161 | 'kernel_size': 3, 162 | 'learning_rate': args.learning_rate, 163 | 'L2': args.L2, 164 | 'batch_size': 256, 165 | 'load_model': args.load_model, 166 | 'model_path': args.model_path, 167 | 'copy_softmax': args.copy_softmax, 168 | 'copy_layernorm': args.copy_layernorm, 169 | 'method': args.method 170 | } 171 | 172 | print(model_para) 173 | 174 | 175 | if not os.path.exists(args.save_dir): 176 | os.makedirs(args.save_dir) 177 | 178 | itemrec = generator_deep.NextItNet_Decoder(model_para) 179 | itemrec.train_graph() 180 | optimizer = tf.train.AdamOptimizer(model_para['learning_rate'], beta1=args.beta1).minimize(itemrec.loss) 181 | itemrec.predict_graph(reuse=True) 182 | 183 | tf.add_to_collection("dilate_input", itemrec.dilate_input) 184 | tf.add_to_collection("context_embedding", itemrec.context_embedding) 185 | 186 | sess = tf.Session() 187 | init = tf.global_variables_initializer() 188 | sess.run(init) 189 | saver = tf.train.Saver(max_to_keep=1) 190 | 191 | #writer=tf.summary.FileWriter('./stack_graph',sess.graph) 192 | 193 | numIters = 1 194 | max_mrr = 0 195 | break_stick = 0 196 | early_stop = 0 197 | while(1): 198 | if break_stick == 1: 199 | break 200 | 201 | batch_no = 0 202 | batch_size = model_para['batch_size'] 203 | 204 | while (batch_no + 1) * batch_size < train_set.shape[0]: 205 | 206 | start = time.time() 207 | 208 | item_batch = train_set[batch_no * batch_size: (batch_no + 1) * batch_size, :] 209 | _, loss = sess.run( 210 | [optimizer, itemrec.loss], 211 | feed_dict={ 212 | itemrec.itemseq_input: item_batch 213 | }) 214 | end = time.time() 215 | if numIters % args.eval_iter == 0: 216 | print("-------------------------------------------------------train") 217 | print("LOSS: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}".format( 218 | loss, batch_no, numIters, train_set.shape[0] / batch_size)) 219 | print("TIME FOR BATCH", end - start) 220 | print("TIME FOR EPOCH (mins)", (end - start) * (train_set.shape[0] / batch_size) / 60.0) 221 | 222 | batch_no += 1 223 | 224 | 225 | if numIters % args.eval_iter == 0: 226 | print("-------------------------------------------------------test") 227 | batch_no_test = 0 228 | batch_size_test = batch_size*1 229 | curr_preds_5 = [] 230 | rec_preds_5 = [] 231 | ndcg_preds_5 = [] 232 | curr_preds_10 = [] 233 | rec_preds_10 = [] 234 | ndcg_preds_10 = [] 235 | while (batch_no_test + 1) * batch_size_test < valid_set.shape[0]: 236 | item_batch = valid_set[batch_no_test * batch_size_test: (batch_no_test + 1) * batch_size_test, :] 237 | [probs_10, probs_5] = sess.run( 238 | [itemrec.top_10, itemrec.top_5], 239 | feed_dict={ 240 | itemrec.input_predict: item_batch 241 | }) 242 | #print(probs_10[1].shape) #(256,1,10) 243 | for bi in range(batch_size_test): 244 | pred_items_10 = probs_10[1][bi][-1] 245 | pred_items_5 = probs_5[1][bi][-1] 246 | 247 | 248 | true_item = item_batch[bi][-1] 249 | predictmap_5 = {ch : i for i, ch in enumerate(pred_items_5)} 250 | pred_items_10 = {ch: i for i, ch in enumerate(pred_items_10)} 251 | 252 | rank_5 = predictmap_5.get(true_item) 253 | rank_10 = pred_items_10.get(true_item) 254 | if rank_5 == None: 255 | curr_preds_5.append(0.0) 256 | rec_preds_5.append(0.0) 257 | ndcg_preds_5.append(0.0) 258 | else: 259 | MRR_5 = 1.0/(rank_5+1) 260 | Rec_5 = 1.0 261 | ndcg_5 = 1.0 / math.log(rank_5 + 2, 2) 262 | curr_preds_5.append(MRR_5) 263 | rec_preds_5.append(Rec_5) 264 | ndcg_preds_5.append(ndcg_5) 265 | if rank_10 == None: 266 | curr_preds_10.append(0.0) 267 | rec_preds_10.append(0.0) 268 | ndcg_preds_10.append(0.0) 269 | else: 270 | MRR_10 = 1.0/(rank_10+1) 271 | Rec_10 = 1.0 272 | ndcg_10 = 1.0 / math.log(rank_10 + 2, 2) 273 | curr_preds_10.append(MRR_10) 274 | rec_preds_10.append(Rec_10) 275 | ndcg_preds_10.append(ndcg_10) 276 | 277 | batch_no_test += 1 278 | 279 | mrr = sum(curr_preds_5) / float(len(curr_preds_5)) 280 | mrr_10 = sum(curr_preds_10) / float(len(curr_preds_10)) 281 | hit = sum(rec_preds_5) / float(len(rec_preds_5)) 282 | hit_10 = sum(rec_preds_10) / float(len(rec_preds_10)) 283 | ndcg = sum(ndcg_preds_5) / float(len(ndcg_preds_5)) 284 | ndcg_10 = sum(ndcg_preds_10) / float(len(ndcg_preds_10)) 285 | 286 | if mrr > max_mrr: 287 | max_mrr = mrr 288 | 289 | print("Save model! mrr_5:", mrr) 290 | print("Save model! mrr_10:", mrr_10) 291 | print("Save model! hit_5:", hit) 292 | print("Save model! hit_10:", hit_10) 293 | print("Save model! ndcg_5:", ndcg) 294 | print("Save model! ndcg_10:", ndcg_10) 295 | early_stop = 0 296 | saver.save(sess, args.save_dir + "/{}_{}_{}_{}.ckpt".format(args.dilation_count, args.learning_rate, args.data_ratio, args.step)) 297 | else: 298 | print("mrr_5:", mrr) 299 | print("mrr_10:", mrr_10) 300 | print("hit_5:", hit) 301 | print("hit_10:", hit_10) 302 | print("ndcg_5:", ndcg) 303 | print("ndcg_10:", ndcg_10) 304 | early_stop += 1 305 | 306 | if numIters >= model_para['step']: 307 | break_stick = 1 308 | break 309 | if early_stop >= args.early_stop: 310 | break_stick = 1 311 | print("early stop!") 312 | break 313 | 314 | numIters += 1 315 | 316 | 317 | if __name__ == '__main__': 318 | main() 319 | -------------------------------------------------------------------------------- /fineall.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import data_loader_finetune 3 | import shutil 4 | import time 5 | import math 6 | import numpy as np 7 | import argparse 8 | import sys 9 | import os 10 | 11 | 12 | def shuffleseq(train_set,padtoken): 13 | shuffle_seqtrain = [] 14 | for i in range(len(train_set)): 15 | # print x_train[i] 16 | seq = train_set[i][1:] 17 | lenseq = len(seq) 18 | # split=np.split(padtoken) 19 | copyseq=list(seq) 20 | padcount = copyseq.count(padtoken) #the number of padding elements 21 | copyseq = copyseq[padcount:] # the remaining elements 22 | # copyseq=seq 23 | shuffle_indices = np.random.permutation(np.arange(len(copyseq))) 24 | # list to array 25 | copyseq= np.array(copyseq) 26 | copyseq_shuffle=copyseq[shuffle_indices] 27 | 28 | padtoken_list=[padtoken]*padcount 29 | # array to list, + means concat in list and real plus in array 30 | seq=list(train_set[i][0:1])+padtoken_list+list(copyseq_shuffle) 31 | shuffle_seqtrain.append(seq) 32 | 33 | 34 | x_train = np.array(shuffle_seqtrain) # list to ndarray 35 | print("shuffling is done!") 36 | return x_train 37 | 38 | def random_neq(l, r, s): 39 | t = np.random.randint(l, r) 40 | while t == s: 41 | t = np.random.randint(l, r) 42 | return t 43 | 44 | def random_negs(l,r,no,s): 45 | # set_s=set(s) 46 | negs = [] 47 | for i in range(no): 48 | t = np.random.randint(l, r) 49 | # while (t in set_s): 50 | while (t== s): 51 | t = np.random.randint(l, r) 52 | negs.append(t) 53 | return negs 54 | 55 | def main(): 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument('--top_k', type=int, default=5, 58 | help='Sample from top k predictions, you cannot set top_k=1 due to evaluation np.squeeze') 59 | parser.add_argument('--beta1', type=float, default=0.9, 60 | help='hyperpara-Adam') 61 | parser.add_argument('--eval_iter', type=int, default=1000, 62 | help='Sample generator output evry x steps') 63 | parser.add_argument('--save_para_every', type=int, default=1000, 64 | help='save model parameters every') 65 | parser.add_argument('--datapath', type=str, default='Data/coldrec/rec50_finetune.csv', 66 | help='data path') 67 | parser.add_argument('--tt_percentage', type=float, default=0.2, 68 | help='default=0.2 means 80% training 20% testing') 69 | parser.add_argument('--is_generatesubsession', type=bool, default=False, 70 | help='whether generating a subsessions, e.g., 12345-->01234,00123,00012 It may be useful for very some very long sequences') 71 | parser.add_argument('--save_dir', type=str, default="Models/ml20", 72 | help='save dir path') 73 | parser.add_argument('--model_path', type=str, default="Models/", 74 | help='load model path') 75 | parser.add_argument('--dilation_count', type=int, default=16, 76 | help='dilation count number') 77 | parser.add_argument('--padtoken', type=str, default='0', 78 | help='is the padding token in the beggining of the sequence') 79 | parser.add_argument('--negtive_samples', type=int, default='99', 80 | help='the number of negative examples for each positive one') 81 | parser.add_argument('--is_shuffle', type=bool, default=False, 82 | help='whether shuffle the training and testing dataset, e.g., 012345-->051324') 83 | args = parser.parse_args() 84 | 85 | 86 | 87 | dl = data_loader_finetune.Data_Loader({'model_type': 'generator', 'dir_name': args.datapath}) 88 | 89 | all_samples = dl.example 90 | 91 | items = dl.item_dict 92 | items_len = len(items) 93 | print("len(items)", len(items)) 94 | targets = dl.target_dict 95 | targets_len=len(targets) 96 | print("len(targets)", len(targets)) 97 | 98 | negtive_samples=args.negtive_samples 99 | top_k=args.top_k 100 | 101 | if items.has_key(args.padtoken): 102 | padtoken = items[args.padtoken] # is the padding token in the beggining of the sentence 103 | else: 104 | padtoken=len(items)+1 105 | 106 | 107 | np.random.seed(10) 108 | shuffle_indices = np.random.permutation(np.arange(len(all_samples))) 109 | all_samples = all_samples[shuffle_indices] 110 | 111 | 112 | 113 | # Split train/test set 114 | dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples))) 115 | train_set, valid_set = all_samples[:dev_sample_index], all_samples[dev_sample_index:] 116 | 117 | 118 | 119 | model_para = { 120 | #all parameters shuold be consist with those in nextitred.py!!!! 121 | 'item_size': len(items), 122 | 'target_item_size': len(targets), 123 | 'dilated_channels': 64, 124 | 'dilations': [1,4]*args.dilation_count, 125 | 'kernel_size': 3, 126 | 'learning_rate':0.001, 127 | 'batch_size': 512, #you can not use batch_size=1 since in the following you use np.squeeze will reuduce one dimension 128 | 'iterations': 10 129 | } 130 | sess = tf.Session() 131 | 132 | 133 | 134 | new_saver = tf.train.import_meta_graph(args.model_path) 135 | new_saver.restore(sess, tf.train.latest_checkpoint(args.save_dir)) 136 | graph = tf.get_default_graph() 137 | 138 | trainable_vars = tf.trainable_variables() 139 | itemseq_input = graph.get_tensor_by_name("itemseq_input:0") 140 | # allitem_embeddings = graph.get_tensor_by_name("allitem_embeddings:0") 141 | 142 | allitem_embeddings=tf.trainable_variables()[0] 143 | dilate_input=tf.get_collection("dilate_input")[0] 144 | 145 | cnn_vars = [] 146 | for i in range(64): 147 | cnn_vars.append(tf.trainable_variables()[i]) 148 | context_embedding = tf.get_collection("context_embedding")[0] 149 | print("allitem_embeddings", (sess.run(allitem_embeddings))) 150 | 151 | 152 | 153 | # source_item_embedding=tf.reduce_mean(dilate_input,1) 154 | source_item_embedding = tf.reduce_mean(dilate_input[:, -1:, :], 1) # use the last token 155 | embedding_size = tf.shape(source_item_embedding)[1] 156 | with tf.variable_scope("target-item"): 157 | allitem_embeddings_target = tf.get_variable('allitem_embeddings_target', 158 | [model_para['target_item_size'], 159 | model_para['dilated_channels']], 160 | initializer=tf.truncated_normal_initializer(stddev=0.02), 161 | regularizer=tf.contrib.layers.l2_regularizer(0.02) 162 | ) 163 | is_training = tf.placeholder(tf.bool, shape=()) 164 | # training 165 | itemseq_input_target_pos = tf.placeholder('int32', 166 | [None, None], name='itemseq_input_pos') 167 | itemseq_input_target_neg = tf.placeholder('int32', 168 | [None, None], name='itemseq_input_neg') 169 | target_item_embedding_pos = tf.nn.embedding_lookup(allitem_embeddings_target, 170 | itemseq_input_target_pos, 171 | name="target_item_embedding_pos") 172 | target_item_embedding_neg = tf.nn.embedding_lookup(allitem_embeddings_target, 173 | itemseq_input_target_neg, 174 | name="target_item_embedding_neg") 175 | 176 | pos_score = source_item_embedding * tf.reshape(target_item_embedding_pos, [-1, embedding_size]) 177 | neg_score = source_item_embedding * tf.reshape(target_item_embedding_neg, [-1, embedding_size]) 178 | pos_logits = tf.reduce_sum(pos_score, -1) 179 | neg_logits = tf.reduce_sum(neg_score, -1) 180 | 181 | 182 | # testing 183 | itemseq_input_target_label = tf.placeholder('int32', 184 | [None, None], name='itemseq_input_target_label') 185 | tf.add_to_collection("itemseq_input_target_label", itemseq_input_target_label) 186 | 187 | target_label_item_embedding = tf.nn.embedding_lookup(allitem_embeddings_target, 188 | itemseq_input_target_label, 189 | name="target_label_item_embedding") 190 | 191 | source_item_embedding_test = tf.expand_dims(source_item_embedding, 1) # (batch, 1, embeddingsize) 192 | target_item_embedding = tf.transpose(target_label_item_embedding, [0, 2, 1]) # transpose 193 | score_test = tf.matmul(source_item_embedding_test, target_item_embedding) 194 | top_k_test = tf.nn.top_k(score_test[:, :], k=top_k, name='top-k') 195 | tf.add_to_collection("top_k", top_k_test[1]) 196 | 197 | loss = tf.reduce_mean( 198 | - tf.log(tf.sigmoid(pos_logits) + 1e-24) - 199 | tf.log(1 - tf.sigmoid(neg_logits) + 1e-24) 200 | ) 201 | reg_losses = tf.reduce_mean(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) 202 | loss += reg_losses 203 | 204 | sc_variable2 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='target-item') 205 | optimizer = tf.train.AdamOptimizer(model_para['learning_rate'], beta1=args.beta1, name='Adam2').minimize(loss) 206 | unitialized_vars = [] 207 | for var in tf.global_variables(): 208 | try: 209 | sess.run(var) 210 | except tf.errors.FailedPreconditionError: 211 | unitialized_vars.append(var) 212 | 213 | initialize_op = tf.variables_initializer(unitialized_vars) 214 | vars = tf.trainable_variables() 215 | sess.run(initialize_op) 216 | saver = tf.train.Saver() 217 | 218 | numIters = 1 219 | best_mrr = 0 220 | for iter in range(model_para['iterations']): 221 | batch_no = 0 222 | batch_size = model_para['batch_size'] 223 | while (batch_no + 1) * batch_size < train_set.shape[0]: 224 | start = time.time() 225 | #the first n-1 is source, the last one is target 226 | #item_batch=[[1,2,3],[4,5,6]] 227 | item_batch = train_set[batch_no * batch_size: (batch_no + 1) * batch_size, :] 228 | 229 | pos_batch=item_batch[:,-1]#[3 6] used for negative sampling 230 | source_batch=item_batch[:,:-1]# 231 | pos_target=item_batch[:,-1:]#[[3][6]] 232 | neg_target = np.array([[random_neq(1, targets_len, s)] for s in pos_batch]) 233 | _, loss_out, reg_losses_out = sess.run( 234 | [optimizer, loss, reg_losses], 235 | feed_dict={ 236 | itemseq_input: source_batch, 237 | itemseq_input_target_pos:pos_target, 238 | itemseq_input_target_neg:neg_target 239 | }) 240 | 241 | end = time.time() 242 | 243 | if numIters % args.eval_iter == 0: 244 | print("-------------------------------------------------------train1") 245 | print("LOSS: {}\Reg_LOSS: {}\tITER: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}".format( 246 | loss_out, reg_losses_out,iter, batch_no, numIters, train_set.shape[0] / batch_size)) 247 | print("TIME FOR BATCH", end - start) 248 | print("TIME FOR ITER (mins)", (end - start) * (train_set.shape[0] / batch_size) / 60.0) 249 | 250 | batch_no += 1 251 | 252 | if numIters % args.eval_iter == 0: 253 | batch_no_test = 0 254 | batch_size_test = batch_size * 1 255 | # batch_size_test = 1 256 | hits = [] # 1 257 | mrrs = [] # ---add 1 258 | 259 | while (batch_no_test + 1) * batch_size_test < valid_set.shape[0]: 260 | 261 | item_batch = valid_set[batch_no_test * batch_size_test: (batch_no_test + 1) * batch_size_test, :] 262 | pos_batch = item_batch[:, -1] # [3 6] used for negative sampling 263 | source_batch = item_batch[:, :-1] # 264 | pos_target = item_batch[:, -1:] # [[3][6]] 265 | 266 | neg_target = np.array([random_negs(1, targets_len, negtive_samples, s) for s in pos_batch]) 267 | target=np.array(np.concatenate([neg_target,pos_target],1)) 268 | 269 | [top_k_batch] = sess.run( 270 | [top_k_test], 271 | feed_dict={ 272 | itemseq_input: source_batch, 273 | itemseq_input_target_label: target 274 | }) 275 | 276 | #note that in top_k_batch[1], such as [1 9 4 5 0], we just need to check whether 0 is here, that's fine 277 | top_k = np.squeeze(top_k_batch[1]) #remove one dimension since e.g., [[[1,2,4]],[[34,2,4]]]-->[[1,2,4],[34,2,4]] 278 | for i in range(top_k.shape[0]): 279 | top_k_per_batch = top_k[i] 280 | predictmap = {ch: i for i, ch in enumerate(top_k_per_batch)} # add 2 281 | rank = predictmap.get(negtive_samples) # add 3 282 | if rank == None: 283 | hits.append(0.0) 284 | mrrs.append(0.0) # add 5 285 | else: 286 | hits.append(1.0) 287 | mrrs.append(1.0 / (rank + 1)) # add 4 288 | batch_no_test += 1 289 | print("-------------------------------------------------------Accuracy") 290 | if len(hits)!=0: 291 | hit_5 = sum(hits)/float(len(hits)) 292 | mrr_5 = sum(mrrs)/float(len(mrrs)) 293 | print("Accuracy hit_n:", hit_5, "MRR_n:", mrr_5) # 5 294 | if mrr_5 > best_mrr: 295 | best_mrr = mrr_5 296 | print("best is here!!!!!!!!!!!!!!!!!!!!!!!!", mrr_5, hit_5) 297 | numIters += 1 298 | 299 | 300 | if __name__ == '__main__': 301 | main() 302 | -------------------------------------------------------------------------------- /generator_deep.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import ops 3 | import numpy as np 4 | 5 | class NextItNet_Decoder: 6 | 7 | def __init__(self, model_para): 8 | self.model_para = model_para 9 | self.load_model = model_para['load_model'] 10 | self.method = model_para['method'] 11 | self.L2 = model_para['L2'] 12 | embedding_width = model_para['dilated_channels'] 13 | 14 | 15 | if self.load_model: 16 | self.model_path = model_para['model_path'] 17 | self.reader = tf.train.NewCheckpointReader(self.model_path) 18 | variable_name = 'allitem_embeddings' 19 | initial_value = self.get_parameters(self.reader, variable_name, variable_name) 20 | self.allitem_embeddings = tf.get_variable('allitem_embeddings', 21 | [model_para['item_size'], embedding_width], 22 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 23 | else: 24 | self.allitem_embeddings = tf.get_variable('allitem_embeddings', 25 | [model_para['item_size'], embedding_width], 26 | initializer=tf.truncated_normal_initializer(stddev=0.02)) 27 | 28 | def get_parameters(self, reader, variable_name, new_variable_name): 29 | print(variable_name, " ---> ", new_variable_name) 30 | param = reader.get_tensor(variable_name) 31 | return param 32 | 33 | 34 | def train_graph(self): 35 | self.itemseq_input = tf.placeholder('int32', 36 | [None, None], name='itemseq_input') 37 | label_seq, self.dilate_input=self.model_graph(self.itemseq_input, train=True) 38 | 39 | model_para = self.model_para 40 | 41 | logits_2D = tf.reshape(self.dilate_input, [-1,model_para['dilated_channels']]) 42 | 43 | 44 | self.softmax_w = tf.get_variable("softmax_w", [model_para['item_size'], model_para['dilated_channels']], tf.float32, tf.random_normal_initializer(0.0, 0.01)) 45 | self.softmax_b = tf.get_variable("softmax_b", [model_para['item_size']], tf.float32, tf.constant_initializer(0.1)) 46 | 47 | label_flat = tf.reshape(label_seq, [-1, 1]) 48 | num_sampled = int(0.2 * model_para['item_size']) 49 | 50 | loss = tf.nn.sampled_softmax_loss(self.softmax_w, self.softmax_b, label_flat, logits_2D, num_sampled, model_para['item_size']) 51 | 52 | 53 | self.loss = tf.reduce_mean(loss) 54 | 55 | if self.L2 != 0: 56 | regularization = self.L2 * tf.reduce_mean([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) 57 | self.loss = self.loss + regularization 58 | 59 | 60 | def model_graph(self, itemseq_input, train=True): 61 | model_para = self.model_para 62 | context_seq = itemseq_input[:, 0:-1] 63 | label_seq = itemseq_input[:, 1:] 64 | 65 | self.context_embedding = tf.nn.embedding_lookup(self.allitem_embeddings, 66 | context_seq, name="context_embedding") 67 | dilate_input = self.context_embedding 68 | layer_num = len(model_para['dilations']) 69 | 70 | for layer_id, dilation in enumerate(model_para['dilations']): 71 | if self.load_model: 72 | dilate_input = ops.nextitnet_residual_block_alpha(dilate_input, dilation, 73 | layer_id, self.method, model_para['dilated_channels'], 74 | model_para['kernel_size'], self.reader, layer_num, train=train) 75 | else: 76 | dilate_input = ops.nextitnet_residual_block_alpha(dilate_input, dilation, 77 | layer_id, self.method, model_para['dilated_channels'], 78 | model_para['kernel_size'], None, layer_num, train=train) 79 | 80 | return label_seq, dilate_input 81 | 82 | 83 | def predict_graph(self, reuse=False): 84 | if reuse: 85 | tf.get_variable_scope().reuse_variables() 86 | self.input_predict = tf.placeholder('int32', [None, None], name='input_predict') 87 | 88 | label_seq, dilate_input = self.model_graph(self.input_predict, train=False) 89 | model_para = self.model_para 90 | 91 | 92 | logits_2D = tf.reshape(dilate_input[:, -1:, :], [-1, model_para['dilated_channels']]) 93 | logits_2D = tf.matmul(logits_2D, tf.transpose(self.softmax_w)) 94 | logits_2D = tf.nn.bias_add(logits_2D, self.softmax_b) 95 | 96 | 97 | probs_flat = tf.nn.softmax(logits_2D) 98 | 99 | self.g_probs = tf.reshape(probs_flat, [-1, 1, model_para['item_size']]) 100 | self.top_10 = tf.nn.top_k(self.g_probs, 10) 101 | self.top_5 = tf.nn.top_k(self.g_probs, 5) 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /generator_deep_GRec.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import ops 3 | import numpy as np 4 | import utils_GRec 5 | 6 | 7 | class NextItNet_Decoder: 8 | def __init__(self, model_para): 9 | self.model_para = model_para 10 | self.load_model = model_para['load_model'] 11 | self.method = model_para['method'] 12 | self.L2 = model_para['L2'] 13 | embedding_width = model_para['dilated_channels'] 14 | 15 | if self.load_model: 16 | self.model_path = model_para['model_path'] 17 | self.reader = tf.train.NewCheckpointReader(self.model_path) 18 | variable_name = 'allitem_embeddings' 19 | initial_value = self.get_parameters(self.reader, variable_name, variable_name) 20 | self.allitem_embeddings = tf.get_variable('allitem_embeddings', 21 | [model_para['item_size'], embedding_width], 22 | initializer=tf.constant_initializer(initial_value, 23 | verify_shape=True)) 24 | else: 25 | self.allitem_embeddings = tf.get_variable('allitem_embeddings', 26 | [model_para['item_size'], embedding_width], 27 | initializer=tf.truncated_normal_initializer(stddev=0.02)) 28 | 29 | self.itemseq_input = tf.placeholder('int32', 30 | [None, None], name='itemseq_input') 31 | 32 | 33 | self.softmax_w = tf.get_variable("softmax_w", [model_para['item_size'], model_para['dilated_channels']], 34 | tf.float32, tf.random_normal_initializer(0.0, 0.01)) 35 | 36 | 37 | self.softmax_b = tf.get_variable("softmax_b", [model_para['item_size']], tf.float32, 38 | tf.constant_initializer(0.1)) 39 | 40 | def get_parameters(self, reader, variable_name, new_variable_name): 41 | print(variable_name, " ---> ", new_variable_name) 42 | param = reader.get_tensor(variable_name) 43 | return param 44 | 45 | def train_graph(self): 46 | 47 | self.masked_position = tf.placeholder('int32', 48 | [None, None], name='masked_position') 49 | self.itemseq_output = tf.placeholder('int32', 50 | [None, None], name='itemseq_output') 51 | self.masked_items = tf.placeholder('int32', 52 | [None, None], name='masked_items') 53 | self.label_weights = tf.placeholder(tf.float32, 54 | [None, None], name='label_weights') 55 | 56 | context_seq = self.itemseq_input 57 | label_seq = self.label_weights 58 | 59 | self.dilate_input = self.model_graph(context_seq, train=True) 60 | 61 | self.loss = self.get_masked_lm_output(self.model_para, self.dilate_input, 62 | self.masked_position, 63 | self.masked_items, label_seq, trainable=True) 64 | 65 | def model_graph(self, itemseq_input, train=True): 66 | model_para = self.model_para 67 | 68 | self.context_embedding = tf.nn.embedding_lookup(self.allitem_embeddings, 69 | itemseq_input, name="context_embedding") 70 | dilate_input = self.context_embedding 71 | layer_num = len(model_para['dilations']) 72 | 73 | 74 | for layer_id, dilation in enumerate(model_para['dilations']): 75 | if self.load_model: 76 | dilate_input = ops.nextitnet_residual_block_alpha(dilate_input, dilation, 77 | layer_id, self.method, 78 | model_para['dilated_channels'], 79 | model_para['kernel_size'], self.reader, layer_num, 80 | train=train) 81 | 82 | else: 83 | dilate_input = ops.nextitnet_residual_block_alpha(dilate_input, dilation, 84 | layer_id, self.method, model_para['dilated_channels'], 85 | model_para['kernel_size'], None, layer_num, train=train) 86 | 87 | 88 | return dilate_input 89 | 90 | def predict_graph(self, reuse=False): 91 | if reuse: 92 | tf.get_variable_scope().reuse_variables() 93 | 94 | context_seq = self.itemseq_input 95 | 96 | dilate_input = self.model_graph(context_seq, train=False) 97 | model_para = self.model_para 98 | 99 | logits_2D = tf.reshape(dilate_input[:, -1:, :], [-1, model_para['dilated_channels']]) 100 | logits_2D = tf.matmul(logits_2D, tf.transpose(self.softmax_w)) 101 | logits_2D = tf.nn.bias_add(logits_2D, self.softmax_b) 102 | 103 | 104 | probs_flat = tf.nn.softmax(logits_2D) 105 | 106 | self.g_probs = tf.reshape(probs_flat, [-1, 1, model_para['item_size']]) 107 | self.top_10 = tf.nn.top_k(self.g_probs, 10) # top10 108 | self.top_5 = tf.nn.top_k(self.g_probs, 5) 109 | 110 | def gather_indexes(self, sequence_tensor, positions): 111 | """Gathers the vectors at the specific positions over a minibatch.""" 112 | sequence_shape = utils_GRec.get_shape_list(sequence_tensor, expected_rank=3) 113 | batch_size = sequence_shape[0] 114 | seq_length = sequence_shape[1] 115 | width = sequence_shape[2] 116 | 117 | flat_offsets = tf.reshape( 118 | tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1]) 119 | flat_positions = tf.reshape(positions + flat_offsets, [-1]) 120 | flat_sequence_tensor = tf.reshape(sequence_tensor, 121 | [batch_size * seq_length, width]) 122 | output_tensor = tf.gather(flat_sequence_tensor, flat_positions) 123 | return output_tensor 124 | 125 | def get_masked_lm_output(self, bert_config, input_tensor, positions, 126 | label_ids, label_weights, trainable=True): 127 | """Get loss and log probs for the masked LM.""" 128 | 129 | input_tensor = self.gather_indexes(input_tensor, positions) 130 | 131 | logits_2D = input_tensor 132 | label_flat = tf.reshape(label_ids, [-1, 1]) # 1 is the number of positive example 133 | num_sampled = int(0.2 * self.model_para['item_size']) # sample 20% as negatives 134 | loss = tf.nn.sampled_softmax_loss(self.softmax_w, self.softmax_b, label_flat, logits_2D, 135 | num_sampled, 136 | self.model_para['item_size']) 137 | 138 | loss = tf.reduce_mean(loss) 139 | regularization = self.L2 * tf.reduce_mean([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) 140 | loss = loss + regularization 141 | 142 | return loss 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /ops.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import math 3 | import numpy as np 4 | 5 | def get_parameters(train, reader, variable_name, new_variable_name): 6 | if train: 7 | print(variable_name, " ---> ", new_variable_name) 8 | param = reader.get_tensor(variable_name) 9 | return param 10 | 11 | def nextitnet_residual_block_alpha(input_, dilation, layer_id, method, 12 | residual_channels, kernel_size, reader, layer_num, train=True): 13 | resblock_type = "decoder" 14 | resblock_name = "nextitnet_residual_block{}_layer_{}".format(resblock_type, layer_id) 15 | 16 | with tf.variable_scope(resblock_name, reuse=tf.AUTO_REUSE): 17 | 18 | if method == 'StackR': 19 | if layer_id >= layer_num / 2: 20 | rez = tf.get_variable('rez', [1], 21 | initializer=tf.constant_initializer(0.0)) 22 | else: 23 | variable_name = resblock_name + '/rez' 24 | new_variable_name = resblock_name + '/rez' 25 | initial_value = get_parameters(train, reader, variable_name, new_variable_name) 26 | rez = tf.get_variable('rez', [1], 27 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 28 | 29 | if method == 'stackC' or method == 'stackA' or method == 'stackE': 30 | rez = tf.get_variable('rez', [1], 31 | initializer=tf.constant_initializer(0.0)) 32 | 33 | if method == 'from_scratch': 34 | rez = tf.get_variable('rez', [1], 35 | initializer=tf.constant_initializer(0.0)) 36 | 37 | 38 | if reader: 39 | dilated_conv = conv1d(input_, residual_channels, method, layer_id, reader, layer_num, resblock_name, 40 | dilation, kernel_size, 41 | name="dilated_conv1", 42 | trainable=train 43 | ) 44 | input_ln = layer_norm(dilated_conv, layer_id, reader, method, layer_num, resblock_name, name="layer_norm1", trainable=train) 45 | relu1 = tf.nn.relu(input_ln) 46 | else: 47 | dilated_conv = conv1d(input_, residual_channels, method, layer_id, None, layer_num, resblock_name, 48 | dilation, kernel_size, 49 | name="dilated_conv1", 50 | trainable=train 51 | ) 52 | input_ln = layer_norm(dilated_conv, layer_id, None, method, layer_num, resblock_name, name="layer_norm1", trainable=train) 53 | relu1 = tf.nn.relu(input_ln) 54 | 55 | if reader: 56 | dilated_conv = conv1d(relu1, residual_channels, method, layer_id, reader, layer_num, resblock_name, 57 | 2*dilation, kernel_size, 58 | name="dilated_conv2", 59 | trainable=train 60 | ) 61 | input_ln = layer_norm(dilated_conv, layer_id, reader, method, layer_num, resblock_name, name="layer_norm2", trainable=train) 62 | relu1 = tf.nn.relu(input_ln) 63 | else: 64 | dilated_conv = conv1d(relu1, residual_channels, method, layer_id, None, layer_num, resblock_name, 65 | 2*dilation, kernel_size, 66 | name="dilated_conv2", 67 | trainable=train 68 | ) 69 | input_ln = layer_norm(dilated_conv, layer_id, None, method, layer_num, resblock_name, name="layer_norm2", trainable=train) 70 | relu1 = tf.nn.relu(input_ln) 71 | 72 | return input_ + relu1 * rez 73 | 74 | def conv1d(input_, output_channels, method, layer_id, reader, layer_num, resblock_name, 75 | dilation=1, kernel_size=1, 76 | name="dilated_conv", trainable=True): 77 | with tf.variable_scope(name): 78 | 79 | if method == 'from_scratch' or method == 'stackE': 80 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 81 | initializer=tf.truncated_normal_initializer(stddev=0.02, seed=1)) 82 | bias = tf.get_variable('bias', [output_channels], 83 | initializer=tf.constant_initializer(0.0)) 84 | 85 | if method == 'stackC': 86 | if layer_id >= layer_num / 2: 87 | relative_layer_id = layer_id % int(layer_num/2) 88 | variable_name = resblock_name.split("_") 89 | variable_name[4] = str(relative_layer_id) 90 | variable_name = "_".join(variable_name) + '/' + name + '/weight' 91 | else: 92 | variable_name = resblock_name + '/' + name + '/weight' 93 | 94 | new_variable_name = resblock_name + '/' + name + '/weight' 95 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 96 | 97 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 98 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 99 | 100 | if layer_id >= layer_num / 2: 101 | relative_layer_id = layer_id % int(layer_num/2) 102 | variable_name = resblock_name.split("_") 103 | variable_name[4] = str(relative_layer_id) 104 | variable_name = "_".join(variable_name) + '/' + name + '/bias' 105 | else: 106 | variable_name = resblock_name + '/' + name + '/bias' 107 | new_variable_name = resblock_name + '/' + name + '/bias' 108 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 109 | bias = tf.get_variable('bias', [output_channels], 110 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 111 | 112 | if method == 'StackR': 113 | if layer_id >= layer_num / 2: 114 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 115 | initializer=tf.truncated_normal_initializer(stddev=0.02, seed=1)) 116 | else: 117 | variable_name = resblock_name + '/' + name + '/weight' 118 | 119 | new_variable_name = resblock_name + '/' + name + '/weight' 120 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 121 | 122 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 123 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 124 | if layer_id >= layer_num / 2: 125 | bias = tf.get_variable('bias', [output_channels], 126 | initializer=tf.constant_initializer(0.0)) 127 | else: 128 | variable_name = resblock_name + '/' + name + '/bias' 129 | new_variable_name = resblock_name + '/' + name + '/bias' 130 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 131 | bias = tf.get_variable('bias', [output_channels], 132 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 133 | 134 | if method == 'stackA': 135 | relative_layer_id = layer_id // 2 136 | variable_name = resblock_name.split("_") 137 | variable_name[4] = str(relative_layer_id) 138 | variable_name = "_".join(variable_name) + '/' + name + '/weight' 139 | 140 | new_variable_name = resblock_name + '/' + name + '/weight' 141 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 142 | 143 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 144 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 145 | 146 | relative_layer_id = layer_id // 2 147 | variable_name = resblock_name.split("_") 148 | variable_name[4] = str(relative_layer_id) 149 | variable_name = "_".join(variable_name) + '/' + name + '/bias' 150 | 151 | new_variable_name = resblock_name + '/' + name + '/bias' 152 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 153 | bias = tf.get_variable('bias', [output_channels], 154 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 155 | 156 | 157 | padding = [[0, 0], [(kernel_size - 1) * dilation, 0], [0, 0]] 158 | padded = tf.pad(input_, padding) 159 | input_expanded = tf.expand_dims(padded, dim=1) 160 | out = tf.nn.atrous_conv2d(input_expanded, weight, rate=dilation, padding='VALID') + bias 161 | 162 | return tf.squeeze(out, [1]) 163 | 164 | 165 | def layer_norm(x, layer_id, reader, method, layer_num, resblock_name, name, epsilon=1e-8, trainable=True): 166 | 167 | with tf.variable_scope(name): 168 | shape = x.get_shape() 169 | 170 | if method == 'from_scratch': 171 | beta = tf.get_variable('beta', [int(shape[-1])], 172 | initializer=tf.constant_initializer(0), trainable=trainable) 173 | gamma = tf.get_variable('gamma', [int(shape[-1])], 174 | initializer=tf.constant_initializer(1), trainable=trainable) 175 | 176 | if method == 'stackC' or method == 'stackA' or method == 'stackE': 177 | beta = tf.get_variable('beta', [int(shape[-1])], 178 | initializer=tf.constant_initializer(0), trainable=trainable) 179 | gamma = tf.get_variable('gamma', [int(shape[-1])], 180 | initializer=tf.constant_initializer(1), trainable=trainable) 181 | 182 | if method == 'StackR': 183 | if layer_id >= layer_num / 2: 184 | beta = tf.get_variable('beta', [int(shape[-1])], 185 | initializer=tf.constant_initializer(0), trainable=trainable) 186 | else: 187 | variable_name = resblock_name + '/' + name + '/beta' 188 | new_variable_name = resblock_name + '/' + name + '/beta' 189 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 190 | beta = tf.get_variable('beta', [int(shape[-1])], 191 | initializer=tf.constant_initializer(initial_value, verify_shape=True), trainable=trainable) 192 | 193 | if layer_id >= layer_num / 2: 194 | gamma = tf.get_variable('gamma', [int(shape[-1])], 195 | initializer=tf.constant_initializer(1), trainable=trainable) 196 | else: 197 | variable_name = resblock_name + '/' + name + '/gamma' 198 | new_variable_name = resblock_name + '/' + name + '/gamma' 199 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 200 | gamma = tf.get_variable('gamma', [int(shape[-1])], 201 | initializer=tf.constant_initializer(initial_value, verify_shape=True), trainable=trainable) 202 | 203 | 204 | mean, variance = tf.nn.moments(x, axes=[len(shape) - 1], keep_dims=True) 205 | x = (x - mean) / tf.sqrt(variance + epsilon) 206 | return gamma * x + beta 207 | -------------------------------------------------------------------------------- /ops_copytop.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import math 3 | import numpy as np 4 | 5 | def get_parameters(train, reader, variable_name, new_variable_name): 6 | if train: 7 | print(variable_name, " ---> ", new_variable_name) 8 | param = reader.get_tensor(variable_name) 9 | return param 10 | 11 | def nextitnet_residual_block_alpha(input_, dilation, layer_id, method, 12 | residual_channels, kernel_size, reader, layer_num, train=True): 13 | resblock_type = "decoder" 14 | resblock_name = "nextitnet_residual_block{}_layer_{}".format(resblock_type, layer_id) 15 | 16 | with tf.variable_scope(resblock_name, reuse=tf.AUTO_REUSE): 17 | 18 | rez = tf.get_variable('rez', [1], 19 | initializer=tf.constant_initializer(0.0)) 20 | 21 | 22 | if reader: 23 | dilated_conv = conv1d(input_, residual_channels, method, layer_id, reader, layer_num, resblock_name, 24 | dilation, kernel_size, 25 | name="dilated_conv1", 26 | trainable=train 27 | ) 28 | input_ln = layer_norm(dilated_conv, layer_id, reader, method, layer_num, resblock_name, name="layer_norm1", trainable=train) 29 | relu1 = tf.nn.relu(input_ln) 30 | else: 31 | dilated_conv = conv1d(input_, residual_channels, method, layer_id, None, layer_num, resblock_name, 32 | dilation, kernel_size, 33 | name="dilated_conv1", 34 | trainable=train 35 | ) 36 | input_ln = layer_norm(dilated_conv, layer_id, None, method, layer_num, resblock_name, name="layer_norm1", trainable=train) 37 | relu1 = tf.nn.relu(input_ln) 38 | 39 | if reader: 40 | dilated_conv = conv1d(relu1, residual_channels, method, layer_id, reader, layer_num, resblock_name, 41 | 2*dilation, kernel_size, 42 | name="dilated_conv2", 43 | trainable=train 44 | ) 45 | input_ln = layer_norm(dilated_conv, layer_id, reader, method, layer_num, resblock_name, name="layer_norm2", trainable=train) 46 | relu1 = tf.nn.relu(input_ln) 47 | else: 48 | dilated_conv = conv1d(relu1, residual_channels, method, layer_id, None, layer_num, resblock_name, 49 | 2*dilation, kernel_size, 50 | name="dilated_conv2", 51 | trainable=train 52 | ) 53 | input_ln = layer_norm(dilated_conv, layer_id, None, method, layer_num, resblock_name, name="layer_norm2", trainable=train) 54 | relu1 = tf.nn.relu(input_ln) 55 | 56 | return input_ + relu1 * rez 57 | 58 | def conv1d(input_, output_channels, method, layer_id, reader, layer_num, resblock_name, 59 | dilation=1, kernel_size=1, 60 | name="dilated_conv", trainable=True): 61 | with tf.variable_scope(name): 62 | 63 | if method == 'from_scratch': 64 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 65 | initializer=tf.truncated_normal_initializer(stddev=0.02, seed=1)) 66 | bias = tf.get_variable('bias', [output_channels], 67 | initializer=tf.constant_initializer(0.0)) 68 | 69 | topn = 16 70 | if method == 'stackC': 71 | if layer_id >= layer_num - topn: 72 | relative_layer_id = layer_id - topn 73 | variable_name = resblock_name.split("_") 74 | variable_name[4] = str(relative_layer_id) 75 | variable_name = "_".join(variable_name) + '/' + name + '/weight' 76 | else: 77 | variable_name = resblock_name + '/' + name + '/weight' 78 | 79 | new_variable_name = resblock_name + '/' + name + '/weight' 80 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 81 | 82 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 83 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 84 | 85 | if layer_id >= layer_num - topn: 86 | relative_layer_id = layer_id - topn 87 | variable_name = resblock_name.split("_") 88 | variable_name[4] = str(relative_layer_id) 89 | variable_name = "_".join(variable_name) + '/' + name + '/bias' 90 | else: 91 | variable_name = resblock_name + '/' + name + '/bias' 92 | new_variable_name = resblock_name + '/' + name + '/bias' 93 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 94 | bias = tf.get_variable('bias', [output_channels], 95 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 96 | 97 | if method == 'stackA': 98 | if layer_id >= layer_num - topn * 2: 99 | relative_layer_id = int((layer_id - (layer_num - topn * 2)) // 2 + (layer_num - topn * 2)) 100 | variable_name = resblock_name.split("_") 101 | variable_name[4] = str(relative_layer_id) 102 | variable_name = "_".join(variable_name) + '/' + name + '/weight' 103 | else: 104 | variable_name = resblock_name + '/' + name + '/weight' 105 | 106 | new_variable_name = resblock_name + '/' + name + '/weight' 107 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 108 | 109 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 110 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 111 | if layer_id >= layer_num - topn * 2: 112 | relative_layer_id = int((layer_id - (layer_num - topn * 2)) // 2 + (layer_num - topn * 2)) 113 | variable_name = resblock_name.split("_") 114 | variable_name[4] = str(relative_layer_id) 115 | variable_name = "_".join(variable_name) + '/' + name + '/bias' 116 | else: 117 | variable_name = resblock_name + '/' + name + '/bias' 118 | 119 | new_variable_name = resblock_name + '/' + name + '/bias' 120 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 121 | bias = tf.get_variable('bias', [output_channels], 122 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 123 | 124 | padding = [[0, 0], [(kernel_size - 1) * dilation, 0], [0, 0]] 125 | padded = tf.pad(input_, padding) 126 | input_expanded = tf.expand_dims(padded, dim=1) 127 | out = tf.nn.atrous_conv2d(input_expanded, weight, rate=dilation, padding='VALID') + bias 128 | 129 | return tf.squeeze(out, [1]) 130 | 131 | 132 | def layer_norm(x, layer_id, reader, method, layer_num, resblock_name, name, epsilon=1e-8, trainable=True): 133 | 134 | with tf.variable_scope(name): 135 | shape = x.get_shape() 136 | 137 | beta = tf.get_variable('beta', [int(shape[-1])], 138 | initializer=tf.constant_initializer(0), trainable=trainable) 139 | gamma = tf.get_variable('gamma', [int(shape[-1])], 140 | initializer=tf.constant_initializer(1), trainable=trainable) 141 | 142 | mean, variance = tf.nn.moments(x, axes=[len(shape) - 1], keep_dims=True) 143 | x = (x - mean) / tf.sqrt(variance + epsilon) 144 | return gamma * x + beta 145 | -------------------------------------------------------------------------------- /ops_original.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import math 3 | import numpy as np 4 | 5 | 6 | def get_parameters(train, reader, variable_name, new_variable_name): 7 | if train: 8 | print(variable_name, " ---> ", new_variable_name) 9 | param = reader.get_tensor(variable_name) 10 | return param 11 | 12 | 13 | def nextitnet_residual_block(input_, dilation, layer_id, method, 14 | residual_channels, kernel_size, reader, layer_num, train=True): 15 | resblock_type = "decoder" 16 | resblock_name = "nextitnet_residual_block{}_layer_{}".format(resblock_type, layer_id) 17 | 18 | with tf.variable_scope(resblock_name, reuse=tf.AUTO_REUSE): 19 | 20 | if reader: 21 | dilated_conv = conv1d(input_, residual_channels, method, layer_id, reader, layer_num, resblock_name, 22 | dilation, kernel_size, 23 | name="dilated_conv1", 24 | trainable=train 25 | ) 26 | input_ln = layer_norm(dilated_conv, layer_id, reader, method, layer_num, resblock_name, name="layer_norm1", 27 | trainable=train) 28 | relu1 = tf.nn.relu(input_ln) 29 | else: 30 | dilated_conv = conv1d(input_, residual_channels, method, layer_id, None, layer_num, resblock_name, 31 | dilation, kernel_size, 32 | name="dilated_conv1", 33 | trainable=train 34 | ) 35 | input_ln = layer_norm(dilated_conv, layer_id, None, method, layer_num, resblock_name, name="layer_norm1", 36 | trainable=train) 37 | relu1 = tf.nn.relu(input_ln) 38 | 39 | if reader: 40 | dilated_conv = conv1d(relu1, residual_channels, method, layer_id, reader, layer_num, resblock_name, 41 | 2 * dilation, kernel_size, 42 | name="dilated_conv2", 43 | trainable=train 44 | ) 45 | input_ln = layer_norm(dilated_conv, layer_id, reader, method, layer_num, resblock_name, name="layer_norm2", 46 | trainable=train) 47 | relu1 = tf.nn.relu(input_ln) 48 | else: 49 | dilated_conv = conv1d(relu1, residual_channels, method, layer_id, None, layer_num, resblock_name, 50 | 2 * dilation, kernel_size, 51 | name="dilated_conv2", 52 | trainable=train 53 | ) 54 | input_ln = layer_norm(dilated_conv, layer_id, None, method, layer_num, resblock_name, name="layer_norm2", 55 | trainable=train) 56 | relu1 = tf.nn.relu(input_ln) 57 | 58 | return input_ + relu1 59 | 60 | 61 | def conv1d(input_, output_channels, method, layer_id, reader, layer_num, resblock_name, 62 | dilation=1, kernel_size=1, 63 | name="dilated_conv", trainable=True): 64 | with tf.variable_scope(name): 65 | 66 | if method == 'from_scratch' or method == 'stackE': 67 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 68 | initializer=tf.truncated_normal_initializer(stddev=0.02, seed=1)) 69 | bias = tf.get_variable('bias', [output_channels], 70 | initializer=tf.constant_initializer(0.0)) 71 | 72 | if method == 'stackC': 73 | if layer_id >= layer_num / 2: 74 | relative_layer_id = layer_id % int(layer_num / 2) 75 | variable_name = resblock_name.split("_") 76 | variable_name[4] = str(relative_layer_id) 77 | variable_name = "_".join(variable_name) + '/' + name + '/weight' 78 | else: 79 | variable_name = resblock_name + '/' + name + '/weight' 80 | 81 | new_variable_name = resblock_name + '/' + name + '/weight' 82 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 83 | 84 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 85 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 86 | 87 | if layer_id >= layer_num / 2: 88 | relative_layer_id = layer_id % int(layer_num / 2) 89 | variable_name = resblock_name.split("_") 90 | variable_name[4] = str(relative_layer_id) 91 | variable_name = "_".join(variable_name) + '/' + name + '/bias' 92 | else: 93 | variable_name = resblock_name + '/' + name + '/bias' 94 | new_variable_name = resblock_name + '/' + name + '/bias' 95 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 96 | bias = tf.get_variable('bias', [output_channels], 97 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 98 | 99 | if method == 'StackR': 100 | if layer_id >= layer_num / 2: 101 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 102 | initializer=tf.truncated_normal_initializer(stddev=0.02, seed=1)) 103 | else: 104 | variable_name = resblock_name + '/' + name + '/weight' 105 | 106 | new_variable_name = resblock_name + '/' + name + '/weight' 107 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 108 | 109 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 110 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 111 | if layer_id >= layer_num / 2: 112 | bias = tf.get_variable('bias', [output_channels], 113 | initializer=tf.constant_initializer(0.0)) 114 | else: 115 | variable_name = resblock_name + '/' + name + '/bias' 116 | new_variable_name = resblock_name + '/' + name + '/bias' 117 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 118 | bias = tf.get_variable('bias', [output_channels], 119 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 120 | 121 | if method == 'stackA': 122 | relative_layer_id = layer_id // 2 123 | variable_name = resblock_name.split("_") 124 | variable_name[4] = str(relative_layer_id) 125 | variable_name = "_".join(variable_name) + '/' + name + '/weight' 126 | 127 | new_variable_name = resblock_name + '/' + name + '/weight' 128 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 129 | 130 | weight = tf.get_variable('weight', [1, kernel_size, input_.get_shape()[-1], output_channels], 131 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 132 | 133 | relative_layer_id = layer_id // 2 134 | variable_name = resblock_name.split("_") 135 | variable_name[4] = str(relative_layer_id) 136 | variable_name = "_".join(variable_name) + '/' + name + '/bias' 137 | 138 | new_variable_name = resblock_name + '/' + name + '/bias' 139 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 140 | bias = tf.get_variable('bias', [output_channels], 141 | initializer=tf.constant_initializer(initial_value, verify_shape=True)) 142 | 143 | padding = [[0, 0], [(kernel_size - 1) * dilation, 0], [0, 0]] 144 | padded = tf.pad(input_, padding) 145 | input_expanded = tf.expand_dims(padded, dim=1) 146 | out = tf.nn.atrous_conv2d(input_expanded, weight, rate=dilation, padding='VALID') + bias 147 | 148 | return tf.squeeze(out, [1]) 149 | 150 | 151 | def layer_norm(x, layer_id, reader, method, layer_num, resblock_name, name, epsilon=1e-8, trainable=True): 152 | with tf.variable_scope(name): 153 | shape = x.get_shape() 154 | 155 | if method == 'from_scratch': 156 | beta = tf.get_variable('beta', [int(shape[-1])], 157 | initializer=tf.constant_initializer(0), trainable=trainable) 158 | gamma = tf.get_variable('gamma', [int(shape[-1])], 159 | initializer=tf.constant_initializer(1), trainable=trainable) 160 | 161 | if method == 'stackC' or method == 'stackA' or method == 'stackE': 162 | beta = tf.get_variable('beta', [int(shape[-1])], 163 | initializer=tf.constant_initializer(0), trainable=trainable) 164 | gamma = tf.get_variable('gamma', [int(shape[-1])], 165 | initializer=tf.constant_initializer(1), trainable=trainable) 166 | 167 | if method == 'StackR': 168 | if layer_id >= layer_num / 2: 169 | beta = tf.get_variable('beta', [int(shape[-1])], 170 | initializer=tf.constant_initializer(0), trainable=trainable) 171 | else: 172 | variable_name = resblock_name + '/' + name + '/beta' 173 | new_variable_name = resblock_name + '/' + name + '/beta' 174 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 175 | beta = tf.get_variable('beta', [int(shape[-1])], 176 | initializer=tf.constant_initializer(initial_value, verify_shape=True), 177 | trainable=trainable) 178 | 179 | if layer_id >= layer_num / 2: 180 | gamma = tf.get_variable('gamma', [int(shape[-1])], 181 | initializer=tf.constant_initializer(1), trainable=trainable) 182 | else: 183 | variable_name = resblock_name + '/' + name + '/gamma' 184 | new_variable_name = resblock_name + '/' + name + '/gamma' 185 | initial_value = get_parameters(trainable, reader, variable_name, new_variable_name) 186 | gamma = tf.get_variable('gamma', [int(shape[-1])], 187 | initializer=tf.constant_initializer(initial_value, verify_shape=True), 188 | trainable=trainable) 189 | 190 | mean, variance = tf.nn.moments(x, axes=[len(shape) - 1], keep_dims=True) 191 | x = (x - mean) / tf.sqrt(variance + epsilon) 192 | return gamma * x + beta 193 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.7.1 2 | argon2-cffi==20.1.0 3 | astor==0.8.0 4 | attrs==20.2.0 5 | backcall==0.2.0 6 | bleach==3.1.5 7 | certifi==2016.2.28 8 | cffi==1.14.2 9 | cloudpickle==1.2.1 10 | contextlib2==0.5.5 11 | Cython==0.29.21 12 | decorator==4.4.0 13 | defusedxml==0.6.0 14 | dm-sonnet==1.34 15 | entrypoints==0.3 16 | gast==0.2.2 17 | google-pasta==0.1.7 18 | grpcio==1.22.0 19 | h5py==2.9.0 20 | importlib-metadata==1.7.0 21 | ipykernel==5.3.4 22 | ipython==7.16.1 23 | ipython-genutils==0.2.0 24 | ipywidgets==7.5.1 25 | jedi==0.17.2 26 | Jinja2==2.11.2 27 | joblib==0.16.0 28 | jsonschema==3.2.0 29 | jupyter==1.0.0 30 | jupyter-client==6.1.7 31 | jupyter-console==6.2.0 32 | jupyter-core==4.6.3 33 | Keras==2.1.4 34 | Keras-Applications==1.0.8 35 | Keras-Preprocessing==1.1.0 36 | Markdown==3.1.1 37 | MarkupSafe==1.1.1 38 | mistune==0.8.4 39 | mkl-fft==1.0.15 40 | mkl-random==1.1.0 41 | mkl-service==2.3.0 42 | nbconvert==5.6.1 43 | nbformat==5.0.7 44 | notebook==6.1.3 45 | numpy==1.16.4 46 | olefile==0.46 47 | packaging==20.4 48 | pandas==0.24.2 49 | pandocfilters==1.4.2 50 | parso==0.7.1 51 | pexpect==4.8.0 52 | pickleshare==0.7.5 53 | Pillow==4.2.1 54 | prometheus-client==0.8.0 55 | prompt-toolkit==3.0.7 56 | protobuf==3.9.1 57 | ptyprocess==0.6.0 58 | pycparser==2.20 59 | Pygments==2.6.1 60 | pyparsing==2.4.7 61 | pyrsistent==0.16.0 62 | python-dateutil==2.8.1 63 | pytz==2020.1 64 | PyYAML==5.1.2 65 | pyzmq==19.0.2 66 | qtconsole==4.7.7 67 | QtPy==1.9.0 68 | scikit-learn==0.23.1 69 | scipy==1.3.0 70 | semantic-version==2.6.0 71 | Send2Trash==1.5.0 72 | six==1.14.0 73 | sklearn==0.0 74 | tensorboard==1.14.0 75 | tensorboardX==1.8 76 | tensorflow-estimator==1.14.0 77 | tensorflow-gpu==1.14.0 78 | tensorflow-probability==0.7.0 79 | termcolor==1.1.0 80 | terminado==0.8.3 81 | testpath==0.4.4 82 | threadpoolctl==2.1.0 83 | torch==1.4.0 84 | torchvision==0.6.0a0+82fd1c8 85 | tornado==6.0.4 86 | tqdm==4.32.2 87 | traitlets==4.3.3 88 | wcwidth==0.2.5 89 | webencodings==0.5.1 90 | Werkzeug==0.15.5 91 | widgetsnbextension==3.5.1 92 | wrapt==1.11.2 93 | zipp==3.1.0 94 | -------------------------------------------------------------------------------- /train_grec_sc1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | data_ratio1=0.8 4 | data_ratio2=1 5 | 6 | dilation_count1=8 7 | dilation_count2=16 8 | 9 | eval_iter1=6000 10 | step1=300000 11 | 12 | eval_iter2=6000 13 | step2=300000 14 | 15 | early_stop1=8 16 | early_stop2=8 17 | 18 | learning_rate1=0.001 19 | learning_rate2=0.001 20 | 21 | method1='from_scratch' 22 | method2='stackA' 23 | 24 | load_model1=False 25 | load_model2=True 26 | 27 | suffix=a0201 28 | save_dir="Models/ml20_${dilation_count1}_${dilation_count2}_${suffix}" 29 | 30 | mkdir -p ${save_dir} 31 | 32 | time=$(date "+%Y%m%d%H%M%S") 33 | logfile="${save_dir}/log_${time}.txt" 34 | 35 | 36 | model_path1=${save_dir} 37 | model_path2="${save_dir}/${dilation_count1}_${learning_rate1}_${data_ratio1}_${step1}.ckpt" 38 | 39 | gpu=4 40 | 41 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u deep_GRec.py --eval_iter ${eval_iter1} --data_ratio ${data_ratio1} --step ${step1} --early_stop ${early_stop1} --learning_rate ${learning_rate1} --dilation_count ${dilation_count1} --method ${method1} --load_model ${load_model1} --model_path ${model_path1} --save_dir ${save_dir} | tee ${logfile}" 42 | 43 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u deep_GRec.py --eval_iter ${eval_iter2} --data_ratio ${data_ratio2} --step ${step2} --early_stop ${early_stop2} --learning_rate ${learning_rate2} --dilation_count ${dilation_count2} --method ${method2} --load_model ${load_model2} --model_path ${model_path2} --save_dir ${save_dir} | tee -a ${logfile}" 44 | 45 | -------------------------------------------------------------------------------- /train_nextitnet_sc1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | data_ratio1=0.8 4 | data_ratio2=1 5 | 6 | dilation_count1=8 7 | dilation_count2=16 8 | 9 | eval_iter1=6000 10 | step1=300000 11 | 12 | eval_iter2=6000 13 | step2=300000 14 | 15 | early_stop1=8 16 | early_stop2=8 17 | 18 | learning_rate1=0.001 19 | learning_rate2=0.001 20 | 21 | method1='from_scratch' 22 | method2='stackA' 23 | 24 | load_model1=False 25 | load_model2=True 26 | 27 | suffix=a0201 28 | save_dir="Models/ml20_${dilation_count1}_${dilation_count2}_${suffix}" 29 | 30 | mkdir -p ${save_dir} 31 | 32 | time=$(date "+%Y%m%d%H%M%S") 33 | logfile="${save_dir}/log_${time}.txt" 34 | 35 | 36 | model_path1=${save_dir} 37 | model_path2="${save_dir}/${dilation_count1}_${learning_rate1}_${data_ratio1}_${step1}.ckpt" 38 | 39 | gpu=4 40 | 41 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u deep_nextitnet.py --eval_iter ${eval_iter1} --data_ratio ${data_ratio1} --step ${step1} --early_stop ${early_stop1} --learning_rate ${learning_rate1} --dilation_count ${dilation_count1} --method ${method1} --load_model ${load_model1} --model_path ${model_path1} --save_dir ${save_dir} | tee ${logfile}" 42 | 43 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u deep_nextitnet.py --eval_iter ${eval_iter2} --data_ratio ${data_ratio2} --step ${step2} --early_stop ${early_stop2} --learning_rate ${learning_rate2} --dilation_count ${dilation_count2} --method ${method2} --load_model ${load_model2} --model_path ${model_path2} --save_dir ${save_dir} | tee -a ${logfile}" 44 | 45 | -------------------------------------------------------------------------------- /train_nextitnet_sc2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | data_ratio1=1 4 | data_ratio2=1 5 | data_ratio3=1 6 | 7 | dilation_count1=4 8 | dilation_count2=8 9 | dilation_count3=16 10 | 11 | 12 | eval_iter1=3000 13 | step1=50000 14 | 15 | eval_iter2=3000 16 | step2=100000 17 | 18 | eval_iter3=3000 19 | step3=200000 20 | 21 | early_stop1=10 22 | early_stop2=10 23 | early_stop3=10 24 | 25 | 26 | learning_rate1=0.001 27 | learning_rate2=0.001 28 | learning_rate3=0.001 29 | 30 | 31 | method1='from_scratch' 32 | method2='stackA' 33 | method3='stackA' 34 | 35 | 36 | load_model1=False 37 | load_model2=True 38 | load_model3=True 39 | 40 | suffix=a0201 41 | save_dir="Models/ml20_${dilation_count1}_${dilation_count2}_${dilation_count3}_${suffix}" 42 | 43 | mkdir -p ${save_dir} 44 | 45 | time=$(date "+%Y%m%d%H%M%S") 46 | logfile="${save_dir}/log_${time}.txt" 47 | 48 | 49 | model_path1=${save_dir} 50 | model_path2="${save_dir}/${dilation_count1}_${learning_rate1}_${data_ratio1}_${step1}.ckpt" 51 | model_path3="${save_dir}/${dilation_count2}_${learning_rate2}_${data_ratio2}_${step2}.ckpt" 52 | 53 | gpu=3 54 | 55 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u deep_nextitnet.py --eval_iter ${eval_iter1} --data_ratio ${data_ratio1} --step ${step1} --early_stop ${early_stop1} --learning_rate ${learning_rate1} --dilation_count ${dilation_count1} --method ${method1} --load_model ${load_model1} --model_path ${model_path1} --save_dir ${save_dir} | tee ${logfile}" 56 | 57 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u deep_nextitnet.py --eval_iter ${eval_iter2} --data_ratio ${data_ratio2} --step ${step2} --early_stop ${early_stop2} --learning_rate ${learning_rate2} --dilation_count ${dilation_count2} --method ${method2} --load_model ${load_model2} --model_path ${model_path2} --save_dir ${save_dir} | tee -a ${logfile}" 58 | 59 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u deep_nextitnet.py --eval_iter ${eval_iter3} --data_ratio ${data_ratio3} --step ${step3} --early_stop ${early_stop3} --learning_rate ${learning_rate3} --dilation_count ${dilation_count3} --method ${method3} --load_model ${load_model3} --model_path ${model_path3} --save_dir ${save_dir} | tee -a ${logfile}" 60 | -------------------------------------------------------------------------------- /train_nextitnet_sc3.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | data_ratio1=0.8 4 | data_ratio2=1 5 | 6 | dilation_count1=4 7 | dilation_count2=8 8 | 9 | eval_iter1=6000 10 | step1=300000 11 | 12 | eval_iter2=6000 13 | step2=300000 14 | 15 | early_stop1=8 16 | early_stop2=8 17 | 18 | learning_rate1=0.001 19 | learning_rate2=0.001 20 | 21 | method1='from_scratch' 22 | method2='stackA' 23 | 24 | load_model1=False 25 | load_model2=True 26 | 27 | suffix=a0201 28 | save_dir="Models/coldrec_${dilation_count1}_${dilation_count2}_${suffix}" 29 | 30 | mkdir -p ${save_dir} 31 | 32 | time=$(date "+%Y%m%d%H%M%S") 33 | logfile="${save_dir}/log_${time}.txt" 34 | 35 | 36 | model_path1=${save_dir} 37 | model_path2="${save_dir}/${dilation_count1}_${learning_rate1}_${data_ratio1}_${step1}.ckpt" 38 | model_path3="${save_dir}/${dilation_count2}_${learning_rate2}_${data_ratio2}_${step2}.ckpt" 39 | 40 | gpu=4 41 | 42 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u deep_nextitnet_coldrec.py --eval_iter ${eval_iter1} --data_ratio ${data_ratio1} --step ${step1} --early_stop ${early_stop1} --learning_rate ${learning_rate1} --dilation_count ${dilation_count1} --method ${method1} --load_model ${load_model1} --model_path ${model_path1} --save_dir ${save_dir} | tee ${logfile}" 43 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u fineall.py --dilation_count ${dilation_count1} --model_path ${model_path2} --save_dir ${save_dir} | tee -a ${logfile}" 44 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u deep_nextitnet_coldrec.py --eval_iter ${eval_iter2} --data_ratio ${data_ratio2} --step ${step2} --early_stop ${early_stop2} --learning_rate ${learning_rate2} --dilation_count ${dilation_count2} --method ${method2} --load_model ${load_model2} --model_path ${model_path2} --save_dir ${save_dir} | tee -a ${logfile}" 45 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u fineall.py --dilation_count ${dilation_count2} --model_path ${model_path3} --save_dir ${save_dir} | tee -a ${logfile}" 46 | 47 | -------------------------------------------------------------------------------- /train_sasrec_sc1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | data_ratio1=0.8 3 | data_ratio2=1 4 | 5 | num_blocks1=18 6 | num_blocks2=24 7 | 8 | eval_iter1=6000 9 | step1=350000 10 | 11 | eval_iter2=6000 12 | step2=350000 13 | 14 | early_stop1=8 15 | early_stop2=8 16 | 17 | learning_rate1=0.001 18 | learning_rate2=0.001 19 | 20 | method1='from_scratch' 21 | method2='stackA' 22 | 23 | load_model1=False 24 | load_model2=True 25 | 26 | seed=a0205 27 | save_dir="Models/ml20_${num_blocks1}_${num_blocks2}_${seed}" 28 | 29 | mkdir -p ${save_dir} 30 | 31 | time=$(date "+%Y%m%d%H%M%S") 32 | logfile="${save_dir}/log_${time}.txt" 33 | 34 | 35 | model_path1=${save_dir} 36 | model_path2="${save_dir}/${num_blocks1}_${learning_rate1}_${data_ratio1}_${step1}.pkl" 37 | 38 | gpu=4 39 | 40 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u baseline_SASRec.py --eval_iter ${eval_iter1} --data_ratio ${data_ratio1} --step ${step1} --early_stop ${early_stop1} --learning_rate ${learning_rate1} --num_blocks ${num_blocks1} --method ${method1} --load_model ${load_model1} --model_path ${model_path1} --save_dir ${save_dir} | tee ${logfile}" 41 | 42 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u baseline_SASRec.py --eval_iter ${eval_iter2} --data_ratio ${data_ratio2} --step ${step2} --early_stop ${early_stop2} --learning_rate ${learning_rate2} --num_blocks ${num_blocks2} --method ${method2} --load_model ${load_model2} --model_path ${model_path2} --save_dir ${save_dir} | tee -a ${logfile}" 43 | 44 | -------------------------------------------------------------------------------- /train_ssept_sc1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | data_ratio1=0.8 3 | data_ratio2=1 4 | 5 | num_blocks1=18 6 | num_blocks2=24 7 | 8 | eval_iter1=2000 9 | step1=250000 10 | 11 | eval_iter2=2000 12 | step2=250000 13 | 14 | early_stop1=15 15 | early_stop2=15 16 | 17 | learning_rate1=0.001 18 | learning_rate2=0.001 19 | 20 | method1='from_scratch' 21 | method2='stackC' 22 | 23 | load_model1=False 24 | load_model2=True 25 | 26 | seed=a0205 27 | save_dir="Models/ml20_${num_blocks1}_${num_blocks2}_${seed}" 28 | 29 | mkdir -p ${save_dir} 30 | 31 | time=$(date "+%Y%m%d%H%M%S") 32 | logfile="${save_dir}/log_${time}.txt" 33 | 34 | 35 | model_path1=${save_dir} 36 | model_path2="${save_dir}/${num_blocks1}_${learning_rate1}_${data_ratio1}_${step1}.pkl" 37 | 38 | gpu=4 39 | 40 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u baseline_SSEPT.py --eval_iter ${eval_iter1} --data_ratio ${data_ratio1} --step ${step1} --early_stop ${early_stop1} --learning_rate ${learning_rate1} --num_blocks ${num_blocks1} --method ${method1} --load_model ${load_model1} --model_path ${model_path1} --save_dir ${save_dir} | tee ${logfile}" 41 | 42 | eval "CUDA_VISIBLE_DEVICES=${gpu} python -u baseline_SSEPT.py --eval_iter ${eval_iter2} --data_ratio ${data_ratio2} --step ${step2} --early_stop ${early_stop2} --learning_rate ${learning_rate2} --num_blocks ${num_blocks2} --method ${method2} --load_model ${load_model2} --model_path ${model_path2} --save_dir ${save_dir} | tee -a ${logfile}" 43 | 44 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | 5 | 6 | def to_pickled_df(data_directory, **kwargs): 7 | for name, df in kwargs.items(): 8 | df.to_pickle(os.path.join(data_directory, name + '.df')) 9 | 10 | 11 | def pad_history(itemlist, length, pad_item): 12 | if len(itemlist)>=length: 13 | return itemlist[-length:] 14 | if len(itemlist)