├── DifferentBackbone.jpg ├── HAMUR ├── __init__.py ├── basic │ ├── __init__.py │ ├── activation.py │ ├── callback.py │ ├── features.py │ ├── initializers.py │ ├── layers.py │ ├── loss_func.py │ ├── metaoptimizer.py │ └── metric.py ├── models │ └── multi_domain │ │ ├── __init__.py │ │ ├── adapter.py │ │ ├── adapter_dcn.py │ │ └── adapter_wd.py ├── trainers │ ├── __init__.py │ └── ctr_trainer.py └── utils │ ├── __init__.py │ └── data.py ├── LICENSE ├── README.md ├── examples ├── data │ ├── ali-ccp │ │ ├── ali_ccp_test_sample.csv │ │ ├── ali_ccp_train_sample.csv │ │ └── ali_ccp_val_sample.csv │ └── ml-1m │ │ └── ml-1m-sample.csv ├── run_ali_ccp_ctr_ranking_multi_domain.py └── run_movielens_rank_multi_domain.py └── framework.jpg /DifferentBackbone.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/HAMUR/53d8dd588282bc288f2621b8fa85e2df9b910e10/DifferentBackbone.jpg -------------------------------------------------------------------------------- /HAMUR/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/HAMUR/53d8dd588282bc288f2621b8fa85e2df9b910e10/HAMUR/__init__.py -------------------------------------------------------------------------------- /HAMUR/basic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/HAMUR/53d8dd588282bc288f2621b8fa85e2df9b910e10/HAMUR/basic/__init__.py -------------------------------------------------------------------------------- /HAMUR/basic/activation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Dice(nn.Module): 6 | """The Dice activation function mentioned in the `DIN paper 7 | https://arxiv.org/abs/1706.06978` 8 | """ 9 | 10 | def __init__(self, epsilon=1e-3): 11 | super(Dice, self).__init__() 12 | self.epsilon = epsilon 13 | self.alpha = nn.Parameter(torch.randn(1)) 14 | 15 | def forward(self, x: torch.Tensor): 16 | # x: N * num_neurons 17 | avg = x.mean(dim=1) # N 18 | avg = avg.unsqueeze(dim=1) # N * 1 19 | var = torch.pow(x - avg, 2) + self.epsilon # N * num_neurons 20 | var = var.sum(dim=1).unsqueeze(dim=1) # N * 1 21 | 22 | ps = (x - avg) / torch.sqrt(var) # N * 1 23 | 24 | ps = nn.Sigmoid()(ps) # N * 1 25 | return ps * x + (1 - ps) * self.alpha * x 26 | 27 | 28 | def activation_layer(act_name): 29 | """Construct activation layers 30 | 31 | Args: 32 | act_name: str or nn.Module, name of activation function 33 | 34 | Returns: 35 | act_layer: activation layer 36 | """ 37 | if isinstance(act_name, str): 38 | if act_name.lower() == 'sigmoid': 39 | act_layer = nn.Sigmoid() 40 | elif act_name.lower() == 'relu': 41 | act_layer = nn.ReLU(inplace=True) 42 | elif act_name.lower() == 'dice': 43 | act_layer = Dice() 44 | elif act_name.lower() == 'prelu': 45 | act_layer = nn.PReLU() 46 | elif act_name.lower() == "softmax": 47 | act_layer = nn.Softmax(dim=1) 48 | elif issubclass(act_name, nn.Module): 49 | act_layer = act_name() 50 | else: 51 | raise NotImplementedError 52 | return act_layer 53 | -------------------------------------------------------------------------------- /HAMUR/basic/callback.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | 4 | class EarlyStopper(object): 5 | """Early stops the training if validation loss doesn't improve after a given patience. 6 | 7 | Args: 8 | patience (int): How long to wait after last time validation auc improved. 9 | """ 10 | 11 | def __init__(self, patience): 12 | self.patience = patience 13 | self.trial_counter = 0 14 | self.best_auc = 0 15 | self.best_weights = None 16 | 17 | def stop_training(self, val_auc, weights): 18 | """whether to stop training. 19 | 20 | Args: 21 | val_auc (float): auc score in val data. 22 | weights (tensor): the weights of model 23 | """ 24 | if val_auc > self.best_auc: 25 | self.best_auc = val_auc 26 | self.trial_counter = 0 27 | self.best_weights = copy.deepcopy(weights) 28 | return False 29 | elif self.trial_counter + 1 < self.patience: 30 | self.trial_counter += 1 31 | return False 32 | else: 33 | return True -------------------------------------------------------------------------------- /HAMUR/basic/features.py: -------------------------------------------------------------------------------- 1 | from ..utils.data import get_auto_embedding_dim 2 | from .initializers import RandomNormal 3 | 4 | 5 | class SequenceFeature(object): 6 | """The Feature Class for Sequence feature or multi-hot feature. 7 | In recommendation, there are many user behaviour features which we want to take the sequence model 8 | and tag featurs (multi hot) which we want to pooling. Note that if you use this feature, you must padding 9 | the feature value before training. 10 | 11 | Args: 12 | name (str): feature's name. 13 | vocab_size (int): vocabulary size of embedding table. 14 | embed_dim (int): embedding vector's length 15 | pooling (str): pooling method, support `["mean", "sum", "concat"]` (default=`"mean"`) 16 | shared_with (str): the another feature name which this feature will shared with embedding. 17 | padding_idx (int, optional): If specified, the entries at padding_idx will be masked 0 in InputMask Layer. 18 | initializer(Initializer): Initializer the embedding layer weight. 19 | """ 20 | 21 | def __init__(self, 22 | name, 23 | vocab_size, 24 | embed_dim=None, 25 | pooling="mean", 26 | shared_with=None, 27 | padding_idx=None, 28 | initializer=RandomNormal(0, 0.0001)): 29 | self.name = name 30 | self.vocab_size = vocab_size 31 | if embed_dim is None: 32 | self.embed_dim = get_auto_embedding_dim(vocab_size) 33 | else: 34 | self.embed_dim = embed_dim 35 | self.pooling = pooling 36 | self.shared_with = shared_with 37 | self.padding_idx = padding_idx 38 | self.initializer = initializer 39 | 40 | def __repr__(self): 41 | return f'' 42 | 43 | def get_embedding_layer(self): 44 | if not hasattr(self, 'embed'): 45 | self.embed = self.initializer(self.vocab_size, self.embed_dim) 46 | return self.embed 47 | 48 | 49 | class SparseFeature(object): 50 | """The Feature Class for Sparse feature. 51 | 52 | Args: 53 | name (str): feature's name. 54 | vocab_size (int): vocabulary size of embedding table. 55 | embed_dim (int): embedding vector's length 56 | shared_with (str): the another feature name which this feature will shared with embedding. 57 | padding_idx (int, optional): If specified, the entries at padding_idx will be masked 0 in InputMask Layer. 58 | initializer(Initializer): Initializer the embedding layer weight. 59 | """ 60 | 61 | def __init__(self, name, vocab_size, embed_dim=None, shared_with=None, padding_idx=None, initializer=RandomNormal(0, 0.0001)): 62 | self.name = name 63 | self.vocab_size = vocab_size 64 | if embed_dim is None: 65 | self.embed_dim = get_auto_embedding_dim(vocab_size) 66 | else: 67 | self.embed_dim = embed_dim 68 | self.shared_with = shared_with 69 | self.padding_idx = padding_idx 70 | self.initializer = initializer 71 | 72 | def __repr__(self): 73 | return f'' 74 | 75 | def get_embedding_layer(self): 76 | if not hasattr(self, 'embed'): 77 | self.embed = self.initializer(self.vocab_size, self.embed_dim) 78 | return self.embed 79 | 80 | 81 | class DenseFeature(object): 82 | """The Feature Class for Dense feature. 83 | 84 | Args: 85 | name (str): feature's name. 86 | embed_dim (int): embedding vector's length, the value fixed `1`. 87 | """ 88 | 89 | def __init__(self, name): 90 | self.name = name 91 | self.embed_dim = 1 92 | 93 | def __repr__(self): 94 | return f'' 95 | -------------------------------------------------------------------------------- /HAMUR/basic/initializers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class RandomNormal(object): 5 | """Returns an embedding initialized with a normal distribution. 6 | 7 | Args: 8 | mean (float): the mean of the normal distribution 9 | std (float): the standard deviation of the normal distribution 10 | """ 11 | 12 | def __init__(self, mean=0.0, std=1.0): 13 | self.mean = mean 14 | self.std = std 15 | 16 | def __call__(self, vocab_size, embed_dim): 17 | embed = torch.nn.Embedding(vocab_size, embed_dim) 18 | torch.nn.init.normal_(embed.weight, self.mean, self.std) 19 | return embed 20 | 21 | 22 | class RandomUniform(object): 23 | """Returns an embedding initialized with a uniform distribution. 24 | 25 | Args: 26 | minval (float): Lower bound of the range of random values of the uniform distribution. 27 | maxval (float): Upper bound of the range of random values of the uniform distribution. 28 | """ 29 | 30 | def __init__(self, minval=0.0, maxval=1.0): 31 | self.minval = minval 32 | self.maxval = maxval 33 | 34 | def __call__(self, vocab_size, embed_dim): 35 | embed = torch.nn.Embedding(vocab_size, embed_dim) 36 | torch.nn.init.uniform_(embed.weight, self.minval, self.maxval) 37 | return embed 38 | 39 | 40 | class XavierNormal(object): 41 | """Returns an embedding initialized with the method described in 42 | `Understanding the difficulty of training deep feedforward neural networks` 43 | - Glorot, X. & Bengio, Y. (2010), using a uniform distribution. 44 | 45 | Args: 46 | gain (float): stddev = gain*sqrt(2 / (fan_in + fan_out)) 47 | """ 48 | 49 | def __init__(self, gain=1.0): 50 | self.gain = gain 51 | 52 | def __call__(self, vocab_size, embed_dim): 53 | embed = torch.nn.Embedding(vocab_size, embed_dim) 54 | torch.nn.init.xavier_normal_(embed.weight, self.gain) 55 | return embed 56 | 57 | 58 | class XavierUniform(object): 59 | """Returns an embedding initialized with the method described in 60 | `Understanding the difficulty of training deep feedforward neural networks` 61 | - Glorot, X. & Bengio, Y. (2010), using a uniform distribution. 62 | 63 | Args: 64 | gain (float): stddev = gain*sqrt(6 / (fan_in + fan_out)) 65 | """ 66 | 67 | def __init__(self, gain=1.0): 68 | self.gain = gain 69 | 70 | def __call__(self, vocab_size, embed_dim): 71 | embed = torch.nn.Embedding(vocab_size, embed_dim) 72 | torch.nn.init.xavier_uniform_(embed.weight, self.gain) 73 | return embed 74 | 75 | 76 | class Pretrained(object): 77 | """Creates Embedding instance from given 2-dimensional FloatTensor. 78 | 79 | Args: 80 | embedding_weight(Tensor or ndarray or List[List[int]]): FloatTensor containing weights for the Embedding. 81 | First dimension is being passed to Embedding as ``num_embeddings``, second as ``embedding_dim``. 82 | freeze (boolean, optional): If ``True``, the tensor does not get updated in the learning process. 83 | """ 84 | 85 | def __init__(self, embedding_weight, freeze=True): 86 | self.embedding_weight = torch.FloatTensor(embedding_weight) 87 | self.freeze = freeze 88 | 89 | def __call__(self, vocab_size, embed_dim): 90 | assert vocab_size == self.embedding_weight.shape[0] and embed_dim == self.embedding_weight.shape[1] 91 | embed = torch.nn.Embedding.from_pretrained(self.embedding_weight, freeze=self.freeze) 92 | return embed 93 | -------------------------------------------------------------------------------- /HAMUR/basic/layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from itertools import combinations 5 | from .activation import activation_layer 6 | from .features import DenseFeature, SparseFeature, SequenceFeature 7 | 8 | 9 | class PredictionLayer(nn.Module): 10 | """Prediction Layer. 11 | 12 | Args: 13 | task_type (str): if `task_type='classification'`, then return sigmoid(x), 14 | change the input logits to probability. if`task_type='regression'`, then return x. 15 | """ 16 | 17 | def __init__(self, task_type='classification'): 18 | super(PredictionLayer, self).__init__() 19 | if task_type not in ["classification", "regression"]: 20 | raise ValueError("task_type must be classification or regression") 21 | self.task_type = task_type 22 | 23 | def forward(self, x): 24 | if self.task_type == "classification": 25 | x = torch.sigmoid(x) 26 | return x 27 | 28 | 29 | class EmbeddingLayer(nn.Module): 30 | """General Embedding Layer. 31 | We save all the feature embeddings in embed_dict: `{feature_name : embedding table}`. 32 | 33 | 34 | Args: 35 | features (list): the list of `Feature Class`. It is means all the features which we want to create a embedding table. 36 | 37 | Shape: 38 | - Input: 39 | x (dict): {feature_name: feature_value}, sequence feature value is a 2D tensor with shape:`(batch_size, seq_len)`,\ 40 | sparse/dense feature value is a 1D tensor with shape `(batch_size)`. 41 | features (list): the list of `Feature Class`. It is means the current features which we want to do embedding lookup. 42 | squeeze_dim (bool): whether to squeeze dim of output (default = `False`). 43 | - Output: 44 | - if input Dense: `(batch_size, num_features_dense)`. 45 | - if input Sparse: `(batch_size, num_features, embed_dim)` or `(batch_size, num_features * embed_dim)`. 46 | - if input Sequence: same with input sparse or `(batch_size, num_features_seq, seq_length, embed_dim)` when `pooling=="concat"`. 47 | - if input Dense and Sparse/Sequence: `(batch_size, num_features_sparse * embed_dim)`. Note we must squeeze_dim for concat dense value with sparse embedding. 48 | """ 49 | 50 | def __init__(self, features): 51 | super().__init__() 52 | self.features = features 53 | self.embed_dict = nn.ModuleDict() 54 | self.n_dense = 0 55 | 56 | for fea in features: 57 | if fea.name in self.embed_dict: #exist 58 | continue 59 | if isinstance(fea, SparseFeature) and fea.shared_with == None: 60 | self.embed_dict[fea.name] = fea.get_embedding_layer() 61 | elif isinstance(fea, SequenceFeature) and fea.shared_with == None: 62 | self.embed_dict[fea.name] = fea.get_embedding_layer() 63 | elif isinstance(fea, DenseFeature): 64 | self.n_dense += 1 65 | 66 | def forward(self, x, features, squeeze_dim=False): 67 | sparse_emb, dense_values = [], [] 68 | sparse_exists, dense_exists = False, False 69 | for fea in features: 70 | if isinstance(fea, SparseFeature): 71 | if fea.shared_with == None: 72 | sparse_emb.append(self.embed_dict[fea.name](x[fea.name].long()).unsqueeze(1)) 73 | else: 74 | sparse_emb.append(self.embed_dict[fea.shared_with](x[fea.name].long()).unsqueeze(1)) 75 | elif isinstance(fea, SequenceFeature): 76 | if fea.pooling == "sum": 77 | pooling_layer = SumPooling() 78 | elif fea.pooling == "mean": 79 | pooling_layer = AveragePooling() 80 | elif fea.pooling == "concat": 81 | pooling_layer = ConcatPooling() 82 | else: 83 | raise ValueError("Sequence pooling method supports only pooling in %s, got %s." % 84 | (["sum", "mean"], fea.pooling)) 85 | fea_mask = InputMask()(x, fea) 86 | if fea.shared_with == None: 87 | sparse_emb.append(pooling_layer(self.embed_dict[fea.name](x[fea.name].long()), fea_mask).unsqueeze(1)) 88 | else: 89 | sparse_emb.append(pooling_layer(self.embed_dict[fea.shared_with](x[fea.name].long()), fea_mask).unsqueeze(1)) #shared specific sparse feature embedding 90 | else: 91 | dense_values.append(x[fea.name].float().unsqueeze(1)) #.unsqueeze(1).unsqueeze(1) 92 | 93 | if len(dense_values) > 0: 94 | dense_exists = True 95 | dense_values = torch.cat(dense_values, dim=1) 96 | if len(sparse_emb) > 0: 97 | sparse_exists = True 98 | sparse_emb = torch.cat(sparse_emb, dim=1) #[batch_size, num_features, embed_dim] 99 | 100 | if squeeze_dim: #Note: if the emb_dim of sparse features is different, we must squeeze_dim 101 | if dense_exists and not sparse_exists: #only input dense features 102 | return dense_values 103 | elif not dense_exists and sparse_exists: 104 | return sparse_emb.flatten(start_dim=1) #squeeze dim to : [batch_size, num_features*embed_dim] 105 | elif dense_exists and sparse_exists: 106 | return torch.cat((sparse_emb.flatten(start_dim=1), dense_values), 107 | dim=1) #concat dense value with sparse embedding 108 | else: 109 | raise ValueError("The input features can note be empty") 110 | else: 111 | if sparse_exists: 112 | return sparse_emb #[batch_size, num_features, embed_dim] 113 | else: 114 | raise ValueError( 115 | "If keep the original shape:[batch_size, num_features, embed_dim], expected %s in feature list, got %s" % 116 | ("SparseFeatures", features)) 117 | 118 | 119 | class EmbeddingLayerMultiDomain(nn.Module): 120 | """General Embedding Layer. 121 | We save all the feature embeddings in embed_dict: `{feature_name : embedding table}`. 122 | 123 | 124 | Args: 125 | features (list): the list of `Feature Class`. It is means all the features which we want to create a embedding table. 126 | 127 | Shape: 128 | - Input: 129 | x (dict): {feature_name: feature_value}, sequence feature value is a 3D tensor with shape:`(batch_size,domain_num, seq_len)`,\ 130 | sparse/dense feature value is a 2D tensor with shape `(batch_size,domain_num)`. 131 | features (list): the list of `Feature Class`. It is means the current features which we want to do embedding lookup. 132 | squeeze_dim (bool): whether to squeeze dim of output (default = `False`). 133 | - Output: 134 | - if input Dense: `(batch_size, domain_num, num_features_dense)`. 135 | - if input Sparse: `(batch_size, domain_num, num_features, embed_dim)` or `(batch_size,domain_num, num_features * embed_dim)`. 136 | - if input Sequence: same with input sparse or `(batch_size,domain_num, num_features_seq, seq_length, embed_dim)` when `pooling=="concat"`. 137 | - if input Dense and Sparse/Sequence: `(batch_size, domain_num,num_features_sparse * embed_dim)`. Note we must squeeze_dim for concat dense value with sparse embedding. 138 | """ 139 | 140 | def __init__(self, features): 141 | super().__init__() 142 | self.features = features 143 | self.embed_dict = nn.ModuleDict() 144 | self.n_dense = 0 145 | 146 | for fea in features: 147 | if fea.name in self.embed_dict: # exist 148 | continue 149 | if isinstance(fea, SparseFeature) and fea.shared_with == None: 150 | self.embed_dict[fea.name] = fea.get_embedding_layer() 151 | elif isinstance(fea, SequenceFeature) and fea.shared_with == None: 152 | self.embed_dict[fea.name] = fea.get_embedding_layer() 153 | elif isinstance(fea, DenseFeature): 154 | self.n_dense += 1 155 | 156 | def forward(self, x, features, squeeze_dim=False): 157 | sparse_emb, dense_values = [], [] 158 | sparse_exists, dense_exists = False, False 159 | for fea in features: 160 | if isinstance(fea, SparseFeature): 161 | if fea.shared_with == None: 162 | b,d = x[fea.name].shape #batch size, domain size 163 | emb = self.embed_dict[fea.name](x[fea.name].long().reshape(-1)) 164 | emb = emb.reshape(b,d,-1) 165 | sparse_emb.append(emb.unsqueeze(2)) # batch_size, domain_num, 1, Embedding_size 166 | else: 167 | b,d = x[fea.name].shape #batch size, domain size 168 | emb = self.embed_dict[fea.shared_with](x[fea.name].long().reshape(-1)) 169 | emb = emb.reshape(b,d,-1) 170 | sparse_emb.append(emb.unsqueeze(2)) # batch_size, domain_num, 1, Embedding_size 171 | 172 | # 未对此做修改,因为未涉及到 SequenceFeature 173 | elif isinstance(fea, SequenceFeature): 174 | if fea.pooling == "sum": 175 | pooling_layer = SumPooling() 176 | elif fea.pooling == "mean": 177 | pooling_layer = AveragePooling() 178 | elif fea.pooling == "concat": 179 | pooling_layer = ConcatPooling() 180 | else: 181 | raise ValueError("Sequence pooling method supports only pooling in %s, got %s." % 182 | (["sum", "mean"], fea.pooling)) 183 | fea_mask = InputMask()(x, fea) 184 | if fea.shared_with == None: 185 | sparse_emb.append( 186 | pooling_layer(self.embed_dict[fea.name](x[fea.name].long()), fea_mask).unsqueeze(1)) 187 | else: 188 | sparse_emb.append( 189 | pooling_layer(self.embed_dict[fea.shared_with](x[fea.name].long()), fea_mask).unsqueeze( 190 | 1)) # shared specific sparse feature embedding 191 | else: 192 | dense_values.append(x[fea.name].float().unsqueeze(2)) # .unsqueeze(1).unsqueeze(1) 193 | 194 | if len(dense_values) > 0: 195 | dense_exists = True 196 | dense_values = torch.cat(dense_values, dim=2) 197 | if len(sparse_emb) > 0: 198 | sparse_exists = True 199 | sparse_emb = torch.cat(sparse_emb, dim=2) # [batch_size, num_features, embed_dim] 200 | 201 | if squeeze_dim: # Note: if the emb_dim of sparse features is different, we must squeeze_dim 202 | if dense_exists and not sparse_exists: # only input dense features 203 | return dense_values 204 | elif not dense_exists and sparse_exists: 205 | return sparse_emb.flatten(start_dim=2) # squeeze dim to : [batch_size,domain_num ,num_features*embed_dim] 206 | elif dense_exists and sparse_exists: 207 | return torch.cat((sparse_emb.flatten(start_dim=2), dense_values), 208 | dim=2) # concat dense value with sparse embedding 209 | else: 210 | raise ValueError("The input features can note be empty") 211 | else: 212 | if sparse_exists: 213 | return sparse_emb # [batch_size, num_features, embed_dim] 214 | else: 215 | raise ValueError( 216 | "If keep the original shape:[batch_size, num_features, embed_dim], expected %s in feature list, got %s" % 217 | ("SparseFeatures", features)) 218 | 219 | class InputMask(nn.Module): 220 | """Return inputs mask from given features 221 | 222 | Shape: 223 | - Input: 224 | x (dict): {feature_name: feature_value}, sequence feature value is a 2D tensor with shape:`(batch_size, seq_len)`,\ 225 | sparse/dense feature value is a 1D tensor with shape `(batch_size)`. 226 | features (list or SparseFeature or SequenceFeature): Note that the elements in features are either all instances of SparseFeature or all instances of SequenceFeature. 227 | - Output: 228 | - if input Sparse: `(batch_size, num_features)` 229 | - if input Sequence: `(batch_size, num_features_seq, seq_length)` 230 | """ 231 | 232 | def __init__(self): 233 | super().__init__() 234 | 235 | def forward(self, x, features): 236 | mask = [] 237 | if not isinstance(features, list): 238 | features = [features] 239 | for fea in features: 240 | if isinstance(fea, SparseFeature) or isinstance(fea, SequenceFeature): 241 | if fea.padding_idx != None: 242 | fea_mask = x[fea.name].long() != fea.padding_idx 243 | else: 244 | fea_mask = x[fea.name].long() != -1 245 | mask.append(fea_mask.unsqueeze(1).float()) 246 | else: 247 | raise ValueError("Only SparseFeature or SequenceFeature support to get mask.") 248 | return torch.cat(mask, dim=1) 249 | 250 | 251 | class LR(nn.Module): 252 | """Logistic Regression Module. It is the one Non-linear 253 | transformation for input feature. 254 | 255 | Args: 256 | input_dim (int): input size of Linear module. 257 | sigmoid (bool): whether to add sigmoid function before output. 258 | 259 | Shape: 260 | - Input: `(batch_size, input_dim)` 261 | - Output: `(batch_size, 1)` 262 | """ 263 | 264 | def __init__(self, input_dim, sigmoid=False): 265 | super().__init__() 266 | self.sigmoid = sigmoid 267 | self.fc = nn.Linear(input_dim, 1, bias=True) 268 | 269 | def forward(self, x): 270 | if self.sigmoid: 271 | return torch.sigmoid(self.fc(x)) 272 | else: 273 | return self.fc(x) 274 | 275 | 276 | class ConcatPooling(nn.Module): 277 | """Keep the origin sequence embedding shape 278 | 279 | Shape: 280 | - Input: `(batch_size, seq_length, embed_dim)` 281 | - Output: `(batch_size, seq_length, embed_dim)` 282 | """ 283 | 284 | def __init__(self): 285 | super().__init__() 286 | 287 | def forward(self, x, mask=None): 288 | return x 289 | 290 | 291 | class AveragePooling(nn.Module): 292 | """Pooling the sequence embedding matrix by `mean`. 293 | 294 | Shape: 295 | - Input 296 | x: `(batch_size, seq_length, embed_dim)` 297 | mask: `(batch_size, 1, seq_length)` 298 | - Output: `(batch_size, embed_dim)` 299 | """ 300 | 301 | def __init__(self): 302 | super().__init__() 303 | 304 | def forward(self, x, mask=None): 305 | if mask == None: 306 | return torch.mean(x, dim=1) 307 | else: 308 | sum_pooling_matrix = torch.bmm(mask, x).squeeze(1) 309 | non_padding_length = mask.sum(dim=-1) 310 | return sum_pooling_matrix / (non_padding_length.float() + 1e-16) 311 | 312 | 313 | class SumPooling(nn.Module): 314 | """Pooling the sequence embedding matrix by `sum`. 315 | 316 | Shape: 317 | - Input 318 | x: `(batch_size, seq_length, embed_dim)` 319 | mask: `(batch_size, 1, seq_length)` 320 | - Output: `(batch_size, embed_dim)` 321 | """ 322 | 323 | def __init__(self): 324 | super().__init__() 325 | 326 | def forward(self, x, mask=None): 327 | if mask == None: 328 | return torch.sum(x, dim=1) 329 | else: 330 | return torch.bmm(mask, x).squeeze(1) 331 | 332 | 333 | class MLP(nn.Module): 334 | """Multi Layer Perceptron Module, it is the most widely used module for 335 | learning feature. Note we default add `BatchNorm1d` and `Activation` 336 | `Dropout` for each `Linear` Module. 337 | 338 | Args: 339 | input dim (int): input size of the first Linear Layer. 340 | output_layer (bool): whether this MLP module is the output layer. If `True`, then append one Linear(*,1) module. 341 | dims (list): output size of Linear Layer (default=[]). 342 | dropout (float): probability of an element to be zeroed (default = 0.5). 343 | activation (str): the activation function, support `[sigmoid, relu, prelu, dice, softmax]` (default='relu'). 344 | 345 | Shape: 346 | - Input: `(batch_size, input_dim)` 347 | - Output: `(batch_size, 1)` or `(batch_size, dims[-1])` 348 | """ 349 | 350 | def __init__(self, input_dim, output_layer=True, dims=None, dropout=0, activation="relu"): 351 | super().__init__() 352 | if dims is None: 353 | dims = [] 354 | layers = list() 355 | for i_dim in dims: 356 | layers.append(nn.Linear(input_dim, i_dim)) 357 | layers.append(nn.BatchNorm1d(i_dim)) 358 | layers.append(activation_layer(activation)) 359 | layers.append(nn.Dropout(p=dropout)) 360 | input_dim = i_dim 361 | if output_layer: 362 | layers.append(nn.Linear(input_dim, 1)) 363 | self.mlp = nn.Sequential(*layers) 364 | 365 | def forward(self, x): 366 | return self.mlp(x) 367 | 368 | 369 | class FM(nn.Module): 370 | """The Factorization Machine module, mentioned in the `DeepFM paper 371 | `. It is used to learn 2nd-order 372 | feature interactions. 373 | 374 | Args: 375 | reduce_sum (bool): whether to sum in embed_dim (default = `True`). 376 | 377 | Shape: 378 | - Input: `(batch_size, num_features, embed_dim)` 379 | - Output: `(batch_size, 1)`` or ``(batch_size, embed_dim)` 380 | """ 381 | 382 | def __init__(self, reduce_sum=True): 383 | super().__init__() 384 | self.reduce_sum = reduce_sum 385 | 386 | def forward(self, x): 387 | square_of_sum = torch.sum(x, dim=1)**2 388 | sum_of_square = torch.sum(x**2, dim=1) 389 | ix = square_of_sum - sum_of_square 390 | if self.reduce_sum: 391 | ix = torch.sum(ix, dim=1, keepdim=True) 392 | return 0.5 * ix 393 | 394 | 395 | class CIN(nn.Module): 396 | """Compressed Interaction Network 397 | 398 | Args: 399 | input_dim (int): input dim of input tensor. 400 | cin_size (list[int]): out channels of Conv1d. 401 | 402 | Shape: 403 | - Input: `(batch_size, num_features, embed_dim)` 404 | - Output: `(batch_size, 1)` 405 | """ 406 | 407 | def __init__(self, input_dim, cin_size, split_half=True): 408 | super().__init__() 409 | self.num_layers = len(cin_size) 410 | self.split_half = split_half 411 | self.conv_layers = torch.nn.ModuleList() 412 | prev_dim, fc_input_dim = input_dim, 0 413 | for i in range(self.num_layers): 414 | cross_layer_size = cin_size[i] 415 | self.conv_layers.append(torch.nn.Conv1d(input_dim * prev_dim, cross_layer_size, 1, stride=1, dilation=1, bias=True)) 416 | if self.split_half and i != self.num_layers - 1: 417 | cross_layer_size //= 2 418 | prev_dim = cross_layer_size 419 | fc_input_dim += prev_dim 420 | self.fc = torch.nn.Linear(fc_input_dim, 1) 421 | 422 | def forward(self, x): 423 | xs = list() 424 | x0, h = x.unsqueeze(2), x 425 | for i in range(self.num_layers): 426 | x = x0 * h.unsqueeze(1) 427 | batch_size, f0_dim, fin_dim, embed_dim = x.shape 428 | x = x.view(batch_size, f0_dim * fin_dim, embed_dim) 429 | x = F.relu(self.conv_layers[i](x)) 430 | if self.split_half and i != self.num_layers - 1: 431 | x, h = torch.split(x, x.shape[1] // 2, dim=1) 432 | else: 433 | h = x 434 | xs.append(x) 435 | return self.fc(torch.sum(torch.cat(xs, dim=1), 2)) 436 | 437 | class CrossLayer(nn.Module): 438 | """ 439 | Cross layer. 440 | Args: 441 | input_dim (int): input dim of input tensor 442 | """ 443 | def __init__(self, input_dim): 444 | super(CrossLayer, self).__init__() 445 | self.w = torch.nn.Linear(input_dim, 1, bias=False) 446 | self.b = torch.nn.Parameter(torch.zeros(input_dim)) 447 | 448 | def forward(self, x_0, x_i): 449 | x = self.w(x_i) * x_0 + self.b 450 | return x 451 | 452 | 453 | class CrossNetwork(nn.Module): 454 | """CrossNetwork mentioned in the DCN paper. 455 | 456 | Args: 457 | input_dim (int): input dim of input tensor 458 | 459 | Shape: 460 | - Input: `(batch_size, *)` 461 | - Output: `(batch_size, *)` 462 | 463 | """ 464 | 465 | def __init__(self, input_dim, num_layers): 466 | super().__init__() 467 | self.num_layers = num_layers 468 | self.w = torch.nn.ModuleList([torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)]) 469 | self.b = torch.nn.ParameterList([torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)]) 470 | 471 | def forward(self, x): 472 | """ 473 | :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)`` 474 | """ 475 | x0 = x 476 | for i in range(self.num_layers): 477 | xw = self.w[i](x) 478 | x = x0 * xw + self.b[i] + x 479 | return x 480 | 481 | class CrossNetV2(nn.Module): 482 | def __init__(self, input_dim, num_layers): 483 | super().__init__() 484 | self.num_layers = num_layers 485 | self.w = torch.nn.ModuleList([torch.nn.Linear(input_dim, input_dim, bias=False) for _ in range(num_layers)]) 486 | self.b = torch.nn.ParameterList([torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)]) 487 | 488 | 489 | def forward(self, x): 490 | x0 = x 491 | for i in range(self.num_layers): 492 | x =x0*self.w[i](x) + self.b[i] + x 493 | return x 494 | 495 | class CrossNetMix(nn.Module): 496 | """ CrossNetMix improves CrossNetwork by: 497 | 1. add MOE to learn feature interactions in different subspaces 498 | 2. add nonlinear transformations in low-dimensional space 499 | :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)`` 500 | """ 501 | 502 | def __init__(self, input_dim, num_layers=2, low_rank=32, num_experts=4): 503 | super(CrossNetMix, self).__init__() 504 | self.num_layers = num_layers 505 | self.num_experts = num_experts 506 | 507 | # U: (input_dim, low_rank) 508 | self.u_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_( 509 | torch.empty(num_experts, input_dim, low_rank))) for i in range(self.num_layers)]) 510 | # V: (input_dim, low_rank) 511 | self.v_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_( 512 | torch.empty(num_experts, input_dim, low_rank))) for i in range(self.num_layers)]) 513 | # C: (low_rank, low_rank) 514 | self.c_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_( 515 | torch.empty(num_experts, low_rank, low_rank))) for i in range(self.num_layers)]) 516 | self.gating = nn.ModuleList([nn.Linear(input_dim, 1, bias=False) for i in range(self.num_experts)]) 517 | 518 | self.bias = torch.nn.ParameterList([nn.Parameter(nn.init.zeros_( 519 | torch.empty(input_dim, 1))) for i in range(self.num_layers)]) 520 | 521 | def forward(self, x): 522 | x_0 = x.unsqueeze(2) # (bs, in_features, 1) 523 | x_l = x_0 524 | for i in range(self.num_layers): 525 | output_of_experts = [] 526 | gating_score_experts = [] 527 | for expert_id in range(self.num_experts): 528 | # (1) G(x_l) 529 | # compute the gating score by x_l 530 | gating_score_experts.append(self.gating[expert_id](x_l.squeeze(2))) 531 | 532 | # (2) E(x_l) 533 | # project the input x_l to $\mathbb{R}^{r}$ 534 | v_x = torch.matmul(self.v_list[i][expert_id].t(), x_l) # (bs, low_rank, 1) 535 | 536 | # nonlinear activation in low rank space 537 | v_x = torch.tanh(v_x) 538 | v_x = torch.matmul(self.c_list[i][expert_id], v_x) 539 | v_x = torch.tanh(v_x) 540 | 541 | # project back to $\mathbb{R}^{d}$ 542 | uv_x = torch.matmul(self.u_list[i][expert_id], v_x) # (bs, in_features, 1) 543 | 544 | dot_ = uv_x + self.bias[i] 545 | dot_ = x_0 * dot_ # Hadamard-product 546 | 547 | output_of_experts.append(dot_.squeeze(2)) 548 | 549 | # (3) mixture of low-rank experts 550 | output_of_experts = torch.stack(output_of_experts, 2) # (bs, in_features, num_experts) 551 | gating_score_experts = torch.stack(gating_score_experts, 1) # (bs, num_experts, 1) 552 | moe_out = torch.matmul(output_of_experts, gating_score_experts.softmax(1)) 553 | x_l = moe_out + x_l # (bs, in_features, 1) 554 | 555 | x_l = x_l.squeeze() # (bs, in_features) 556 | return x_l 557 | 558 | class SENETLayer(nn.Module): 559 | """ 560 | A weighted feature gating system in the SENet paper 561 | Args: 562 | num_fields (int): number of feature fields 563 | 564 | Shape: 565 | - num_fields: `(batch_size, *)` 566 | - Output: `(batch_size, *)` 567 | """ 568 | def __init__(self, num_fields, reduction_ratio=3): 569 | super(SENETLayer, self).__init__() 570 | reduced_size = max(1, int(num_fields/ reduction_ratio)) 571 | self.mlp = nn.Sequential(nn.Linear(num_fields, reduced_size, bias=False), 572 | nn.ReLU(), 573 | nn.Linear(reduced_size, num_fields, bias=False), 574 | nn.ReLU()) 575 | def forward(self, x): 576 | z = torch.mean(x, dim=-1, out=None) 577 | a = self.mlp(z) 578 | v = x*a.unsqueeze(-1) 579 | return v 580 | 581 | class BiLinearInteractionLayer(nn.Module): 582 | """ 583 | Bilinear feature interaction module, which is an improved model of the FFM model 584 | Args: 585 | num_fields (int): number of feature fields 586 | bilinear_type(str): the type bilinear interaction function 587 | Shape: 588 | - num_fields: `(batch_size, *)` 589 | - Output: `(batch_size, *)` 590 | """ 591 | def __init__(self, input_dim, num_fields, bilinear_type = "field_interaction"): 592 | super(BiLinearInteractionLayer, self).__init__() 593 | self.bilinear_type = bilinear_type 594 | if self.bilinear_type == "field_all": 595 | self.bilinear_layer = nn.Linear(input_dim, input_dim, bias=False) 596 | elif self.bilinear_type == "field_each": 597 | self.bilinear_layer = nn.ModuleList([nn.Linear(input_dim, input_dim, bias=False) for i in range(num_fields)]) 598 | elif self.bilinear_type == "field_interaction": 599 | self.bilinear_layer = nn.ModuleList([nn.Linear(input_dim, input_dim, bias=False) for i,j in combinations(range(num_fields), 2)]) 600 | else: 601 | raise NotImplementedError() 602 | 603 | def forward(self, x): 604 | feature_emb = torch.split(x, 1, dim=1) 605 | if self.bilinear_type == "field_all": 606 | bilinear_list = [self.bilinear_layer(v_i)*v_j for v_i, v_j in combinations(feature_emb, 2)] 607 | elif self.bilinear_type == "field_each": 608 | bilinear_list = [self.bilinear_layer[i](feature_emb[i])*feature_emb[j] for i,j in combinations(range(len(feature_emb)), 2)] 609 | elif self.bilinear_type == "field_interaction": 610 | bilinear_list = [self.bilinear_layer[i](v[0])*v[1] for i,v in enumerate(combinations(feature_emb, 2))] 611 | return torch.cat(bilinear_list, dim=1) 612 | 613 | 614 | 615 | 616 | class MultiInterestSA(nn.Module): 617 | """MultiInterest Attention mentioned in the Comirec paper. 618 | 619 | Args: 620 | embedding_dim (int): embedding dim of item embedding 621 | interest_num (int): num of interest 622 | hidden_dim (int): hidden dim 623 | 624 | Shape: 625 | - Input: seq_emb : (batch,seq,emb) 626 | mask : (batch,seq,1) 627 | - Output: `(batch_size, interest_num, embedding_dim)` 628 | 629 | """ 630 | 631 | def __init__(self, embedding_dim, interest_num, hidden_dim=None): 632 | super(MultiInterestSA, self).__init__() 633 | self.embedding_dim = embedding_dim 634 | self.interest_num = interest_num 635 | if hidden_dim == None: 636 | self.hidden_dim = self.embedding_dim * 4 637 | self.W1 = torch.nn.Parameter(torch.rand(self.embedding_dim, self.hidden_dim), requires_grad=True) 638 | self.W2 = torch.nn.Parameter(torch.rand(self.hidden_dim, self.interest_num), requires_grad=True) 639 | self.W3 = torch.nn.Parameter(torch.rand(self.embedding_dim, self.embedding_dim), requires_grad=True) 640 | 641 | def forward(self, seq_emb, mask=None): 642 | H = torch.einsum('bse, ed -> bsd', seq_emb, self.W1).tanh() 643 | if mask != None: 644 | A = torch.einsum('bsd, dk -> bsk', H, self.W2) + -1.e9 * (1 - mask.float()) 645 | A = F.softmax(A, dim=1) 646 | else: 647 | A = F.softmax(torch.einsum('bsd, dk -> bsk', H, self.W2), dim=1) 648 | A = A.permute(0, 2, 1) 649 | multi_interest_emb = torch.matmul(A, seq_emb) 650 | return multi_interest_emb 651 | 652 | 653 | class CapsuleNetwork(nn.Module): 654 | """CapsuleNetwork mentioned in the Comirec and MIND paper. 655 | 656 | Args: 657 | hidden_size (int): embedding dim of item embedding 658 | seq_len (int): length of the item sequence 659 | bilinear_type (int): 0 for MIND, 2 for ComirecDR 660 | interest_num (int): num of interest 661 | routing_times (int): routing times 662 | 663 | Shape: 664 | - Input: seq_emb : (batch,seq,emb) 665 | mask : (batch,seq,1) 666 | - Output: `(batch_size, interest_num, embedding_dim)` 667 | 668 | """ 669 | 670 | def __init__(self, embedding_dim, seq_len, bilinear_type=2, interest_num=4, routing_times=3, relu_layer=False): 671 | super(CapsuleNetwork, self).__init__() 672 | self.embedding_dim = embedding_dim # h 673 | self.seq_len = seq_len # s 674 | self.bilinear_type = bilinear_type 675 | self.interest_num = interest_num 676 | self.routing_times = routing_times 677 | 678 | self.relu_layer = relu_layer 679 | self.stop_grad = True 680 | self.relu = nn.Sequential(nn.Linear(self.embedding_dim, self.embedding_dim, bias=False), nn.ReLU()) 681 | if self.bilinear_type == 0: # MIND 682 | self.linear = nn.Linear(self.embedding_dim, self.embedding_dim, bias=False) 683 | elif self.bilinear_type == 1: 684 | self.linear = nn.Linear(self.embedding_dim, self.embedding_dim * self.interest_num, bias=False) 685 | else: 686 | self.w = nn.Parameter(torch.Tensor(1, self.seq_len, self.interest_num * self.embedding_dim, self.embedding_dim)) 687 | 688 | def forward(self, item_eb, mask): 689 | if self.bilinear_type == 0: 690 | item_eb_hat = self.linear(item_eb) 691 | item_eb_hat = item_eb_hat.repeat(1, 1, self.interest_num) 692 | elif self.bilinear_type == 1: 693 | item_eb_hat = self.linear(item_eb) 694 | else: 695 | u = torch.unsqueeze(item_eb, dim=2) 696 | item_eb_hat = torch.sum(self.w[:, :self.seq_len, :, :] * u, dim=3) 697 | 698 | item_eb_hat = torch.reshape(item_eb_hat, (-1, self.seq_len, self.interest_num, self.embedding_dim)) 699 | item_eb_hat = torch.transpose(item_eb_hat, 1, 2).contiguous() 700 | item_eb_hat = torch.reshape(item_eb_hat, (-1, self.interest_num, self.seq_len, self.embedding_dim)) 701 | 702 | if self.stop_grad: 703 | item_eb_hat_iter = item_eb_hat.detach() 704 | else: 705 | item_eb_hat_iter = item_eb_hat 706 | 707 | if self.bilinear_type > 0: 708 | capsule_weight = torch.zeros(item_eb_hat.shape[0], 709 | self.interest_num, 710 | self.seq_len, 711 | device=item_eb.device, 712 | requires_grad=False) 713 | else: 714 | capsule_weight = torch.randn(item_eb_hat.shape[0], 715 | self.interest_num, 716 | self.seq_len, 717 | device=item_eb.device, 718 | requires_grad=False) 719 | 720 | for i in range(self.routing_times): # 动态路由传播3次 721 | atten_mask = torch.unsqueeze(mask, 1).repeat(1, self.interest_num, 1) 722 | paddings = torch.zeros_like(atten_mask, dtype=torch.float) 723 | 724 | capsule_softmax_weight = F.softmax(capsule_weight, dim=-1) 725 | capsule_softmax_weight = torch.where(torch.eq(atten_mask, 0), paddings, capsule_softmax_weight) 726 | capsule_softmax_weight = torch.unsqueeze(capsule_softmax_weight, 2) 727 | 728 | if i < 2: 729 | interest_capsule = torch.matmul(capsule_softmax_weight, item_eb_hat_iter) 730 | cap_norm = torch.sum(torch.square(interest_capsule), -1, True) 731 | scalar_factor = cap_norm / (1 + cap_norm) / torch.sqrt(cap_norm + 1e-9) 732 | interest_capsule = scalar_factor * interest_capsule 733 | 734 | delta_weight = torch.matmul(item_eb_hat_iter, torch.transpose(interest_capsule, 2, 3).contiguous()) 735 | delta_weight = torch.reshape(delta_weight, (-1, self.interest_num, self.seq_len)) 736 | capsule_weight = capsule_weight + delta_weight 737 | else: 738 | interest_capsule = torch.matmul(capsule_softmax_weight, item_eb_hat) 739 | cap_norm = torch.sum(torch.square(interest_capsule), -1, True) 740 | scalar_factor = cap_norm / (1 + cap_norm) / torch.sqrt(cap_norm + 1e-9) 741 | interest_capsule = scalar_factor * interest_capsule 742 | 743 | interest_capsule = torch.reshape(interest_capsule, (-1, self.interest_num, self.embedding_dim)) 744 | 745 | if self.relu_layer: 746 | interest_capsule = self.relu(interest_capsule) 747 | 748 | return interest_capsule 749 | 750 | 751 | class FFM(nn.Module): 752 | """The Field-aware Factorization Machine module, mentioned in the `FFM paper 753 | `. It explicitly models 754 | multi-channel second-order feature interactions, with each feature filed 755 | corresponding to one channel. 756 | 757 | Args: 758 | num_fields (int): number of feature fields. 759 | reduce_sum (bool): whether to sum in embed_dim (default = `True`). 760 | 761 | Shape: 762 | - Input: `(batch_size, num_fields, num_fields, embed_dim)` 763 | - Output: `(batch_size, num_fields*(num_fields-1)/2, 1)` or `(batch_size, num_fields*(num_fields-1)/2, embed_dim)` 764 | """ 765 | 766 | def __init__(self, num_fields, reduce_sum=True): 767 | super().__init__() 768 | self.num_fields = num_fields 769 | self.reduce_sum = reduce_sum 770 | 771 | def forward(self, x): 772 | # compute (non-redundant) second order field-aware feature crossings 773 | crossed_embeddings = [] 774 | for i in range(self.num_fields-1): 775 | for j in range(i+1, self.num_fields): 776 | crossed_embeddings.append(x[:, i, j, :] * x[:, j, i, :]) 777 | crossed_embeddings = torch.stack(crossed_embeddings, dim=1) 778 | 779 | # if reduce_sum is true, the crossing operation is effectively inner product, other wise Hadamard-product 780 | if self.reduce_sum: 781 | crossed_embeddings = torch.sum(crossed_embeddings, dim=-1, keepdim=True) 782 | return crossed_embeddings 783 | 784 | 785 | class CEN(nn.Module): 786 | """The Compose-Excitation Network module, mentioned in the `FAT-DeepFFM paper 787 | `, a modified version of 788 | `Squeeze-and-Excitation Network” (SENet) (Hu et al., 2017)`. It is used to 789 | highlight the importance of second-order feature crosses. 790 | 791 | Args: 792 | embed_dim (int): the dimensionality of categorical value embedding. 793 | num_field_crosses (int): the number of second order crosses between feature fields. 794 | reduction_ratio (int): the between the dimensions of input layer and hidden layer of the MLP module. 795 | 796 | Shape: 797 | - Input: `(batch_size, num_fields, num_fields, embed_dim)` 798 | - Output: `(batch_size, num_fields*(num_fields-1)/2 * embed_dim)` 799 | """ 800 | def __init__(self, embed_dim, num_field_crosses, reduction_ratio): 801 | super().__init__() 802 | 803 | # convolution weight (Eq.7 FAT-DeepFFM) 804 | self.u = torch.nn.Parameter(torch.rand(num_field_crosses, embed_dim), requires_grad=True) 805 | 806 | # two FC layers that computes the field attention 807 | self.mlp_att = MLP(num_field_crosses, dims=[num_field_crosses//reduction_ratio, num_field_crosses], output_layer=False, activation="relu") 808 | 809 | 810 | def forward(self, em): 811 | # compute descriptor vector (Eq.7 FAT-DeepFFM), output shape [batch_size, num_field_crosses] 812 | d = F.relu((self.u.squeeze(0) * em).sum(-1)) 813 | 814 | # compute field attention (Eq.9), output shape [batch_size, num_field_crosses] 815 | s = self.mlp_att(d) 816 | 817 | # rescale original embedding with field attention (Eq.10), output shape [batch_size, num_field_crosses, embed_dim] 818 | aem = s.unsqueeze(-1) * em 819 | return aem.flatten(start_dim=1) 820 | -------------------------------------------------------------------------------- /HAMUR/basic/loss_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.functional as F 3 | 4 | 5 | class HingeLoss(torch.nn.Module): 6 | """Hinge Loss for pairwise learning. 7 | reference: https://github.com/ustcml/RecStudio/blob/main/recstudio/model/loss_func.py 8 | 9 | """ 10 | 11 | def __init__(self, margin=2, num_items=None): 12 | super().__init__() 13 | self.margin = margin 14 | self.n_items = num_items 15 | 16 | def forward(self, pos_score, neg_score): 17 | loss = torch.maximum(torch.max(neg_score, dim=-1).values - pos_score + self.margin, torch.tensor([0]).type_as(pos_score)) 18 | if self.n_items is not None: 19 | impostors = neg_score - pos_score.view(-1, 1) + self.margin > 0 20 | rank = torch.mean(impostors, -1) * self.n_items 21 | return torch.mean(loss * torch.log(rank + 1)) 22 | else: 23 | return torch.mean(loss) 24 | 25 | 26 | class BPRLoss(torch.nn.Module): 27 | 28 | def __init__(self): 29 | super().__init__() 30 | 31 | def forward(self, pos_score, neg_score): 32 | loss = torch.mean(-(pos_score - neg_score).sigmoid().log(), dim=-1) 33 | return loss 34 | #loss = -torch.mean(F.logsigmoid(pos_score - torch.max(neg_score, dim=-1))) need v1.10 -------------------------------------------------------------------------------- /HAMUR/basic/metaoptimizer.py: -------------------------------------------------------------------------------- 1 | """The metaoptimizer module, it provides a class MetaBalance 2 | MetaBalance is used to scale the gradient and balance the gradient of each task 3 | Authors: Qida Dong, dongjidan@126.com 4 | """ 5 | import torch 6 | from torch.optim.optimizer import Optimizer 7 | 8 | 9 | class MetaBalance(Optimizer): 10 | """MetaBalance Optimizer 11 | This method is used to scale the gradient and balance the gradient of each task 12 | 13 | Args: 14 | parameters (list): the parameters of model 15 | relax_factor (float, optional): the relax factor of gradient scaling (default: 0.7) 16 | beta (float, optional): the coefficient of moving average (default: 0.9) 17 | """ 18 | 19 | def __init__(self, parameters, relax_factor=0.7, beta=0.9): 20 | 21 | if relax_factor < 0. or relax_factor >= 1.: 22 | raise ValueError(f'Invalid relax_factor: {relax_factor}, it should be 0. <= relax_factor < 1.') 23 | if beta < 0. or beta >= 1.: 24 | raise ValueError(f'Invalid beta: {beta}, it should be 0. <= beta < 1.') 25 | rel_beta_dict = {'relax_factor': relax_factor, 'beta': beta} 26 | super(MetaBalance, self).__init__(parameters, rel_beta_dict) 27 | 28 | @torch.no_grad() 29 | def step(self, losses): 30 | """_summary_ 31 | Args: 32 | losses (_type_): _description_ 33 | 34 | Raises: 35 | RuntimeError: _description_ 36 | """ 37 | 38 | for idx, loss in enumerate(losses): 39 | loss.backward(retain_graph=True) 40 | for group in self.param_groups: 41 | for gp in group['params']: 42 | if gp.grad is None: 43 | # print('breaking') 44 | break 45 | if gp.grad.is_sparse: 46 | raise RuntimeError('MetaBalance does not support sparse gradients') 47 | # store the result of moving average 48 | state = self.state[gp] 49 | if len(state) == 0: 50 | for i in range(len(losses)): 51 | if i == 0: 52 | gp.norms = [0] 53 | else: 54 | gp.norms.append(0) 55 | # calculate the moving average 56 | beta = group['beta'] 57 | gp.norms[idx] = gp.norms[idx] * beta + (1 - beta) * torch.norm(gp.grad) 58 | # scale the auxiliary gradient 59 | relax_factor = group['relax_factor'] 60 | gp.grad = gp.grad * gp.norms[0] / (gp.norms[idx] + 1e-5) * relax_factor + gp.grad * (1. - relax_factor) 61 | # store the gradient of each auxiliary task in state 62 | if idx == 0: 63 | state['sum_gradient'] = torch.zeros_like(gp.data) 64 | state['sum_gradient'] += gp.grad 65 | else: 66 | state['sum_gradient'] += gp.grad 67 | 68 | if gp.grad is not None: 69 | gp.grad.detach_() 70 | gp.grad.zero_() 71 | if idx == len(losses) - 1: 72 | gp.grad = state['sum_gradient'] 73 | -------------------------------------------------------------------------------- /HAMUR/basic/metric.py: -------------------------------------------------------------------------------- 1 | """The metric module, it is used to provide some metrics for recommenders. 2 | Available function: 3 | - auc_score: compute AUC 4 | - gauc_score: compute GAUC 5 | - log_loss: compute LogLoss 6 | - topk_metrics: compute topk metrics contains 'ndcg', 'mrr', 'recall', 'hit' 7 | Authors: Qida Dong, dongjidan@126.com 8 | """ 9 | from sklearn.metrics import roc_auc_score 10 | import numpy as np 11 | from collections import defaultdict 12 | 13 | 14 | def auc_score(y_true, y_pred): 15 | 16 | return roc_auc_score(y_true, y_pred) 17 | 18 | 19 | def get_user_pred(y_true, y_pred, users): 20 | """divide the result into different group by user id 21 | 22 | Args: 23 | y_true (array): all true labels of the data 24 | y_pred (array): the predicted score 25 | users (array): user id 26 | 27 | Return: 28 | user_pred (dict): {userid: values}, key is user id and value is the labels and scores of each user 29 | """ 30 | user_pred = {} 31 | for i, u in enumerate(users): 32 | if u not in user_pred: 33 | user_pred[u] = {'y_true': [y_true[i]], 'y_pred': [y_pred[i]]} 34 | else: 35 | user_pred[u]['y_true'].append(y_true[i]) 36 | user_pred[u]['y_pred'].append(y_pred[i]) 37 | 38 | return user_pred 39 | 40 | 41 | def gauc_score(y_true, y_pred, users, weights=None): 42 | """compute GAUC 43 | 44 | Args: 45 | y_true (array): dim(N, ), all true labels of the data 46 | y_pred (array): dim(N, ), the predicted score 47 | users (array): dim(N, ), user id 48 | weight (dict): {userid: weight_value}, it contains weights for each group. 49 | if it is None, the weight is equal to the number 50 | of times the user is recommended 51 | Return: 52 | score: float, GAUC 53 | """ 54 | assert len(y_true) == len(y_pred) and len(y_true) == len(users) 55 | 56 | user_pred = get_user_pred(y_true, y_pred, users) 57 | score = 0 58 | num = 0 59 | for u in user_pred.keys(): 60 | auc = auc_score(user_pred[u]['y_true'], user_pred[u]['y_pred']) 61 | if weights is None: 62 | user_weight = len(user_pred[u]['y_true']) 63 | else: 64 | user_weight = weights[u] 65 | auc *= user_weight 66 | num += user_weight 67 | score += auc 68 | return score / num 69 | 70 | 71 | 72 | def ndcg_score(y_true, y_pred, topKs=None): 73 | if topKs is None: 74 | topKs = [5] 75 | result = topk_metrics(y_true, y_pred, topKs) 76 | return result['NDCG'] 77 | 78 | 79 | 80 | def hit_score(y_true, y_pred, topKs=None): 81 | if topKs is None: 82 | topKs = [5] 83 | result = topk_metrics(y_true, y_pred, topKs) 84 | return result['Hit'] 85 | 86 | 87 | def mrr_score(y_true, y_pred, topKs=None): 88 | if topKs is None: 89 | topKs = [5] 90 | result = topk_metrics(y_true, y_pred, topKs) 91 | return result['MRR'] 92 | 93 | 94 | def recall_score(y_true, y_pred, topKs=None): 95 | if topKs is None: 96 | topKs = [5] 97 | result = topk_metrics(y_true, y_pred, topKs) 98 | return result['Recall'] 99 | 100 | 101 | def precision_score(y_true, y_pred, topKs=None): 102 | if topKs is None: 103 | topKs = [5] 104 | result = topk_metrics(y_true, y_pred, topKs) 105 | return result['Precision'] 106 | 107 | 108 | def topk_metrics(y_true, y_pred, topKs=None): 109 | """choice topk metrics and compute it 110 | the metrics contains 'ndcg', 'mrr', 'recall', 'precision' and 'hit' 111 | 112 | Args: 113 | y_true (dict): {userid, item_ids}, the key is user id and the value is the list that contains the items the user interacted 114 | y_pred (dict): {userid, item_ids}, the key is user id and the value is the list that contains the items recommended 115 | topKs (list or tuple): if you want to get top5 and top10, topKs=(5, 10) 116 | 117 | Return: 118 | results (dict): {metric_name: metric_values}, it contains five metrics, 'ndcg', 'recall', 'mrr', 'hit', 'precision' 119 | 120 | """ 121 | if topKs is None: 122 | topKs = [5] 123 | assert len(y_true) == len(y_pred) 124 | 125 | if not isinstance(topKs, (tuple, list)): 126 | raise ValueError('topKs wrong, it should be tuple or list') 127 | 128 | pred_array = [] 129 | true_array = [] 130 | for u in y_true.keys(): 131 | pred_array.append(y_pred[u]) 132 | true_array.append(y_true[u]) 133 | ndcg_result = [] 134 | mrr_result = [] 135 | hit_result = [] 136 | precision_result = [] 137 | recall_result = [] 138 | for idx in range(len(topKs)): 139 | ndcgs = 0 140 | mrrs = 0 141 | hits = 0 142 | precisions = 0 143 | recalls = 0 144 | gts = 0 145 | for i in range(len(true_array)): 146 | if len(true_array[i]) != 0: 147 | mrr_tmp = 0 148 | mrr_flag = True 149 | hit_tmp = 0 150 | dcg_tmp = 0 151 | idcg_tmp = 0 152 | for j in range(topKs[idx]): 153 | if pred_array[i][j] in true_array[i]: 154 | hit_tmp += 1. 155 | if mrr_flag: 156 | mrr_flag = False 157 | mrr_tmp = 1. / (1 + j) 158 | dcg_tmp += 1. / (np.log2(j + 2)) 159 | if j < len(true_array[i]): 160 | idcg_tmp += 1. / (np.log2(j + 2)) 161 | gts += len(true_array[i]) 162 | hits += hit_tmp 163 | mrrs += mrr_tmp 164 | recalls += hit_tmp / len(true_array[i]) 165 | precisions += hit_tmp / topKs[idx] 166 | if idcg_tmp != 0: 167 | ndcgs += dcg_tmp / idcg_tmp 168 | hit_result.append(round(hits / gts, 4)) 169 | mrr_result.append(round(mrrs / len(pred_array), 4)) 170 | recall_result.append(round(recalls / len(pred_array), 4)) 171 | precision_result.append(round(precisions / len(pred_array), 4)) 172 | ndcg_result.append(round(ndcgs / len(pred_array), 4)) 173 | 174 | results = defaultdict(list) 175 | for idx in range(len(topKs)): 176 | 177 | output = f'NDCG@{topKs[idx]}: {ndcg_result[idx]}' 178 | results['NDCG'].append(output) 179 | 180 | output = f'MRR@{topKs[idx]}: {mrr_result[idx]}' 181 | results['MRR'].append(output) 182 | 183 | output = f'Recall@{topKs[idx]}: {recall_result[idx]}' 184 | results['Recall'].append(output) 185 | 186 | output = f'Hit@{topKs[idx]}: {hit_result[idx]}' 187 | results['Hit'].append(output) 188 | 189 | output = f'Precision@{topKs[idx]}: {precision_result[idx]}' 190 | results['Precision'].append(output) 191 | return results 192 | 193 | def log_loss(y_true, y_pred): 194 | score = y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred) 195 | return -score.sum() / len(y_true) 196 | 197 | def Coverage(y_pred, all_items, topKs=None): 198 | """compute the coverage 199 | This method measures the diversity of the recommended items 200 | and the ability to explore the long-tailed items 201 | Arg: 202 | y_pred (dict): {userid, item_ids}, the key is user id and the value is the list that contains the items recommended 203 | all_items (set): all unique items 204 | Return: 205 | result (list[float]): the list of coverage scores 206 | """ 207 | if topKs is None: 208 | topKs = [5] 209 | result = [] 210 | for k in topKs: 211 | rec_items = set([]) 212 | for u in y_pred.keys(): 213 | tmp_items = set(y_pred[u][:k]) 214 | rec_items = rec_items | tmp_items 215 | score = len(rec_items) * 1. / len(all_items) 216 | score = round(score, 4) 217 | result.append(f'Coverage@{k}: {score}') 218 | return result 219 | 220 | 221 | # print(Coverage({'0':[0,1,2],'1':[1,3,4]}, {0,1,2,3,4,5}, [2,3])) 222 | 223 | # pred = np.array([ 0.3, 0.2, 0.5, 0.9, 0.7, 0.31, 0.8, 0.1, 0.4, 0.6]) 224 | # label = np.array([ 1, 0, 0, 1, 0, 0, 1, 0, 0, 1]) 225 | # users_id = np.array([ 2, 1, 0, 2, 1, 0, 0, 2, 1, 1]) 226 | 227 | # print('auc: ', auc_score(label, pred)) 228 | # print('gauc: ', gauc_score(label, pred, users_id)) 229 | # print('log_loss: ', log_loss(label, pred)) 230 | 231 | # for mt in ['ndcg', 'mrr', 'recall', 'hit','s']: 232 | # tm = topk_metrics(y_true, y_pred, users_id, 3, metric_type=mt) 233 | # print(f'{mt}: {tm}') 234 | # y_pred = {'0': [0, 1], '1': [0, 1], '2': [2, 3]} 235 | # y_true = {'0': [1, 2], '1': [0, 1, 2], '2': [2, 3]} 236 | # out = topk_metrics(y_true, y_pred, topKs=(1,2)) 237 | # ndcgs = ndcg_score(y_true,y_pred, topKs=(1,2)) 238 | # print(out) 239 | # print(ndcgs) 240 | 241 | # ground_truth, match_res = np.load("C:\\Users\\dongj\\Desktop/res.npy", allow_pickle=True) 242 | # print(len(ground_truth),len(match_res)) 243 | # out = topk_metrics(y_true=ground_truth, y_pred=match_res, topKs=[50]) 244 | # print(out) 245 | 246 | if __name__ == "__main__": 247 | y_pred = {'0': [0, 1], '1': [0, 1], '2': [2, 3]} 248 | y_true = {'0': [1, 2], '1': [0, 1, 2], '2': [2, 3]} 249 | out = topk_metrics(y_true, y_pred, topKs=(1,2)) 250 | print(out) -------------------------------------------------------------------------------- /HAMUR/models/multi_domain/__init__.py: -------------------------------------------------------------------------------- 1 | from .adapter import Mlp_2_Layer, Mlp_7_Layer,MLP_adap_2_layer_1_adp,MLP_adap_7_layer_2_adp 2 | from .adapter_dcn import DCN_MD,DCN_MD_adp 3 | from .adapter_wd import WideDeep_MD,WideDeep_MD_adp -------------------------------------------------------------------------------- /HAMUR/models/multi_domain/adapter.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | from torch.nn.parameter import Parameter 6 | from ...basic.layers import EmbeddingLayer 7 | from ...basic.activation import activation_layer 8 | 9 | 10 | class MLP_adap_7_layer_2_adp(nn.Module): 11 | # 7 layers MLP with 2 adapter cells 12 | def __init__(self, features, domain_num, fcn_dims, hyper_dims, k ): 13 | super().__init__() 14 | self.features = features 15 | self.input_dim = sum([fea.embed_dim for fea in features]) 16 | self.layer_num = len(fcn_dims) + 1 # 生成的主网络层数+一层最后输出 17 | self.fcn_dim = [self.input_dim] + fcn_dims # 把这个input_dim加进来,并把最后的一写出来,方便生成参数 18 | self.domain_num = domain_num 19 | self.embedding = EmbeddingLayer(features) 20 | 21 | self.relu = activation_layer("relu") 22 | self.sig = activation_layer("sigmoid") 23 | 24 | self.layer_list = nn.ModuleList() 25 | 26 | # backbone network architecture 27 | for d in range(domain_num): 28 | domain_specific = nn.ModuleList() 29 | domain_specific.append(nn.Linear(self.fcn_dim[0], self.fcn_dim[1])) 30 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[1])) 31 | domain_specific.append(nn.ReLU()) 32 | 33 | domain_specific.append(nn.Linear(self.fcn_dim[1], self.fcn_dim[2])) 34 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[2])) 35 | domain_specific.append(nn.ReLU()) 36 | 37 | domain_specific.append(nn.Linear(self.fcn_dim[2], self.fcn_dim[3])) 38 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[3])) 39 | domain_specific.append(nn.ReLU()) 40 | 41 | domain_specific.append(nn.Linear(self.fcn_dim[3], self.fcn_dim[4])) 42 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[4])) 43 | domain_specific.append(nn.ReLU()) 44 | 45 | domain_specific.append(nn.Linear(self.fcn_dim[4], self.fcn_dim[5])) 46 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[5])) 47 | domain_specific.append(nn.ReLU()) 48 | 49 | domain_specific.append(nn.Linear(self.fcn_dim[5], self.fcn_dim[6])) 50 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[6])) 51 | domain_specific.append(nn.ReLU()) 52 | 53 | domain_specific.append(nn.Linear(self.fcn_dim[6], self.fcn_dim[7])) 54 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[7])) 55 | domain_specific.append(nn.ReLU()) 56 | 57 | domain_specific.append(nn.Linear(self.fcn_dim[7], 1)) 58 | 59 | self.layer_list.append(domain_specific) 60 | 61 | # instance representation matrix initiation 62 | self.k = k 63 | self.u = nn.ParameterList() 64 | self.v = nn.ParameterList() 65 | 66 | # u,v matrix initiation 67 | self.u.append(Parameter(torch.ones((self.fcn_dim[6], self.k)), requires_grad=True)) 68 | self.u.append(Parameter(torch.ones((32, self.k)), requires_grad=True)) 69 | self.u.append(Parameter(torch.ones((self.fcn_dim[7], self.k)), requires_grad=True)) 70 | self.u.append(Parameter(torch.ones((32, self.k)), requires_grad=True)) 71 | 72 | self.v.append(Parameter(torch.ones((self.k, 32)), requires_grad=True)) 73 | self.v.append(Parameter(torch.ones((self.k, self.fcn_dim[6])), requires_grad=True)) 74 | self.v.append(Parameter(torch.ones((self.k, 32)), requires_grad=True)) 75 | self.v.append(Parameter(torch.ones((self.k, self.fcn_dim[7])), requires_grad=True)) 76 | 77 | # hyper-network design 78 | hyper_dims += [self.k * self.k] 79 | input_dim = self.input_dim 80 | hyper_layers = [] 81 | for i_dim in hyper_dims: 82 | hyper_layers.append(nn.Linear(input_dim, i_dim)) 83 | hyper_layers.append(nn.BatchNorm1d(i_dim)) 84 | hyper_layers.append(nn.ReLU()) 85 | hyper_layers.append(nn.Dropout(p=0)) 86 | input_dim = i_dim 87 | self.hyper_net = nn.Sequential(*hyper_layers) 88 | 89 | # adapter initiation 90 | self.b_list = nn.ParameterList() # bias 91 | self.b_list.append(Parameter(torch.zeros((32)), requires_grad=True)) 92 | self.b_list.append(Parameter(torch.zeros((self.fcn_dim[6])), requires_grad=True)) 93 | self.b_list.append(Parameter(torch.zeros((32)), requires_grad=True)) 94 | self.b_list.append(Parameter(torch.zeros((self.fcn_dim[7])), requires_grad=True)) 95 | 96 | self.gamma1 = nn.Parameter(torch.ones(self.fcn_dim[6])) # domain norm parameters 97 | self.bias1 = nn.Parameter(torch.zeros(self.fcn_dim[6])) 98 | self.gamma2 = nn.Parameter(torch.ones(self.fcn_dim[7])) 99 | self.bias2 = nn.Parameter(torch.zeros(self.fcn_dim[7])) 100 | self.eps = 1e-5 101 | 102 | def forward(self, x): 103 | 104 | domain_id = x["domain_indicator"].clone().detach() 105 | 106 | emb = self.embedding(x, self.features, squeeze_dim=True) # [batch_size,total_dims] 107 | 108 | mask = [] 109 | 110 | out_l = [] 111 | for d in range(self.domain_num): 112 | domain_mask = (domain_id == d) 113 | mask.append(domain_mask) 114 | 115 | domain_input = emb 116 | 117 | # hyper_network_out 118 | hyper_out_full = self.hyper_net(domain_input) # B * (k * k) 119 | # Representation matrix 120 | hyper_out = hyper_out_full.reshape(-1, self.k, self.k) # B * k * k 121 | 122 | model_list = self.layer_list[d] 123 | 124 | domain_input = model_list[0](domain_input) # linear 125 | 126 | domain_input = model_list[1](domain_input) # bn 127 | 128 | domain_input = model_list[2](domain_input) # relu B * m 129 | 130 | 131 | 132 | domain_input = model_list[3](domain_input) # linear 133 | 134 | domain_input = model_list[4](domain_input) # bn 135 | 136 | domain_input = model_list[5](domain_input) # relu 137 | 138 | 139 | 140 | 141 | domain_input = model_list[6](domain_input) # linear 142 | 143 | domain_input = model_list[7](domain_input) # bn 144 | 145 | domain_input = model_list[8](domain_input) # relu 146 | 147 | 148 | 149 | 150 | 151 | domain_input = model_list[9](domain_input) # linear 152 | 153 | domain_input = model_list[10](domain_input) # bn 154 | 155 | domain_input = model_list[11](domain_input) # relu 156 | 157 | 158 | 159 | 160 | domain_input = model_list[12](domain_input) # linear 161 | 162 | domain_input = model_list[13](domain_input) # bn 163 | 164 | domain_input = model_list[14](domain_input) # relu 165 | 166 | 167 | 168 | 169 | domain_input = model_list[15](domain_input) # linear 170 | 171 | domain_input = model_list[16](domain_input) # bn 172 | 173 | domain_input = model_list[17](domain_input) # relu 174 | 175 | # First Adapter-cell 176 | 177 | # Adapter layer-1: Down projection 178 | w1 = torch.einsum('mi,bij,jn->bmn',self.u[0] , hyper_out,self.v[0]) 179 | b1 = self.b_list[0] 180 | tmp_out = torch.einsum('bf,bfj->bj',domain_input,w1) 181 | tmp_out += b1 182 | 183 | # Adapter layer-2: non-linear 184 | tmp_out = self.sig(tmp_out) 185 | 186 | # Adapter layer-3: Up - projection 187 | w2 = torch.einsum('mi,bij,jn->bmn',self.u[1] , hyper_out,self.v[1]) 188 | b2 = self.b_list[1] 189 | tmp_out = torch.einsum('bf,bfj->bj',tmp_out,w2) 190 | tmp_out += b2 191 | 192 | # Adpater layer-4: Domain norm 193 | mean = tmp_out.mean(dim=0) 194 | var = tmp_out.var(dim=0) 195 | x_norm = (tmp_out - mean) / torch.sqrt(var + self.eps) 196 | out = self.gamma1 * x_norm + self.bias1 197 | 198 | # Adapter: short-cut 199 | domain_input = out+domain_input 200 | 201 | 202 | domain_input = model_list[18](domain_input) # linear 203 | 204 | domain_input = model_list[19](domain_input) # bn 205 | 206 | domain_input = model_list[20](domain_input) # relu 207 | 208 | # Second Adapter-cell 209 | 210 | # Adapter layer-1: Down projection 211 | w1 = torch.einsum('mi,bij,jn->bmn', self.u[2], hyper_out, self.v[2]) 212 | b1 = self.b_list[2] 213 | tmp_out = torch.einsum('bf,bfj->bj', domain_input, w1) 214 | tmp_out += b1 215 | 216 | # Adapter layer-2: non-linear 217 | tmp_out = self.sig(tmp_out) 218 | 219 | # Adapter layer-3: Up - projection 220 | w2 = torch.einsum('mi,bij,jn->bmn', self.u[3], hyper_out, self.v[3]) 221 | b2 = self.b_list[3] 222 | tmp_out = torch.einsum('bf,bfj->bj', tmp_out, w2) 223 | tmp_out += b2 224 | 225 | # Adpater layer-4: Domain norm 226 | mean = tmp_out.mean(dim=0) 227 | var = tmp_out.var(dim=0) 228 | x_norm = (tmp_out - mean) / torch.sqrt(var + self.eps) 229 | out = self.gamma2 * x_norm + self.bias2 230 | 231 | # Adapter: short-cut 232 | domain_input = out + domain_input 233 | 234 | 235 | domain_input = model_list[21](domain_input) # linear 236 | 237 | domain_input = self.sig(domain_input) # relu 238 | 239 | out_l.append(domain_input) 240 | 241 | final = torch.zeros_like(out_l[0]) 242 | for d in range(self.domain_num): 243 | final = torch.where(mask[d].unsqueeze(1), out_l[d], final) 244 | 245 | return final.squeeze(1) 246 | 247 | 248 | class MLP_adap_2_layer_1_adp(nn.Module): 249 | # 2 layers MLP with 1 adapter cells 250 | def __init__(self, features, domain_num, fcn_dims, hyper_dims, k ): 251 | super().__init__() 252 | self.features = features 253 | self.input_dim = sum([fea.embed_dim for fea in features]) 254 | self.layer_num = len(fcn_dims) + 1 255 | self.fcn_dim = [self.input_dim] + fcn_dims 256 | self.domain_num = domain_num 257 | self.embedding = EmbeddingLayer(features) 258 | 259 | self.relu = activation_layer("relu") 260 | self.sig = activation_layer("sigmoid") 261 | 262 | self.layer_list = nn.ModuleList() 263 | for d in range(domain_num): 264 | domain_specific = nn.ModuleList() 265 | domain_specific.append(nn.Linear(self.fcn_dim[0], self.fcn_dim[1])) 266 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[1])) 267 | domain_specific.append(nn.ReLU()) 268 | 269 | domain_specific.append(nn.Linear(self.fcn_dim[1], self.fcn_dim[2])) 270 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[2])) 271 | domain_specific.append(nn.ReLU()) 272 | domain_specific.append(nn.Linear(self.fcn_dim[2], 1)) 273 | 274 | self.layer_list.append(domain_specific) 275 | 276 | # instance matrix initiation 277 | self.k = k 278 | self.u = nn.ParameterList() 279 | self.v = nn.ParameterList() 280 | 281 | # u,v initiation 282 | self.u.append(Parameter(torch.ones((self.fcn_dim[2], self.k)), requires_grad=True)) 283 | self.u.append(Parameter(torch.ones((32, self.k)), requires_grad=True)) 284 | 285 | self.v.append(Parameter(torch.ones((self.k, 32)), requires_grad=True)) 286 | self.v.append(Parameter(torch.ones((self.k, self.fcn_dim[2])), requires_grad=True)) 287 | 288 | # hypernwt work 289 | hyper_dims += [self.k * self.k] 290 | input_dim = self.input_dim 291 | hyper_layers = [] 292 | for i_dim in hyper_dims: 293 | hyper_layers.append(nn.Linear(input_dim, i_dim)) 294 | hyper_layers.append(nn.BatchNorm1d(i_dim)) 295 | hyper_layers.append(nn.ReLU()) 296 | hyper_layers.append(nn.Dropout(p=0)) 297 | input_dim = i_dim 298 | self.hyper_net = nn.Sequential(*hyper_layers) 299 | 300 | # Adapter parameters 301 | self.b_list = nn.ParameterList() 302 | self.b_list.append(Parameter(torch.zeros((32)), requires_grad=True)) 303 | self.b_list.append(Parameter(torch.zeros((self.fcn_dim[2])), requires_grad=True)) 304 | 305 | self.gamma1 = nn.Parameter(torch.ones(self.fcn_dim[2])) 306 | self.bias1 = nn.Parameter(torch.zeros(self.fcn_dim[2])) 307 | self.eps = 1e-5 308 | 309 | def forward(self, x): 310 | 311 | domain_id = x["domain_indicator"].clone().detach() 312 | 313 | emb = self.embedding(x, self.features, squeeze_dim=True) # [batch_size,total_dims] 314 | 315 | mask = [] 316 | 317 | out_l = [] 318 | for d in range(self.domain_num): 319 | domain_mask = (domain_id == d) 320 | mask.append(domain_mask) 321 | 322 | domain_input = emb 323 | 324 | # hyper-network output 325 | hyper_out_full = self.hyper_net(domain_input) # B * (k * k) 326 | # Representation matrix 327 | hyper_out = hyper_out_full.reshape(-1, self.k, self.k) # B * k * k 328 | 329 | model_list = self.layer_list[d] 330 | 331 | domain_input = model_list[0](domain_input) # linear 332 | 333 | domain_input = model_list[1](domain_input) # bn 334 | 335 | domain_input = model_list[2](domain_input) # relu 336 | 337 | 338 | 339 | domain_input = model_list[3](domain_input) # linear 340 | 341 | domain_input = model_list[4](domain_input) # bn 342 | 343 | domain_input = model_list[5](domain_input) # relu 344 | 345 | # Adapter cell 346 | # Adapter layer-1: Down projection 347 | w1 = torch.einsum('mi,bij,jn->bmn', self.u[0], hyper_out, self.v[0]) 348 | b1 = self.b_list[0] 349 | tmp_out = torch.einsum('bf,bfj->bj', domain_input, w1) 350 | tmp_out += b1 351 | 352 | # Adapter layer-2: Non-linear 353 | tmp_out = self.sig(tmp_out) 354 | 355 | # Adapter layer-3: Up projection 356 | w2 = torch.einsum('mi,bij,jn->bmn', self.u[1], hyper_out, self.v[1]) 357 | b2 = self.b_list[1] 358 | tmp_out = torch.einsum('bf,bfj->bj', tmp_out, w2) 359 | tmp_out += b2 360 | 361 | # Adapter layer-4: Domain norm 362 | mean = tmp_out.mean(dim=0) 363 | var = tmp_out.var(dim=0) 364 | x_norm = (tmp_out - mean) / torch.sqrt(var + self.eps) 365 | out = self.gamma1 * x_norm + self.bias1 366 | 367 | # Adapter: Short-cut 368 | domain_input = out + domain_input 369 | 370 | domain_input = model_list[6](domain_input) 371 | domain_input = self.sig(domain_input) 372 | 373 | out_l.append(domain_input) 374 | 375 | final = torch.zeros_like(out_l[0]) 376 | for d in range(self.domain_num): 377 | final = torch.where(mask[d].unsqueeze(1), out_l[d], final) 378 | 379 | return final.squeeze(1) 380 | 381 | class Mlp_2_Layer(nn.Module): 382 | # 2-layres Mlp model 383 | def __init__(self, features, domain_num, fcn_dims): 384 | super().__init__() 385 | self.features = features 386 | self.input_dim = sum([fea.embed_dim for fea in features]) 387 | self.layer_num = len(fcn_dims) + 1 # 生成的主网络层数+一层最后输出 388 | self.fcn_dim = [self.input_dim] + fcn_dims # 把这个input_dim加进来,并把最后的一写出来,方便生成参数 389 | self.domain_num = domain_num 390 | self.embedding = EmbeddingLayer(features) 391 | 392 | self.relu = activation_layer("relu") 393 | self.sig = activation_layer("sigmoid") 394 | 395 | self.layer_list = nn.ModuleList() 396 | for d in range(domain_num): 397 | domain_specific = nn.ModuleList() 398 | 399 | domain_specific.append(nn.Linear(self.fcn_dim[0], self.fcn_dim[1])) 400 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[1])) 401 | domain_specific.append(nn.ReLU()) 402 | 403 | domain_specific.append(nn.Linear(self.fcn_dim[1], self.fcn_dim[2])) 404 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[2])) 405 | domain_specific.append(nn.ReLU()) 406 | 407 | domain_specific.append(nn.Linear(self.fcn_dim[2], 1)) 408 | self.layer_list.append(domain_specific) 409 | 410 | def forward(self, x): 411 | 412 | domain_id = x["domain_indicator"].clone().detach() 413 | 414 | emb = self.embedding(x, self.features, squeeze_dim=True) # [batch_size,total_dims] 415 | 416 | mask = [] 417 | 418 | out = [] 419 | for d in range(self.domain_num): 420 | domain_mask = (domain_id == d) 421 | mask.append(domain_mask) 422 | 423 | domain_input = emb 424 | 425 | model_list = self.layer_list[d] 426 | 427 | domain_input = model_list[0](domain_input) # linear 428 | domain_input = model_list[1](domain_input) # bn 429 | domain_input = model_list[2](domain_input) # relu 430 | 431 | domain_input = model_list[3](domain_input) # linear 432 | domain_input = model_list[4](domain_input) # bn 433 | domain_input = model_list[5](domain_input) # relu 434 | 435 | domain_input = model_list[6](domain_input) 436 | domain_input = self.sig(domain_input) 437 | 438 | out.append(domain_input) 439 | 440 | final = torch.zeros_like(out[0]) 441 | for d in range(self.domain_num): 442 | final = torch.where(mask[d].unsqueeze(1), out[d], final) 443 | return final.squeeze(1) 444 | 445 | 446 | class Mlp_7_Layer(nn.Module): 447 | # 7-layers Mlp model 448 | def __init__(self, features, domain_num, fcn_dims): 449 | super().__init__() 450 | self.features = features 451 | self.input_dim = sum([fea.embed_dim for fea in features]) 452 | self.layer_num = len(fcn_dims) + 1 # 生成的主网络层数+一层最后输出 453 | self.fcn_dim = [self.input_dim] + fcn_dims # 把这个input_dim加进来,并把最后的一写出来,方便生成参数 454 | self.domain_num = domain_num 455 | self.embedding = EmbeddingLayer(features) 456 | 457 | self.relu = activation_layer("relu") 458 | self.sig = activation_layer("sigmoid") 459 | 460 | self.layer_list = nn.ModuleList() 461 | for d in range(domain_num): 462 | domain_specific = nn.ModuleList() 463 | 464 | domain_specific.append(nn.Linear(self.fcn_dim[0], self.fcn_dim[1])) 465 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[1])) 466 | domain_specific.append(nn.ReLU()) 467 | 468 | domain_specific.append(nn.Linear(self.fcn_dim[1], self.fcn_dim[2])) 469 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[2])) 470 | domain_specific.append(nn.ReLU()) 471 | 472 | domain_specific.append(nn.Linear(self.fcn_dim[2], self.fcn_dim[3])) 473 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[3])) 474 | domain_specific.append(nn.ReLU()) 475 | 476 | domain_specific.append(nn.Linear(self.fcn_dim[3], self.fcn_dim[4])) 477 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[4])) 478 | domain_specific.append(nn.ReLU()) 479 | 480 | domain_specific.append(nn.Linear(self.fcn_dim[4], self.fcn_dim[5])) 481 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[5])) 482 | domain_specific.append(nn.ReLU()) 483 | 484 | domain_specific.append(nn.Linear(self.fcn_dim[5], self.fcn_dim[6])) 485 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[6])) 486 | domain_specific.append(nn.ReLU()) 487 | 488 | domain_specific.append(nn.Linear(self.fcn_dim[6], self.fcn_dim[7])) 489 | domain_specific.append(nn.BatchNorm1d(self.fcn_dim[7])) 490 | domain_specific.append(nn.ReLU()) 491 | 492 | domain_specific.append(nn.Linear(self.fcn_dim[7], 1)) 493 | self.layer_list.append(domain_specific) 494 | 495 | def forward(self, x): 496 | 497 | domain_id = x["domain_indicator"].clone().detach() 498 | 499 | emb = self.embedding(x, self.features, squeeze_dim=True) # [batch_size,total_dims] 500 | 501 | mask = [] 502 | 503 | out = [] 504 | for d in range(self.domain_num): 505 | domain_mask = (domain_id == d) 506 | mask.append(domain_mask) 507 | 508 | domain_input = emb 509 | 510 | model_list = self.layer_list[d] 511 | 512 | domain_input = model_list[0](domain_input) # linear 513 | domain_input = model_list[1](domain_input) # bn 514 | domain_input = model_list[2](domain_input) # relu 515 | 516 | domain_input = model_list[3](domain_input) # linear 517 | domain_input = model_list[4](domain_input) # bn 518 | domain_input = model_list[5](domain_input) # relu 519 | 520 | domain_input = model_list[6](domain_input) # linear 521 | domain_input = model_list[7](domain_input) # bn 522 | domain_input = model_list[8](domain_input) # relu 523 | 524 | domain_input = model_list[9](domain_input) # linear 525 | domain_input = model_list[10](domain_input) # bn 526 | domain_input = model_list[11](domain_input) # relu 527 | 528 | domain_input = model_list[12](domain_input) # linear 529 | domain_input = model_list[13](domain_input) # bn 530 | domain_input = model_list[14](domain_input) # relu 531 | 532 | domain_input = model_list[15](domain_input) # linear 533 | domain_input = model_list[16](domain_input) # bn 534 | domain_input = model_list[17](domain_input) # relu 535 | 536 | domain_input = model_list[18](domain_input) # linear 537 | domain_input = model_list[19](domain_input) # bn 538 | domain_input = model_list[20](domain_input) # relu 539 | 540 | domain_input = model_list[21](domain_input) 541 | domain_input = self.sig(domain_input) 542 | 543 | out.append(domain_input) 544 | 545 | final = torch.zeros_like(out[0]) 546 | for d in range(self.domain_num): 547 | final = torch.where(mask[d].unsqueeze(1), out[d], final) 548 | return final.squeeze(1) 549 | -------------------------------------------------------------------------------- /HAMUR/models/multi_domain/adapter_dcn.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | from torch import nn 5 | from torch.nn.parameter import Parameter 6 | from ...basic.layers import LR, MLP, CrossNetwork, EmbeddingLayer 7 | 8 | 9 | class DCN_MD(torch.nn.Module): 10 | """ 11 | multi-domain Deep & Cross Network 12 | """ 13 | 14 | def __init__(self, features,num_domains , n_cross_layers, mlp_params): 15 | super().__init__() 16 | self.features = features 17 | self.dims = sum([fea.embed_dim for fea in features]) 18 | self.num_domains = num_domains 19 | self.embedding = EmbeddingLayer(features) 20 | self.cn = CrossNetwork(self.dims, n_cross_layers) 21 | self.mlp = MLP(self.dims, output_layer=False, **mlp_params) 22 | self.linear = LR(self.dims + mlp_params["dims"][-1]) 23 | 24 | def forward(self, x): 25 | domain_id = x["domain_indicator"].clone().detach() 26 | 27 | embed_x = self.embedding(x, self.features, squeeze_dim=True) # [batch_size,total_dims] 28 | 29 | # mask list 30 | mask = [] 31 | # out list 32 | out = [] 33 | 34 | for d in range(self.num_domains): 35 | domain_mask = (domain_id == d) 36 | mask.append(domain_mask) 37 | 38 | domain_input = embed_x 39 | cn_out = self.cn(domain_input) 40 | mlp_out = self.mlp(domain_input) 41 | x_stack = torch.cat([cn_out, mlp_out], dim=1) 42 | y = self.linear(x_stack) 43 | out.append(torch.sigmoid(y)) 44 | 45 | final = torch.zeros_like(out[0]) 46 | for d in range(self.num_domains): 47 | final = torch.where(mask[d].unsqueeze(1), out[d], final) 48 | return final.squeeze(1) 49 | 50 | 51 | class DCN_MD_adp(torch.nn.Module): 52 | """ 53 | multi-domain Deep & Cross Network with Adapter 54 | """ 55 | 56 | def __init__(self, features, num_domains, n_cross_layers, k, mlp_params ,hyper_dims): 57 | super().__init__() 58 | self.features = features 59 | self.dims = sum([fea.embed_dim for fea in features]) 60 | self.num_domains = num_domains 61 | self.embedding = EmbeddingLayer(features) 62 | self.cn = CrossNetwork(self.dims, n_cross_layers) 63 | self.mlp = MLP(self.dims, output_layer=False, **mlp_params) 64 | self.linear = LR(self.dims + mlp_params["dims"][-1]) 65 | 66 | 67 | 68 | # instance represntation matrix init 69 | self.k = k 70 | 71 | # u,v initiation 72 | self.u = nn.ParameterList() 73 | self.v = nn.ParameterList() 74 | 75 | self.u.append(Parameter(torch.ones((mlp_params["dims"][-1], self.k)), requires_grad=True)) 76 | self.u.append(Parameter(torch.ones((32, self.k)), requires_grad=True)) 77 | 78 | self.v.append(Parameter(torch.ones((self.k, 32)), requires_grad=True)) 79 | self.v.append(Parameter(torch.ones((self.k, mlp_params["dims"][-1])), requires_grad=True)) 80 | 81 | # hyper-network initiation 82 | hyper_dims += [self.k * self.k] 83 | input_dim = self.dims 84 | hyper_layers = [] 85 | for i_dim in hyper_dims: 86 | hyper_layers.append(nn.Linear(input_dim, i_dim)) 87 | hyper_layers.append(nn.BatchNorm1d(i_dim)) 88 | hyper_layers.append(nn.ReLU()) 89 | hyper_layers.append(nn.Dropout(p=0)) 90 | input_dim = i_dim 91 | self.hyper_net = nn.Sequential(*hyper_layers) 92 | 93 | # adapter parameters initiation 94 | self.b_list = nn.ParameterList() 95 | self.b_list.append(Parameter(torch.zeros((32)), requires_grad=True)) 96 | self.b_list.append(Parameter(torch.zeros(mlp_params["dims"][-1]), requires_grad=True)) 97 | 98 | self.gamma1 = nn.Parameter(torch.ones(mlp_params["dims"][-1])) 99 | self.bias1 = nn.Parameter(torch.zeros(mlp_params["dims"][-1])) 100 | self.eps = 1e-5 101 | 102 | def forward(self, x): 103 | domain_id = x["domain_indicator"].clone().detach() 104 | 105 | embed_x = self.embedding(x, self.features, squeeze_dim=True) # [batch_size,total_dims] 106 | 107 | # mask 存储每个batch中的domain id 108 | mask = [] 109 | # out 存储 110 | out_l = [] 111 | 112 | for d in range(self.num_domains): 113 | domain_mask = (domain_id == d) 114 | mask.append(domain_mask) 115 | 116 | domain_input = embed_x 117 | 118 | hyper_out_full = self.hyper_net(domain_input) # B * (k * k) 119 | hyper_out = hyper_out_full.reshape(-1, self.k, self.k) # B * k * k 120 | 121 | cn_out = self.cn(domain_input) 122 | mlp_out = self.mlp(domain_input) 123 | 124 | 125 | # Adapter-cell 126 | # Adapter-layer1: Down projection 127 | w1 = torch.einsum('mi,bij,jn->bmn', self.u[0], hyper_out, self.v[0]) 128 | b1 = self.b_list[0] 129 | tmp_out = torch.einsum('bf,bfj->bj', mlp_out, w1) 130 | tmp_out += b1 131 | # Adapter-layer2: non-linear 132 | tmp_out = torch.sigmoid(tmp_out) 133 | # Adapter-layer3: Up projection 134 | w2 = torch.einsum('mi,bij,jn->bmn', self.u[1], hyper_out, self.v[1]) 135 | b2 = self.b_list[1] 136 | tmp_out = torch.einsum('bf,bfj->bj', tmp_out, w2) 137 | tmp_out += b2 138 | # Adapter-layer4: Domain norm 139 | mean = tmp_out.mean(dim=0) 140 | var = tmp_out.var(dim=0) 141 | x_norm = (tmp_out - mean) / torch.sqrt(var + self.eps) 142 | out = self.gamma1 * x_norm + self.bias1 143 | # Adapter: short-cut 144 | mlp_out = out + mlp_out 145 | 146 | x_stack = torch.cat([cn_out, mlp_out], dim=1) 147 | y = self.linear(x_stack) 148 | out_l.append(torch.sigmoid(y)) 149 | 150 | final = torch.zeros_like(out_l[0]) 151 | for d in range(self.num_domains): 152 | final = torch.where(mask[d].unsqueeze(1), out_l[d], final) 153 | return final.squeeze(1) 154 | -------------------------------------------------------------------------------- /HAMUR/models/multi_domain/adapter_wd.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | from torch import nn 5 | from torch.nn.parameter import Parameter 6 | from ...basic.layers import LR, MLP, EmbeddingLayer 7 | 8 | 9 | class WideDeep_MD(torch.nn.Module): 10 | """ 11 | Multi-domain Wide & Deep Learning model. 12 | """ 13 | 14 | def __init__(self, wide_features, num_domains, deep_features, mlp_params): 15 | super(WideDeep_MD, self).__init__() 16 | self.num_domains = num_domains 17 | self.wide_features = wide_features 18 | self.deep_features = deep_features 19 | self.wide_dims = sum([fea.embed_dim for fea in wide_features]) 20 | self.deep_dims = sum([fea.embed_dim for fea in deep_features]) 21 | self.linear = LR(self.wide_dims) 22 | self.embedding = EmbeddingLayer(wide_features + deep_features) 23 | self.mlp = MLP(self.deep_dims, **mlp_params) 24 | 25 | def forward(self, x): 26 | domain_id = x["domain_indicator"].clone().detach() 27 | 28 | input_wide = self.embedding(x, self.wide_features, squeeze_dim=True) #[batch_size, wide_dims] 29 | input_deep = self.embedding(x, self.deep_features, squeeze_dim=True) #[batch_size, deep_dims] 30 | 31 | # mask 存储每个batch中的domain id 32 | mask = [] 33 | # out 存储 34 | out = [] 35 | 36 | for d in range(self.num_domains): 37 | domain_mask = (domain_id == d) 38 | mask.append(domain_mask) 39 | 40 | domain_input_wide = input_wide 41 | domain_input_deep = input_deep 42 | 43 | y_wide = self.linear(domain_input_wide) #[batch_size, 1] 44 | y_deep = self.mlp(domain_input_deep) #[batch_size, 1] 45 | y = y_wide + y_deep 46 | out.append(torch.sigmoid(y)) 47 | 48 | final = torch.zeros_like(out[0]) 49 | for d in range(self.num_domains): 50 | final = torch.where(mask[d].unsqueeze(1), out[d], final) 51 | return final.squeeze(1) 52 | 53 | class WideDeep_MD_adp(torch.nn.Module): 54 | """ 55 | Multi-domain Wide & Deep Learning model with adapter. 56 | """ 57 | 58 | def __init__(self, wide_features, num_domains, deep_features, k, mlp_params, hyper_dims): 59 | super(WideDeep_MD_adp, self).__init__() 60 | self.num_domains = num_domains 61 | self.wide_features = wide_features 62 | self.deep_features = deep_features 63 | self.wide_dims = sum([fea.embed_dim for fea in wide_features]) 64 | self.deep_dims = sum([fea.embed_dim for fea in deep_features]) 65 | self.linear = LR(self.wide_dims) 66 | self.embedding = EmbeddingLayer(wide_features + deep_features) 67 | self.mlp = MLP(self.deep_dims, **mlp_params, output_layer = False) 68 | self.mlp_final = LR(mlp_params["dims"][-1]) 69 | 70 | 71 | 72 | # instance representation matrix initiation 73 | self.k = k 74 | 75 | # u,v initiation 76 | self.u = nn.ParameterList() 77 | self.v = nn.ParameterList() 78 | 79 | self.u.append(Parameter(torch.ones((mlp_params["dims"][-1], self.k)), requires_grad=True)) 80 | self.u.append(Parameter(torch.ones((32, self.k)), requires_grad=True)) 81 | 82 | self.v.append(Parameter(torch.ones((self.k, 32)), requires_grad=True)) 83 | self.v.append(Parameter(torch.ones((self.k, mlp_params["dims"][-1])), requires_grad=True)) 84 | 85 | # hyper-network initiation 86 | hyper_dims += [self.k * self.k] 87 | input_dim = self.wide_dims +self.deep_dims 88 | hyper_layers = [] 89 | for i_dim in hyper_dims: 90 | hyper_layers.append(nn.Linear(input_dim, i_dim)) 91 | hyper_layers.append(nn.BatchNorm1d(i_dim)) 92 | hyper_layers.append(nn.ReLU()) 93 | hyper_layers.append(nn.Dropout(p=0)) 94 | input_dim = i_dim 95 | self.hyper_net = nn.Sequential(*hyper_layers) 96 | 97 | # adapter parameters initiation 98 | self.b_list = nn.ParameterList() 99 | self.b_list.append(Parameter(torch.zeros((32)), requires_grad=True)) 100 | self.b_list.append(Parameter(torch.zeros(mlp_params["dims"][-1]), requires_grad=True)) 101 | self.gamma1 = nn.Parameter(torch.ones(mlp_params["dims"][-1])) 102 | self.bias1 = nn.Parameter(torch.zeros(mlp_params["dims"][-1])) 103 | self.eps = 1e-5 104 | 105 | 106 | def forward(self, x): 107 | domain_id = x["domain_indicator"].clone().detach() 108 | 109 | input_wide = self.embedding(x, self.wide_features, squeeze_dim=True) #[batch_size, wide_dims] 110 | input_deep = self.embedding(x, self.deep_features, squeeze_dim=True) #[batch_size, deep_dims] 111 | 112 | hyper_out_full = self.hyper_net(torch.cat((input_wide,input_deep),dim=1)) # B * (k * k) 113 | hyper_out = hyper_out_full.reshape(-1, self.k, self.k) # B * k * k 114 | 115 | # mask 116 | mask = [] 117 | # out 118 | out_l = [] 119 | 120 | for d in range(self.num_domains): 121 | domain_mask = (domain_id == d) 122 | mask.append(domain_mask) 123 | 124 | domain_input_wide = input_wide 125 | domain_input_deep = input_deep 126 | 127 | y_wide = self.linear(domain_input_wide) #[batch_size, 1] 128 | y_deep = self.mlp(domain_input_deep) #[batch_size, f] 129 | 130 | # Adapter cell 131 | # Adapter layer1: Down projection 132 | w1 = torch.einsum('mi,bij,jn->bmn', self.u[0], hyper_out, self.v[0]) 133 | b1 = self.b_list[0] 134 | tmp_out = torch.einsum('bf,bfj->bj', y_deep, w1) 135 | tmp_out += b1 136 | # Adapter layer2: Non-linear 137 | tmp_out = torch.sigmoid(tmp_out) 138 | # Adapter layer3: Up projection 139 | w2 = torch.einsum('mi,bij,jn->bmn', self.u[1], hyper_out, self.v[1]) 140 | b2 = self.b_list[1] 141 | tmp_out = torch.einsum('bf,bfj->bj', tmp_out, w2) 142 | tmp_out += b2 143 | # Adapter layer4: Domain Norm 144 | mean = tmp_out.mean(dim=0) 145 | var = tmp_out.var(dim=0) 146 | x_norm = (tmp_out - mean) / torch.sqrt(var + self.eps) 147 | out = self.gamma1 * x_norm + self.bias1 148 | # Adapter: short-cut 149 | mlp_out = out + y_deep 150 | 151 | mlp_out = self.mlp_final(mlp_out) # linear 152 | 153 | y = y_wide + mlp_out 154 | out_l.append(torch.sigmoid(y)) 155 | 156 | final = torch.zeros_like(out_l[0]) 157 | for d in range(self.num_domains): 158 | final = torch.where(mask[d].unsqueeze(1), out_l[d], final) 159 | return final.squeeze(1) 160 | 161 | -------------------------------------------------------------------------------- /HAMUR/trainers/__init__.py: -------------------------------------------------------------------------------- 1 | from .ctr_trainer import CTRTrainer,CTRTrainerMultiDomain -------------------------------------------------------------------------------- /HAMUR/trainers/ctr_trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import tqdm 5 | from sklearn.metrics import roc_auc_score,log_loss 6 | from ..basic.callback import EarlyStopper 7 | 8 | 9 | class CTRTrainer(object): 10 | """A general trainer for single task learning. 11 | 12 | Args: 13 | model (nn.Module): any multi task learning model. 14 | optimizer_fn (torch.optim): optimizer function of pytorch (default = `torch.optim.Adam`). 15 | optimizer_params (dict): parameters of optimizer_fn. 16 | scheduler_fn (torch.optim.lr_scheduler) : torch scheduling class, eg. `torch.optim.lr_scheduler.StepLR`. 17 | scheduler_params (dict): parameters of optimizer scheduler_fn. 18 | n_epoch (int): epoch number of training. 19 | earlystop_patience (int): how long to wait after last time validation auc improved (default=10). 20 | device (str): `"cpu"` or `"cuda:0"` 21 | gpus (list): id of multi gpu (default=[]). If the length >=1, then the model will wrapped by nn.DataParallel. 22 | model_path (str): the path you want to save the model (default="./"). Note only save the best weight in the validation data. 23 | """ 24 | 25 | def __init__( 26 | self, 27 | model, 28 | optimizer_fn=torch.optim.Adam, 29 | optimizer_params=None, 30 | scheduler_fn=None, 31 | scheduler_params=None, 32 | n_epoch=10, 33 | earlystop_patience=10, 34 | device="cpu", 35 | gpus=None, 36 | model_path="./", 37 | ): 38 | self.model = model # for uniform weights save method in one gpu or multi gpu 39 | if gpus is None: 40 | gpus = [] 41 | self.gpus = gpus 42 | if len(gpus) > 1: 43 | print('parallel running on these gpus:', gpus) 44 | self.model = torch.nn.DataParallel(self.model, device_ids=gpus) 45 | self.device = torch.device(device) #torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 46 | self.model.to(self.device) 47 | if optimizer_params is None: 48 | optimizer_params = {"lr": 1e-3, "weight_decay": 1e-5} 49 | self.optimizer = optimizer_fn(self.model.parameters(), **optimizer_params) #default optimizer 50 | self.scheduler = None 51 | if scheduler_fn is not None: 52 | self.scheduler = scheduler_fn(self.optimizer, **scheduler_params) 53 | self.criterion = torch.nn.BCELoss() #default loss cross_entropy 54 | self.evaluate_fn = roc_auc_score #default evaluate function 55 | self.evaluate_fn_logloss = log_loss 56 | self.n_epoch = n_epoch 57 | self.early_stopper = EarlyStopper(patience=earlystop_patience) 58 | self.model_path = model_path 59 | 60 | def train_one_epoch(self, data_loader, log_interval=10): 61 | self.model.train() 62 | total_loss = 0 63 | tk0 = tqdm.tqdm(data_loader, desc="train", smoothing=0, mininterval=1.0) 64 | for i, (x_dict, y) in enumerate(tk0): 65 | x_dict = {k: v.to(self.device) for k, v in x_dict.items()} #tensor to GPU 66 | y = y.to(self.device) 67 | y_pred = self.model(x_dict) 68 | loss = self.criterion(y_pred, y.float()) 69 | self.model.zero_grad() 70 | loss.backward() 71 | self.optimizer.step() 72 | total_loss += loss.item() 73 | if (i + 1) % log_interval == 0: 74 | tk0.set_postfix(loss=total_loss / log_interval) 75 | total_loss = 0 76 | 77 | def fit(self, train_dataloader, val_dataloader=None): 78 | for epoch_i in range(self.n_epoch): 79 | print('epoch:', epoch_i) 80 | self.train_one_epoch(train_dataloader) 81 | if self.scheduler is not None: 82 | if epoch_i % self.scheduler.step_size == 0: 83 | print("Current lr : {}".format(self.optimizer.state_dict()['param_groups'][0]['lr'])) 84 | self.scheduler.step() #update lr in epoch level by scheduler 85 | if val_dataloader: 86 | auc = self.evaluate(self.model, val_dataloader) 87 | log_loss = self.evaluate_logloss(self.model, val_dataloader) 88 | print('epoch:', epoch_i, 'validation: auc:', auc, 'log loss:', log_loss) 89 | if self.early_stopper.stop_training(auc, self.model.state_dict()): 90 | print(f'validation: best auc: {self.early_stopper.best_auc}') 91 | self.model.load_state_dict(self.early_stopper.best_weights) 92 | break 93 | torch.save(self.model.state_dict(), os.path.join(self.model_path, "model.pth")) #save best auc model 94 | 95 | def evaluate(self, model, data_loader): 96 | model.eval() 97 | targets, predicts = list(), list() 98 | with torch.no_grad(): 99 | tk0 = tqdm.tqdm(data_loader, desc="validation", smoothing=0, mininterval=1.0) 100 | for i, (x_dict, y) in enumerate(tk0): 101 | x_dict = {k: v.to(self.device) for k, v in x_dict.items()} 102 | y = y.to(self.device) 103 | y_pred = model(x_dict) 104 | targets.extend(y.tolist()) 105 | predicts.extend(y_pred.tolist()) 106 | return self.evaluate_fn(targets, predicts) 107 | def evaluate_logloss(self, model, data_loader): 108 | model.eval() 109 | targets, predicts = list(), list() 110 | with torch.no_grad(): 111 | tk0 = tqdm.tqdm(data_loader, desc="validation", smoothing=0, mininterval=1.0) 112 | for i, (x_dict, y) in enumerate(tk0): 113 | x_dict = {k: v.to(self.device) for k, v in x_dict.items()} 114 | y = y.to(self.device) 115 | y_pred = model(x_dict) 116 | targets.extend(y.tolist()) 117 | predicts.extend(y_pred.tolist()) 118 | return log_loss(targets,predicts) 119 | 120 | def evaluate_multi_domain_logloss(self, model, data_loader): 121 | model.eval() 122 | targets, predicts = list() ,list() 123 | targets1, predicts1 = list() ,list() 124 | targets2, predicts2 = list() ,list() 125 | targets3, predicts3 = list() ,list() 126 | with torch.no_grad(): 127 | tk0 = tqdm.tqdm(data_loader, desc="predict", smoothing=0, mininterval=1.0) 128 | for i, (x_dict, y) in enumerate(tk0): 129 | domain_mask_list = [] 130 | x_dict = {k: v.to(self.device) for k, v in x_dict.items()} 131 | domain_id = x_dict["domain_indicator"].clone().detach() 132 | 133 | y = y.to(self.device) 134 | y_pred = model(x_dict) 135 | for d in range(3): 136 | domain_mask = (domain_id == d) 137 | domain_mask_list.append(domain_mask) 138 | 139 | y1 = y[domain_mask_list[0]].tolist() 140 | y_pred_1 = y_pred[domain_mask_list[0]].tolist() 141 | targets1.extend(y1) 142 | predicts1.extend(y_pred_1) 143 | 144 | y2 = y[domain_mask_list[1]].tolist() 145 | y_pred_2 = y_pred[domain_mask_list[1]].tolist() 146 | targets2.extend(y2) 147 | predicts2.extend(y_pred_2) 148 | 149 | y3 = y[domain_mask_list[2]].tolist() 150 | y_pred_3 = y_pred[domain_mask_list[2]].tolist() 151 | targets3.extend(y3) 152 | predicts3.extend(y_pred_3) 153 | 154 | targets.extend(y.tolist()) 155 | predicts.extend(y_pred.tolist()) 156 | domain1_val = log_loss(targets1, predicts1) if predicts1 else None 157 | domain2_val = log_loss(targets2, predicts2) if predicts2 else None 158 | domain3_val = log_loss(targets3, predicts3) if predicts3 else None 159 | total_val = log_loss(targets, predicts) if predicts else None 160 | 161 | return domain1_val, domain2_val, domain3_val, total_val 162 | def evaluate_multi_domain_auc(self, model, data_loader): 163 | model.eval() 164 | targets, predicts = list() ,list() 165 | targets1, predicts1 = list() ,list() 166 | targets2, predicts2 = list() ,list() 167 | targets3, predicts3 = list() ,list() 168 | with torch.no_grad(): 169 | tk0 = tqdm.tqdm(data_loader, desc="predict", smoothing=0, mininterval=1.0) 170 | for i, (x_dict, y) in enumerate(tk0): 171 | domain_mask_list = [] 172 | x_dict = {k: v.to(self.device) for k, v in x_dict.items()} 173 | domain_id = x_dict["domain_indicator"].clone().detach() 174 | 175 | y = y.to(self.device) 176 | y_pred = model(x_dict) 177 | for d in range(3): 178 | domain_mask = (domain_id == d) 179 | domain_mask_list.append(domain_mask) 180 | 181 | y1 = y[domain_mask_list[0]].tolist() 182 | y_pred_1 = y_pred[domain_mask_list[0]].tolist() 183 | targets1.extend(y1) 184 | predicts1.extend(y_pred_1) 185 | 186 | y2 = y[domain_mask_list[1]].tolist() 187 | y_pred_2 = y_pred[domain_mask_list[1]].tolist() 188 | targets2.extend(y2) 189 | predicts2.extend(y_pred_2) 190 | 191 | y3 = y[domain_mask_list[2]].tolist() 192 | y_pred_3 = y_pred[domain_mask_list[2]].tolist() 193 | targets3.extend(y3) 194 | predicts3.extend(y_pred_3) 195 | 196 | targets.extend(y.tolist()) 197 | predicts.extend(y_pred.tolist()) 198 | domain1_val = self.evaluate_fn(targets1, predicts1) if predicts1 else None 199 | domain2_val = self.evaluate_fn(targets2, predicts2) if predicts2 else None 200 | domain3_val = self.evaluate_fn(targets3, predicts3) if predicts3 else None 201 | total_val = self.evaluate_fn(targets, predicts) if predicts else None 202 | 203 | return domain1_val, domain2_val, domain3_val, total_val 204 | 205 | def predict(self, model, data_loader): 206 | model.eval() 207 | predicts = list() 208 | with torch.no_grad(): 209 | tk0 = tqdm.tqdm(data_loader, desc="predict", smoothing=0, mininterval=1.0) 210 | for i, (x_dict, y) in enumerate(tk0): 211 | x_dict = {k: v.to(self.device) for k, v in x_dict.items()} 212 | y = y.to(self.device) 213 | y_pred = model(x_dict) 214 | predicts.extend(y_pred.tolist()) 215 | return predicts 216 | 217 | class CTRTrainerMultiDomain(object): 218 | """A general trainer for single task multi domain learning. 219 | 220 | Args: 221 | model (nn.Module): any multi task learning model. 222 | optimizer_fn (torch.optim): optimizer function of pytorch (default = `torch.optim.Adam`). 223 | optimizer_params (dict): parameters of optimizer_fn. 224 | scheduler_fn (torch.optim.lr_scheduler) : torch scheduling class, eg. `torch.optim.lr_scheduler.StepLR`. 225 | scheduler_params (dict): parameters of optimizer scheduler_fn. 226 | n_epoch (int): epoch number of training. 227 | earlystop_patience (int): how long to wait after last time validation auc improved (default=10). 228 | device (str): `"cpu"` or `"cuda:0"` 229 | gpus (list): id of multi gpu (default=[]). If the length >=1, then the model will wrapped by nn.DataParallel. 230 | model_path (str): the path you want to save the model (default="./"). Note only save the best weight in the validation data. 231 | """ 232 | 233 | def __init__( 234 | self, 235 | model, 236 | optimizer_fn=torch.optim.Adam, 237 | optimizer_params=None, 238 | scheduler_fn=None, 239 | scheduler_params=None, 240 | n_epoch=10, 241 | earlystop_patience=10, 242 | device="cpu", 243 | gpus=None, 244 | model_path="./", 245 | ): 246 | self.model = model # for uniform weights save method in one gpu or multi gpu 247 | if gpus is None: 248 | gpus = [] 249 | self.gpus = gpus 250 | if len(gpus) > 1: 251 | print('parallel running on these gpus:', gpus) 252 | self.model = torch.nn.DataParallel(self.model, device_ids=gpus) 253 | self.device = torch.device(device) #torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 254 | self.model.to(self.device) 255 | if optimizer_params is None: 256 | optimizer_params = {"lr": 1e-3, "weight_decay": 1e-5} 257 | self.optimizer = optimizer_fn(self.model.parameters(), **optimizer_params) #default optimizer 258 | self.scheduler = None 259 | if scheduler_fn is not None: 260 | self.scheduler = scheduler_fn(self.optimizer, **scheduler_params) 261 | self.criterion = torch.nn.BCELoss() #default loss cross_entropy 262 | self.evaluate_fn = roc_auc_score #default evaluate function 263 | self.n_epoch = n_epoch 264 | self.early_stopper = EarlyStopper(patience=earlystop_patience) 265 | self.model_path = model_path 266 | 267 | def train_one_epoch(self, data_loader, log_interval=10): 268 | self.model.train() 269 | total_loss = 0 270 | tk0 = tqdm.tqdm(data_loader, desc="train", smoothing=0, mininterval=1.0) 271 | for i, (x_dict, y) in enumerate(tk0): 272 | x_dict = {k: v.to(self.device) for k, v in x_dict.items()} #tensor to GPU 273 | 274 | 275 | 276 | y = y.to(self.device) 277 | y_pred = self.model(x_dict) 278 | 279 | 280 | loss = self.criterion(y_pred.reshape(-1), y.reshape(-1).float()) 281 | self.model.zero_grad() 282 | loss.backward() 283 | self.optimizer.step() 284 | total_loss += loss.item() 285 | if (i + 1) % log_interval == 0: 286 | tk0.set_postfix(loss=total_loss / log_interval) 287 | total_loss = 0 288 | def fit(self, train_dataloader, val_dataloader=None): 289 | for epoch_i in range(self.n_epoch): 290 | print('epoch:', epoch_i) 291 | self.train_one_epoch(train_dataloader) 292 | if self.scheduler is not None: 293 | if epoch_i % self.scheduler.step_size == 0: 294 | print("Current lr : {}".format(self.optimizer.state_dict()['param_groups'][0]['lr'])) 295 | self.scheduler.step() #update lr in epoch level by scheduler 296 | if val_dataloader: 297 | auc = self.evaluate(self.model, val_dataloader) 298 | print('epoch:', epoch_i, 'validation: auc:', auc) 299 | if self.early_stopper.stop_training(auc, self.model.state_dict()): 300 | print(f'validation: best auc: {self.early_stopper.best_auc}') 301 | self.model.load_state_dict(self.early_stopper.best_weights) 302 | break 303 | torch.save(self.model.state_dict(), os.path.join(self.model_path, "model.pth")) #save best auc model 304 | 305 | def evaluate(self, model, data_loader): 306 | model.eval() 307 | targets, predicts = list(), list() 308 | 309 | with torch.no_grad(): 310 | tk0 = tqdm.tqdm(data_loader, desc="validation", smoothing=0, mininterval=1.0) 311 | for i, (x_dict, y) in enumerate(tk0): 312 | x_dict = {k: v.to(self.device) for k, v in x_dict.items()} 313 | y = y.to(self.device) 314 | y_pred = model(x_dict) 315 | 316 | domain1_pre = [] 317 | domain1_target = [] 318 | domain2_pre = [] 319 | domain2_target = [] 320 | domain3_pre = [] 321 | domain3_target = [] 322 | 323 | domain1_target.extend(y[:,0].reshape(-1).tolist()) 324 | domain2_target.extend(y[:, 1].reshape(-1).tolist()) 325 | domain3_target.extend(y[:, 2].reshape(-1).tolist()) 326 | 327 | domain1_pre.extend(y_pred[:,0].reshape(-1).tolist()) 328 | domain2_pre.extend(y_pred[:,1].reshape(-1).tolist()) 329 | domain3_pre.extend(y_pred[:,2].reshape(-1).tolist()) 330 | 331 | targets.extend(y.reshape(-1).tolist()) 332 | predicts.extend(y_pred.reshape(-1).tolist()) 333 | print("domain1 auc:{}".format(self.evaluate_fn(domain1_target, domain1_pre))) 334 | print("domain2 auc:{}".format(self.evaluate_fn(domain2_target, domain2_pre))) 335 | print("domain3 auc:{}".format(self.evaluate_fn(domain3_target, domain3_pre))) 336 | return self.evaluate_fn(targets, predicts) 337 | 338 | def predict(self, model, data_loader): 339 | model.eval() 340 | predicts = list() 341 | with torch.no_grad(): 342 | tk0 = tqdm.tqdm(data_loader, desc="predict", smoothing=0, mininterval=1.0) 343 | for i, (x_dict, y) in enumerate(tk0): 344 | x_dict = {k: v.to(self.device) for k, v in x_dict.items()} 345 | y = y.to(self.device).re 346 | y_pred = model(x_dict) 347 | predicts.extend(y_pred.tolist()) 348 | return predicts -------------------------------------------------------------------------------- /HAMUR/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/HAMUR/53d8dd588282bc288f2621b8fa85e2df9b910e10/HAMUR/utils/__init__.py -------------------------------------------------------------------------------- /HAMUR/utils/data.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | import numpy as np 4 | import pandas as pd 5 | import tqdm 6 | from sklearn.preprocessing import LabelEncoder 7 | from sklearn.metrics import roc_auc_score, mean_squared_error 8 | from torch.utils.data import Dataset, DataLoader, random_split 9 | 10 | 11 | class TorchDataset(Dataset): 12 | 13 | def __init__(self, x, y): 14 | super().__init__() 15 | self.x = x 16 | self.y = y 17 | 18 | def __getitem__(self, index): 19 | return {k: v[index] for k, v in self.x.items()}, self.y[index] 20 | 21 | def __len__(self): 22 | return len(self.y) 23 | 24 | 25 | class PredictDataset(Dataset): 26 | 27 | def __init__(self, x): 28 | super().__init__() 29 | self.x = x 30 | 31 | def __getitem__(self, index): 32 | return {k: v[index] for k, v in self.x.items()} 33 | 34 | def __len__(self): 35 | return len(self.x[list(self.x.keys())[0]]) 36 | 37 | 38 | class MatchDataGenerator(object): 39 | 40 | def __init__(self, x, y=[]): 41 | super().__init__() 42 | if len(y) != 0: 43 | self.dataset = TorchDataset(x, y) 44 | else: # For pair-wise model, trained without given label 45 | self.dataset = PredictDataset(x) 46 | 47 | def generate_dataloader(self, x_test_user, x_all_item, batch_size, num_workers=8): 48 | train_dataloader = DataLoader(self.dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) 49 | test_dataset = PredictDataset(x_test_user) 50 | 51 | # shuffle = False to keep same order as ground truth 52 | test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) 53 | item_dataset = PredictDataset(x_all_item) 54 | item_dataloader = DataLoader(item_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) 55 | return train_dataloader, test_dataloader, item_dataloader 56 | 57 | 58 | class DataGenerator(object): 59 | 60 | def __init__(self, x, y): 61 | super().__init__() 62 | self.dataset = TorchDataset(x, y) 63 | self.length = len(self.dataset) 64 | 65 | def generate_dataloader(self, x_val=None, y_val=None, x_test=None, y_test=None, split_ratio=None, batch_size=16, 66 | num_workers=8, drop_last_flag=False): 67 | if split_ratio != None: 68 | train_length = int(self.length * split_ratio[0]) 69 | val_length = int(self.length * split_ratio[1]) 70 | test_length = self.length - train_length - val_length 71 | print("the samples of train : val : test are %d : %d : %d" % (train_length, val_length, test_length)) 72 | train_dataset, val_dataset, test_dataset = random_split(self.dataset, 73 | (train_length, val_length, test_length)) 74 | else: 75 | train_dataset = self.dataset 76 | val_dataset = TorchDataset(x_val, y_val) 77 | test_dataset = TorchDataset(x_test, y_test) 78 | 79 | train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers,drop_last = drop_last_flag) 80 | val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers,drop_last = drop_last_flag) 81 | test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers,drop_last = drop_last_flag) 82 | return train_dataloader, val_dataloader, test_dataloader 83 | 84 | 85 | def get_auto_embedding_dim(num_classes): 86 | """ Calculate the dim of embedding vector according to number of classes in the category 87 | emb_dim = [6 * (num_classes)^(1/4)] 88 | reference: Deep & Cross Network for Ad Click Predictions.(ADKDD'17) 89 | Args: 90 | num_classes: number of classes in the category 91 | 92 | Returns: 93 | the dim of embedding vector 94 | """ 95 | return np.floor(6 * np.pow(num_classes, 0.26)) 96 | 97 | 98 | def get_loss_func(task_type="classification"): 99 | if task_type == "classification": 100 | return torch.nn.BCELoss() 101 | elif task_type == "regression": 102 | return torch.nn.MSELoss() 103 | else: 104 | raise ValueError("task_type must be classification or regression") 105 | 106 | 107 | def get_metric_func(task_type="classification"): 108 | if task_type == "classification": 109 | return roc_auc_score 110 | elif task_type == "regression": 111 | return mean_squared_error 112 | else: 113 | raise ValueError("task_type must be classification or regression") 114 | 115 | 116 | def generate_seq_feature(data, 117 | user_col, 118 | item_col, 119 | time_col, 120 | item_attribute_cols=[], 121 | min_item=0, 122 | shuffle=True, 123 | max_len=50): 124 | """generate sequence feature and negative sample for ranking. 125 | 126 | Args: 127 | data (pd.DataFrame): the raw data. 128 | user_col (str): the col name of user_id 129 | item_col (str): the col name of item_id 130 | time_col (str): the col name of timestamp 131 | item_attribute_cols (list[str], optional): the other attribute cols of item which you want to generate sequence feature. Defaults to `[]`. 132 | sample_method (int, optional): the negative sample method `{ 133 | 0: "random sampling", 134 | 1: "popularity sampling method used in word2vec", 135 | 2: "popularity sampling method by `log(count+1)+1e-6`", 136 | 3: "tencent RALM sampling"}`. 137 | Defaults to 0. 138 | min_item (int, optional): the min item each user must have. Defaults to 0. 139 | shuffle (bool, optional): shulle if True 140 | max_len (int, optional): the max length of a user history sequence. 141 | 142 | Returns: 143 | pd.DataFrame: split train, val and test data with sequence features by time. 144 | """ 145 | for feat in data: 146 | le = LabelEncoder() 147 | data[feat] = le.fit_transform(data[feat]) 148 | data[feat] = data[feat].apply(lambda x: x + 1) # 0 to be used as the symbol for padding 149 | data = data.astype('int32') 150 | 151 | # generate item to attribute mapping 152 | n_items = data[item_col].max() 153 | item2attr = {} 154 | if len(item_attribute_cols) > 0: 155 | for col in item_attribute_cols: 156 | map = data[[item_col, col]] 157 | item2attr[col] = map.set_index([item_col])[col].to_dict() 158 | 159 | train_data, val_data, test_data = [], [], [] 160 | data.sort_values(time_col, inplace=True) 161 | # Sliding window to construct negative samples 162 | for uid, hist in tqdm.tqdm(data.groupby(user_col), desc='generate sequence features'): 163 | pos_list = hist[item_col].tolist() 164 | len_pos_list = len(pos_list) 165 | if len_pos_list < min_item: # drop this user when his pos items < min_item 166 | continue 167 | 168 | neg_list = [neg_sample(pos_list, n_items) for _ in range(len_pos_list)] 169 | for i in range(1, min(len_pos_list, max_len)): 170 | hist_item = pos_list[:i] 171 | hist_item = hist_item + [0] * (max_len - len(hist_item)) 172 | pos_item = pos_list[i] 173 | neg_item = neg_list[i] 174 | pos_seq = [1, pos_item, uid, hist_item] 175 | neg_seq = [0, neg_item, uid, hist_item] 176 | if len(item_attribute_cols) > 0: 177 | for attr_col in item_attribute_cols: # the history of item attribute features 178 | hist_attr = hist[attr_col].tolist()[:i] 179 | hist_attr = hist_attr + [0] * (max_len - len(hist_attr)) 180 | pos2attr = [hist_attr, item2attr[attr_col][pos_item]] 181 | neg2attr = [hist_attr, item2attr[attr_col][neg_item]] 182 | pos_seq += pos2attr 183 | neg_seq += neg2attr 184 | if i == len_pos_list - 1: 185 | test_data.append(pos_seq) 186 | test_data.append(neg_seq) 187 | elif i == len_pos_list - 2: 188 | val_data.append(pos_seq) 189 | val_data.append(neg_seq) 190 | else: 191 | train_data.append(pos_seq) 192 | train_data.append(neg_seq) 193 | 194 | col_name = ['label', 'target_item_id', user_col, 'hist_item_id'] 195 | if len(item_attribute_cols) > 0: 196 | for attr_col in item_attribute_cols: # the history of item attribute features 197 | name = ['hist_'+attr_col, 'target_'+attr_col] 198 | col_name += name 199 | 200 | # shuffle 201 | if shuffle: 202 | random.shuffle(train_data) 203 | random.shuffle(val_data) 204 | random.shuffle(test_data) 205 | 206 | train = pd.DataFrame(train_data, columns=col_name) 207 | val = pd.DataFrame(val_data, columns=col_name) 208 | test = pd.DataFrame(test_data, columns=col_name) 209 | 210 | return train, val, test 211 | 212 | 213 | def df_to_dict(data): 214 | """ 215 | Convert the DataFrame to a dict type input that the network can accept 216 | Args: 217 | data (pd.DataFrame): datasets of type DataFrame 218 | Returns: 219 | The converted dict, which can be used directly into the input network 220 | """ 221 | data_dict = data.to_dict('list') 222 | for key in data.keys(): 223 | data_dict[key] = np.array(data_dict[key]) 224 | return data_dict 225 | 226 | 227 | def neg_sample(click_hist, item_size): 228 | neg = random.randint(1, item_size) 229 | while neg in click_hist: 230 | neg = random.randint(1, item_size) 231 | return neg 232 | 233 | 234 | def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.): 235 | """ Pads sequences (list of list) to the ndarray of same length. 236 | This is an equivalent implementation of tf.keras.preprocessing.sequence.pad_sequences 237 | reference: https://github.com/huawei-noah/benchmark/tree/main/FuxiCTR/fuxictr 238 | """ 239 | assert padding in ["pre", "post"], "Invalid padding={}.".format(padding) 240 | assert truncating in ["pre", "post"], "Invalid truncating={}.".format(truncating) 241 | 242 | if maxlen is None: 243 | maxlen = max(len(x) for x in sequences) 244 | arr = np.full((len(sequences), maxlen), value, dtype=dtype) 245 | for idx, x in enumerate(sequences): 246 | if len(x) == 0: 247 | continue # empty list 248 | if truncating == 'pre': 249 | trunc = x[-maxlen:] 250 | else: 251 | trunc = x[:maxlen] 252 | trunc = np.asarray(trunc, dtype=dtype) 253 | 254 | if padding == 'pre': 255 | arr[idx, -len(trunc):] = trunc 256 | else: 257 | arr[idx, :len(trunc)] = trunc 258 | return arr 259 | 260 | 261 | def array_replace_with_dict(array, dic): 262 | """Replace values in NumPy array based on dictionary. 263 | Args: 264 | array (np.array): a numpy array 265 | dic (dict): a map dict 266 | 267 | Returns: 268 | np.array: array with replace 269 | """ 270 | # Extract out keys and values 271 | k = np.array(list(dic.keys())) 272 | v = np.array(list(dic.values())) 273 | 274 | # Get argsort indices 275 | idx = k.argsort() 276 | return v[idx[np.searchsorted(k, array, sorter=idx)]] 277 | 278 | 279 | import pandas as pd 280 | import numpy as np 281 | 282 | 283 | def reduce_mem_usage(df): 284 | """Reduce memory. 285 | Args: 286 | df (pd.dataframe): a pandas dataframe 287 | Returns: 288 | df (pd.dataframe): a pandas dataframe 289 | """ 290 | start_mem = df.memory_usage().sum() / 1024 ** 2 291 | print('Memory usage of dataframe is {:.2f} MB'.format(start_mem)) 292 | 293 | for col in df.columns: 294 | col_type = df[col].dtype 295 | 296 | if col_type != object: 297 | c_min = df[col].min() 298 | c_max = df[col].max() 299 | if str(col_type)[:3] == 'int': 300 | if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: 301 | df[col] = df[col].astype(np.int8) 302 | elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max: 303 | df[col] = df[col].astype(np.int16) 304 | elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max: 305 | df[col] = df[col].astype(np.int32) 306 | elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max: 307 | df[col] = df[col].astype(np.int64) 308 | else: 309 | if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max: 310 | df[col] = df[col].astype(np.float16) 311 | elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max: 312 | df[col] = df[col].astype(np.float32) 313 | else: 314 | df[col] = df[col].astype(np.float64) 315 | else: 316 | df[col] = df[col].astype('category') 317 | 318 | end_mem = df.memory_usage().sum() / 1024 ** 2 319 | print('Memory usage after optimization is: {:.2f} MB'.format(end_mem)) 320 | print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem)) 321 | 322 | return df -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HAMUR 2 | 3 | Official implementation of our paper [HAMUR: Hyper Adapter for Multi-Domain Recommendation](https://arxiv.org/pdf/2309.06217.pdf) in CIKM 2023. 4 | 5 | You could cite our paper if you find this repository interesting or helpful: 6 | ``` 7 | @inproceedings{li2023hamur, 8 | title={HAMUR: Hyper Adapter for Multi-Domain Recommendation}, 9 | author={Li, Xiaopeng and Yan, Fan and Zhao, Xiangyu and Wang, Yichao and Chen, Bo and Guo, Huifeng and Tang, Ruiming}, 10 | booktitle={Proceedings of the 32nd ACM International Conference on Information and Knowledge Management}, 11 | pages={1268--1277}, 12 | year={2023} 13 | } 14 | ``` 15 | 16 | ## Introduction 17 | Source code of HAMUR: Hyper Adapter for Multi-Domain Recommendation, in Proceedings of the 32nd ACM International Conference on Information and Knowledge Management(CIKM 23'). 18 | !['Img_HAMUR'](framework.jpg) 19 | 20 | ## Environment Setting 21 | * torch >=1.7.0 22 | * numpy >=1.23.5 23 | * pandas >=1.5.3 24 | * scikit-learn >=0.23.2 25 | 26 | ## Dataset Download 27 | In this paper, we use two datasets, **Aliccp** and **movieLens**. Dataset samples are shown in example/data. 28 | 29 | Full dataset download: 30 | * Aliccp: Download address https://tianchi.aliyun.com/dataset/408. 31 | * Movielens: The raw data file can be found in [Torch-Rechub-ml-1m](https://github.com/morningsky/Torch-RecHub/tree/main/examples/matching/data/ml-1m), and you could directly download the processed file from https://cowtransfer.com/s/5a3ab69ebd314e. 32 | 33 | ## Models 34 | In this repo, we offer the following models. Their structures are shown in the following figure. 35 | !['Img_DifferentBackbone'](DifferentBackbone.jpg) 36 | * Pure MLP as multi-domain backbone models. 37 | * MLP + HAMUR 38 | * Pure Wide & Deep as multi-domain backbone models. 39 | * Wide & Deep + HAMUR 40 | * Pure DCN as multi-domain backbone models. 41 | * DCN + HAMUR 42 | 43 | ## Usage 44 | 45 | ### Step 1: Clone the repository 46 | ```Shell 47 | git clone https://github.com/Applied-Machine-Learning-Lab/HAMUR.git 48 | ``` 49 | 50 | ### Step 2: Run the model 51 | ```Shell 52 | cd examples 53 | # For Aliccp 54 | python run_ali_ccp_ctr_ranking_multi_domain.py --model_name mlp_adp --epoch 200 --device cpu --seed 2022 55 | # For MovieLens 56 | python run_movielens_rank_multi_domain.py --model_name mlp_adp --epoch 200 --device cpu --seed 2022 57 | 58 | ``` 59 | 60 | ## Credits 61 | Our code is developed based on [Torch-RecHub](https://github.com/datawhalechina/torch-rechub). Thanks to their contribution. 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /examples/data/ali-ccp/ali_ccp_test_sample.csv: -------------------------------------------------------------------------------- 1 | click,purchase,101,121,122,124,125,126,127,128,129,205,206,207,210,216,508,509,702,853,301,109_14,110_14,127_14,150_14,D109_14,D110_14,D127_14,D150_14,D508,D509,D702,D853 2 | 0,0,1,1,1,1,1,0,1,1,1,65,36,84,88,52,23,0,17,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1754,0.0,0.3555,0.0 3 | 0,0,1,1,1,1,1,0,1,1,1,66,20,85,89,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 4 | 0,0,1,1,1,1,1,0,1,1,1,67,29,86,90,0,19,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 5 | 0,0,1,1,1,1,1,0,1,1,1,68,6,87,91,0,5,0,0,21,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.11914000000000001 6 | 0,0,1,1,1,1,1,0,1,1,1,0,27,88,92,53,18,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.0,0.0,0.0 7 | 0,0,1,1,1,1,1,0,1,1,1,69,37,89,93,54,24,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 8 | 0,0,1,1,1,1,1,0,1,1,1,70,6,90,91,55,5,0,0,11,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.0752 9 | 0,0,1,1,1,1,1,0,1,1,1,71,18,51,43,56,14,0,0,13,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3508,0.0,0.0,0.11914000000000001 10 | 0,0,1,1,1,1,1,0,1,1,1,0,8,71,10,57,6,16,18,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.2764,0.0797,0.0 11 | 0,0,1,1,1,1,1,0,1,1,1,72,10,91,28,18,8,0,7,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1511,0.0,0.0797,0.0 12 | 0,0,1,1,1,1,1,0,1,1,1,73,6,44,91,28,5,0,0,22,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.1064 13 | 0,0,1,1,1,1,1,0,1,1,1,0,24,92,94,0,16,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3633,0.0,0.0,0.0 14 | 0,0,1,1,1,1,1,0,1,1,1,74,19,93,95,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 15 | 0,0,1,1,1,1,1,0,1,1,1,75,38,94,96,0,25,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1511,0.0,0.0,0.0 16 | 0,0,1,1,1,1,1,0,1,1,1,76,36,95,97,0,23,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1754,0.0,0.0,0.0 17 | 0,0,1,1,1,1,1,0,1,1,1,0,26,47,98,0,17,11,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.2444,0.0,0.0 18 | 0,0,1,1,1,1,1,0,1,1,1,0,27,71,99,0,18,16,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.2764,0.0,0.0 19 | 0,0,1,1,1,1,1,0,1,1,1,77,39,96,100,58,26,0,0,23,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2268,0.0,0.0,0.11914000000000001 20 | 0,0,1,1,1,1,1,0,1,1,1,0,27,0,101,59,18,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.0771,0.0797,0.0 21 | 0,0,1,1,1,1,1,0,1,1,1,78,12,97,102,60,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 22 | 0,0,1,1,1,1,1,0,1,1,1,79,4,98,103,61,3,0,0,24,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2615,0.0,0.0,0.08795 23 | 0,0,1,1,1,1,1,0,1,1,1,80,8,99,104,62,6,0,19,3,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.1263,0.2656 24 | 0,0,1,1,1,1,1,0,1,1,1,81,28,100,105,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 25 | 1,0,1,1,1,1,1,0,1,1,1,0,5,71,106,0,4,16,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3591,0.2764,0.0797,0.0 26 | 0,0,1,1,1,1,1,0,1,1,1,82,9,101,13,63,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 27 | 0,0,1,1,1,1,1,0,1,1,1,83,40,102,107,0,27,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2395,0.0,0.0,0.0 28 | 1,1,1,1,1,1,1,0,1,1,1,0,27,71,108,64,18,16,20,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.2764,0.1594,0.0 29 | 0,0,1,1,1,1,1,0,1,1,1,84,29,103,109,0,19,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 30 | 0,0,1,1,1,1,1,0,1,1,1,0,27,71,110,37,18,16,12,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.2764,0.2391,0.0 31 | 0,0,1,1,1,1,1,0,1,1,1,0,24,0,50,33,16,0,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3633,0.1993,0.3948,0.0 32 | 0,0,1,1,1,1,1,0,1,1,1,85,41,104,111,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 33 | 0,0,1,1,1,1,1,0,1,1,1,0,27,105,99,37,18,17,12,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.0771,0.2391,0.0 34 | 0,0,1,1,1,1,1,0,1,1,1,86,42,106,112,65,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 35 | 0,0,1,1,1,1,1,0,1,1,1,87,43,29,113,19,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 36 | 0,0,1,1,1,1,1,0,1,1,1,0,28,107,114,66,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 37 | 0,0,1,1,1,1,1,0,1,1,1,88,9,108,38,0,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 38 | 0,0,1,1,1,1,1,0,1,1,1,89,44,109,115,67,28,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.07556,0.0,0.0,0.0 39 | 0,0,1,1,1,1,1,0,1,1,1,0,34,110,116,0,21,0,0,25,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.07556,0.0,0.0,0.2322 40 | 0,0,1,1,1,1,1,0,1,1,1,90,28,111,117,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 41 | 0,0,1,1,1,1,1,0,1,1,1,91,11,112,118,33,9,18,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3267,0.3386,0.3948,0.0 42 | 0,0,1,1,1,1,1,0,1,1,1,92,11,112,119,33,9,18,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3267,0.3386,0.3948,0.0 43 | 0,0,1,1,1,1,1,0,1,1,1,93,11,112,118,33,9,18,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3267,0.3386,0.3948,0.0 44 | 0,0,1,1,1,1,1,0,1,1,1,94,16,113,120,68,12,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3743,0.0,0.0,0.0 45 | 0,0,1,1,1,1,1,0,1,1,1,39,11,54,121,0,9,12,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3267,0.0771,0.0,0.0 46 | 0,0,1,1,1,1,1,0,1,1,1,0,18,56,43,11,14,13,4,13,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3508,0.179,0.265,0.11914000000000001 47 | 0,0,1,1,1,1,1,0,1,1,1,0,5,114,122,11,4,0,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3591,0.0,0.265,0.0 48 | 0,0,1,1,1,1,1,0,1,1,1,0,45,18,123,11,29,3,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0771,0.265,0.0 49 | 0,0,1,1,1,1,1,0,1,1,1,0,6,56,11,11,5,13,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.179,0.265,0.0 50 | 0,0,1,1,1,1,1,0,1,1,1,95,16,115,124,69,12,0,0,6,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3743,0.0,0.0,0.203 51 | 0,0,1,1,1,1,1,0,1,1,1,0,46,56,125,11,30,13,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2122,0.179,0.265,0.0 -------------------------------------------------------------------------------- /examples/data/ali-ccp/ali_ccp_train_sample.csv: -------------------------------------------------------------------------------- 1 | click,purchase,101,121,122,124,125,126,127,128,129,205,206,207,210,216,508,509,702,853,301,109_14,110_14,127_14,150_14,D109_14,D110_14,D127_14,D150_14,D508,D509,D702,D853 2 | 0,0,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.07556,0.0,0.0,0.0 3 | 0,0,1,1,1,1,1,0,1,1,1,1,2,2,2,1,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 4 | 1,1,1,1,1,1,1,0,1,1,1,2,3,3,3,2,2,0,1,1,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.5605,0.256,0.4626,0.344 5 | 0,0,1,1,1,1,1,0,1,1,1,3,4,4,4,3,3,0,0,2,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2615,0.0,0.0,0.12212999999999999 6 | 0,0,1,1,1,1,1,0,1,1,1,0,5,5,5,4,4,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3591,0.0,0.0,0.0 7 | 0,0,1,1,1,1,1,0,1,1,1,4,3,6,6,5,2,0,0,1,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.5605,0.0,0.0,0.344 8 | 0,0,1,1,1,1,1,0,1,1,1,5,3,7,7,5,2,0,0,1,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.5605,0.0,0.0,0.344 9 | 0,0,1,1,1,1,1,0,1,1,1,0,6,8,8,6,5,1,2,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.1222,0.185,0.0 10 | 0,0,1,1,1,1,1,0,1,1,1,0,7,9,9,7,0,2,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.1222,0.0,0.0 11 | 0,0,1,1,1,1,1,0,1,1,1,0,8,10,10,6,6,0,2,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.185,0.0 12 | 0,0,1,1,1,1,1,0,1,1,1,0,6,0,11,2,5,0,1,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.1993,0.4626,0.0 13 | 0,0,1,1,1,1,1,0,1,1,1,6,4,11,12,0,3,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2615,0.0,0.0,0.0 14 | 0,0,1,1,1,1,1,0,1,1,1,7,9,12,13,0,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 15 | 0,0,1,1,1,1,1,0,1,1,1,0,9,13,14,8,7,0,3,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0797,0.0 16 | 0,0,1,1,1,1,1,0,1,1,1,8,10,14,15,9,8,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1511,0.0,0.0,0.0 17 | 0,0,1,1,1,1,1,0,1,1,1,9,11,15,16,0,9,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3267,0.0,0.0,0.0 18 | 1,0,1,1,1,1,1,0,1,1,1,10,8,16,17,0,6,0,0,3,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.0,0.2656 19 | 0,0,1,1,1,1,1,0,1,1,1,11,3,17,18,10,2,0,0,1,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.5605,0.0,0.0,0.344 20 | 0,0,1,1,1,1,1,0,1,1,1,0,9,18,19,11,7,3,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0771,0.265,0.0 21 | 0,0,1,1,1,1,1,0,1,1,1,12,3,19,20,12,2,4,5,1,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.5605,0.0771,0.0797,0.344 22 | 0,0,1,1,1,1,1,0,1,1,1,13,12,20,21,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 23 | 0,0,1,1,1,1,1,0,1,1,1,14,3,21,22,13,2,0,0,4,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.5605,0.0,0.0,0.3254 24 | 0,0,1,1,1,1,1,0,1,1,1,15,11,22,23,14,9,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3267,0.0,0.0,0.0 25 | 0,0,1,1,1,1,1,0,1,1,1,16,13,23,24,15,10,5,6,5,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.3782,0.391,0.369 26 | 0,0,1,1,1,1,1,0,1,1,1,17,6,24,11,16,5,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.0 27 | 0,0,1,1,1,1,1,0,1,1,1,18,8,25,25,11,6,0,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.265,0.0 28 | 0,0,1,1,1,1,1,0,1,1,1,19,14,26,26,0,11,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2795,0.0,0.0,0.0 29 | 1,0,1,1,1,1,1,0,1,1,1,20,8,27,27,17,6,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.0,0.0 30 | 0,0,1,1,1,1,1,0,1,1,1,21,10,28,28,18,8,0,7,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1511,0.0,0.0797,0.0 31 | 0,0,1,1,1,1,1,0,1,1,1,22,15,29,29,19,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 32 | 1,0,1,1,1,1,1,0,1,1,1,0,16,30,30,0,12,6,0,6,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3743,0.3818,0.0,0.203 33 | 0,0,1,1,1,1,1,0,1,1,1,0,17,31,31,20,13,0,0,7,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2876,0.0,0.0,0.1504 34 | 1,0,1,1,1,1,1,0,1,1,1,23,11,32,32,0,9,7,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3267,0.0771,0.0,0.0 35 | 0,0,1,1,1,1,1,0,1,1,1,0,3,33,22,21,2,8,8,8,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.5605,0.1222,0.5093,0.3254 36 | 0,0,1,1,1,1,1,0,1,1,1,0,18,34,33,22,14,0,9,9,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3508,0.0,0.2238,0.11914000000000001 37 | 0,0,1,1,1,1,1,0,1,1,1,0,6,35,34,2,5,9,1,10,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.2313,0.4626,0.09186 38 | 0,0,1,1,1,1,1,0,1,1,1,0,3,36,35,21,2,0,8,8,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.5605,0.0,0.5093,0.3254 39 | 0,0,1,1,1,1,1,0,1,1,1,24,17,37,36,23,13,0,0,7,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2876,0.0,0.0,0.1504 40 | 0,0,1,1,1,1,1,0,1,1,1,0,19,0,37,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 41 | 0,0,1,1,1,1,1,0,1,1,1,25,9,38,38,17,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 42 | 0,0,1,1,1,1,1,0,1,1,1,26,6,39,39,24,5,0,0,11,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.0752 43 | 0,0,1,1,1,1,1,0,1,1,1,27,6,40,40,0,5,0,0,12,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.0752 44 | 0,0,1,1,1,1,1,0,1,1,1,28,20,41,41,25,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 45 | 0,0,1,1,1,1,1,0,1,1,1,29,21,42,42,26,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 46 | 0,0,1,1,1,1,1,0,1,1,1,30,18,43,43,27,14,0,10,13,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3508,0.0,0.0797,0.11914000000000001 47 | 1,0,1,1,1,1,1,0,1,1,1,31,6,44,44,28,5,0,0,14,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.05545 48 | 0,0,1,1,1,1,1,0,1,1,1,0,22,0,45,0,15,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.3083,0.0,0.0 49 | 0,0,1,1,1,1,1,0,1,1,1,32,20,45,46,29,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 50 | 0,0,1,1,1,1,1,0,1,1,1,33,18,46,33,30,14,10,0,15,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3508,0.4045,0.0,0.11914000000000001 51 | 0,0,1,1,1,1,1,0,1,1,1,0,18,47,47,2,14,11,1,15,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3508,0.2444,0.4626,0.11914000000000001 52 | 0,0,1,1,1,1,1,0,1,1,1,0,6,48,48,31,5,0,0,16,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.2451 53 | 0,0,1,1,1,1,1,0,1,1,1,34,23,49,49,32,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 54 | 0,0,1,1,1,1,1,0,1,1,1,0,24,0,50,33,16,0,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3633,0.1993,0.3948,0.0 55 | 0,0,1,1,1,1,1,0,1,1,1,35,6,50,51,34,5,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.0 56 | 0,0,1,1,1,1,1,0,1,1,1,36,8,51,52,6,6,0,2,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.185,0.0 57 | 0,0,1,1,1,1,1,0,1,1,1,0,6,0,11,2,5,0,1,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.1993,0.4626,0.0 58 | 0,0,1,1,1,1,1,0,1,1,1,37,25,52,53,35,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 59 | 0,0,1,1,1,1,1,0,1,1,1,38,23,53,54,36,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 60 | 0,0,1,1,1,1,1,0,1,1,1,39,11,54,55,0,9,12,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3267,0.0771,0.0,0.0 61 | 0,0,1,1,1,1,1,0,1,1,1,40,8,10,25,37,6,0,12,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.2391,0.0 62 | 0,0,1,1,1,1,1,0,1,1,1,41,26,55,56,38,17,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 63 | 0,0,1,1,1,1,1,0,1,1,1,0,6,56,11,11,5,13,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.179,0.265,0.0 64 | 0,0,1,1,1,1,1,0,1,1,1,0,27,56,57,11,18,13,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.179,0.265,0.0 65 | 0,0,1,1,1,1,1,0,1,1,1,42,18,57,58,0,14,14,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3508,0.1993,0.0,0.0 66 | 0,0,1,1,1,1,1,0,1,1,1,43,24,58,59,39,16,0,0,17,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3633,0.0,0.0,0.1943 67 | 0,0,1,1,1,1,1,0,1,1,1,44,16,59,60,40,12,0,13,18,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3743,0.0,0.0797,0.366 68 | 0,0,1,1,1,1,1,0,1,1,1,45,9,60,61,41,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 69 | 0,0,1,1,1,1,1,0,1,1,1,0,6,47,62,2,5,11,1,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.2444,0.4626,0.0 70 | 0,0,1,1,1,1,1,0,1,1,1,46,24,61,63,42,16,15,14,17,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3633,0.179,0.185,0.1943 71 | 0,0,1,1,1,1,1,0,1,1,1,47,14,62,64,43,11,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2795,0.0,0.0,0.0 72 | 0,0,1,1,1,1,1,0,1,1,1,48,28,63,65,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 73 | 0,0,1,1,1,1,1,0,1,1,1,0,29,64,66,0,19,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 74 | 0,0,1,1,1,1,1,0,1,1,1,49,8,65,67,0,6,0,0,19,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.0,0.2278 75 | 0,0,1,1,1,1,1,0,1,1,1,50,30,66,68,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 76 | 0,0,1,1,1,1,1,0,1,1,1,51,9,67,19,44,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 77 | 0,0,1,1,1,1,1,0,1,1,1,52,29,68,69,0,19,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 78 | 0,0,1,1,1,1,1,0,1,1,1,53,9,69,70,0,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 79 | 0,0,1,1,1,1,1,0,1,1,1,0,24,0,71,33,16,0,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3633,0.1993,0.3948,0.0 80 | 0,0,1,1,1,1,1,0,1,1,1,54,31,29,72,19,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 81 | 0,0,1,1,1,1,1,0,1,1,1,55,29,70,73,45,19,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 82 | 0,0,1,1,1,1,1,0,1,1,1,0,6,0,8,2,5,0,1,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.1993,0.4626,0.0 83 | 0,0,1,1,1,1,1,0,1,1,1,0,5,71,74,37,4,16,12,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3591,0.2764,0.2391,0.0 84 | 0,0,1,1,1,1,1,0,1,1,1,56,32,72,75,46,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 85 | 0,0,1,1,1,1,1,0,1,1,1,57,28,73,65,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 86 | 1,0,1,1,1,1,1,0,1,1,1,58,9,74,19,0,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 87 | 0,0,1,1,1,1,1,0,1,1,1,0,33,0,76,0,20,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 88 | 0,0,1,1,1,1,1,0,1,1,1,0,28,75,77,47,0,0,15,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0797,0.0 89 | 0,0,1,1,1,1,1,0,1,1,1,59,34,76,78,33,21,0,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.07556,0.0,0.3948,0.0 90 | 0,0,1,1,1,1,1,0,1,1,1,60,9,77,79,48,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 91 | 0,0,1,1,1,1,1,0,1,1,1,61,17,78,80,49,13,0,0,7,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2876,0.0,0.0,0.1504 92 | 0,0,1,1,1,1,1,0,1,1,1,0,28,79,81,50,0,0,16,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.4868,0.0 93 | 0,0,1,1,1,1,1,0,1,1,1,0,8,5,82,37,6,0,12,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.2391,0.0 94 | 0,0,1,1,1,1,1,0,1,1,1,0,28,75,77,33,0,0,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.3948,0.0 95 | 0,0,1,1,1,1,1,0,1,1,1,62,11,80,83,0,9,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3267,0.0,0.0,0.0 96 | 0,0,1,1,1,1,1,0,1,1,1,0,6,56,8,11,5,13,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.179,0.265,0.0 97 | 0,0,1,1,1,1,1,0,1,1,1,0,35,81,84,0,22,0,0,20,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.07556,0.0,0.0,0.1504 98 | 0,0,1,1,1,1,1,0,1,1,1,63,34,82,85,51,21,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.07556,0.0,0.0,0.0 99 | 0,0,1,1,1,1,1,0,1,1,1,64,29,83,86,33,19,0,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.3948,0.0 100 | 0,0,1,1,1,1,1,0,1,1,1,0,27,56,87,11,18,13,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.179,0.265,0.0 101 | 0,0,1,1,1,1,1,0,1,1,1,42,18,57,58,0,14,14,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3508,0.1993,0.0,0.0 -------------------------------------------------------------------------------- /examples/data/ali-ccp/ali_ccp_val_sample.csv: -------------------------------------------------------------------------------- 1 | click,purchase,101,121,122,124,125,126,127,128,129,205,206,207,210,216,508,509,702,853,301,109_14,110_14,127_14,150_14,D109_14,D110_14,D127_14,D150_14,D508,D509,D702,D853 2 | 0,0,1,1,1,1,1,0,1,1,1,0,27,56,57,11,18,13,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.179,0.265,0.0 3 | 0,0,1,1,1,1,1,0,1,1,1,0,18,116,43,22,14,0,9,13,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3508,0.0771,0.2238,0.11914000000000001 4 | 0,0,1,1,1,1,1,0,1,1,1,96,47,117,126,70,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 5 | 0,0,1,1,1,1,1,0,1,1,1,97,6,118,127,0,5,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.0 6 | 0,0,1,1,1,1,1,0,1,1,1,98,8,119,128,0,6,0,0,3,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.0,0.2656 7 | 0,0,1,1,1,1,1,0,1,1,1,0,27,114,129,11,18,0,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.0,0.265,0.0 8 | 0,0,1,1,1,1,1,0,1,1,1,99,10,120,15,71,8,19,21,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1511,0.1222,0.1263,0.0 9 | 0,0,1,1,1,1,1,0,1,1,1,100,48,121,130,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 10 | 0,0,1,1,1,1,1,0,1,1,1,0,6,44,51,28,5,0,0,22,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.1064 11 | 0,0,1,1,1,1,1,0,1,1,1,101,27,122,99,33,18,0,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.0,0.3948,0.0 12 | 0,0,1,1,1,1,1,0,1,1,1,102,49,123,131,72,0,0,22,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.1594,0.0 13 | 0,0,1,1,1,1,1,0,1,1,1,0,8,124,132,0,6,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.0,0.0 14 | 0,0,1,1,1,1,1,0,1,1,1,103,50,125,133,73,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 15 | 0,0,1,1,1,1,1,0,1,1,1,0,27,71,129,6,18,16,2,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.2764,0.185,0.0 16 | 0,0,1,1,1,1,1,0,1,1,1,0,27,71,134,37,18,16,12,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.2764,0.2391,0.0 17 | 0,0,1,1,1,1,1,0,1,1,1,0,24,0,71,33,16,0,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3633,0.1993,0.3948,0.0 18 | 0,0,1,1,1,1,1,0,1,1,1,0,51,126,135,74,31,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1953,0.0,0.0,0.0 19 | 0,0,1,1,1,1,1,0,1,1,1,0,27,105,99,37,18,17,12,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.0771,0.2391,0.0 20 | 0,0,1,1,1,1,1,0,1,1,1,0,10,127,136,9,8,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1511,0.0,0.0,0.0 21 | 0,0,1,1,1,1,1,0,1,1,1,0,6,126,11,75,5,0,23,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0797,0.0 22 | 0,0,1,1,1,1,1,0,1,1,1,0,6,0,62,2,5,0,1,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.1993,0.4626,0.0 23 | 0,0,1,1,1,1,1,0,1,1,1,104,10,128,137,76,8,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1511,0.0,0.0,0.0 24 | 0,0,1,1,1,1,1,0,1,1,1,105,11,112,118,33,9,18,11,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3267,0.3386,0.3948,0.0 25 | 0,0,1,1,1,1,1,0,1,1,1,106,8,129,25,0,6,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.0,0.0 26 | 0,0,1,1,1,1,1,0,1,1,1,107,9,126,138,0,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 27 | 1,1,1,1,1,1,1,0,1,1,1,108,27,130,139,52,18,0,17,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.0,0.3555,0.0 28 | 0,0,1,1,1,1,1,0,1,1,1,0,6,56,11,11,5,13,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.179,0.265,0.0 29 | 0,0,1,1,1,1,1,0,1,1,1,0,5,126,74,0,4,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3591,0.0,0.0,0.0 30 | 0,0,1,1,1,1,1,0,1,1,1,0,27,56,140,11,18,13,4,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.179,0.265,0.0 31 | 0,0,1,1,1,1,1,0,1,1,1,109,9,131,38,77,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 32 | 0,0,1,1,1,1,1,0,1,1,1,0,6,126,40,0,5,0,0,11,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.0752 33 | 0,0,1,1,1,1,1,0,1,1,1,0,8,126,10,0,6,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.0,0.0 34 | 0,0,1,1,1,1,1,0,1,1,1,0,28,126,141,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 35 | 0,0,1,1,1,1,1,0,1,1,1,0,26,126,142,78,17,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 36 | 0,0,1,1,1,1,1,0,1,1,1,110,24,126,63,0,16,0,0,26,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3633,0.0,0.0,0.1589 37 | 0,0,1,1,1,1,1,0,1,1,1,0,36,126,143,0,23,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1754,0.0,0.0,0.0 38 | 0,0,1,1,1,1,1,0,1,1,1,0,29,126,144,0,19,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 39 | 0,0,1,1,1,1,1,0,1,1,1,111,38,132,145,79,25,20,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.1511,0.0771,0.0,0.0 40 | 0,0,1,1,1,1,1,0,1,1,1,112,27,133,146,52,18,21,17,20,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.0771,0.3555,0.1504 41 | 0,0,1,1,1,1,1,0,1,1,1,0,46,126,147,0,30,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2122,0.0,0.0,0.0 42 | 0,0,1,1,1,1,1,0,1,1,1,0,52,126,148,0,32,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 43 | 1,0,1,1,1,1,1,0,1,1,1,0,52,126,149,0,32,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.11975,0.0,0.0,0.0 44 | 1,0,1,1,1,1,1,0,1,1,1,113,6,126,150,0,5,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4802,0.0,0.0,0.0 45 | 0,0,1,1,1,1,1,0,1,1,1,0,27,134,151,37,18,22,12,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.4243,0.1222,0.2391,0.0 46 | 0,0,1,1,1,1,1,0,1,1,1,114,8,126,152,0,6,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3936,0.0,0.0,0.0 47 | 0,0,1,1,1,1,1,0,1,1,1,115,53,135,153,80,33,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.2268,0.0,0.0,0.0 48 | 0,0,1,1,1,1,1,0,1,1,1,116,32,136,154,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 49 | 0,0,1,1,1,1,1,0,1,1,1,117,54,137,155,0,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 50 | 0,0,1,1,1,1,1,0,1,1,1,118,32,138,156,81,0,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.0,0.0,0.0,0.0 51 | 0,0,1,1,1,1,1,0,1,1,1,119,9,139,157,0,7,0,0,0,1,1,1,1,1,0.4734,0.562,0.0856,0.1902,0.3706,0.0,0.0,0.0 -------------------------------------------------------------------------------- /examples/data/ml-1m/ml-1m-sample.csv: -------------------------------------------------------------------------------- 1 | user_id,movie_id,rating,timestamp,title,genres,gender,age,occupation,zip 2 | 1,1193,5,978300760,One Flew Over the Cuckoo's Nest (1975),Drama,F,1,10,48067 3 | 1,661,3,978302109,James and the Giant Peach (1996),Animation|Children's|Musical,F,1,10,48067 4 | 1,914,3,978301968,My Fair Lady (1964),Musical|Romance,F,1,10,48067 5 | 1,3408,4,978300275,Erin Brockovich (2000),Drama,F,1,10,48067 6 | 1,2355,5,978824291,"Bug's Life, A (1998)",Animation|Children's|Comedy,F,1,10,48067 7 | 1,1197,3,978302268,"Princess Bride, The (1987)",Action|Adventure|Comedy|Romance,F,1,10,48067 8 | 1,1287,5,978302039,Ben-Hur (1959),Action|Adventure|Drama,F,1,10,48067 9 | 1,2804,5,978300719,"Christmas Story, A (1983)",Comedy|Drama,F,1,10,48067 10 | 1,594,4,978302268,Snow White and the Seven Dwarfs (1937),Animation|Children's|Musical,F,1,10,48067 11 | 1,919,4,978301368,"Wizard of Oz, The (1939)",Adventure|Children's|Drama|Musical,F,1,10,48067 12 | 1,595,5,978824268,Beauty and the Beast (1991),Animation|Children's|Musical,F,1,10,48067 13 | 1,938,4,978301752,Gigi (1958),Musical,F,1,10,48067 14 | 1,2398,4,978302281,Miracle on 34th Street (1947),Drama,F,1,10,48067 15 | 1,2918,4,978302124,Ferris Bueller's Day Off (1986),Comedy,F,1,10,48067 16 | 1,1035,5,978301753,"Sound of Music, The (1965)",Musical,F,1,10,48067 17 | 1,2791,4,978302188,Airplane! (1980),Comedy,F,1,10,48067 18 | 1,2687,3,978824268,Tarzan (1999),Animation|Children's,F,1,10,48067 19 | 1,2018,4,978301777,Bambi (1942),Animation|Children's,F,1,10,48067 20 | 1,3105,5,978301713,Awakenings (1990),Drama,F,1,10,48067 21 | 1,2797,4,978302039,Big (1988),Comedy|Fantasy,F,1,10,48067 22 | 1,2321,3,978302205,Pleasantville (1998),Comedy,F,1,10,48067 23 | 1,720,3,978300760,Wallace & Gromit: The Best of Aardman Animation (1996),Animation,F,1,10,48067 24 | 1,1270,5,978300055,Back to the Future (1985),Comedy|Sci-Fi,F,1,10,48067 25 | 1,527,5,978824195,Schindler's List (1993),Drama|War,F,1,10,48067 26 | 1,2340,3,978300103,Meet Joe Black (1998),Romance,F,1,10,48067 27 | 1,48,5,978824351,Pocahontas (1995),Animation|Children's|Musical|Romance,F,1,10,48067 28 | 1,1097,4,978301953,E.T. the Extra-Terrestrial (1982),Children's|Drama|Fantasy|Sci-Fi,F,1,10,48067 29 | 1,1721,4,978300055,Titanic (1997),Drama|Romance,F,1,10,48067 30 | 1,1545,4,978824139,Ponette (1996),Drama,F,1,10,48067 31 | 1,745,3,978824268,"Close Shave, A (1995)",Animation|Comedy|Thriller,F,1,10,48067 32 | 1,2294,4,978824291,Antz (1998),Animation|Children's,F,1,10,48067 33 | 1,3186,4,978300019,"Girl, Interrupted (1999)",Drama,F,1,10,48067 34 | 1,1566,4,978824330,Hercules (1997),Adventure|Animation|Children's|Comedy|Musical,F,1,10,48067 35 | 1,588,4,978824268,Aladdin (1992),Animation|Children's|Comedy|Musical,F,1,10,48067 36 | 1,1907,4,978824330,Mulan (1998),Animation|Children's,F,1,10,48067 37 | 1,783,4,978824291,"Hunchback of Notre Dame, The (1996)",Animation|Children's|Musical,F,1,10,48067 38 | 1,1836,5,978300172,"Last Days of Disco, The (1998)",Drama,F,1,10,48067 39 | 1,1022,5,978300055,Cinderella (1950),Animation|Children's|Musical,F,1,10,48067 40 | 1,2762,4,978302091,"Sixth Sense, The (1999)",Thriller,F,1,10,48067 41 | 1,150,5,978301777,Apollo 13 (1995),Drama,F,1,10,48067 42 | 1,1,5,978824268,Toy Story (1995),Animation|Children's|Comedy,F,1,10,48067 43 | 1,1961,5,978301590,Rain Man (1988),Drama,F,1,10,48067 44 | 1,1962,4,978301753,Driving Miss Daisy (1989),Drama,F,1,10,48067 45 | 1,2692,4,978301570,Run Lola Run (Lola rennt) (1998),Action|Crime|Romance,F,1,10,48067 46 | 1,260,4,978300760,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi,F,1,10,48067 47 | 1,1028,5,978301777,Mary Poppins (1964),Children's|Comedy|Musical,F,1,10,48067 48 | 1,1029,5,978302205,Dumbo (1941),Animation|Children's|Musical,F,1,10,48067 49 | 1,1207,4,978300719,To Kill a Mockingbird (1962),Drama,F,1,10,48067 50 | 1,2028,5,978301619,Saving Private Ryan (1998),Action|Drama|War,F,1,10,48067 51 | 1,531,4,978302149,"Secret Garden, The (1993)",Children's|Drama,F,1,10,48067 52 | 1,3114,4,978302174,Toy Story 2 (1999),Animation|Children's|Comedy,F,1,10,48067 53 | 1,608,4,978301398,Fargo (1996),Crime|Drama|Thriller,F,1,10,48067 54 | 1,1246,4,978302091,Dead Poets Society (1989),Drama,F,1,10,48067 55 | 2,1193,5,978298413,One Flew Over the Cuckoo's Nest (1975),Drama,M,56,16,70072 56 | 2,3105,4,978298673,Awakenings (1990),Drama,M,56,16,70072 57 | 2,2321,3,978299666,Pleasantville (1998),Comedy,M,56,16,70072 58 | 2,1962,5,978298813,Driving Miss Daisy (1989),Drama,M,56,16,70072 59 | 2,1207,4,978298478,To Kill a Mockingbird (1962),Drama,M,56,16,70072 60 | 2,2028,4,978299773,Saving Private Ryan (1998),Action|Drama|War,M,56,16,70072 61 | 2,1246,5,978299418,Dead Poets Society (1989),Drama,M,56,16,70072 62 | 2,1357,5,978298709,Shine (1996),Drama|Romance,M,56,16,70072 63 | 2,3068,4,978299000,"Verdict, The (1982)",Drama,M,56,16,70072 64 | 2,1537,4,978299620,Shall We Dance? (Shall We Dansu?) (1996),Comedy,M,56,16,70072 65 | 2,647,3,978299351,Courage Under Fire (1996),Drama|War,M,56,16,70072 66 | 2,2194,4,978299297,"Untouchables, The (1987)",Action|Crime|Drama,M,56,16,70072 67 | 2,648,4,978299913,Mission: Impossible (1996),Action|Adventure|Mystery,M,56,16,70072 68 | 2,2268,5,978299297,"Few Good Men, A (1992)",Crime|Drama,M,56,16,70072 69 | 2,2628,3,978300051,Star Wars: Episode I - The Phantom Menace (1999),Action|Adventure|Fantasy|Sci-Fi,M,56,16,70072 70 | 2,1103,3,978298905,Rebel Without a Cause (1955),Drama,M,56,16,70072 71 | 2,2916,3,978299809,Total Recall (1990),Action|Adventure|Sci-Fi|Thriller,M,56,16,70072 72 | 2,3468,5,978298542,"Hustler, The (1961)",Drama,M,56,16,70072 73 | 2,1210,4,978298151,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Romance|Sci-Fi|War,M,56,16,70072 74 | 2,1792,3,978299941,U.S. Marshalls (1998),Action|Thriller,M,56,16,70072 75 | 2,1687,3,978300174,"Jackal, The (1997)",Action|Thriller,M,56,16,70072 76 | 2,1213,2,978298458,GoodFellas (1990),Crime|Drama,M,56,16,70072 77 | 2,3578,5,978298958,Gladiator (2000),Action|Drama,M,56,16,70072 78 | 2,2881,3,978300002,Double Jeopardy (1999),Action|Thriller,M,56,16,70072 79 | 2,3030,4,978298434,Yojimbo (1961),Comedy|Drama|Western,M,56,16,70072 80 | 2,1217,3,978298151,Ran (1985),Drama|War,M,56,16,70072 81 | 2,434,2,978300174,Cliffhanger (1993),Action|Adventure|Crime,M,56,16,70072 82 | 2,2126,3,978300123,Snake Eyes (1998),Action|Crime|Mystery|Thriller,M,56,16,70072 83 | 2,3107,2,978300002,Backdraft (1991),Action|Drama,M,56,16,70072 84 | 2,3108,3,978299712,"Fisher King, The (1991)",Comedy|Drama|Romance,M,56,16,70072 85 | 2,3035,4,978298625,Mister Roberts (1955),Comedy|Drama|War,M,56,16,70072 86 | 2,1253,3,978299120,"Day the Earth Stood Still, The (1951)",Drama|Sci-Fi,M,56,16,70072 87 | 2,1610,5,978299809,"Hunt for Red October, The (1990)",Action|Thriller,M,56,16,70072 88 | 2,292,3,978300123,Outbreak (1995),Action|Drama|Thriller,M,56,16,70072 89 | 2,2236,5,978299220,Simon Birch (1998),Drama,M,56,16,70072 90 | 2,3071,4,978299120,Stand and Deliver (1987),Drama,M,56,16,70072 91 | 2,902,2,978298905,Breakfast at Tiffany's (1961),Drama|Romance,M,56,16,70072 92 | 2,368,4,978300002,Maverick (1994),Action|Comedy|Western,M,56,16,70072 93 | 2,1259,5,978298841,Stand by Me (1986),Adventure|Comedy|Drama,M,56,16,70072 94 | 2,3147,5,978298652,"Green Mile, The (1999)",Drama|Thriller,M,56,16,70072 95 | 2,1544,4,978300174,"Lost World: Jurassic Park, The (1997)",Action|Adventure|Sci-Fi|Thriller,M,56,16,70072 96 | 2,1293,5,978298261,Gandhi (1982),Drama,M,56,16,70072 97 | 2,1188,4,978299620,Strictly Ballroom (1992),Comedy|Romance,M,56,16,70072 98 | 2,3255,4,978299321,"League of Their Own, A (1992)",Comedy|Drama,M,56,16,70072 99 | 2,3256,2,978299839,Patriot Games (1992),Action|Thriller,M,56,16,70072 100 | 2,3257,3,978300073,"Bodyguard, The (1992)",Action|Drama|Romance|Thriller,M,56,16,70072 101 | 2,110,5,978298625,Braveheart (1995),Action|Drama|War,M,56,16,70072 102 | 2,2278,3,978299889,Ronin (1998),Action|Crime|Thriller,M,56,16,70072 103 | 2,2490,3,978299966,Payback (1999),Action|Thriller,M,56,16,70072 104 | 2,1834,4,978298813,"Spanish Prisoner, The (1997)",Drama|Thriller,M,56,16,70072 105 | 2,3471,5,978298814,Close Encounters of the Third Kind (1977),Drama|Sci-Fi,M,56,16,70072 106 | 2,589,4,978299773,Terminator 2: Judgment Day (1991),Action|Sci-Fi|Thriller,M,56,16,70072 107 | 2,1690,3,978300051,Alien: Resurrection (1997),Action|Horror|Sci-Fi,M,56,16,70072 108 | 2,3654,3,978298814,"Guns of Navarone, The (1961)",Action|Drama|War,M,56,16,70072 109 | 2,2852,3,978298958,"Soldier's Story, A (1984)",Drama,M,56,16,70072 110 | 2,1945,5,978298458,On the Waterfront (1954),Crime|Drama,M,56,16,70072 111 | 2,982,4,978299269,Picnic (1955),Drama,M,56,16,70072 112 | 2,1873,4,978298542,"Misérables, Les (1998)",Drama,M,56,16,70072 113 | 2,2858,4,978298434,American Beauty (1999),Comedy|Drama,M,56,16,70072 114 | 2,1225,5,978298391,Amadeus (1984),Drama,M,56,16,70072 115 | 2,515,5,978298542,"Remains of the Day, The (1993)",Drama,M,56,16,70072 116 | 2,442,3,978300025,Demolition Man (1993),Action|Sci-Fi,M,56,16,70072 117 | 2,2312,3,978299046,Children of a Lesser God (1986),Drama,M,56,16,70072 118 | 2,265,4,978299026,Like Water for Chocolate (Como agua para chocolate) (1992),Drama|Romance,M,56,16,70072 119 | 2,1408,3,978299839,"Last of the Mohicans, The (1992)",Action|Romance|War,M,56,16,70072 120 | 2,1084,3,978298813,Bonnie and Clyde (1967),Crime|Drama,M,56,16,70072 121 | 2,3699,2,978299173,Starman (1984),Adventure|Drama|Romance|Sci-Fi,M,56,16,70072 122 | 2,480,5,978299809,Jurassic Park (1993),Action|Adventure|Sci-Fi,M,56,16,70072 123 | 2,1442,4,978299297,Prefontaine (1997),Drama,M,56,16,70072 124 | 2,2067,5,978298625,Doctor Zhivago (1965),Drama|Romance|War,M,56,16,70072 125 | 2,1265,3,978299712,Groundhog Day (1993),Comedy|Romance,M,56,16,70072 126 | 2,1370,5,978299889,Die Hard 2 (1990),Action|Thriller,M,56,16,70072 127 | 2,1801,3,978300002,"Man in the Iron Mask, The (1998)",Action|Drama|Romance,M,56,16,70072 128 | 2,1372,3,978299941,Star Trek VI: The Undiscovered Country (1991),Action|Adventure|Sci-Fi,M,56,16,70072 129 | 2,2353,4,978299861,Enemy of the State (1998),Action|Thriller,M,56,16,70072 130 | 2,3334,4,978298958,Key Largo (1948),Crime|Drama|Film-Noir|Thriller,M,56,16,70072 131 | 2,2427,2,978299913,"Thin Red Line, The (1998)",Action|Drama|War,M,56,16,70072 132 | 2,590,5,978299083,Dances with Wolves (1990),Adventure|Drama|Western,M,56,16,70072 133 | 2,1196,5,978298730,Star Wars: Episode V - The Empire Strikes Back (1980),Action|Adventure|Drama|Sci-Fi|War,M,56,16,70072 134 | 2,1552,3,978299941,Con Air (1997),Action|Adventure|Thriller,M,56,16,70072 135 | 2,736,4,978300100,Twister (1996),Action|Adventure|Romance|Thriller,M,56,16,70072 136 | 2,1198,4,978298124,Raiders of the Lost Ark (1981),Action|Adventure,M,56,16,70072 137 | 2,593,5,978298517,"Silence of the Lambs, The (1991)",Drama|Thriller,M,56,16,70072 138 | 2,2359,3,978299666,Waking Ned Devine (1998),Comedy,M,56,16,70072 139 | 2,95,2,978300143,Broken Arrow (1996),Action|Thriller,M,56,16,70072 140 | 2,2717,3,978298196,Ghostbusters II (1989),Comedy|Horror,M,56,16,70072 141 | 2,2571,4,978299773,"Matrix, The (1999)",Action|Sci-Fi|Thriller,M,56,16,70072 142 | 2,1917,3,978300174,Armageddon (1998),Action|Adventure|Sci-Fi|Thriller,M,56,16,70072 143 | 2,2396,4,978299641,Shakespeare in Love (1998),Comedy|Romance,M,56,16,70072 144 | 2,3735,3,978298814,Serpico (1973),Crime|Drama,M,56,16,70072 145 | 2,1953,4,978298775,"French Connection, The (1971)",Action|Crime|Drama|Thriller,M,56,16,70072 146 | 2,1597,3,978300025,Conspiracy Theory (1997),Action|Mystery|Romance|Thriller,M,56,16,70072 147 | 2,3809,3,978299712,What About Bob? (1991),Comedy,M,56,16,70072 148 | 2,1954,5,978298841,Rocky (1976),Action|Drama,M,56,16,70072 149 | 2,1955,4,978299200,Kramer Vs. Kramer (1979),Drama,M,56,16,70072 150 | 2,235,3,978299351,Ed Wood (1994),Comedy|Drama,M,56,16,70072 151 | 2,1124,5,978299418,On Golden Pond (1981),Drama,M,56,16,70072 152 | 2,1957,5,978298750,Chariots of Fire (1981),Drama,M,56,16,70072 153 | 2,163,4,978299809,Desperado (1995),Action|Romance|Thriller,M,56,16,70072 154 | 2,21,1,978299839,Get Shorty (1995),Action|Comedy|Drama,M,56,16,70072 155 | 2,165,3,978300002,Die Hard: With a Vengeance (1995),Action|Thriller,M,56,16,70072 156 | 2,1090,2,978298580,Platoon (1986),Drama|War,M,56,16,70072 157 | 2,380,5,978299809,True Lies (1994),Action|Adventure|Comedy|Romance,M,56,16,70072 158 | 2,2501,5,978298600,October Sky (1999),Drama,M,56,16,70072 159 | 2,349,4,978299839,Clear and Present Danger (1994),Action|Adventure|Thriller,M,56,16,70072 160 | 2,457,4,978299773,"Fugitive, The (1993)",Action|Thriller,M,56,16,70072 161 | 2,1096,4,978299386,Sophie's Choice (1982),Drama,M,56,16,70072 162 | 2,920,5,978298775,Gone with the Wind (1939),Drama|Romance|War,M,56,16,70072 163 | 2,459,3,978300002,"Getaway, The (1994)",Action,M,56,16,70072 164 | 2,1527,4,978299839,"Fifth Element, The (1997)",Action|Sci-Fi,M,56,16,70072 165 | 2,3418,4,978299809,Thelma & Louise (1991),Action|Drama,M,56,16,70072 166 | 2,1385,3,978299966,Under Siege (1992),Action,M,56,16,70072 167 | 2,3451,4,978298924,Guess Who's Coming to Dinner (1967),Comedy|Drama,M,56,16,70072 168 | 2,3095,4,978298517,"Grapes of Wrath, The (1940)",Drama,M,56,16,70072 169 | 2,780,3,978299966,Independence Day (ID4) (1996),Action|Sci-Fi|War,M,56,16,70072 170 | 2,498,3,978299418,Mr. Jones (1993),Drama|Romance,M,56,16,70072 171 | 2,2728,3,978298881,Spartacus (1960),Drama,M,56,16,70072 172 | 2,2002,5,978300100,Lethal Weapon 3 (1992),Action|Comedy|Crime|Drama,M,56,16,70072 173 | 2,1784,5,978298841,As Good As It Gets (1997),Comedy|Drama,M,56,16,70072 174 | 2,2943,4,978298372,Indochine (1992),Drama|Romance,M,56,16,70072 175 | 2,2006,3,978299861,"Mask of Zorro, The (1998)",Action|Adventure|Romance,M,56,16,70072 176 | 2,318,5,978298413,"Shawshank Redemption, The (1994)",Drama,M,56,16,70072 177 | 2,1968,2,978298881,"Breakfast Club, The (1985)",Comedy|Drama,M,56,16,70072 178 | 2,3678,3,978299250,"Man with the Golden Arm, The (1955)",Drama,M,56,16,70072 179 | 2,1244,3,978299143,Manhattan (1979),Comedy|Drama|Romance,M,56,16,70072 180 | 2,356,5,978299686,Forrest Gump (1994),Comedy|Romance|War,M,56,16,70072 181 | 2,1245,2,978299200,Miller's Crossing (1990),Drama,M,56,16,70072 182 | 2,3893,1,978299535,Nurse Betty (2000),Comedy|Thriller,M,56,16,70072 183 | 2,1247,5,978298652,"Graduate, The (1967)",Drama|Romance,M,56,16,70072 184 | 12,1193,4,978220179,One Flew Over the Cuckoo's Nest (1975),Drama,M,25,12,32793 185 | 12,2804,5,978220237,"Christmas Story, A (1983)",Comedy|Drama,M,25,12,32793 186 | 12,919,5,978220120,"Wizard of Oz, The (1939)",Adventure|Children's|Drama|Musical,M,25,12,32793 187 | 12,1198,5,978218949,Raiders of the Lost Ark (1981),Action|Adventure,M,25,12,32793 188 | 12,593,5,978220193,"Silence of the Lambs, The (1991)",Drama|Thriller,M,25,12,32793 189 | 12,1247,3,978220216,"Graduate, The (1967)",Drama|Romance,M,25,12,32793 190 | 12,1641,3,978218568,"Full Monty, The (1997)",Comedy,M,25,12,32793 191 | 12,1221,5,978218949,"Godfather: Part II, The (1974)",Action|Crime|Drama,M,25,12,32793 192 | 12,111,5,978220179,Taxi Driver (1976),Drama|Thriller,M,25,12,32793 193 | 12,3265,4,978218916,Hard-Boiled (Lashou shentan) (1992),Action|Crime,M,25,12,32793 194 | 12,1233,3,978220120,"Boat, The (Das Boot) (1981)",Action|Drama|War,M,25,12,32793 195 | 12,1252,3,978220237,Chinatown (1974),Film-Noir|Mystery|Thriller,M,25,12,32793 196 | 12,923,5,978220237,Citizen Kane (1941),Drama,M,25,12,32793 197 | 12,858,5,978218949,"Godfather, The (1972)",Action|Crime|Drama,M,25,12,32793 198 | 12,3362,3,978220273,Dog Day Afternoon (1975),Comedy|Crime|Drama,M,25,12,32793 199 | 12,813,3,978218949,Larger Than Life (1996),Comedy,M,25,12,32793 200 | 12,3897,4,978218949,Almost Famous (2000),Comedy|Drama,M,25,12,32793 201 | 12,934,2,978218568,Father of the Bride (1950),Comedy,M,25,12,32793 202 | 12,3658,4,978220216,Quatermass and the Pit (1967),Sci-Fi,M,25,12,32793 203 | 12,1303,4,978218916,"Man Who Would Be King, The (1975)",Adventure,M,25,12,32793 204 | 12,999,4,978218568,2 Days in the Valley (1996),Crime,M,25,12,32793 205 | 12,2616,1,978218568,Dick Tracy (1990),Action|Crime,M,25,12,32793 206 | 12,3785,3,978218568,Scary Movie (2000),Comedy|Horror,M,25,12,32793 207 | 15,1193,4,978199279,One Flew Over the Cuckoo's Nest (1975),Drama,M,25,7,22903 208 | 15,3408,4,978196499,Erin Brockovich (2000),Drama,M,25,7,22903 209 | 15,3105,3,978198125,Awakenings (1990),Drama,M,25,7,22903 210 | 15,2321,2,978198301,Pleasantville (1998),Comedy,M,25,7,22903 211 | 15,527,4,978197830,Schindler's List (1993),Drama|War,M,25,7,22903 212 | 15,2762,4,978196817,"Sixth Sense, The (1999)",Thriller,M,25,7,22903 213 | 15,260,4,978212645,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi,M,25,7,22903 214 | 15,2028,4,978197856,Saving Private Ryan (1998),Action|Drama|War,M,25,7,22903 215 | 15,648,4,978212463,Mission: Impossible (1996),Action|Adventure|Mystery,M,25,7,22903 216 | 15,2268,4,978196975,"Few Good Men, A (1992)",Crime|Drama,M,25,7,22903 217 | 15,2916,4,978197453,Total Recall (1990),Action|Adventure|Sci-Fi|Thriller,M,25,7,22903 218 | 15,1210,4,978212663,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Romance|Sci-Fi|War,M,25,7,22903 219 | 15,1213,4,978212141,GoodFellas (1990),Crime|Drama,M,25,7,22903 220 | 15,3578,5,978196546,Gladiator (2000),Action|Drama,M,25,7,22903 221 | 15,2881,3,978196472,Double Jeopardy (1999),Action|Thriller,M,25,7,22903 222 | 15,434,3,978212329,Cliffhanger (1993),Action|Adventure|Crime,M,25,7,22903 223 | 15,2126,3,978212274,Snake Eyes (1998),Action|Crime|Mystery|Thriller,M,25,7,22903 224 | 15,3108,4,978198616,"Fisher King, The (1991)",Comedy|Drama|Romance,M,25,7,22903 225 | 15,1610,4,978198169,"Hunt for Red October, The (1990)",Action|Thriller,M,25,7,22903 226 | 15,368,3,978198493,Maverick (1994),Action|Comedy|Western,M,25,7,22903 227 | 15,3147,4,978197971,"Green Mile, The (1999)",Drama|Thriller,M,25,7,22903 228 | 15,3256,4,978198236,Patriot Games (1992),Action|Thriller,M,25,7,22903 229 | 15,110,5,978196933,Braveheart (1995),Action|Drama|War,M,25,7,22903 230 | 15,2278,2,978212210,Ronin (1998),Action|Crime|Thriller,M,25,7,22903 231 | 15,2490,4,978196741,Payback (1999),Action|Thriller,M,25,7,22903 232 | 15,2858,4,978196348,American Beauty (1999),Comedy|Drama,M,25,7,22903 233 | 15,442,3,978198790,Demolition Man (1993),Action|Sci-Fi,M,25,7,22903 234 | 15,480,4,978197998,Jurassic Park (1993),Action|Adventure|Sci-Fi,M,25,7,22903 235 | 15,1265,3,978198201,Groundhog Day (1993),Comedy|Romance,M,25,7,22903 236 | 15,1370,4,978198772,Die Hard 2 (1990),Action|Thriller,M,25,7,22903 237 | 15,2427,2,978212046,"Thin Red Line, The (1998)",Action|Drama|War,M,25,7,22903 238 | 15,1196,4,978212628,Star Wars: Episode V - The Empire Strikes Back (1980),Action|Adventure|Drama|Sci-Fi|War,M,25,7,22903 239 | 15,1198,4,978196126,Raiders of the Lost Ark (1981),Action|Adventure,M,25,7,22903 240 | 15,593,4,978199279,"Silence of the Lambs, The (1991)",Drama|Thriller,M,25,7,22903 241 | 15,2571,4,978197870,"Matrix, The (1999)",Action|Sci-Fi|Thriller,M,25,7,22903 242 | 15,2396,4,978196817,Shakespeare in Love (1998),Comedy|Romance,M,25,7,22903 243 | 15,1597,4,978212463,Conspiracy Theory (1997),Action|Mystery|Romance|Thriller,M,25,7,22903 244 | 15,380,3,978197453,True Lies (1994),Action|Adventure|Comedy|Romance,M,25,7,22903 245 | 15,2501,4,978198125,October Sky (1999),Drama,M,25,7,22903 246 | 15,457,4,978197971,"Fugitive, The (1993)",Action|Thriller,M,25,7,22903 247 | 15,1527,4,978198581,"Fifth Element, The (1997)",Action|Sci-Fi,M,25,7,22903 248 | 15,3418,3,978198062,Thelma & Louise (1991),Action|Drama,M,25,7,22903 249 | 15,780,2,978198735,Independence Day (ID4) (1996),Action|Sci-Fi|War,M,25,7,22903 250 | 15,2002,3,978197004,Lethal Weapon 3 (1992),Action|Comedy|Crime|Drama,M,25,7,22903 251 | 15,1784,3,978198125,As Good As It Gets (1997),Comedy|Drama,M,25,7,22903 252 | 15,2006,4,978197453,"Mask of Zorro, The (1998)",Action|Adventure|Romance,M,25,7,22903 253 | 15,318,4,978197830,"Shawshank Redemption, The (1994)",Drama,M,25,7,22903 254 | 15,1968,4,978196188,"Breakfast Club, The (1985)",Comedy|Drama,M,25,7,22903 255 | 15,356,4,978198169,Forrest Gump (1994),Comedy|Romance|War,M,25,7,22903 256 | 15,3421,4,978196170,Animal House (1978),Comedy,M,25,7,22903 257 | 15,3534,3,978196348,28 Days (2000),Comedy,M,25,7,22903 258 | 15,104,4,978198235,Happy Gilmore (1996),Comedy,M,25,7,22903 259 | 15,2997,2,978196418,Being John Malkovich (1999),Comedy,M,25,7,22903 260 | 15,1615,3,978197476,"Edge, The (1997)",Adventure|Thriller,M,25,7,22903 261 | 15,1291,2,978212645,Indiana Jones and the Last Crusade (1989),Action|Adventure,M,25,7,22903 262 | 15,653,2,978212570,Dragonheart (1996),Action|Adventure|Fantasy,M,25,7,22903 263 | 15,1580,2,978197476,Men in Black (1997),Action|Adventure|Comedy|Sci-Fi,M,25,7,22903 264 | 15,733,3,978212698,"Rock, The (1996)",Action|Adventure|Thriller,M,25,7,22903 265 | 15,2617,1,978197517,"Mummy, The (1999)",Action|Adventure|Horror|Thriller,M,25,7,22903 266 | 15,2115,4,978212720,Indiana Jones and the Temple of Doom (1984),Action|Adventure,M,25,7,22903 267 | 15,1466,4,978212166,Donnie Brasco (1997),Crime|Drama,M,25,7,22903 268 | 15,2770,1,978196442,Bowfinger (1999),Comedy,M,25,7,22903 269 | 15,2058,3,978198516,"Negotiator, The (1998)",Action|Thriller,M,25,7,22903 270 | 15,47,4,978198104,Seven (Se7en) (1995),Crime|Thriller,M,25,7,22903 271 | 15,296,3,978197928,Pulp Fiction (1994),Crime|Drama,M,25,7,22903 272 | 15,1617,3,978212113,L.A. Confidential (1997),Crime|Film-Noir|Mystery|Thriller,M,25,7,22903 273 | 15,1909,4,978198907,"X-Files: Fight the Future, The (1998)",Mystery|Sci-Fi|Thriller,M,25,7,22903 274 | 15,6,4,978198250,Heat (1995),Action|Crime|Thriller,M,25,7,22903 275 | 15,50,4,978197842,"Usual Suspects, The (1995)",Crime|Thriller,M,25,7,22903 276 | 15,3081,3,978198735,Sleepy Hollow (1999),Horror|Romance,M,25,7,22903 277 | 15,1485,3,978198548,Liar Liar (1997),Comedy,M,25,7,22903 278 | 15,1704,4,978197887,Good Will Hunting (1997),Drama,M,25,7,22903 279 | 15,1923,4,978198104,There's Something About Mary (1998),Comedy,M,25,7,22903 280 | 15,3386,2,978212416,JFK (1991),Drama|Mystery,M,25,7,22903 281 | 15,3176,4,978196848,"Talented Mr. Ripley, The (1999)",Drama|Mystery|Thriller,M,25,7,22903 282 | 15,1500,3,978212166,Grosse Pointe Blank (1997),Comedy|Crime,M,25,7,22903 283 | 15,2959,3,978196521,Fight Club (1999),Drama,M,25,7,22903 284 | 15,1729,2,978212210,Jackie Brown (1997),Crime|Drama,M,25,7,22903 285 | 15,3113,4,978196499,End of Days (1999),Action|Thriller,M,25,7,22903 286 | 15,1732,3,978198601,"Big Lebowski, The (1998)",Comedy|Crime|Mystery|Thriller,M,25,7,22903 287 | 15,3624,4,978197054,Shanghai Noon (2000),Action,M,25,7,22903 288 | 15,2683,4,978196389,Austin Powers: The Spy Who Shagged Me (1999),Comedy,M,25,7,22903 289 | 15,1885,2,978198150,"Opposite of Sex, The (1998)",Comedy|Drama,M,25,7,22903 290 | 15,2188,3,978196348,54 (1998),Drama,M,25,7,22903 291 | 15,2908,3,978198266,Boys Don't Cry (1999),Drama,M,25,7,22903 292 | 15,3499,4,978198323,Misery (1990),Horror,M,25,7,22903 293 | 15,32,3,978198235,Twelve Monkeys (1995),Drama|Sci-Fi,M,25,7,22903 294 | 15,357,1,978198125,Four Weddings and a Funeral (1994),Comedy|Romance,M,25,7,22903 295 | 15,3717,3,978196546,Gone in 60 Seconds (2000),Action|Crime,M,25,7,22903 296 | 15,364,4,978198062,"Lion King, The (1994)",Animation|Children's|Musical,M,25,7,22903 297 | 15,2506,2,978198277,"Other Sister, The (1999)",Comedy|Drama|Romance,M,25,7,22903 298 | 15,3753,5,978196741,"Patriot, The (2000)",Action|Drama|War,M,25,7,22903 299 | 15,266,4,978198907,Legends of the Fall (1994),Drama|Romance|War|Western,M,25,7,22903 300 | 15,3148,4,978197985,"Cider House Rules, The (1999)",Drama,M,25,7,22903 301 | 15,2702,1,978196848,Summer of Sam (1999),Drama,M,25,7,22903 302 | 15,3155,4,978196389,Anna and the King (1999),Drama|Romance,M,25,7,22903 303 | 15,2712,3,978196521,Eyes Wide Shut (1999),Drama,M,25,7,22903 304 | 15,1840,2,978197493,He Got Game (1998),Drama,M,25,7,22903 305 | 15,161,4,978212013,Crimson Tide (1995),Drama|Thriller|War,M,25,7,22903 306 | 15,1411,3,978211800,Hamlet (1996),Drama,M,25,7,22903 307 | 15,524,4,978198150,Rudy (1993),Drama,M,25,7,22903 308 | 15,1639,2,978198201,Chasing Amy (1997),Drama|Romance,M,25,7,22903 309 | 15,73,4,978197903,"Misérables, Les (1995)",Drama|Musical,M,25,7,22903 310 | 15,2688,3,978196546,"General's Daughter, The (1999)",Drama|Thriller,M,25,7,22903 311 | 15,1810,4,978198834,Primary Colors (1998),Drama,M,25,7,22903 312 | 15,1673,3,978198818,Boogie Nights (1997),Drama,M,25,7,22903 313 | 15,367,4,978212166,"Mask, The (1994)",Comedy|Crime|Fantasy,M,25,7,22903 314 | 15,3298,4,978196418,Boiler Room (2000),Drama,M,25,7,22903 315 | 15,3948,3,978197734,Meet the Parents (2000),Comedy,M,25,7,22903 316 | 15,805,4,978198420,"Time to Kill, A (1996)",Drama,M,25,7,22903 317 | 15,3510,5,978361393,Frequency (2000),Drama|Thriller,M,25,7,22903 318 | 15,300,4,978197928,Quiz Show (1994),Drama,M,25,7,22903 319 | 15,1777,3,978198394,"Wedding Singer, The (1998)",Comedy|Romance,M,25,7,22903 320 | 15,3301,2,978212228,"Whole Nine Yards, The (2000)",Comedy|Crime,M,25,7,22903 321 | 15,1343,3,978198323,Cape Fear (1991),Thriller,M,25,7,22903 322 | 15,3160,2,978196657,Magnolia (1999),Drama,M,25,7,22903 323 | 15,3178,4,978196210,"Hurricane, The (1999)",Drama,M,25,7,22903 324 | 15,3755,3,978212752,"Perfect Storm, The (2000)",Action|Adventure|Thriller,M,25,7,22903 325 | 15,2302,3,978198379,My Cousin Vinny (1992),Comedy,M,25,7,22903 326 | 15,3623,3,978196692,Mission: Impossible 2 (2000),Action|Thriller,M,25,7,22903 327 | 15,587,3,978198535,Ghost (1990),Comedy|Romance|Thriller,M,25,7,22903 328 | 15,2496,3,978198493,Blast from the Past (1999),Comedy|Romance,M,25,7,22903 329 | 15,2640,3,978212720,Superman (1978),Action|Adventure|Sci-Fi,M,25,7,22903 330 | 15,1271,3,978198104,Fried Green Tomatoes (1991),Drama,M,25,7,22903 331 | 15,2161,2,978212698,"NeverEnding Story, The (1984)",Adventure|Children's|Fantasy,M,25,7,22903 332 | 15,2826,4,978196348,"13th Warrior, The (1999)",Action|Horror|Thriller,M,25,7,22903 333 | 15,3489,4,978212591,Hook (1991),Adventure|Fantasy,M,25,7,22903 334 | 15,344,4,978198640,Ace Ventura: Pet Detective (1994),Comedy,M,25,7,22903 335 | 15,3723,4,978198394,Hamlet (1990),Drama,M,25,7,22903 336 | 15,2424,3,978197100,You've Got Mail (1998),Comedy|Romance,M,25,7,22903 337 | 15,2000,4,978197004,Lethal Weapon (1987),Action|Comedy|Crime|Drama,M,25,7,22903 338 | 15,2001,4,978197004,Lethal Weapon 2 (1989),Action|Comedy|Crime|Drama,M,25,7,22903 339 | 15,539,4,978198469,Sleepless in Seattle (1993),Comedy|Romance,M,25,7,22903 340 | 15,3247,2,978212185,Sister Act (1992),Comedy|Crime,M,25,7,22903 341 | 15,3174,4,978196657,Man on the Moon (1999),Comedy|Drama,M,25,7,22903 342 | 15,1586,3,978212065,G.I. Jane (1997),Action|Drama|War,M,25,7,22903 343 | 15,2706,4,978196348,American Pie (1999),Comedy,M,25,7,22903 344 | 15,231,2,978198790,Dumb & Dumber (1994),Comedy,M,25,7,22903 345 | 15,2598,3,978196775,Pushing Tin (1999),Comedy,M,25,7,22903 346 | 15,788,3,978198874,"Nutty Professor, The (1996)",Comedy|Fantasy|Romance|Sci-Fi,M,25,7,22903 347 | 15,2539,2,978199023,Analyze This (1999),Comedy,M,25,7,22903 348 | 15,517,3,978212463,Rising Sun (1993),Action|Drama|Mystery,M,25,7,22903 349 | 15,3354,2,978196692,Mission to Mars (2000),Sci-Fi,M,25,7,22903 350 | 15,2694,4,978196418,Big Daddy (1999),Comedy,M,25,7,22903 351 | 15,2485,3,978196817,She's All That (1999),Comedy|Romance,M,25,7,22903 352 | 15,141,4,978198350,"Birdcage, The (1996)",Comedy,M,25,7,22903 353 | 15,3798,4,978196866,What Lies Beneath (2000),Thriller,M,25,7,22903 354 | 15,2707,4,978196389,Arlington Road (1999),Thriller,M,25,7,22903 355 | 15,2567,4,978198847,EDtv (1999),Comedy,M,25,7,22903 356 | 15,3004,2,978196389,"Bachelor, The (1999)",Comedy|Romance,M,25,7,22903 357 | 15,3005,4,978196442,"Bone Collector, The (1999)",Thriller,M,25,7,22903 358 | 15,2719,3,978196579,"Haunting, The (1999)",Horror|Thriller,M,25,7,22903 359 | 15,160,2,978212507,Congo (1995),Action|Adventure|Mystery|Sci-Fi,M,25,7,22903 360 | 15,748,3,978196103,"Arrival, The (1996)",Action|Sci-Fi|Thriller,M,25,7,22903 361 | 15,2724,3,978196817,Runaway Bride (1999),Comedy|Romance,M,25,7,22903 362 | 15,2581,3,978196692,Never Been Kissed (1999),Comedy|Romance,M,25,7,22903 363 | 15,1783,2,978212494,Palmetto (1998),Film-Noir|Mystery|Thriller,M,25,7,22903 364 | 15,1422,4,978212463,Murder at 1600 (1997),Mystery|Thriller,M,25,7,22903 365 | 15,3461,4,978212698,Lord of the Flies (1963),Adventure|Drama|Thriller,M,25,7,22903 366 | 15,3535,2,978197348,American Psycho (2000),Comedy|Horror|Thriller,M,25,7,22903 367 | 15,257,3,978212463,Just Cause (1995),Mystery|Thriller,M,25,7,22903 368 | 15,832,4,978198581,Ransom (1996),Drama|Thriller,M,25,7,22903 369 | 15,762,3,978212352,Striptease (1996),Comedy|Crime,M,25,7,22903 370 | 15,2676,3,978196625,Instinct (1999),Drama|Thriller,M,25,7,22903 371 | 15,1883,3,978198442,Bulworth (1998),Comedy,M,25,7,22903 372 | 15,2763,3,978196866,"Thomas Crown Affair, The (1999)",Action|Thriller,M,25,7,22903 373 | 15,3566,2,978197314,"Big Kahuna, The (2000)",Comedy|Drama,M,25,7,22903 374 | 15,2764,3,978212254,"Thomas Crown Affair, The (1968)",Crime|Drama|Thriller,M,25,7,22903 375 | 15,1892,4,978212431,"Perfect Murder, A (1998)",Mystery|Thriller,M,25,7,22903 376 | 15,1608,3,978198379,Air Force One (1997),Action|Thriller,M,25,7,22903 377 | 15,1396,4,978198442,Sneakers (1992),Crime|Drama|Sci-Fi,M,25,7,22903 378 | 15,2840,4,978196848,Stigmata (1999),Thriller,M,25,7,22903 379 | 15,2841,4,978196848,Stir of Echoes (1999),Thriller,M,25,7,22903 380 | 15,3646,3,978199149,Big Momma's House (2000),Comedy,M,25,7,22903 381 | 15,500,4,978198493,Mrs. Doubtfire (1993),Comedy,M,25,7,22903 382 | 15,3145,3,978198535,"Cradle Will Rock, The (1999)",Drama,M,25,7,22903 383 | 15,2272,3,978198907,One True Thing (1998),Drama,M,25,7,22903 384 | 15,2273,3,978198834,Rush Hour (1998),Action|Thriller,M,25,7,22903 385 | 15,1619,4,978211867,Seven Years in Tibet (1997),Drama|War,M,25,7,22903 386 | 15,1625,4,978198456,"Game, The (1997)",Mystery|Thriller,M,25,7,22903 387 | 15,1487,4,978198011,Selena (1997),Drama|Musical,M,25,7,22903 388 | 15,1702,3,978212590,Flubber (1997),Children's|Comedy|Fantasy,M,25,7,22903 389 | 15,2433,4,978198735,"Civil Action, A (1998)",Drama,M,25,7,22903 390 | 15,70,2,978212287,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller,M,25,7,22903 391 | 15,1644,3,978212494,I Know What You Did Last Summer (1997),Horror|Mystery|Thriller,M,25,7,22903 392 | 15,1645,3,978212228,"Devil's Advocate, The (1997)",Crime|Horror|Mystery|Thriller,M,25,7,22903 393 | 15,2882,3,978196625,Jakob the Liar (1999),Drama,M,25,7,22903 394 | 15,2387,2,978212312,Very Bad Things (1998),Comedy|Crime,M,25,7,22903 395 | 15,2389,4,978212352,Psycho (1998),Crime|Horror|Thriller,M,25,7,22903 396 | 15,2961,3,978196848,"Story of Us, The (1999)",Comedy|Drama,M,25,7,22903 397 | 15,553,4,978198649,Tombstone (1993),Western,M,25,7,22903 398 | 15,628,4,978198040,Primal Fear (1996),Drama|Thriller,M,25,7,22903 399 | 15,3261,2,978198493,Singles (1992),Comedy|Drama|Romance,M,25,7,22903 400 | 15,2605,3,978196499,Entrapment (1999),Crime|Thriller,M,25,7,22903 401 | 15,1804,4,978212312,"Newton Boys, The (1998)",Crime|Drama,M,25,7,22903 402 | 15,1805,4,978212185,Wild Things (1998),Crime|Drama|Mystery|Thriller,M,25,7,22903 403 | 15,2391,3,978198150,"Simple Plan, A (1998)",Crime|Thriller,M,25,7,22903 404 | 15,2394,4,978196775,"Prince of Egypt, The (1998)",Animation|Musical,M,25,7,22903 405 | 15,3773,2,978199167,House Party (1990),Comedy,M,25,7,22903 406 | 15,1092,4,978198581,Basic Instinct (1992),Mystery|Thriller,M,25,7,22903 407 | 15,1672,4,978198442,"Rainmaker, The (1997)",Drama,M,25,7,22903 408 | 17,1193,5,978158471,One Flew Over the Cuckoo's Nest (1975),Drama,M,50,1,95350 409 | 17,2355,4,978159683,"Bug's Life, A (1998)",Animation|Children's|Comedy,M,50,1,95350 410 | 17,595,5,978159762,Beauty and the Beast (1991),Animation|Children's|Musical,M,50,1,95350 411 | 17,2321,5,978160304,Pleasantville (1998),Comedy,M,50,1,95350 412 | 17,720,5,978159210,Wallace & Gromit: The Best of Aardman Animation (1996),Animation,M,50,1,95350 413 | 17,1270,5,978158536,Back to the Future (1985),Comedy|Sci-Fi,M,50,1,95350 414 | 17,527,4,978159210,Schindler's List (1993),Drama|War,M,50,1,95350 415 | 17,1097,3,978160616,E.T. the Extra-Terrestrial (1982),Children's|Drama|Fantasy|Sci-Fi,M,50,1,95350 416 | 17,2762,5,978159467,"Sixth Sense, The (1999)",Thriller,M,50,1,95350 417 | 17,260,5,978160436,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi,M,50,1,95350 418 | 17,2028,5,978159289,Saving Private Ryan (1998),Action|Drama|War,M,50,1,95350 419 | 17,3114,5,978159386,Toy Story 2 (1999),Animation|Children's|Comedy,M,50,1,95350 420 | 17,2268,4,978159683,"Few Good Men, A (1992)",Crime|Drama,M,50,1,95350 421 | 17,2916,5,978160546,Total Recall (1990),Action|Adventure|Sci-Fi|Thriller,M,50,1,95350 422 | 17,1210,4,978158536,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Romance|Sci-Fi|War,M,50,1,95350 423 | 17,1213,4,978159386,GoodFellas (1990),Crime|Drama,M,50,1,95350 424 | 17,3578,5,978158656,Gladiator (2000),Action|Drama,M,50,1,95350 425 | 17,3108,3,978160157,"Fisher King, The (1991)",Comedy|Drama|Romance,M,50,1,95350 426 | 17,1253,5,978160616,"Day the Earth Stood Still, The (1951)",Drama|Sci-Fi,M,50,1,95350 427 | 17,1610,4,978159762,"Hunt for Red October, The (1990)",Action|Thriller,M,50,1,95350 428 | 17,3147,4,978159989,"Green Mile, The (1999)",Drama|Thriller,M,50,1,95350 429 | 17,3256,4,978160337,Patriot Games (1992),Action|Thriller,M,50,1,95350 430 | 17,110,4,978159386,Braveheart (1995),Action|Drama|War,M,50,1,95350 431 | 17,3471,4,978160490,Close Encounters of the Third Kind (1977),Drama|Sci-Fi,M,50,1,95350 432 | 17,589,5,978159435,Terminator 2: Judgment Day (1991),Action|Sci-Fi|Thriller,M,50,1,95350 433 | 17,2858,5,978159467,American Beauty (1999),Comedy|Drama,M,50,1,95350 434 | 17,1408,5,978159642,"Last of the Mohicans, The (1992)",Action|Romance|War,M,50,1,95350 435 | 17,3699,4,978160739,Starman (1984),Adventure|Drama|Romance|Sci-Fi,M,50,1,95350 436 | 17,480,4,978160075,Jurassic Park (1993),Action|Adventure|Sci-Fi,M,50,1,95350 437 | 17,1265,5,978159784,Groundhog Day (1993),Comedy|Romance,M,50,1,95350 438 | 17,1370,3,978160075,Die Hard 2 (1990),Action|Thriller,M,50,1,95350 439 | 17,3334,3,978159013,Key Largo (1948),Crime|Drama|Film-Noir|Thriller,M,50,1,95350 440 | 17,590,4,978159683,Dances with Wolves (1990),Adventure|Drama|Western,M,50,1,95350 441 | 17,1196,5,978160436,Star Wars: Episode V - The Empire Strikes Back (1980),Action|Adventure|Drama|Sci-Fi|War,M,50,1,95350 442 | 17,593,5,978159289,"Silence of the Lambs, The (1991)",Drama|Thriller,M,50,1,95350 443 | 17,2571,4,978159435,"Matrix, The (1999)",Action|Sci-Fi|Thriller,M,50,1,95350 444 | 17,2396,4,978159467,Shakespeare in Love (1998),Comedy|Romance,M,50,1,95350 445 | 17,235,5,978159815,Ed Wood (1994),Comedy|Drama,M,50,1,95350 446 | 17,163,4,978160304,Desperado (1995),Action|Romance|Thriller,M,50,1,95350 447 | 17,2501,4,978159815,October Sky (1999),Drama,M,50,1,95350 448 | 17,457,5,978159859,"Fugitive, The (1993)",Action|Thriller,M,50,1,95350 449 | 17,1527,5,978160260,"Fifth Element, The (1997)",Action|Sci-Fi,M,50,1,95350 450 | 17,3418,4,978159568,Thelma & Louise (1991),Action|Drama,M,50,1,95350 451 | 17,780,4,978160993,Independence Day (ID4) (1996),Action|Sci-Fi|War,M,50,1,95350 452 | 17,318,5,978159234,"Shawshank Redemption, The (1994)",Drama,M,50,1,95350 453 | 17,356,5,978159896,Forrest Gump (1994),Comedy|Romance|War,M,50,1,95350 454 | 17,1580,4,978160933,Men in Black (1997),Action|Adventure|Comedy|Sci-Fi,M,50,1,95350 455 | 17,1214,5,978160436,Alien (1979),Action|Horror|Sci-Fi|Thriller,M,50,1,95350 456 | 17,3702,3,978160616,Mad Max (1979),Action|Sci-Fi,M,50,1,95350 457 | 17,3527,4,978160546,Predator (1987),Action|Sci-Fi|Thriller,M,50,1,95350 458 | 17,1240,5,978160490,"Terminator, The (1984)",Action|Sci-Fi|Thriller,M,50,1,95350 459 | 17,866,5,978159315,Bound (1996),Crime|Drama|Romance|Thriller,M,50,1,95350 460 | 17,3793,4,978158689,X-Men (2000),Action|Sci-Fi,M,50,1,95350 461 | 17,2058,3,978160129,"Negotiator, The (1998)",Action|Thriller,M,50,1,95350 462 | 17,296,5,978159386,Pulp Fiction (1994),Crime|Drama,M,50,1,95350 463 | 17,1617,5,978161166,L.A. Confidential (1997),Crime|Film-Noir|Mystery|Thriller,M,50,1,95350 464 | 17,1909,4,978160848,"X-Files: Fight the Future, The (1998)",Mystery|Sci-Fi|Thriller,M,50,1,95350 465 | 17,3006,4,978159859,"Insider, The (1999)",Drama,M,50,1,95350 466 | 17,50,5,978159289,"Usual Suspects, The (1995)",Crime|Thriller,M,50,1,95350 467 | 17,2282,4,978160337,Pecker (1998),Comedy|Drama,M,50,1,95350 468 | 17,2289,4,978159713,"Player, The (1992)",Comedy|Drama,M,50,1,95350 469 | 17,162,5,978159435,Crumb (1994),Documentary,M,50,1,95350 470 | 17,1127,4,978160616,"Abyss, The (1989)",Action|Adventure|Sci-Fi|Thriller,M,50,1,95350 471 | 17,1704,4,978159386,Good Will Hunting (1997),Drama,M,50,1,95350 472 | 17,1635,4,978160304,"Ice Storm, The (1997)",Drama,M,50,1,95350 473 | 17,1921,4,978160616,Pi (1998),Sci-Fi|Thriller,M,50,1,95350 474 | 17,1923,4,978159859,There's Something About Mary (1998),Comedy,M,50,1,95350 475 | 17,3744,3,978158733,Shaft (2000),Action|Crime,M,50,1,95350 476 | 17,968,4,978160739,Night of the Living Dead (1968),Horror|Sci-Fi,M,50,1,95350 477 | 17,2599,3,978159762,Election (1999),Comedy,M,50,1,95350 478 | 17,1653,5,978160680,Gattaca (1997),Drama|Sci-Fi|Thriller,M,50,1,95350 479 | 17,1729,4,978160337,Jackie Brown (1997),Crime|Drama,M,50,1,95350 480 | 17,913,5,978159013,"Maltese Falcon, The (1941)",Film-Noir|Mystery,M,50,1,95350 481 | 17,3267,4,978160304,"Mariachi, El (1992)",Action|Thriller,M,50,1,95350 482 | 17,3624,4,978158656,Shanghai Noon (2000),Action,M,50,1,95350 483 | 17,1594,4,978160129,In the Company of Men (1997),Drama,M,50,1,95350 484 | 17,24,3,978160933,Powder (1995),Drama|Sci-Fi,M,50,1,95350 485 | 17,2908,3,978159713,Boys Don't Cry (1999),Drama,M,50,1,95350 486 | 17,32,5,978160490,Twelve Monkeys (1995),Drama|Sci-Fi,M,50,1,95350 487 | 17,34,5,978159683,Babe (1995),Children's|Comedy|Drama,M,50,1,95350 488 | 17,3717,4,978158779,Gone in 60 Seconds (2000),Action|Crime,M,50,1,95350 489 | 17,1997,4,978158656,"Exorcist, The (1973)",Horror,M,50,1,95350 490 | 17,1573,2,978160848,Face/Off (1997),Action|Sci-Fi|Thriller,M,50,1,95350 491 | 17,3148,5,978159989,"Cider House Rules, The (1999)",Drama,M,50,1,95350 492 | 17,454,4,978160260,"Firm, The (1993)",Drama|Thriller,M,50,1,95350 493 | 17,1274,5,978160490,Akira (1988),Adventure|Animation|Sci-Fi|Thriller,M,50,1,95350 494 | 17,741,5,978159351,Ghost in the Shell (Kokaku kidotai) (1995),Animation|Sci-Fi,M,50,1,95350 495 | 17,3246,4,978160129,Malcolm X (1992),Drama,M,50,1,95350 496 | 17,2600,3,978160887,eXistenZ (1999),Action|Sci-Fi|Thriller,M,50,1,95350 497 | 17,223,4,978159762,Clerks (1994),Comedy,M,50,1,95350 498 | 17,3510,4,978158656,Frequency (2000),Drama|Thriller,M,50,1,95350 499 | 17,300,5,978159815,Quiz Show (1994),Drama,M,50,1,95350 500 | 17,1343,4,978158536,Cape Fear (1991),Thriller,M,50,1,95350 501 | 17,529,4,978160304,Searching for Bobby Fischer (1993),Drama,M,50,1,95350 502 | -------------------------------------------------------------------------------- /examples/run_ali_ccp_ctr_ranking_multi_domain.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("../") 4 | 5 | import pandas as pd 6 | import torch 7 | from HAMUR.trainers import CTRTrainer 8 | from HAMUR.basic.features import DenseFeature, SparseFeature 9 | from HAMUR.utils.data import DataGenerator 10 | from HAMUR.models.multi_domain import Mlp_7_Layer, MLP_adap_7_layer_2_adp, DCN_MD, DCN_MD_adp, WideDeep_MD, WideDeep_MD_adp 11 | 12 | 13 | def get_ali_ccp_data_dict(data_path='./data/ali-ccp'): 14 | df_train = pd.read_csv(data_path + '/ali_ccp_train_sample.csv') 15 | df_val = pd.read_csv(data_path + '/ali_ccp_val_sample.csv') 16 | df_test = pd.read_csv(data_path + '/ali_ccp_test_sample.csv') 17 | print("train : val : test = %d %d %d" % (len(df_train), len(df_val), len(df_test))) 18 | train_idx, val_idx = df_train.shape[0], df_train.shape[0] + df_val.shape[0] 19 | data = pd.concat([df_train, df_val, df_test], axis=0) 20 | domain_map = {1: 0, 2: 1, 3: 2} 21 | domain_num =3 22 | data["domain_indicator"] = data["301"].apply(lambda x : domain_map[x]) 23 | 24 | col_names = data.columns.values.tolist() 25 | dense_cols = ['D109_14', 'D110_14', 'D127_14', 'D150_14', 'D508', 'D509', 'D702', 'D853'] 26 | sparse_cols = [col for col in col_names if col not in dense_cols and col not in ['click', 'purchase']] 27 | print("sparse cols:%d dense cols:%d" % (len(sparse_cols), len(dense_cols))) 28 | 29 | dense_feas = [DenseFeature(col) for col in dense_cols] 30 | sparse_feas = [SparseFeature(col, vocab_size=data[col].max() + 1, embed_dim=16) for col in sparse_cols] 31 | 32 | y = data["click"] 33 | del data["click"] 34 | x = data 35 | x_train, y_train = x[:train_idx], y[:train_idx] 36 | x_val, y_val = x[train_idx:val_idx], y[train_idx:val_idx] 37 | x_test, y_test = x[val_idx:], y[val_idx:] 38 | return dense_feas, sparse_feas, x_train, y_train, x_val, y_val, x_test, y_test , domain_num 39 | 40 | def main(dataset_path, model_name, epoch, learning_rate, batch_size, weight_decay, device, save_dir, seed): 41 | torch.manual_seed(seed) 42 | dense_feas, sparse_feas, x_train, y_train, x_val, y_val, x_test, y_test,domain_num = get_ali_ccp_data_dict(dataset_path) 43 | dg = DataGenerator(x_train, y_train) 44 | train_dataloader, val_dataloader, test_dataloader = dg.generate_dataloader(x_val=x_val, y_val=y_val, x_test=x_test, y_test=y_test, batch_size=batch_size) 45 | 46 | if model_name == "mlp": 47 | model = Mlp_7_Layer(dense_feas + sparse_feas, domain_num=domain_num, fcn_dims=[1024, 512, 512, 256, 256, 64, 64]) 48 | elif model_name == "mlp_adp": 49 | model = MLP_adap_7_layer_2_adp(dense_feas + sparse_feas, domain_num=domain_num, fcn_dims=[1024, 512, 512, 256, 256, 64, 64], 50 | hyper_dims=[64], k=65) 51 | elif model_name == "dcn_md": 52 | model = DCN_MD(features=dense_feas + sparse_feas,num_domains=domain_num ,n_cross_layers=7, mlp_params={"dims": [512, 512, 256, 256, 64, 64]}) 53 | elif model_name == "dcn_md_adp": 54 | model = DCN_MD_adp(features=dense_feas + sparse_feas, num_domains=domain_num, n_cross_layers=7, k = 25, mlp_params={"dims": [512, 512, 256, 256, 64, 64]}, hyper_dims=[64]) 55 | elif model_name == "wd_md": 56 | model = WideDeep_MD(wide_features=dense_feas, deep_features=sparse_feas, num_domains=domain_num,mlp_params={"dims": [512, 512, 256, 256, 64, 64], "dropout": 0.2, "activation": "relu"}) 57 | elif model_name == "wd_md_adp": 58 | model = WideDeep_MD_adp(wide_features=dense_feas, deep_features=sparse_feas, num_domains = domain_num, k= 25,mlp_params={"dims": [512, 512, 256, 256, 64, 64], "dropout": 0.2, "activation": "relu"}, hyper_dims=[64]) 59 | 60 | ctr_trainer = CTRTrainer(model, optimizer_params={"lr": learning_rate, "weight_decay": weight_decay}, n_epoch=epoch, earlystop_patience=5, scheduler_params={"step_size": 5,"gamma": 0.85},device=device, model_path=save_dir) 61 | # scheduler_fn=torch.optim.lr_scheduler.StepLR,scheduler_params={"step_size": 2,"gamma": 0.8}, 62 | ctr_trainer.fit(train_dataloader, val_dataloader) 63 | auc1,auc2,auc3,auc = ctr_trainer.evaluate_multi_domain_auc(ctr_trainer.model, test_dataloader) 64 | log1,log2,log3,log = ctr_trainer.evaluate_multi_domain_logloss(ctr_trainer.model, test_dataloader) 65 | print(f'test auc: {auc} | test logloss: {log}') 66 | print(f'domain 1 test auc: {auc1} | test logloss: {log1}') 67 | print(f'domain 2 test auc: {auc2} | test logloss: {log2}') 68 | print(f'domain 3 test auc: {auc3} | test logloss: {log3}') 69 | 70 | # save csv file 71 | # import csv 72 | # with open(model_name+"_"+str(seed)+'.csv', "w", newline='') as f: 73 | # writer = csv.writer(f) 74 | # writer.writerow(['model', 'seed', 'auc', 'log', 'auc1', 'log1', 'auc2', 'log2', 'auc3', 'log3']) 75 | # writer.writerow([model_name, str(seed), auc, log, auc1,log1,auc2,log2,auc3,log3]) 76 | 77 | 78 | if __name__ == '__main__': 79 | import argparse 80 | import warnings 81 | warnings.filterwarnings('ignore') 82 | parser = argparse.ArgumentParser() 83 | parser.add_argument('--dataset_path', default="data/ali-ccp") #/home/xiaopli2/dataset/ali_ccp 84 | parser.add_argument('--model_name', default='dcn_md') 85 | parser.add_argument('--epoch', type=int, default=1) #200 86 | parser.add_argument('--learning_rate', type=float, default=1e-3) 87 | parser.add_argument('--batch_size', type=int, default=100) #4096*10 88 | parser.add_argument('--weight_decay', type=float, default=1e-5) 89 | parser.add_argument('--device', default='cpu') #cuda:0 90 | parser.add_argument('--save_dir', default='./') 91 | parser.add_argument('--seed', type=int, default=2022) 92 | 93 | args = parser.parse_args() 94 | main(args.dataset_path, args.model_name, args.epoch, args.learning_rate, args.batch_size, args.weight_decay, args.device, args.save_dir, args.seed) 95 | """ 96 | python run_ali_ccp_ctr_ranking_multi_domain.py --model_name widedeep 97 | python run_ali_ccp_ctr_ranking_multi_domain.py --model_name deepfm 98 | python run_ali_ccp_ctr_ranking_multi_domain.py --model_name dcn 99 | """ 100 | -------------------------------------------------------------------------------- /examples/run_movielens_rank_multi_domain.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("../") 3 | 4 | import torch 5 | import pandas as pd 6 | from tqdm import tqdm 7 | from HAMUR.basic.features import DenseFeature, SparseFeature 8 | from sklearn.preprocessing import MinMaxScaler, LabelEncoder 9 | from HAMUR.trainers import CTRTrainer 10 | from HAMUR.utils.data import DataGenerator 11 | from HAMUR.models.multi_domain import Mlp_7_Layer, Mlp_2_Layer, MLP_adap_2_layer_1_adp, DCN_MD, DCN_MD_adp, WideDeep_MD, WideDeep_MD_adp 12 | 13 | def get_movielens_data_rank_multidomain(data_path="examples/ranking/data/ml-1m"): 14 | data = pd.read_csv(data_path+"/ml-1m-sample.csv") 15 | data["cate_id"] = data["genres"].apply(lambda x: x.split("|")[0]) 16 | del data["genres"] 17 | 18 | group1 = {1, 18} 19 | group2 = {25} 20 | group3 = {35, 45, 50, 56} 21 | 22 | domain_num = 3 23 | 24 | data["domain_indicator"] = data["age"].apply(lambda x: map_group_indicator(x, [group1, group2, group3])) 25 | 26 | useless_features = ['title', 'timestamp'] 27 | 28 | dense_features = ['age'] 29 | domain_split_feature = ['age'] 30 | sparse_features = ['user_id', 'movie_id', 'gender', 'occupation', 'zip', "cate_id", "domain_indicator"] 31 | target = "rating" 32 | 33 | for feature in dense_features: 34 | data[feature] = data[feature].apply(lambda x: convert_numeric(x)) 35 | if dense_features: 36 | sca = MinMaxScaler() # scaler dense feature 37 | data[dense_features] = sca.fit_transform(data[dense_features]) 38 | 39 | for feature in useless_features: 40 | del data[feature] 41 | for feature in sparse_features: 42 | lbe = LabelEncoder() 43 | data[feature] = lbe.fit_transform(data[feature]) + 1 44 | 45 | data[target] = data[target].apply(lambda x: convert_target(x)) 46 | 47 | for feat in tqdm(sparse_features): # encode sparse feature 48 | lbe = LabelEncoder() 49 | data[feat] = lbe.fit_transform(data[feat]) 50 | 51 | dense_feas = [DenseFeature(feature_name) for feature_name in dense_features] 52 | sparse_feas = [SparseFeature(feature_name, vocab_size=data[feature_name].nunique(), embed_dim=16) for feature_name 53 | in sparse_features] 54 | 55 | y= data[target] 56 | del data[target] 57 | 58 | return dense_feas, sparse_feas, data, y, domain_num 59 | 60 | 61 | def map_group_indicator(age, list_group): 62 | l = len(list(list_group)) 63 | for i in range(l): 64 | if age in list_group[i]: 65 | return i 66 | 67 | 68 | def convert_target(val): 69 | v = int(val) 70 | if v > 3: 71 | return int(1) 72 | else: 73 | return int(0) 74 | 75 | 76 | def convert_numeric(val): 77 | """ 78 | Forced conversion 79 | """ 80 | return int(val) 81 | 82 | 83 | def df_to_dict_multi_domain(data, columns): 84 | """ 85 | Convert the array to a dict type input that the network can accept 86 | Args: 87 | data (array): 3D datasets of type DataFrame (Length * Domain_num * feature_num) 88 | columns (list): feature name list 89 | Returns: 90 | The converted dict, which can be used directly into the input network 91 | """ 92 | 93 | data_dict = {} 94 | for i in range(len(columns)): 95 | data_dict[columns[i]] = data[:, :, i] 96 | return data_dict 97 | 98 | def main(dataset_path, model_name, epoch, learning_rate, batch_size, weight_decay, device, save_dir, seed): 99 | torch.manual_seed(seed) 100 | dense_feas, sparse_feas, x, y ,domain_num= get_movielens_data_rank_multidomain(dataset_path) 101 | dg = DataGenerator(x, y) 102 | train_dataloader, val_dataloader, test_dataloader = dg.generate_dataloader(split_ratio=[0.8, 0.1], batch_size=batch_size) 103 | if model_name == "mlp": 104 | model = Mlp_2_Layer(dense_feas + sparse_feas, domain_num=domain_num, fcn_dims=[256, 128]) 105 | elif model_name == "mlp_adp": 106 | model = MLP_adap_2_layer_1_adp(dense_feas + sparse_feas, domain_num=domain_num, fcn_dims=[256, 128], 107 | hyper_dims=[64], k=35) 108 | elif model_name == "dcn_md": 109 | model = DCN_MD(features=dense_feas + sparse_feas,num_domains=domain_num ,n_cross_layers=2, mlp_params={"dims": [256, 128]}) 110 | elif model_name == "dcn_md_adp": 111 | model = DCN_MD_adp(features=dense_feas + sparse_feas,num_domains=domain_num, n_cross_layers=2, k = 30, mlp_params={"dims": [256, 128]}, hyper_dims=[128]) 112 | elif model_name == "wd_md": 113 | model = WideDeep_MD(wide_features=dense_feas,num_domains= domain_num, deep_features=sparse_feas, mlp_params={"dims": [256, 128], "dropout": 0.2, "activation": "relu"}) 114 | elif model_name == "wd_md_adp": 115 | model = WideDeep_MD_adp(wide_features=dense_feas,num_domains= domain_num, deep_features=sparse_feas, k= 45,mlp_params={"dims": [256, 128], "dropout": 0.2, "activation": "relu"}, hyper_dims=[128]) 116 | ctr_trainer = CTRTrainer(model, optimizer_params={"lr": learning_rate, "weight_decay": weight_decay}, n_epoch=epoch, earlystop_patience=10, device=device, model_path=save_dir,scheduler_params={"step_size": 4,"gamma": 0.85}) 117 | #scheduler_fn=torch.optim.lr_scheduler.StepLR,scheduler_params={"step_size": 2,"gamma": 0.8}, 118 | ctr_trainer.fit(train_dataloader, val_dataloader) 119 | auc1,auc2,auc3,auc = ctr_trainer.evaluate_multi_domain_auc(ctr_trainer.model, test_dataloader) 120 | log1,log2,log3,log = ctr_trainer.evaluate_multi_domain_logloss(ctr_trainer.model, test_dataloader) 121 | print(f'test auc: {auc} | test logloss: {log}') 122 | print(f'domain 1 test auc: {auc1} | test logloss: {log1}') 123 | print(f'domain 2 test auc: {auc2} | test logloss: {log2}') 124 | print(f'domain 3 test auc: {auc3} | test logloss: {log3}') 125 | 126 | # save csv file 127 | # import csv 128 | # with open(model_name+"_"+str(seed)+'.csv', "w", newline='') as f: 129 | # writer = csv.writer(f) 130 | # writer.writerow(['model', 'seed', 'auc', 'log', 'auc1', 'log1', 'auc2', 'log2', 'auc3', 'log3']) 131 | # writer.writerow([model_name, str(seed), auc, log, auc1,log1,auc2,log2,auc3,log3]) 132 | 133 | if __name__ == '__main__': 134 | import argparse 135 | parser = argparse.ArgumentParser() 136 | parser.add_argument('--dataset_path', default="./data/ml-1m") 137 | parser.add_argument('--model_name', default='mlp_adp') 138 | parser.add_argument('--epoch', type=int, default=1) #100 139 | parser.add_argument('--learning_rate', type=float, default=1e-3) 140 | parser.add_argument('--batch_size', type=int, default=4096*10) #4096 141 | parser.add_argument('--weight_decay', type=float, default=1e-5) 142 | parser.add_argument('--device', default='cpu') #cuda:0 143 | parser.add_argument('--save_dir', default='./') 144 | parser.add_argument('--seed', type=int, default=2022) 145 | 146 | args = parser.parse_args() 147 | main(args.dataset_path, args.model_name, args.epoch, args.learning_rate, args.batch_size, args.weight_decay, args.device, args.save_dir, args.seed) 148 | -------------------------------------------------------------------------------- /framework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/HAMUR/53d8dd588282bc288f2621b8fa85e2df9b910e10/framework.jpg --------------------------------------------------------------------------------