├── din ├── utils.py ├── __init__.py ├── embedding.py ├── dice.py ├── fc.py ├── attention.py └── model.py └── README.md /din/utils.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /din/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import DeepInterestNetwork -------------------------------------------------------------------------------- /din/embedding.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class EmbeddingLayer(nn.Module): 5 | def __init__(self, feature_dim, embedding_dim): 6 | super().__init__() 7 | 8 | self.embed = nn.Embedding(feature_dim, embedding_dim, padding_idx=0) 9 | 10 | # normal weight initialization 11 | self.embed.weight.data.normal_(0., 0.0001) 12 | # TODO: regularization 13 | 14 | def forward(self, x): 15 | return self.embed(x) 16 | 17 | 18 | 19 | if __name__ == "__main__": 20 | a = EmbeddingLayer(10, 12) 21 | import torch 22 | b = torch.ones((2048,)).type(torch.LongTensor) 23 | print(a(b).size()) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch Implementation of Deep Interest Network for Click-Through Rate Prediction 2 | 3 | ### Overview 4 | 5 | This repository contains code unofficially reimplemented the original paper ***KDD 2018 Deep Interest Network for Click-Through Rate Prediction*** in PyTorch version 6 | 7 | ### To-Dos 8 | 9 | - [x] Simple Architecture Reimplementation 10 | - [x] Dice Activation / PReLU 11 | - [ ] Data Evaluation (not trained yet) 12 | - [x] Dropout 13 | - [ ] Regularization in DiFacto 14 | - [ ] Mini-Batch Aware (MBA) regularization 15 | 16 | ### Acknowledgements 17 | 18 | Part of codes are adapted from the following projects 19 | 20 | * [Deep Interest Network](https://github.com/zhougr1993/DeepInterestNetwork): Official repository for DIN 21 | 22 | * [DeepCTR](https://github.com/shenweichen/DeepCTR): A collection of Click-Through Rate (CTR) prediction works 23 | -------------------------------------------------------------------------------- /din/dice.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | 5 | class Dice(nn.Module): 6 | def __init__(self, num_features, dim=2): 7 | super(Dice, self).__init__() 8 | assert dim == 2 or dim == 3 9 | self.bn = nn.BatchNorm1d(num_features, eps=1e-9) 10 | self.sigmoid = nn.Sigmoid() 11 | self.dim = dim 12 | 13 | if self.dim == 3: 14 | self.alpha = torch.zeros((num_features, 1)).cuda() 15 | elif self.dim == 2: 16 | self.alpha = torch.zeros((num_features,)).cuda() 17 | 18 | 19 | def forward(self, x): 20 | if self.dim == 3: 21 | x = torch.transpose(x, 1, 2) 22 | x_p = self.sigmoid(self.bn(x)) 23 | out = self.alpha * (1 - x_p) * x + x_p * x 24 | out = torch.transpose(out, 1, 2) 25 | 26 | elif self.dim == 2: 27 | x_p = self.sigmoid(self.bn(x)) 28 | out = self.alpha * (1 - x_p) * x + x_p * x 29 | 30 | return out 31 | 32 | 33 | 34 | if __name__ == "__main__": 35 | a = Dice(32) 36 | b = torch.zeros((10, 32)) 37 | #b = torch.transpose(b, 1, 2) 38 | c = a(b) 39 | print(c.size()) -------------------------------------------------------------------------------- /din/fc.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from .dice import Dice 3 | #from dice import Dice 4 | 5 | class FullyConnectedLayer(nn.Module): 6 | def __init__(self, input_size, hidden_size, bias, batch_norm=True, dropout_rate=0.5, activation='relu', sigmoid=False, dice_dim=2): 7 | super(FullyConnectedLayer, self).__init__() 8 | assert len(hidden_size) >= 1 and len(bias) >= 1 9 | assert len(bias) == len(hidden_size) 10 | self.sigmoid = sigmoid 11 | 12 | layers = [] 13 | layers.append(nn.Linear(input_size, hidden_size[0], bias=bias[0])) 14 | 15 | for i, h in enumerate(hidden_size[:-1]): 16 | if batch_norm: 17 | layers.append(nn.BatchNorm1d(hidden_size[i])) 18 | 19 | if activation.lower() == 'relu': 20 | layers.append(nn.ReLU(inplace=True)) 21 | elif activation.lower() == 'dice': 22 | assert dice_dim 23 | layers.append(Dice(hidden_size[i], dim=dice_dim)) 24 | elif activation.lower() == 'prelu': 25 | layers.append(nn.PReLU()) 26 | else: 27 | raise NotImplementedError 28 | 29 | layers.append(nn.Dropout(p=dropout_rate)) 30 | layers.append(nn.Linear(hidden_size[i], hidden_size[i+1], bias=bias[i])) 31 | 32 | self.fc = nn.Sequential(*layers) 33 | if self.sigmoid: 34 | self.output_layer = nn.Sigmoid() 35 | 36 | # weight initialization xavier_normal (or glorot_normal in keras, tf) 37 | for m in self.modules(): 38 | if isinstance(m, nn.Linear): 39 | nn.init.xavier_normal_(m.weight.data, gain=1.0) 40 | if m.bias is not None: 41 | nn.init.zeros_(m.bias.data) 42 | 43 | def forward(self, x): 44 | return self.output_layer(self.fc(x)) if self.sigmoid else self.fc(x) 45 | 46 | 47 | if __name__ == "__main__": 48 | from torchsummary import summary 49 | a = FullyConnectedLayer(2, [200, 80, 1]) 50 | summary(a, input_size=(2,)) 51 | import torch 52 | b = torch.zeros((3, 2)) 53 | print(a(b).size()) -------------------------------------------------------------------------------- /din/attention.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | from .fc import FullyConnectedLayer 5 | 6 | 7 | class AttentionSequencePoolingLayer(nn.Module): 8 | def __init__(self, embedding_dim=4): 9 | super(AttentionSequencePoolingLayer, self).__init__() 10 | 11 | # TODO: DICE acitivation function 12 | # TODO: attention weight normalization 13 | self.local_att = LocalActivationUnit(hidden_size=[64, 16], bias=[True, True], embedding_dim=embedding_dim, batch_norm=False) 14 | 15 | 16 | def forward(self, query_ad, user_behavior, user_behavior_length): 17 | # query ad : size -> batch_size * 1 * embedding_size 18 | # user behavior : size -> batch_size * time_seq_len * embedding_size 19 | # user behavior length: size -> batch_size * 1 20 | # output : size -> batch_size * 1 * embedding_size 21 | 22 | attention_score = self.local_att(query_ad, user_behavior) 23 | attention_score = torch.transpose(attention_score, 1, 2) # B * 1 * T 24 | #print(attention_score.size()) 25 | 26 | # define mask by length 27 | user_behavior_length = user_behavior_length.type(torch.LongTensor) 28 | mask = torch.arange(user_behavior.size(1))[None, :] < user_behavior_length[:, None] 29 | 30 | # mask 31 | output = torch.mul(attention_score, mask.type(torch.cuda.FloatTensor)) # batch_size * 32 | 33 | # multiply weight 34 | output = torch.matmul(output, user_behavior) 35 | 36 | return output 37 | 38 | 39 | class LocalActivationUnit(nn.Module): 40 | def __init__(self, hidden_size=[80, 40], bias=[True, True], embedding_dim=4, batch_norm=False): 41 | super(LocalActivationUnit, self).__init__() 42 | self.fc1 = FullyConnectedLayer(input_size=4*embedding_dim, 43 | hidden_size=hidden_size, 44 | bias=bias, 45 | batch_norm=batch_norm, 46 | activation='dice', 47 | dice_dim=3) 48 | 49 | self.fc2 = FullyConnectedLayer(input_size=hidden_size[-1], 50 | hidden_size=[1], 51 | bias=[True], 52 | batch_norm=batch_norm, 53 | activation='dice', 54 | dice_dim=3) 55 | # TODO: fc_2 initialization 56 | 57 | def forward(self, query, user_behavior): 58 | # query ad : size -> batch_size * 1 * embedding_size 59 | # user behavior : size -> batch_size * time_seq_len * embedding_size 60 | 61 | user_behavior_len = user_behavior.size(1) 62 | queries = torch.cat([query for _ in range(user_behavior_len)], dim=1) 63 | 64 | attention_input = torch.cat([queries, user_behavior, queries-user_behavior, queries*user_behavior], dim=-1) 65 | attention_output = self.fc1(attention_input) 66 | attention_output = self.fc2(attention_output) 67 | 68 | return attention_output 69 | 70 | if __name__ == "__main__": 71 | a = AttentionSequencePoolingLayer() 72 | 73 | import torch 74 | b = torch.zeros((3, 1, 4)) 75 | c = torch.zeros((3, 20, 4)) 76 | d = torch.ones((3, 1)) 77 | a(b, c, d) -------------------------------------------------------------------------------- /din/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | from .embedding import EmbeddingLayer 5 | from .fc import FullyConnectedLayer 6 | from .attention import AttentionSequencePoolingLayer 7 | 8 | 9 | 10 | dim_config = { 11 | 'user_exposed_time': 24, 12 | 'user_gender': 2, 13 | 'user_age': 9, 14 | 'history_article_id': 53932, # multi-hot 15 | 'history_image_feature': 2048, 16 | 'history_categories': 23, 17 | 'query_article_id': 1856, # one-hot 18 | 'query_image_feature': 2048, 19 | 'query_categories': 23 20 | } 21 | 22 | que_embed_features = ['query_article_id'] 23 | que_image_features = ['query_image_feature'] 24 | que_category = ['query_categories'] 25 | 26 | his_embed_features = ['history_article_id'] 27 | his_image_features = ['history_image_feature'] 28 | his_category = ['history_categories'] 29 | 30 | image_hidden_dim = 64 31 | category_dim = 23 32 | 33 | embed_features = [k for k, _ in dim_config.items() if 'user' in k] 34 | 35 | 36 | class DeepInterestNetwork(nn.Module): 37 | def __init__(self, config): 38 | super().__init__() 39 | self.config = config 40 | embedding_size = config['embedding_size'] 41 | 42 | self.query_feature_embedding_dict = dict() 43 | for feature in que_embed_features: 44 | self.query_feature_embedding_dict[feature] = EmbeddingLayer(feature_dim=dim_config[feature], 45 | embedding_dim=embedding_size).cuda() 46 | self.query_image_fc = FullyConnectedLayer(input_size=2048, 47 | hidden_size=[image_hidden_dim], 48 | bias=[True], 49 | activation='relu').cuda() 50 | 51 | self.history_feature_embedding_dict = dict() 52 | for feature in his_embed_features: 53 | self.history_feature_embedding_dict[feature] = EmbeddingLayer(feature_dim=dim_config[feature], 54 | embedding_dim=embedding_size).cuda() 55 | self.history_image_fc = FullyConnectedLayer(input_size=2048, 56 | hidden_size=[image_hidden_dim], 57 | bias=[True], 58 | activation='relu').cuda() 59 | 60 | self.attn = AttentionSequencePoolingLayer(embedding_dim=image_hidden_dim + embedding_size + category_dim).cuda() 61 | self.fc_layer = FullyConnectedLayer(input_size=2 * (image_hidden_dim + embedding_size + category_dim) + sum([dim_config[k] for k in embed_features]), 62 | hidden_size=[200, 80, 1], 63 | bias=[True, True, False], 64 | activation='relu', 65 | sigmoid=True).cuda() 66 | 67 | def forward(self, user_features): 68 | # user_features -> dict (key:feature name, value: feature tensor) 69 | 70 | # deep input embedding 71 | feature_embedded = [] 72 | 73 | for feature in embed_features: 74 | feature_embedded.append(user_features[feature]) 75 | 76 | feature_embedded = torch.cat(feature_embedded, dim=1) 77 | #print('User_feature_embed size', user_feature_embedded.size()) # batch_size * (feature_size * embedding_size) 78 | #print('User feature done') 79 | 80 | query_feature_embedded = [] 81 | 82 | for feature in que_embed_features: 83 | query_feature_embedded.append(self.query_feature_embedding_dict[feature](user_features[feature].squeeze())) 84 | for feature in que_image_features: 85 | query_feature_embedded.append(self.query_image_fc(user_features[feature])) 86 | for feature in que_category: 87 | query_feature_embedded.append(user_features[feature]) 88 | 89 | query_feature_embedded = torch.cat(query_feature_embedded, dim=1) 90 | # print('Query feature_embed size', query_feature_embedded.size()) # batch_size * (feature_size * embedding_size) 91 | # print('Query feature done') 92 | # exit() 93 | 94 | # TODO: history 95 | history_feature_embedded = [] 96 | for feature in his_embed_features: 97 | #print(feature) 98 | #print(user_features[feature].size()) 99 | history_feature_embedded.append(self.history_feature_embedding_dict[feature](user_features[feature])) 100 | #print(self.history_feature_embedding_dict[feature](user_features[feature]).size()) 101 | 102 | for feature in his_image_features: 103 | #print(user_features[feature].size()) 104 | history_feature_embedded.append(self.history_image_fc(user_features[feature])) 105 | for feature in his_category: 106 | history_feature_embedded.append(user_features[feature]) 107 | 108 | history_feature_embedded = torch.cat(history_feature_embedded, dim=2) 109 | #print('History feature_embed size', history_feature_embedded.size()) # batch_size * T * (feature_size * embedding_size) 110 | #print('History feature done') 111 | 112 | #print(user_features['history_len']) 113 | #print(user_features['history_len'].size()) 114 | 115 | 116 | history = self.attn(query_feature_embedded.unsqueeze(1), 117 | history_feature_embedded, 118 | user_features['history_len']) 119 | 120 | concat_feature = torch.cat([feature_embedded, query_feature_embedded, history.squeeze()], dim=1) 121 | 122 | # fully-connected layers 123 | #print(concat_feature.size()) 124 | output = self.fc_layer(concat_feature) 125 | return output 126 | 127 | 128 | if __name__ == "__main__": 129 | a = DeepInterestNetwork() 130 | import torch 131 | import numpy as np 132 | 133 | 134 | user_feature = { 135 | 'user_exposed_time': torch.LongTensor(np.zeros(shape=(2, 24))), 136 | 'user_gender': torch.LongTensor(np.zeros(shape=(2, 2))), 137 | 'user_age': torch.LongTensor(np.zeros(shape=(2, 9))), 138 | } 139 | a(user_feature) --------------------------------------------------------------------------------