├── din
    ├── utils.py
    ├── __init__.py
    ├── embedding.py
    ├── dice.py
    ├── fc.py
    ├── attention.py
    └── model.py
└── README.md


/din/utils.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/din/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import DeepInterestNetwork


--------------------------------------------------------------------------------
/din/embedding.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class EmbeddingLayer(nn.Module):
 5 |     def __init__(self, feature_dim, embedding_dim):
 6 |         super().__init__()
 7 | 
 8 |         self.embed = nn.Embedding(feature_dim, embedding_dim, padding_idx=0)
 9 |         
10 |         # normal weight initialization
11 |         self.embed.weight.data.normal_(0., 0.0001)
12 |         # TODO: regularization
13 | 
14 |     def forward(self, x):
15 |         return self.embed(x)
16 | 
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     a = EmbeddingLayer(10, 12)
21 |     import torch
22 |     b = torch.ones((2048,)).type(torch.LongTensor)
23 |     print(a(b).size())


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch Implementation of Deep Interest Network for Click-Through Rate Prediction
 2 | 
 3 | ### Overview
 4 | 
 5 | This repository contains code unofficially reimplemented the original paper ***KDD 2018 Deep Interest Network for Click-Through Rate Prediction*** in PyTorch version
 6 | 
 7 | ### To-Dos
 8 | 
 9 | - [x] Simple Architecture Reimplementation
10 | - [x] Dice Activation / PReLU
11 | - [ ] Data Evaluation (not trained yet)
12 | - [x] Dropout
13 | - [ ] Regularization in DiFacto
14 | - [ ] Mini-Batch Aware (MBA) regularization
15 | 
16 | ### Acknowledgements
17 | 
18 | Part of codes are adapted from the following projects
19 | 
20 | * [Deep Interest Network](https://github.com/zhougr1993/DeepInterestNetwork): Official repository for DIN
21 | 
22 | * [DeepCTR](https://github.com/shenweichen/DeepCTR): A collection of Click-Through Rate (CTR) prediction works
23 | 


--------------------------------------------------------------------------------
/din/dice.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | 
 4 | 
 5 | class Dice(nn.Module):
 6 |     def __init__(self, num_features, dim=2):
 7 |         super(Dice, self).__init__()
 8 |         assert dim == 2 or dim == 3
 9 |         self.bn = nn.BatchNorm1d(num_features, eps=1e-9)
10 |         self.sigmoid = nn.Sigmoid()
11 |         self.dim = dim
12 |         
13 |         if self.dim == 3:
14 |             self.alpha = torch.zeros((num_features, 1)).cuda()
15 |         elif self.dim == 2:
16 |             self.alpha = torch.zeros((num_features,)).cuda()
17 |         
18 | 
19 |     def forward(self, x):
20 |         if self.dim == 3:
21 |             x = torch.transpose(x, 1, 2)
22 |             x_p = self.sigmoid(self.bn(x))
23 |             out = self.alpha * (1 - x_p) * x + x_p * x
24 |             out = torch.transpose(out, 1, 2)
25 |         
26 |         elif self.dim == 2:
27 |             x_p = self.sigmoid(self.bn(x))
28 |             out = self.alpha * (1 - x_p) * x + x_p * x
29 |         
30 |         return out
31 |         
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     a = Dice(32)
36 |     b = torch.zeros((10, 32))
37 |     #b = torch.transpose(b, 1, 2)
38 |     c = a(b)
39 |     print(c.size())


--------------------------------------------------------------------------------
/din/fc.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from .dice import Dice
 3 | #from dice import Dice
 4 | 
 5 | class FullyConnectedLayer(nn.Module):
 6 |     def __init__(self, input_size, hidden_size, bias, batch_norm=True, dropout_rate=0.5, activation='relu', sigmoid=False, dice_dim=2):
 7 |         super(FullyConnectedLayer, self).__init__()
 8 |         assert len(hidden_size) >= 1 and len(bias) >= 1
 9 |         assert len(bias) == len(hidden_size)
10 |         self.sigmoid = sigmoid
11 | 
12 |         layers = []
13 |         layers.append(nn.Linear(input_size, hidden_size[0], bias=bias[0]))
14 |         
15 |         for i, h in enumerate(hidden_size[:-1]):
16 |             if batch_norm:
17 |                 layers.append(nn.BatchNorm1d(hidden_size[i]))
18 |             
19 |             if activation.lower() == 'relu':
20 |                 layers.append(nn.ReLU(inplace=True))
21 |             elif activation.lower() == 'dice':
22 |                 assert dice_dim
23 |                 layers.append(Dice(hidden_size[i], dim=dice_dim))
24 |             elif activation.lower() == 'prelu':
25 |                 layers.append(nn.PReLU())
26 |             else:
27 |                 raise NotImplementedError
28 |             
29 |             layers.append(nn.Dropout(p=dropout_rate))
30 |             layers.append(nn.Linear(hidden_size[i], hidden_size[i+1], bias=bias[i]))
31 |         
32 |         self.fc = nn.Sequential(*layers)
33 |         if self.sigmoid:
34 |             self.output_layer = nn.Sigmoid()
35 |         
36 |         # weight initialization xavier_normal (or glorot_normal in keras, tf)
37 |         for m in self.modules():
38 |             if isinstance(m, nn.Linear):
39 |                 nn.init.xavier_normal_(m.weight.data, gain=1.0)
40 |                 if m.bias is not None:
41 |                     nn.init.zeros_(m.bias.data)
42 | 
43 |     def forward(self, x):
44 |         return self.output_layer(self.fc(x)) if self.sigmoid else self.fc(x) 
45 |         
46 | 
47 | if __name__ == "__main__":
48 |     from torchsummary import summary
49 |     a = FullyConnectedLayer(2, [200, 80, 1])
50 |     summary(a, input_size=(2,))
51 |     import torch
52 |     b = torch.zeros((3, 2))
53 |     print(a(b).size())


--------------------------------------------------------------------------------
/din/attention.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | 
 4 | from .fc import FullyConnectedLayer
 5 | 
 6 | 
 7 | class AttentionSequencePoolingLayer(nn.Module):
 8 |     def __init__(self, embedding_dim=4):
 9 |         super(AttentionSequencePoolingLayer, self).__init__()
10 | 
11 |         # TODO: DICE acitivation function
12 |         # TODO: attention weight normalization
13 |         self.local_att = LocalActivationUnit(hidden_size=[64, 16], bias=[True, True], embedding_dim=embedding_dim, batch_norm=False)
14 | 
15 |     
16 |     def forward(self, query_ad, user_behavior, user_behavior_length):
17 |         # query ad            : size -> batch_size * 1 * embedding_size
18 |         # user behavior       : size -> batch_size * time_seq_len * embedding_size
19 |         # user behavior length: size -> batch_size * 1
20 |         # output              : size -> batch_size * 1 * embedding_size
21 |         
22 |         attention_score = self.local_att(query_ad, user_behavior)
23 |         attention_score = torch.transpose(attention_score, 1, 2)  # B * 1 * T
24 |         #print(attention_score.size())
25 |         
26 |         # define mask by length
27 |         user_behavior_length = user_behavior_length.type(torch.LongTensor)
28 |         mask = torch.arange(user_behavior.size(1))[None, :] < user_behavior_length[:, None]
29 |         
30 |         # mask
31 |         output = torch.mul(attention_score, mask.type(torch.cuda.FloatTensor))  # batch_size *
32 | 
33 |         # multiply weight
34 |         output = torch.matmul(output, user_behavior)
35 | 
36 |         return output
37 |         
38 | 
39 | class LocalActivationUnit(nn.Module):
40 |     def __init__(self, hidden_size=[80, 40], bias=[True, True], embedding_dim=4, batch_norm=False):
41 |         super(LocalActivationUnit, self).__init__()
42 |         self.fc1 = FullyConnectedLayer(input_size=4*embedding_dim,
43 |                                        hidden_size=hidden_size,
44 |                                        bias=bias,
45 |                                        batch_norm=batch_norm,
46 |                                        activation='dice',
47 |                                        dice_dim=3)
48 | 
49 |         self.fc2 = FullyConnectedLayer(input_size=hidden_size[-1],
50 |                                        hidden_size=[1],
51 |                                        bias=[True],
52 |                                        batch_norm=batch_norm,
53 |                                        activation='dice',
54 |                                        dice_dim=3)
55 |         # TODO: fc_2 initialization
56 | 
57 |     def forward(self, query, user_behavior):
58 |         # query ad            : size -> batch_size * 1 * embedding_size
59 |         # user behavior       : size -> batch_size * time_seq_len * embedding_size
60 | 
61 |         user_behavior_len = user_behavior.size(1)
62 |         queries = torch.cat([query for _ in range(user_behavior_len)], dim=1)
63 | 
64 |         attention_input = torch.cat([queries, user_behavior, queries-user_behavior, queries*user_behavior], dim=-1)
65 |         attention_output = self.fc1(attention_input)
66 |         attention_output = self.fc2(attention_output)
67 | 
68 |         return attention_output
69 | 
70 | if __name__ == "__main__":
71 |     a = AttentionSequencePoolingLayer()
72 |     
73 |     import torch
74 |     b = torch.zeros((3, 1, 4))
75 |     c = torch.zeros((3, 20, 4))
76 |     d = torch.ones((3, 1))
77 |     a(b, c, d)


--------------------------------------------------------------------------------
/din/model.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | 
  4 | from .embedding import EmbeddingLayer
  5 | from .fc import FullyConnectedLayer
  6 | from .attention import AttentionSequencePoolingLayer
  7 | 
  8 | 
  9 | 
 10 | dim_config = {
 11 |     'user_exposed_time': 24,
 12 |     'user_gender': 2,
 13 |     'user_age': 9,
 14 |     'history_article_id': 53932,   # multi-hot
 15 |     'history_image_feature': 2048,
 16 |     'history_categories': 23,
 17 |     'query_article_id': 1856,    # one-hot
 18 |     'query_image_feature': 2048,
 19 |     'query_categories': 23
 20 | }
 21 | 
 22 | que_embed_features = ['query_article_id']
 23 | que_image_features = ['query_image_feature']
 24 | que_category =  ['query_categories']
 25 | 
 26 | his_embed_features = ['history_article_id']
 27 | his_image_features = ['history_image_feature']
 28 | his_category =  ['history_categories']
 29 | 
 30 | image_hidden_dim = 64
 31 | category_dim = 23
 32 | 
 33 | embed_features = [k for k, _ in dim_config.items() if 'user' in k]
 34 | 
 35 | 
 36 | class DeepInterestNetwork(nn.Module):
 37 |     def __init__(self, config):
 38 |         super().__init__()
 39 |         self.config = config
 40 |         embedding_size = config['embedding_size']
 41 | 
 42 |         self.query_feature_embedding_dict = dict()
 43 |         for feature in que_embed_features:
 44 |             self.query_feature_embedding_dict[feature] = EmbeddingLayer(feature_dim=dim_config[feature],
 45 |                                                                         embedding_dim=embedding_size).cuda()
 46 |         self.query_image_fc = FullyConnectedLayer(input_size=2048,
 47 |                                                   hidden_size=[image_hidden_dim],
 48 |                                                   bias=[True],
 49 |                                                   activation='relu').cuda()
 50 |         
 51 |         self.history_feature_embedding_dict = dict()
 52 |         for feature in his_embed_features:
 53 |             self.history_feature_embedding_dict[feature] = EmbeddingLayer(feature_dim=dim_config[feature],
 54 |                                                                           embedding_dim=embedding_size).cuda()     
 55 |         self.history_image_fc = FullyConnectedLayer(input_size=2048,
 56 |                                                     hidden_size=[image_hidden_dim],
 57 |                                                     bias=[True],
 58 |                                                     activation='relu').cuda()                                                      
 59 | 
 60 |         self.attn = AttentionSequencePoolingLayer(embedding_dim=image_hidden_dim + embedding_size + category_dim).cuda()
 61 |         self.fc_layer = FullyConnectedLayer(input_size=2 * (image_hidden_dim + embedding_size + category_dim) + sum([dim_config[k] for k in embed_features]),
 62 |                                             hidden_size=[200, 80, 1],
 63 |                                             bias=[True, True, False],
 64 |                                             activation='relu',
 65 |                                             sigmoid=True).cuda()
 66 | 
 67 |     def forward(self, user_features):
 68 |         # user_features -> dict (key:feature name, value: feature tensor)
 69 | 
 70 |         # deep input embedding
 71 |         feature_embedded = []
 72 | 
 73 |         for feature in embed_features:
 74 |             feature_embedded.append(user_features[feature])
 75 | 
 76 |         feature_embedded = torch.cat(feature_embedded, dim=1)
 77 |         #print('User_feature_embed size', user_feature_embedded.size()) # batch_size * (feature_size * embedding_size)
 78 |         #print('User feature done')
 79 | 
 80 |         query_feature_embedded = []
 81 | 
 82 |         for feature in que_embed_features:
 83 |             query_feature_embedded.append(self.query_feature_embedding_dict[feature](user_features[feature].squeeze()))
 84 |         for feature in que_image_features:
 85 |             query_feature_embedded.append(self.query_image_fc(user_features[feature]))
 86 |         for feature in que_category:
 87 |             query_feature_embedded.append(user_features[feature])
 88 | 
 89 |         query_feature_embedded = torch.cat(query_feature_embedded, dim=1)
 90 |         # print('Query feature_embed size', query_feature_embedded.size()) # batch_size * (feature_size * embedding_size)
 91 |         # print('Query feature done')
 92 |         # exit()
 93 | 
 94 |         # TODO: history
 95 |         history_feature_embedded = []
 96 |         for feature in his_embed_features:
 97 |             #print(feature)
 98 |             #print(user_features[feature].size())
 99 |             history_feature_embedded.append(self.history_feature_embedding_dict[feature](user_features[feature]))
100 |             #print(self.history_feature_embedding_dict[feature](user_features[feature]).size())
101 | 
102 |         for feature in his_image_features:
103 |             #print(user_features[feature].size())
104 |             history_feature_embedded.append(self.history_image_fc(user_features[feature]))
105 |         for feature in his_category:
106 |             history_feature_embedded.append(user_features[feature])
107 | 
108 |         history_feature_embedded = torch.cat(history_feature_embedded, dim=2)
109 |         #print('History feature_embed size', history_feature_embedded.size()) # batch_size * T * (feature_size * embedding_size)
110 |         #print('History feature done')
111 |         
112 |         #print(user_features['history_len'])
113 |         #print(user_features['history_len'].size())
114 |         
115 |         
116 |         history = self.attn(query_feature_embedded.unsqueeze(1), 
117 |                             history_feature_embedded, 
118 |                             user_features['history_len']) 
119 |         
120 |         concat_feature = torch.cat([feature_embedded, query_feature_embedded, history.squeeze()], dim=1)
121 |         
122 |         # fully-connected layers
123 |         #print(concat_feature.size())
124 |         output = self.fc_layer(concat_feature)
125 |         return output
126 | 
127 | 
128 | if __name__ == "__main__":
129 |     a = DeepInterestNetwork()
130 |     import torch
131 |     import numpy as np
132 | 
133 |     
134 |     user_feature = {
135 |         'user_exposed_time': torch.LongTensor(np.zeros(shape=(2, 24))),
136 |         'user_gender': torch.LongTensor(np.zeros(shape=(2, 2))),
137 |         'user_age': torch.LongTensor(np.zeros(shape=(2, 9))),
138 |     }
139 |     a(user_feature)


--------------------------------------------------------------------------------