├── README.md
├── layers.py
└── models.py


/README.md:
--------------------------------------------------------------------------------
 1 | # hcan-pytorch
 2 | hierarchical convolutional attention networks for text classification
 3 | 
 4 | 
 5 | ```
 6 | from models import Proto_CNN as HCAN
 7 | 
 8 | model = HCAN(input_dim, hidden_dim, kernel_dim,
 9 |              sent_maxlen, dropout_rate, num_emb, pretrained_weight)
10 | ```
11 | if you don't have ```pretrained_weight```, you should modify the Class ```Proto``` to ```pretrained_weight``` optional.
12 | 
13 | ```
14 | logtis = model(x, None)
15 | ```
16 | second variable is length of ```x```, ```l```. However, since this variable is no longer needed, insert ```None```.   
17 | ```x```is index of words.
18 | 


--------------------------------------------------------------------------------
/layers.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.nn.modules.module import Module
 7 | 
 8 | 
 9 | class ConvolutionalMultiheadAttention(Module):
10 |     def __init__(self, input_dim, kernel_dim, multihead_cnt, conv_cnt):
11 |         super(ConvolutionalMultiheadAttention, self).__init__()
12 |         self.input_dim = input_dim
13 |         self.multihead_cnt = multihead_cnt
14 | 
15 |         self.convs = nn.ModuleList([nn.Conv1d(input_dim, input_dim, kernel_dim)
16 |                                     for _ in range(conv_cnt)])
17 |         for w in self.convs:
18 |             nn.init.xavier_normal_(w.weight)
19 | 
20 |     def attention(self, q, k, v):
21 |         return torch.softmax(torch.div(torch.bmm(q.permute(0,2,1), k),
22 |                np.sqrt(self.input_dim)), 2).bmm(v.permute(0,2,1)).permute(0,2,1)
23 | 
24 |     def multihead(self, hiddens):
25 |         hiddens = [torch.chunk(hidden, self.multihead_cnt, 1) for hidden in hiddens]
26 |         hiddens = torch.cat([self.attention(hiddens[0][i], hiddens[1][i], hiddens[2][i])
27 |                              for i in range(self.multihead_cnt)], 1)
28 | 
29 |         return hiddens
30 | 
31 | class ConvolutionalMultiheadSelfAttention(ConvolutionalMultiheadAttention):
32 |     def __init__(self, input_dim, kernel_dim, multihead_cnt=10, conv_cnt=6):
33 |         super(ConvolutionalMultiheadSelfAttention, self).\
34 |               __init__(input_dim, kernel_dim, multihead_cnt, conv_cnt)
35 | 
36 |     def forward(self, input):
37 |         hiddens = [F.elu(conv(input)) for conv in self.convs[:-1]]
38 |         hiddens.append(torch.tanh(self.convs[-1](input)))
39 | 
40 |         elu_hid = self.multihead(hiddens[:3])
41 |         tanh_hid= self.multihead(hiddens[3:])
42 |         output = F.layer_norm(torch.mul(elu_hid, tanh_hid), elu_hid.size()[1:])
43 | 
44 |         return output
45 | 
46 | class ConvolutionalMultiheadTargetAttention(ConvolutionalMultiheadAttention):
47 |     def __init__(self, input_dim, kernel_dim, multihead_cnt=10, conv_cnt=2):
48 |         super(ConvolutionalMultiheadTargetAttention, self).\
49 |               __init__(input_dim, kernel_dim, multihead_cnt, conv_cnt)
50 |         self.target = nn.Parameter(torch.randn(input_dim, 1))
51 |         stdv = 1. / math.sqrt(self.target.size(1))
52 |         self.target.data.uniform_(-stdv, stdv)
53 | 
54 |     def forward(self, input):
55 |         batch_size = input.size(0)
56 |         hiddens = [F.elu(conv(input)) for conv in self.convs]
57 |         output = self.multihead([self.target.expand(batch_size, self.input_dim, 1)]+hiddens)
58 | 
59 |         return output
60 | 


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import torch.nn.utils as utils
 5 | import torch.nn.init as init
 6 | 
 7 | from layers import ConvolutionalMultiheadSelfAttention as CMSA
 8 | from layers import ConvolutionalMultiheadTargetAttention as CMTA
 9 | 
10 | class Proto(nn.Module):
11 |     def __init__(self, num_emb, input_dim, pretrained_weight):
12 |         super(Proto, self).__init__()
13 |         self.id2vec = nn.Embedding(num_emb, input_dim, padding_idx=1)
14 |         # unk, pad, ..., keywords
15 |         self.id2vec.weight.data[3:].copy_(torch.from_numpy(pretrained_weight))
16 |         self.id2vec.requires_grad = True
17 | 
18 |         # withLogits : combines a Sigmoid and BCE
19 |         #self.loss = nn.BCEWithLogitsLoss()
20 |         self.loss = nn.CrossEntropyLoss()
21 | 
22 |     def binary_accuracy(self, y_pred, y):
23 |         ge = torch.ge(y_pred.type(y.type()), 0.5).float()
24 |         correct = torch.eq(ge, y).view(-1)
25 | 
26 |         return torch.sum(correct).item()/correct.shape[0]
27 | 
28 |     def accuracy(self, y_pred, y):
29 |         _, pred = y_pred.max(1)
30 |         return sum(pred==y).cpu().numpy()/y.size()
31 | 
32 |     def predict(self, x, l):
33 |         input = self.id2vec(x)
34 |         input = torch.div(torch.sum(input, 1), l)
35 |         return self.model(input)
36 | 
37 |     def forward(self, data, sent_maxlen):
38 |         x, l, y = torch.split(data, [sent_maxlen,1,1], 1)
39 |         logits = self.predict(x, l.float())
40 |         loss = self.loss(logits, y.squeeze())
41 |         accuracy = self.accuracy(logits, y.squeeze())
42 | 
43 |         return loss, accuracy
44 | 
45 | 
46 | class Proto_CNN(Proto):
47 |     def __init__(self, input_dim, hidden_dim, kernel_dim,
48 |                  sent_maxlen, dropout_rate, num_emb, pretrained_weight):
49 |         super(Proto_CNN, self).__init__(num_emb, input_dim, pretrained_weight)
50 | 
51 |         self.positions = nn.Parameter(torch.randn(sent_maxlen,input_dim))
52 |         stdv = 1. / self.positions.size(1) ** 0.5
53 |         self.positions.data.uniform_(-stdv, stdv)
54 |         self.cmsa = CMSA(input_dim, kernel_dim[0])
55 |         self.cmta = CMTA(input_dim, kernel_dim[1])
56 |         self.dropout = nn.Dropout(dropout_rate)
57 |         self.cls = nn.Linear(input_dim, hidden_dim[-1])
58 |         nn.init.xavier_normal_(self.cls.weight)
59 | 
60 |     def predict(self, x, l):
61 |         input = self.id2vec(x)
62 |         input = self.dropout(input + self.positions)
63 | 
64 |         hidden = self.cmsa(input.permute(0,2,1))
65 |         hidden = self.cmta(hidden)
66 | 
67 |         logits = self.cls(hidden.squeeze(-1))
68 | 
69 |         return logits
70 | 


--------------------------------------------------------------------------------