├── README.md └── src ├── aff_lstm_model.py ├── aff_lstm_model.pyc ├── afflstm_model.pyc ├── att_lstm_model.py ├── att_lstm_model.pyc ├── avg_lstm_model.py ├── avg_lstm_model.pyc ├── batcher.py ├── batcher.pyc ├── biaff_lstm_model.py ├── biaff_lstm_model.pyc ├── biatt_avg_lstm_model.py ├── biatt_avg_lstm_model.pyc ├── biatt_lstm_model.py ├── biatt_lstm_model.pyc ├── bilstm_model.py ├── bilstm_model.pyc ├── distance_dmn_model.py ├── distance_dmn_model.pyc ├── distance_lstm_model.py ├── distance_lstm_model.pyc ├── evaluate.py ├── evaluate.pyc ├── feat_bilstm_model.py ├── feat_bilstm_model.pyc ├── feat_corr_lstm.py ├── feat_corr_lstm.pyc ├── feat_corr_ran.py ├── feat_recall_gate_lstm.py ├── feat_recall_gate_lstm.pyc ├── hook.py ├── hook.pyc ├── train.py ├── train_swn.py ├── utils.py └── utils.pyc /README.md: -------------------------------------------------------------------------------- 1 | # Sentic-Long-Short-Term-Memory 2 | Sentic Long Short Term Memory 3 | -------------------------------------------------------------------------------- /src/aff_lstm_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as autograd 4 | import torch.optim as optim 5 | from utils import * 6 | from att_lstm_model import AttLSTM_Model 7 | from utils import _multi_bilayer_attention 8 | from feat_bilstm_model import FeatBiLSTM_Model 9 | 10 | 11 | class AffLSTM_Model(AttLSTM_Model): 12 | 13 | def __init__(self,num_classes,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100,concept_vector=None,num_ways =3,lr=0.001,cell='recall'): 14 | 15 | super(AffLSTM_Model,self).__init__(num_classes,max_length,num_tokens,embd,emb_dim=emb_dim,hidden_dim=hidden_dim,num_ways =num_ways,lr=lr) 16 | 17 | self.concept_dim = concept_vector.size()[1] 18 | 19 | self.lstm = FeatBiLSTM_Model(max_length,num_tokens,embd,self.concept_dim,emb_dim,hidden_dim,concept_vector=concept_vector,cell=cell) 20 | 21 | self.concept_linear = nn.Linear(self.concept_dim,self.hidden_dim*2) 22 | 23 | self.linear = nn.Linear(self.hidden_dim*2, self.num_ways*self.num_classes) 24 | 25 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 26 | 27 | 28 | 29 | 30 | def train_(self,x,y,targets,lengths,concepts=None,concept_lengths=None): 31 | 32 | 33 | self.zero_grad() 34 | 35 | self.train() 36 | 37 | lstm_outputs = self.lstm.forward(x,lengths,concepts,concept_lengths) 38 | 39 | output,output_ = self.target_attention_forward(lstm_outputs,targets) 40 | 41 | y = y.view(-1) 42 | 43 | loss = self.loss_fn(output_,y) 44 | 45 | loss.backward() 46 | 47 | self.optimizer.step() 48 | 49 | return loss 50 | 51 | 52 | def test(self,x,targets,lengths,concepts=None,concept_lengths=None): 53 | 54 | self.eval() 55 | 56 | lstm_outputs = self.lstm.forward(x,lengths,concepts,concept_lengths) 57 | 58 | output,output_ = self.target_attention_forward(lstm_outputs,targets) 59 | 60 | 61 | return output.view(-1,self.num_classes,self.num_ways).data.numpy()#,c_att,global_att -------------------------------------------------------------------------------- /src/aff_lstm_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/aff_lstm_model.pyc -------------------------------------------------------------------------------- /src/afflstm_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/afflstm_model.pyc -------------------------------------------------------------------------------- /src/att_lstm_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as autograd 4 | import torch.optim as optim 5 | from utils import * 6 | from bilstm_model import BiLSTM_Model 7 | from utils import _lengths_to_masks,_multi_bilayer_attention 8 | 9 | 10 | 11 | class AttLSTM_Model(nn.Module): 12 | 13 | def __init__(self,num_classes,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100,num_ways=3,lr=0.001): 14 | 15 | super(AttLSTM_Model,self).__init__() 16 | 17 | self.max_length = max_length 18 | 19 | self.num_tokens = num_tokens 20 | 21 | self.hidden_dim = hidden_dim 22 | 23 | self.num_classes = num_classes 24 | 25 | self.att_dim = 50 26 | 27 | self.depth = 10 28 | 29 | self.num_att = num_classes 30 | 31 | self.num_ways = num_ways 32 | 33 | self.lstm = BiLSTM_Model(max_length,num_tokens,embd,emb_dim,hidden_dim) 34 | 35 | self.linear = nn.Linear(self.hidden_dim*2, self.num_ways*self.num_classes) 36 | 37 | self.target_linear = nn.Linear(self.hidden_dim * 2,self.num_att * self.num_ways) 38 | 39 | self.target_linear_att = nn.Linear(self.hidden_dim * 2, 1) 40 | 41 | self.loss_fn = nn.functional.cross_entropy 42 | 43 | self.softmax = nn.Softmax() 44 | 45 | self.sigmoid = nn.Sigmoid() 46 | 47 | self.tanh = nn.Tanh() 48 | 49 | self.dropout = nn.Dropout(0.1) 50 | 51 | self.err = 1e-24 52 | 53 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 54 | 55 | def target_attention(self,hidden_outputs,targets): 56 | 57 | batch_size = len(hidden_outputs) 58 | 59 | hidden_outputs = hidden_outputs.view(-1,self.hidden_dim*2) 60 | 61 | att_vecs = self.target_linear_att(hidden_outputs).squeeze().view(batch_size,-1).exp() 62 | 63 | att_vecs = att_vecs * targets 64 | 65 | att_vecs = att_vecs / att_vecs.sum(-1).expand_as(att_vecs) 66 | 67 | return att_vecs 68 | 69 | 70 | def target_attention_forward(self, lstm_outputs, targets): 71 | 72 | targets = self.target_attention(lstm_outputs,targets) 73 | 74 | target_outputs = (targets.unsqueeze(2).expand_as(lstm_outputs) * lstm_outputs).sum(1) 75 | 76 | target_outputs = target_outputs.squeeze(1) 77 | 78 | output_ = self.linear(target_outputs) 79 | 80 | output = self.dropout(output_.view(len(output_),-1,self.num_ways)) 81 | 82 | output = self.softmax(output.view(-1,self.num_ways)) 83 | 84 | return output,output_.view(-1,self.num_ways) 85 | 86 | def train_(self,x,y,targets,lengths): 87 | 88 | self.zero_grad() 89 | 90 | self.train() 91 | 92 | lstm_outputs = self.lstm.forward(x,lengths) 93 | 94 | output,output_ = self.target_attention_forward(lstm_outputs,targets) 95 | 96 | y = y.view(-1) 97 | 98 | loss = self.loss_fn(output_,y) 99 | 100 | loss.backward() 101 | 102 | self.optimizer.step() 103 | 104 | return loss 105 | 106 | 107 | def test(self,x,targets,lengths): 108 | 109 | self.eval() 110 | 111 | lstm_outputs = self.lstm.forward(x,lengths) 112 | 113 | output,output_ = self.target_attention_forward(lstm_outputs,targets) 114 | 115 | return output.view(-1,self.num_classes,self.num_ways).data.numpy() -------------------------------------------------------------------------------- /src/att_lstm_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/att_lstm_model.pyc -------------------------------------------------------------------------------- /src/avg_lstm_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as autograd 4 | import torch.optim as optim 5 | from utils import * 6 | from bilstm_model import BiLSTM_Model 7 | from utils import _lengths_to_masks,_multi_bilayer_attention 8 | 9 | 10 | 11 | class AvgLSTM_Model(nn.Module): 12 | 13 | def __init__(self,num_classes,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100,num_ways=3,lr=0.001): 14 | 15 | super(AvgLSTM_Model,self).__init__() 16 | 17 | self.max_length = max_length 18 | 19 | self.num_tokens = num_tokens 20 | 21 | self.hidden_dim = hidden_dim 22 | 23 | self.num_classes = num_classes 24 | 25 | self.att_dim = 50 26 | 27 | self.depth = 10 28 | 29 | self.num_att = num_classes 30 | 31 | self.num_ways = num_ways 32 | 33 | self.lstm = BiLSTM_Model(max_length,num_tokens,embd,emb_dim,hidden_dim) 34 | 35 | self.linear = nn.Linear(self.hidden_dim*2, self.num_ways*self.num_classes) 36 | 37 | self.target_linear = nn.Linear(self.hidden_dim * 2,self.num_att * self.num_ways) 38 | 39 | self.target_linear_att = nn.Linear(self.hidden_dim * 2, 1) 40 | 41 | self.loss_fn = nn.functional.cross_entropy 42 | 43 | self.softmax = nn.Softmax() 44 | 45 | self.sigmoid = nn.Sigmoid() 46 | 47 | self.tanh = nn.Tanh() 48 | 49 | self.dropout = nn.Dropout(0.1) 50 | 51 | self.err = 1e-24 52 | 53 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 54 | 55 | def target_avg(self,hidden_outputs,targets): 56 | 57 | target_outputs = (targets.unsqueeze(2).expand_as(hidden_outputs) * hidden_outputs).sum(1) 58 | 59 | target_outputs = target_outputs.squeeze(1) 60 | 61 | target_outputs = target_outputs/targets.sum(1).expand_as(target_outputs) 62 | 63 | return target_outputs 64 | 65 | 66 | def target_attention_forward(self, lstm_outputs, targets): 67 | 68 | targets = self.target_avg(lstm_outputs,targets) 69 | 70 | target_outputs = (targets.unsqueeze(2).expand_as(lstm_outputs) * lstm_outputs).sum(1) 71 | 72 | target_outputs = target_outputs.squeeze(1) 73 | 74 | output_ = self.linear(target_outputs) 75 | 76 | output = self.dropout(output_.view(len(output_),-1,self.num_ways)) 77 | 78 | output = self.softmax(output.view(-1,self.num_ways)) 79 | 80 | return output,output_.view(-1,self.num_ways) 81 | 82 | def train_(self,x,y,targets,lengths): 83 | 84 | self.zero_grad() 85 | 86 | self.train() 87 | 88 | lstm_outputs = self.lstm.forward(x,lengths) 89 | 90 | output,output_ = self.target_attention_forward(lstm_outputs,targets) 91 | 92 | y = y.view(-1) 93 | 94 | loss = self.loss_fn(output_,y) 95 | 96 | loss.backward() 97 | 98 | self.optimizer.step() 99 | 100 | return loss 101 | 102 | 103 | def test(self,x,targets,lengths): 104 | 105 | self.eval() 106 | 107 | lstm_outputs = self.lstm.forward(x,lengths) 108 | 109 | output,output_ = self.target_attention_forward(lstm_outputs,targets) 110 | 111 | return output.view(-1,self.num_classes,self.num_ways).data.numpy() -------------------------------------------------------------------------------- /src/avg_lstm_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/avg_lstm_model.pyc -------------------------------------------------------------------------------- /src/batcher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.externals import joblib 3 | import random 4 | 5 | 6 | class Batcher: 7 | def __init__(self,data,batch_size,concepts=None): 8 | 9 | self.data = data 10 | self.num_of_samples = len(data[0]) 11 | self.max_length = data[0].shape[1] 12 | self.dim = 300 #len(id2vec[0]) 13 | self.num_of_labels = data[3].shape[1] 14 | self.batch_size = batch_size 15 | self.batch_num = 0 16 | self.max_batch_num = int(self.num_of_samples / self.batch_size) 17 | self.indexes = np.arange(len(data[0])) 18 | 19 | if concepts is not None: 20 | self.use_concepts = True 21 | self.concepts = concepts[1] 22 | self.concept_lengths = concepts[0] 23 | self.max_concepts_length = np.max([l for ls in self.concept_lengths for l in ls]) 24 | print 'max concepts length', self.max_concepts_length 25 | else: 26 | self.use_concepts = False 27 | def next(self): 28 | X = np.zeros((self.batch_size,self.max_length),dtype=np.int32) 29 | Y = np.zeros((self.batch_size,self.num_of_labels),dtype=np.int32) 30 | targets = np.zeros((self.batch_size,self.max_length)) 31 | lengths = np.zeros((self.batch_size),dtype=np.int32) 32 | # tags = np.zeros((self.batch_size,self.max_length),dtype=np.int32) 33 | if self.use_concepts: 34 | cpts = np.zeros((self.batch_size,self.max_length,self.max_concepts_length),dtype=np.int32) 35 | cpt_lengths = np.zeros((self.batch_size,self.max_length),dtype=np.int32) 36 | for i in range(self.batch_size): 37 | index = self.indexes[self.batch_num * self.batch_size + i] 38 | X[i,:] = self.data[0][index,:] 39 | lengths[i] = self.data[1][index] 40 | targets[i,:] = self.data[2][index,:] 41 | # tags[i,:] = self.data[4][index,:] 42 | for k in range(self.num_of_labels): 43 | Y[i,k] = self.data[3][index,k].nonzero()[0][0] 44 | if self.use_concepts: 45 | # print len(self.concept_lengths),index 46 | cpt_lengths[i,:lengths[i]] = self.concept_lengths[index] 47 | for j in range(lengths[i]): 48 | cpts[i,j,:cpt_lengths[i,j]] = self.concepts[index][j] 49 | self.batch_num = (self.batch_num + 1) % self.max_batch_num 50 | 51 | res = [X,Y,targets,lengths] 52 | 53 | if self.use_concepts: 54 | res.extend([cpts,cpt_lengths]) 55 | return res 56 | def shuffle(self): 57 | np.random.shuffle(self.indexes) 58 | 59 | -------------------------------------------------------------------------------- /src/batcher.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/batcher.pyc -------------------------------------------------------------------------------- /src/biaff_lstm_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as autograd 4 | import torch.optim as optim 5 | from utils import * 6 | from biatt_lstm_model import BiAttLSTM_Model 7 | from utils import _lengths_to_masks,_multi_bilayer_attention 8 | from feat_bilstm_model import FeatBiLSTM_Model 9 | 10 | 11 | class BiAffLSTM_Model(BiAttLSTM_Model): 12 | 13 | def __init__(self,num_classes,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100,concept_vector=None,num_ways =3,lr=0.001,cell='recall'): 14 | 15 | super(BiAffLSTM_Model,self).__init__(num_classes,max_length,num_tokens,embd,emb_dim=emb_dim,hidden_dim=hidden_dim,num_ways =num_ways,lr=lr) 16 | 17 | self.concept_dim = concept_vector.size()[1] 18 | 19 | self.lstm = FeatBiLSTM_Model(max_length,num_tokens,embd,self.concept_dim,emb_dim,hidden_dim,concept_vector=concept_vector,cell=cell) 20 | 21 | self.concept_linear = nn.Linear(self.concept_dim,self.hidden_dim*2) 22 | 23 | self.linear = nn.Linear(self.hidden_dim*2, self.num_ways) 24 | 25 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 26 | 27 | 28 | 29 | 30 | def train_(self,x,y,targets,lengths,concepts=None,concept_lengths=None): 31 | 32 | 33 | self.zero_grad() 34 | 35 | self.train() 36 | 37 | lstm_outputs = self.lstm.forward(x,lengths,concepts,concept_lengths) 38 | 39 | output,output_ = self.global_attention_forward(lstm_outputs,targets,lengths) 40 | 41 | y = y.view(-1) 42 | 43 | loss = self.loss_fn(output_,y) 44 | 45 | loss.backward() 46 | 47 | self.optimizer.step() 48 | 49 | return loss 50 | 51 | 52 | def test(self,x,targets,lengths,concepts=None,concept_lengths=None): 53 | 54 | self.eval() 55 | 56 | lstm_outputs = self.lstm.forward(x,lengths,concepts,concept_lengths) 57 | 58 | output,output_ = self.global_attention_forward(lstm_outputs,targets,lengths) 59 | 60 | 61 | return output.view(-1,self.num_classes,self.num_ways).data.numpy()#,c_att,global_att -------------------------------------------------------------------------------- /src/biaff_lstm_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/biaff_lstm_model.pyc -------------------------------------------------------------------------------- /src/biatt_avg_lstm_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as autograd 4 | import torch.optim as optim 5 | from utils import * 6 | from avg_lstm_model import AvgLSTM_Model 7 | from utils import _lengths_to_masks,_multi_bilayer_attention 8 | 9 | 10 | 11 | class BiAvgLSTM_Model(AvgLSTM_Model): 12 | 13 | def __init__(self,num_classes,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100,num_ways =3,lr=0.001): 14 | 15 | super(BiAvgLSTM_Model,self).__init__(num_classes,max_length,num_tokens,embd,emb_dim=emb_dim,hidden_dim=hidden_dim,num_ways =num_ways,lr=lr) 16 | 17 | self.linear = nn.Linear(self.hidden_dim*2, self.num_ways) 18 | 19 | self.global_linear_att_l1 = nn.Linear(self.hidden_dim*4,self.att_dim) 20 | 21 | self.global_linear_att_l2 = nn.Linear(self.att_dim,1) 22 | 23 | self.global_multi_linear_att_l2 = nn.Linear(self.att_dim,self.num_att,bias=False) 24 | 25 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 26 | 27 | 28 | def global_attention_forward(self, lstm_outputs,targets,lengths): 29 | 30 | max_length = lstm_outputs.size()[1] 31 | 32 | batch_size = lstm_outputs.size()[0] 33 | 34 | mask = _lengths_to_masks(lengths,max_length) 35 | 36 | target_outputs = self.target_avg(lstm_outputs,targets) 37 | 38 | global_att = _multi_bilayer_attention(lstm_outputs,target_outputs,mask,\ 39 | self.global_linear_att_l1,self.global_multi_linear_att_l2,self.tanh,num_att=self.num_att) 40 | 41 | norm = global_att.sum(1) 42 | 43 | global_att = global_att / norm.expand_as(global_att) 44 | 45 | global_outputs = lstm_outputs.unsqueeze(2).expand(batch_size,lstm_outputs.size()[1],self.num_att,lstm_outputs.size()[2]) 46 | 47 | global_outputs = (global_att.unsqueeze(3).expand_as(global_outputs) * global_outputs).sum(1) 48 | 49 | global_outputs = global_outputs.squeeze(1) 50 | 51 | output_ = self.linear(global_outputs.view(batch_size * self.num_att,-1)).view(batch_size,-1,self.num_ways) 52 | 53 | output = self.dropout(output_) 54 | 55 | output = self.softmax(output.view(-1,self.num_ways)) 56 | 57 | return output,output_.view(-1,self.num_ways) 58 | 59 | 60 | 61 | 62 | 63 | def train_(self,x,y,targets,lengths,concepts=None,concept_lengths=None): 64 | 65 | self.zero_grad() 66 | 67 | self.train() 68 | 69 | lstm_outputs = self.lstm.forward(x,lengths) 70 | 71 | output,output_ = self.global_attention_forward(lstm_outputs,targets,lengths) 72 | 73 | y = y.view(-1) 74 | 75 | loss = self.loss_fn(output_,y) 76 | 77 | loss.backward() 78 | 79 | self.optimizer.step() 80 | 81 | return loss 82 | 83 | 84 | def test(self,x,targets,lengths): 85 | 86 | self.eval() 87 | 88 | lstm_outputs = self.lstm.forward(x,lengths) 89 | 90 | output,output_ = self.global_attention_forward(lstm_outputs,targets,lengths) 91 | 92 | return output.view(-1,self.num_classes,self.num_ways).data.numpy() -------------------------------------------------------------------------------- /src/biatt_avg_lstm_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/biatt_avg_lstm_model.pyc -------------------------------------------------------------------------------- /src/biatt_lstm_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as autograd 4 | import torch.optim as optim 5 | from utils import * 6 | from att_lstm_model import AttLSTM_Model 7 | from utils import _lengths_to_masks,_multi_bilayer_attention 8 | 9 | 10 | 11 | class BiAttLSTM_Model(AttLSTM_Model): 12 | 13 | def __init__(self,num_classes,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100,num_ways =3,lr=0.001): 14 | 15 | super(BiAttLSTM_Model,self).__init__(num_classes,max_length,num_tokens,embd,emb_dim=emb_dim,hidden_dim=hidden_dim,num_ways =num_ways,lr=lr) 16 | 17 | self.linear = nn.Linear(self.hidden_dim*2, self.num_ways) 18 | 19 | self.global_linear_att_l1 = nn.Linear(self.hidden_dim*4,self.att_dim) 20 | 21 | self.global_linear_att_l2 = nn.Linear(self.att_dim,1) 22 | 23 | self.global_multi_linear_att_l2 = nn.Linear(self.att_dim,self.num_att,bias=False) 24 | 25 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 26 | 27 | 28 | def global_attention_forward(self, lstm_outputs,targets,lengths): 29 | 30 | max_length = lstm_outputs.size()[1] 31 | 32 | batch_size = lstm_outputs.size()[0] 33 | 34 | mask = _lengths_to_masks(lengths,max_length) 35 | 36 | targets = self.target_attention(lstm_outputs,targets) 37 | 38 | target_outputs = (targets.unsqueeze(2).expand_as(lstm_outputs) * lstm_outputs).sum(1) 39 | 40 | target_outputs = target_outputs.squeeze(1) 41 | 42 | target_outputs = target_outputs/targets.sum(1).expand_as(target_outputs) 43 | 44 | global_att = _multi_bilayer_attention(lstm_outputs,target_outputs,mask,\ 45 | self.global_linear_att_l1,self.global_multi_linear_att_l2,self.tanh,num_att=self.num_att) 46 | 47 | norm = global_att.sum(1) 48 | 49 | global_att = global_att / norm.expand_as(global_att) 50 | 51 | global_outputs = lstm_outputs.unsqueeze(2).expand(batch_size,lstm_outputs.size()[1],self.num_att,lstm_outputs.size()[2]) 52 | 53 | global_outputs = (global_att.unsqueeze(3).expand_as(global_outputs) * global_outputs).sum(1) 54 | 55 | global_outputs = global_outputs.squeeze(1) 56 | 57 | output_ = self.linear(global_outputs.view(batch_size * self.num_att,-1)).view(batch_size,-1,self.num_ways) 58 | 59 | output = self.dropout(output_) 60 | 61 | output = self.softmax(output.view(-1,self.num_ways)) 62 | 63 | return output,output_.view(-1,self.num_ways) 64 | 65 | 66 | 67 | 68 | 69 | def train_(self,x,y,targets,lengths,concepts=None,concept_lengths=None): 70 | 71 | self.zero_grad() 72 | 73 | self.train() 74 | 75 | lstm_outputs = self.lstm.forward(x,lengths) 76 | 77 | output,output_ = self.global_attention_forward(lstm_outputs,targets,lengths) 78 | 79 | y = y.view(-1) 80 | 81 | loss = self.loss_fn(output_,y) 82 | 83 | loss.backward() 84 | 85 | self.optimizer.step() 86 | 87 | return loss 88 | 89 | 90 | def test(self,x,targets,lengths): 91 | 92 | self.eval() 93 | 94 | lstm_outputs = self.lstm.forward(x,lengths) 95 | 96 | output,output_ = self.global_attention_forward(lstm_outputs,targets,lengths) 97 | 98 | return output.view(-1,self.num_classes,self.num_ways).data.numpy() -------------------------------------------------------------------------------- /src/biatt_lstm_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/biatt_lstm_model.pyc -------------------------------------------------------------------------------- /src/bilstm_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as autograd 4 | import torch.optim as optim 5 | from utils import _lengths_to_masks 6 | 7 | 8 | class BiLSTM_Model(nn.Module): 9 | 10 | def __init__(self,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100): 11 | 12 | super(BiLSTM_Model,self).__init__() 13 | 14 | self.emb_dim = emb_dim 15 | 16 | self.hidden_dim = hidden_dim 17 | 18 | self.max_length = max_length 19 | 20 | self.embedding = nn.Embedding(num_tokens,emb_dim) 21 | 22 | self.embedding.weight = nn.Parameter(torch.from_numpy(embd),requires_grad=True) 23 | 24 | self.lstm_fw = nn.LSTMCell(self.emb_dim ,self.hidden_dim ) 25 | 26 | self.lstm_bw = nn.LSTMCell(self.emb_dim ,self.hidden_dim) 27 | 28 | self.loss_fn = nn.functional.cross_entropy 29 | 30 | self.softmax = nn.Softmax() 31 | 32 | self.sigmoid = nn.Sigmoid() 33 | 34 | self.tanh = nn.Tanh() 35 | 36 | self.dropout = nn.Dropout(0.1) 37 | 38 | 39 | def init_hidden(self,batch_size): 40 | 41 | return (autograd.Variable(torch.zeros(batch_size, self.hidden_dim)),autograd.Variable(torch.zeros(batch_size, self.hidden_dim))) 42 | 43 | def forward(self,x,lengths): 44 | 45 | max_length = x.size()[1] 46 | 47 | mask = _lengths_to_masks(lengths,max_length) 48 | 49 | x_embd = self.dropout(self.embedding(x).transpose(0,1)) 50 | 51 | hidden_fw = self.init_hidden(len(x)) 52 | 53 | hidden_bw = self.init_hidden(len(x)) 54 | 55 | lstm_fw_outputs = [] 56 | 57 | lstm_bw_outputs = [] 58 | 59 | 60 | for i in range(self.max_length): 61 | 62 | hidden_fw = self.lstm_fw(x_embd[i],hidden_fw) 63 | 64 | hidden_fw = [fw * mask[:,i].unsqueeze(1).expand_as(fw) for fw in hidden_fw] 65 | 66 | 67 | hidden_bw = self.lstm_bw(x_embd[-i-1],hidden_bw) 68 | 69 | hidden_bw = [bw * mask[:,-i-1].unsqueeze(1).expand_as(bw) for bw in hidden_bw] 70 | 71 | lstm_fw_outputs.append(hidden_fw[0][:,:self.hidden_dim]) 72 | 73 | lstm_bw_outputs.append(hidden_bw[0][:,:self.hidden_dim]) 74 | 75 | lstm_bw_outputs = lstm_bw_outputs[::-1] 76 | 77 | lstm_outputs = torch.cat([torch.cat([fw,bw],1).unsqueeze(1) for fw,bw in zip(lstm_fw_outputs,lstm_bw_outputs)],1) 78 | 79 | return self.dropout(lstm_outputs) 80 | 81 | 82 | -------------------------------------------------------------------------------- /src/bilstm_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/bilstm_model.pyc -------------------------------------------------------------------------------- /src/distance_dmn_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as autograd 4 | import torch.optim as optim 5 | from utils import * 6 | from bilstm_model import BiLSTM_Model 7 | from utils import _lengths_to_masks,deep_bilayer_attention,target2distance 8 | 9 | 10 | 11 | class DistDMN_Model(nn.Module): 12 | 13 | def __init__(self,num_classes,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100,num_ways=3,lr=0.001): 14 | 15 | super(DistDMN_Model,self).__init__() 16 | 17 | self.max_length = max_length 18 | 19 | self.num_tokens = num_tokens 20 | 21 | self.hidden_dim = hidden_dim 22 | 23 | self.num_classes = num_classes 24 | 25 | self.att_dim = 50 26 | 27 | self.depth = 1 28 | 29 | self.num_att = num_classes 30 | 31 | self.num_ways = num_ways 32 | 33 | self.lstm = BiLSTM_Model(max_length,num_tokens,embd,emb_dim,hidden_dim) 34 | 35 | self.linear = nn.Linear(self.hidden_dim*2, self.num_ways) 36 | 37 | self.target_linear_att_l1s = [ nn.Linear(self.hidden_dim*2,self.att_dim)] 38 | 39 | self.target_linear_att_l2s = [[nn.Linear(self.att_dim,1) for _ in range(self.num_classes)]] 40 | 41 | for i in range(1,self.depth): 42 | 43 | self.target_linear_att_l1s.append(nn.Linear(self.hidden_dim * 4, self.att_dim)) 44 | 45 | self.target_linear_att_l2s.append([nn.Linear(self.att_dim,1) for _ in range(self.num_classes)]) 46 | 47 | self.loss_fn = nn.functional.cross_entropy 48 | 49 | self.softmax = nn.Softmax() 50 | 51 | self.sigmoid = nn.Sigmoid() 52 | 53 | self.tanh = nn.Tanh() 54 | 55 | self.dropout = nn.Dropout(0.1) 56 | 57 | self.err = 1e-24 58 | 59 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 60 | 61 | 62 | def target_attention_forward(self, lstm_outputs, targets,lengths): 63 | 64 | targets = target2distance(targets.data.numpy()) 65 | 66 | lstm_outputs = lstm_outputs * targets.unsqueeze(2).expand_as(lstm_outputs) 67 | 68 | max_length = lstm_outputs.size()[1] 69 | 70 | mask = _lengths_to_masks(lengths,max_length) 71 | 72 | targets,target_outputs = deep_bilayer_attention(lstm_outputs,mask,\ 73 | self.target_linear_att_l1s,self.target_linear_att_l2s,self.tanh,num_classes=self.num_classes, err=0.0) 74 | 75 | 76 | target_outputs = target_outputs.view(-1,self.hidden_dim*2) 77 | 78 | output_ = self.linear(target_outputs) 79 | 80 | output = self.dropout(output_.view(len(output_),-1,self.num_ways)) 81 | 82 | output = self.softmax(output.view(-1,self.num_ways)) 83 | 84 | return output,output_.view(-1,self.num_ways) 85 | 86 | def train_(self,x,y,targets,lengths): 87 | 88 | self.zero_grad() 89 | 90 | self.train() 91 | 92 | lstm_outputs = self.lstm.forward(x,lengths) 93 | 94 | output,output_ = self.target_attention_forward(lstm_outputs,targets,lengths) 95 | 96 | y = y.view(-1) 97 | 98 | loss = self.loss_fn(output_,y) 99 | 100 | loss.backward() 101 | 102 | self.optimizer.step() 103 | 104 | return loss 105 | 106 | 107 | def test(self,x,targets,lengths): 108 | 109 | self.eval() 110 | 111 | lstm_outputs = self.lstm.forward(x,lengths) 112 | 113 | output,output_ = self.target_attention_forward(lstm_outputs,targets,lengths) 114 | 115 | return output.view(-1,self.num_classes,self.num_ways).data.numpy() -------------------------------------------------------------------------------- /src/distance_dmn_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/distance_dmn_model.pyc -------------------------------------------------------------------------------- /src/distance_lstm_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as autograd 4 | import torch.optim as optim 5 | from utils import * 6 | from bilstm_model import BiLSTM_Model 7 | from utils import _lengths_to_masks,_multi_bilayer_attention,target2distance 8 | 9 | 10 | 11 | class DistAttLSTM_Model(nn.Module): 12 | 13 | def __init__(self,num_classes,max_length,num_tokens,embd,emb_dim = 300,hidden_dim=100,num_ways=3,lr=0.001): 14 | 15 | super(DistAttLSTM_Model,self).__init__() 16 | 17 | self.max_length = max_length 18 | 19 | self.num_tokens = num_tokens 20 | 21 | self.hidden_dim = hidden_dim 22 | 23 | self.num_classes = num_classes 24 | 25 | self.att_dim = 50 26 | 27 | self.depth = 10 28 | 29 | self.num_att = num_classes 30 | 31 | self.num_ways = num_ways 32 | 33 | self.lstm = BiLSTM_Model(max_length,num_tokens,embd,emb_dim,hidden_dim) 34 | 35 | self.linear = nn.Linear(self.hidden_dim*2, self.num_ways*self.num_classes) 36 | 37 | self.target_linear = nn.Linear(self.hidden_dim * 2,self.num_att * self.num_ways) 38 | 39 | self.target_linear_att = nn.Linear(self.hidden_dim * 2, 1) 40 | 41 | self.loss_fn = nn.functional.cross_entropy 42 | 43 | self.softmax = nn.Softmax() 44 | 45 | self.sigmoid = nn.Sigmoid() 46 | 47 | self.tanh = nn.Tanh() 48 | 49 | self.dropout = nn.Dropout(0.1) 50 | 51 | self.err = 1e-24 52 | 53 | self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 54 | 55 | 56 | def target_attention_forward(self, lstm_outputs, targets): 57 | 58 | targets = target2distance(targets.data.numpy()) 59 | 60 | target_outputs = (targets.unsqueeze(2).expand_as(lstm_outputs) * lstm_outputs).sum(1) 61 | 62 | target_outputs = target_outputs.squeeze(1) 63 | 64 | output_ = self.linear(target_outputs) 65 | 66 | output = self.dropout(output_.view(len(output_),-1,self.num_ways)) 67 | 68 | output = self.softmax(output.view(-1,self.num_ways)) 69 | 70 | return output,output_.view(-1,self.num_ways) 71 | 72 | def train_(self,x,y,targets,lengths): 73 | 74 | self.zero_grad() 75 | 76 | self.train() 77 | 78 | lstm_outputs = self.lstm.forward(x,lengths) 79 | 80 | output,output_ = self.target_attention_forward(lstm_outputs,targets) 81 | 82 | y = y.view(-1) 83 | 84 | loss = self.loss_fn(output_,y) 85 | 86 | loss.backward() 87 | 88 | self.optimizer.step() 89 | 90 | return loss 91 | 92 | 93 | def test(self,x,targets,lengths): 94 | 95 | self.eval() 96 | 97 | lstm_outputs = self.lstm.forward(x,lengths) 98 | 99 | output,output_ = self.target_attention_forward(lstm_outputs,targets) 100 | 101 | return output.view(-1,self.num_classes,self.num_ways).data.numpy() -------------------------------------------------------------------------------- /src/distance_lstm_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/distance_lstm_model.pyc -------------------------------------------------------------------------------- /src/evaluate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | def f1(p,r): 5 | if r == 0.: 6 | return 0. 7 | return 2 * p * r / float( p + r ) 8 | 9 | def strict(true_and_prediction): 10 | num_entities = len(true_and_prediction) 11 | correct_num = 0. 12 | for true_labels, predicted_labels in true_and_prediction: 13 | correct_num += set(true_labels) == set(predicted_labels) 14 | precision = recall = correct_num / num_entities 15 | return precision, recall, f1( precision, recall) 16 | 17 | def loose_macro(true_and_prediction): 18 | num_entities = len(true_and_prediction) 19 | p = 0. 20 | r = 0. 21 | for true_labels, predicted_labels in true_and_prediction: 22 | if len(predicted_labels) > 0: 23 | p += len(set(predicted_labels).intersection(set(true_labels))) / float(len(predicted_labels)) 24 | if len(true_labels): 25 | r += len(set(predicted_labels).intersection(set(true_labels))) / float(len(true_labels)) 26 | precision = p / num_entities 27 | recall = r / num_entities 28 | return precision, recall, f1( precision, recall) 29 | 30 | def loose_micro(true_and_prediction): 31 | num_predicted_labels = 0. 32 | num_true_labels = 0. 33 | num_correct_labels = 0. 34 | for true_labels, predicted_labels in true_and_prediction: 35 | num_predicted_labels += len(predicted_labels) 36 | num_true_labels += len(true_labels) 37 | num_correct_labels += len(set(predicted_labels).intersection(set(true_labels))) 38 | precision = num_correct_labels / num_predicted_labels 39 | recall = num_correct_labels / num_true_labels 40 | return precision, recall, f1( precision, recall) 41 | 42 | 43 | if __name__ == "__main__": 44 | file = open(sys.argv[1]) 45 | true_and_prediction = [] 46 | for line in file: 47 | temp = line.split("\t") 48 | if len(temp) == 1: 49 | true_labels = temp[0].split() 50 | predicted_labels = [] 51 | else: 52 | true_labels, predicted_labels = temp 53 | true_labels = true_labels.split() 54 | predicted_labels = predicted_labels.split() 55 | true_and_prediction.append((true_labels,predicted_labels)) 56 | #for each in true_and_prediction: 57 | #print(each) 58 | print(" strict (p,r,f1):",strict(true_and_prediction)) 59 | print("loose macro (p,r,f1):",loose_macro(true_and_prediction)) 60 | print("loose micro (p,r,f1):",loose_micro(true_and_prediction)) 61 | file.close() 62 | -------------------------------------------------------------------------------- /src/evaluate.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/evaluate.pyc -------------------------------------------------------------------------------- /src/feat_bilstm_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as autograd 4 | import torch.optim as optim 5 | from utils import _lengths_to_masks 6 | from feat_recall_gate_lstm import LSTMCell as Recall_cell 7 | from feat_corr_lstm import LSTMCell as Sentic_cell 8 | 9 | 10 | class FeatBiLSTM_Model(nn.Module): 11 | 12 | def __init__(self,max_length,num_tokens,embd,concept_dim,emb_dim = 300,hidden_dim=100,concept_vector=None,cell='recall'): 13 | 14 | super(FeatBiLSTM_Model,self).__init__() 15 | 16 | self.emb_dim = emb_dim 17 | 18 | self.hidden_dim = hidden_dim 19 | 20 | self.max_length = max_length 21 | 22 | self.concept_dim = concept_dim 23 | 24 | self.embedding = nn.Embedding(num_tokens,emb_dim) 25 | 26 | self.embedding.weight = nn.Parameter(torch.from_numpy(embd),requires_grad=False) 27 | 28 | self.concept_embedding = nn.Embedding(concept_vector.size()[0],self.concept_dim) 29 | 30 | self.concept_embedding.weight = nn.Parameter(concept_vector,requires_grad=False) 31 | 32 | if cell == 'recall': 33 | 34 | print "using recall cell" 35 | 36 | self.lstm_fw = Recall_cell(self.emb_dim,self.concept_dim ,self.hidden_dim ) 37 | 38 | self.lstm_bw = Recall_cell(self.emb_dim,self.concept_dim ,self.hidden_dim ) 39 | else: 40 | 41 | print "using sentic cell" 42 | 43 | self.lstm_fw = Sentic_cell(self.emb_dim,self.concept_dim ,self.hidden_dim ) 44 | 45 | self.lstm_bw = Sentic_cell(self.emb_dim,self.concept_dim ,self.hidden_dim ) 46 | 47 | self.loss_fn = nn.functional.cross_entropy 48 | 49 | self.softmax = nn.Softmax() 50 | 51 | self.sigmoid = nn.Sigmoid() 52 | 53 | self.err = 1e-24 54 | 55 | self.tanh = nn.Tanh() 56 | 57 | self.dropout = nn.Dropout(0.1) 58 | 59 | def concept_avg(self,c,c_mask): 60 | 61 | max_length = c.size()[1] 62 | 63 | concept_outputs = [] 64 | 65 | batch_size = c.size()[0] 66 | 67 | for i in range(max_length): 68 | 69 | c_att = c_mask[:,i] 70 | 71 | norm = c_att.sum(1) + self.err 72 | 73 | c_att = c_att / norm.expand_as(c_att) 74 | 75 | c_vec = c[:,i] 76 | c_ = (c_att.unsqueeze(2).expand_as(c_vec) * c_vec).sum(1) 77 | 78 | concept_outputs.append(c_) 79 | 80 | res = torch.cat(concept_outputs,1) 81 | 82 | return res 83 | 84 | 85 | def init_hidden(self,batch_size): 86 | 87 | return (autograd.Variable(torch.zeros(batch_size, self.hidden_dim)),autograd.Variable(torch.zeros(batch_size, self.hidden_dim))) 88 | 89 | def forward(self,x,lengths,concepts,concept_lengths): 90 | 91 | max_length = x.size()[1] 92 | 93 | max_concept_length = concepts.size(2) 94 | batch_size = x.size()[0] 95 | 96 | mask = _lengths_to_masks(lengths,max_length) 97 | 98 | x_embd = self.dropout(self.embedding(x).transpose(0,1)) 99 | hidden_fw = self.init_hidden(len(x)) 100 | 101 | hidden_bw = self.init_hidden(len(x)) 102 | 103 | lstm_fw_outputs = [] 104 | 105 | lstm_bw_outputs = [] 106 | 107 | c_mask = _lengths_to_masks(concept_lengths.view(-1),max_concept_length).view(-1,max_length,max_concept_length) 108 | 109 | c = self.concept_embedding(concepts.view(-1)).view(batch_size,max_length,-1,self.concept_dim) 110 | 111 | concept_outputs = self.concept_avg(c,c_mask) 112 | 113 | # concept_outputs = (concept_outputs * mask.unsqueeze(2).expand_as(concept_outputs)).sum(1).expand_as(concept_outputs) 114 | 115 | for i in range(self.max_length): 116 | 117 | hidden_fw = self.lstm_fw(x_embd[i],concept_outputs[:,i],hidden_fw) 118 | 119 | hidden_fw = [fw * mask[:,i].unsqueeze(1).expand_as(fw) for fw in hidden_fw] 120 | 121 | 122 | hidden_bw = self.lstm_bw(x_embd[-i-1],concept_outputs[:,-i-1],hidden_bw) 123 | 124 | hidden_bw = [bw * mask[:,-i-1].unsqueeze(1).expand_as(bw) for bw in hidden_bw] 125 | 126 | 127 | lstm_fw_outputs.append(hidden_fw[0][:,:self.hidden_dim]) 128 | 129 | lstm_bw_outputs.append(hidden_bw[0][:,:self.hidden_dim]) 130 | 131 | 132 | lstm_bw_outputs = lstm_bw_outputs[::-1] 133 | 134 | 135 | lstm_outputs = torch.cat([torch.cat([fw,bw],1).unsqueeze(1) for fw,bw in zip(lstm_fw_outputs,lstm_bw_outputs)],1) 136 | 137 | 138 | return self.dropout(lstm_outputs) 139 | 140 | 141 | -------------------------------------------------------------------------------- /src/feat_bilstm_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/feat_bilstm_model.pyc -------------------------------------------------------------------------------- /src/feat_corr_lstm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import numpy as np 6 | def weight_variable(shape): 7 | 8 | initial = np.random.uniform(-0.01, 0.01,shape) 9 | 10 | initial = torch.from_numpy(initial) 11 | 12 | return initial.float() 13 | 14 | class LSTMCell(nn.Module): 15 | 16 | def __init__(self, num_inputs, feat_dim,num_hidden, forget_gate_bias=-1): 17 | super(LSTMCell, self).__init__() 18 | 19 | self.forget_gate_bias = forget_gate_bias 20 | self.num_hidden = num_hidden 21 | self.fc_i2h = nn.Linear(num_inputs, 5 * num_hidden) 22 | self.fc_h2h = nn.Linear(num_hidden, 5 * num_hidden) 23 | 24 | self.input_dim = num_inputs 25 | self.feat_dim = feat_dim 26 | self.fc_c2h = nn.Linear(self.feat_dim,num_hidden*5) 27 | 28 | def forward(self, inputs, feats, state): 29 | 30 | hx, cx = state 31 | # xc_gate = F.sigmoid(self.fc_x2c(torch.cat([inputs,hx,feats],-1))) 32 | # xc = self.x2c(torch.cat([inputs,cx],-1)) 33 | # feats = xc_gate*xc + (1 - xc_gate) * feats 34 | i2h = self.fc_i2h(inputs) 35 | h2h = self.fc_h2h(hx) 36 | c2h = self.fc_c2h(feats) 37 | x = i2h + h2h 38 | 39 | gates = x.split(self.num_hidden, 1) 40 | c_gates = c2h.split(self.num_hidden,1) 41 | in_gate = F.sigmoid(gates[0] + c_gates[0]) 42 | forget_gate = F.sigmoid(gates[1] + c_gates[1]+self.forget_gate_bias) 43 | out_gate = F.sigmoid(gates[2] + c_gates[2]) 44 | in_transform = F.tanh(gates[3]) 45 | concept_in_gate = F.sigmoid(gates[4] + c_gates[3]) 46 | cx = forget_gate * cx + in_gate * in_transform #+ concept_in_gate*F.tanh(c_gates[3]) 47 | hx = out_gate * F.tanh(cx) + concept_in_gate*F.tanh(c_gates[4]) 48 | 49 | return hx, cx 50 | -------------------------------------------------------------------------------- /src/feat_corr_lstm.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/feat_corr_lstm.pyc -------------------------------------------------------------------------------- /src/feat_corr_ran.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import numpy as np 6 | def weight_variable(shape): 7 | 8 | initial = np.random.uniform(-0.01, 0.01,shape) 9 | 10 | initial = torch.from_numpy(initial) 11 | 12 | return initial.float() 13 | 14 | class LSTMCell(nn.Module): 15 | 16 | def __init__(self, num_inputs, feat_dim,num_hidden, forget_gate_bias=-1): 17 | super(LSTMCell, self).__init__() 18 | 19 | self.forget_gate_bias = forget_gate_bias 20 | self.num_hidden = num_hidden 21 | self.fc_i2h = nn.Linear(num_inputs, 5 * num_hidden) 22 | self.fc_h2h = nn.Linear(num_hidden, 5 * num_hidden) 23 | 24 | self.input_dim = num_inputs 25 | self.feat_dim = feat_dim 26 | self.fc_c2h = nn.Linear(self.feat_dim,num_hidden*5) 27 | 28 | def forward(self, inputs, feats, state): 29 | 30 | hx, cx = state 31 | 32 | i2h = self.fc_i2h(inputs) 33 | h2h = self.fc_h2h(hx) 34 | c2h = self.fc_c2h(feats) 35 | x = i2h + h2h 36 | 37 | gates = x.split(self.num_hidden, 1) 38 | c_gates = c2h.split(self.num_hidden,1) 39 | in_gate = F.sigmoid(gates[0] + c_gates[0]) 40 | forget_gate = F.sigmoid(gates[1] + c_gates[1]+self.forget_gate_bias) 41 | out_gate = F.sigmoid(gates[2] + c_gates[2]) 42 | in_transform = F.tanh(gates[3]) 43 | concept_in_gate = F.sigmoid(gates[4] + c_gates[3]) 44 | cx = forget_gate * cx + in_gate * in_transform #+ concept_in_gate*F.tanh(c_gates[3]) 45 | hx = out_gate * F.tanh(cx) + concept_in_gate*F.tanh(c_gates[4]) 46 | 47 | return hx, cx 48 | -------------------------------------------------------------------------------- /src/feat_recall_gate_lstm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import numpy as np 6 | def weight_variable(shape): 7 | 8 | initial = np.random.uniform(-0.01, 0.01,shape) 9 | 10 | initial = torch.from_numpy(initial) 11 | 12 | return initial.float() 13 | 14 | class LSTMCell(nn.Module): 15 | 16 | def __init__(self, num_inputs, feat_dim,num_hidden, forget_gate_bias=-1): 17 | super(LSTMCell, self).__init__() 18 | 19 | self.forget_gate_bias = forget_gate_bias 20 | self.num_hidden = num_hidden 21 | self.fc_i2h = nn.Linear(num_inputs, 4 * num_hidden) 22 | self.fc_h2h = nn.Linear(num_hidden, 4 * num_hidden) 23 | 24 | self.input_dim = num_inputs 25 | self.feat_dim = feat_dim 26 | self.i2c = nn.Linear(self.input_dim,num_hidden) 27 | self.h2c = nn.Linear(2*self.num_hidden, num_hidden) 28 | self.c2c = nn.Linear(self.feat_dim, num_hidden) 29 | self.c2h = nn.Linear(self.feat_dim,num_hidden) 30 | def forward(self, inputs, feats, state): 31 | hx, cx = state 32 | i2h = self.fc_i2h(inputs) 33 | h2h = self.fc_h2h(hx) 34 | 35 | recall_gate = F.sigmoid(self.i2c(inputs) + self.h2c(torch.cat(state,-1)) + self.c2c(feats)) 36 | 37 | x = i2h + h2h 38 | gates = x.split(self.num_hidden, 1) 39 | 40 | in_gate = F.sigmoid(gates[0]) 41 | forget_gate = F.sigmoid(gates[1] + self.forget_gate_bias) 42 | out_gate = F.sigmoid(gates[2]) 43 | in_transform = F.tanh(gates[3]) 44 | 45 | c_transform = F.tanh(self.c2h(feats)) 46 | 47 | cx = forget_gate * cx + in_gate * in_transform + c_transform*recall_gate 48 | hx = out_gate * F.tanh(cx) 49 | return hx, cx 50 | -------------------------------------------------------------------------------- /src/feat_recall_gate_lstm.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/feat_recall_gate_lstm.pyc -------------------------------------------------------------------------------- /src/hook.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from evaluate import strict, loose_macro, loose_micro 3 | import numpy as np 4 | def remove_conflict(tags,scores,hier): 5 | tags_new = [] 6 | tags = sorted([t for t in tags],key=lambda x:scores[x])[::-1] 7 | 8 | for tag in tags: 9 | path_t = set(hier[tag].nonzero()[0]) 10 | keep = True 11 | for tag2 in tags_new: 12 | path_t2 = set(hier[tag2].nonzero()[0]) 13 | if not (path_t2 best_acc: 285 | 286 | best_rep = rep 287 | 288 | best_acc = dev_acc 289 | 290 | best_dev_rep = dev_rep 291 | 292 | if data_name == 'sentihood': 293 | 294 | print "best dev" 295 | print "\n".join(best_dev_rep) 296 | print "best test" 297 | print "\n".join(best_rep) 298 | else: 299 | print "test set" 300 | 301 | print "\n".join(rep) 302 | 303 | -------------------------------------------------------------------------------- /src/train_swn.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | from utils import * 4 | import time 5 | import numpy as np 6 | import logging 7 | import math 8 | from batcher import Batcher 9 | from hook import acc_hook 10 | from sklearn.externals import joblib 11 | import torch 12 | import torch.autograd as autograd 13 | 14 | parser = argparse.ArgumentParser() 15 | 16 | parser.add_argument("data",help="path to dataset") 17 | 18 | parser.add_argument("mode",help="global-attention,target-attention,avg,sentic") 19 | 20 | parser.add_argument("data_fn",help="semeval or sentihood") 21 | 22 | args = parser.parse_args() 23 | 24 | if args.mode[:6] == 'sentic': 25 | 26 | use_concepts = True 27 | 28 | else: 29 | 30 | use_concepts = False 31 | 32 | cell = 'sentic' 33 | 34 | data_name = args.data_fn 35 | 36 | input_dir = args.data 37 | 38 | if data_name == 'sentihood': 39 | num_epoch = 15 40 | data = joblib.load(input_dir+'/data_cpts_swn_all.150.pkl') 41 | 42 | num_ways = 3 43 | 44 | else: 45 | num_epoch = 15 46 | data = joblib.load(input_dir+'/data_semeval_swn_all.150.pkl') 47 | 48 | num_ways = 4 49 | 50 | 51 | train = data['train'] 52 | 53 | if data_name == 'sentihood': 54 | 55 | dev = data['dev'] 56 | 57 | test = data['test'] 58 | 59 | concepts_dict = dict([[v,k] for k,v in data['concepts_dict'].items()]) 60 | 61 | print "train size:",len(train[0]) 62 | 63 | if data_name == 'sentihood': 64 | 65 | print "dev size:",len(dev[0]) 66 | 67 | print"test size",len(test[0]) 68 | 69 | dicts = data['dicts'] 70 | 71 | id2token = dicts['id2token'] 72 | 73 | embd = data['embd'] 74 | 75 | num_classes = train[3].shape[1] 76 | 77 | max_length = train[0].shape[1] 78 | 79 | new_concept_embd = data['concepts_vecs'] 80 | 81 | concept_embd = torch.Tensor(new_concept_embd) 82 | 83 | 84 | other_id = len(dicts['label2id']) 85 | 86 | aspects = [dicts['id2label'][k] for k in dicts['id2label']] 87 | 88 | if use_concepts: 89 | 90 | train_batcher = Batcher(train,16,data['concepts_train']) 91 | 92 | if data_name == 'sentihood': 93 | 94 | dev_batcher = Batcher(dev,len(dev[0]),data['concepts_dev']) 95 | 96 | test_batcher = Batcher(test,len(test[0]),data['concepts_test']) 97 | 98 | else: 99 | 100 | train_batcher = Batcher(train,16) 101 | 102 | if data_name == 'sentihood': 103 | 104 | dev_batcher = Batcher(dev,len(dev[0])) 105 | 106 | test_batcher = Batcher(test,len(test[0])) 107 | 108 | input_dim = 150 109 | 110 | hidden_dim = 50 111 | 112 | print args.mode 113 | 114 | if args.mode == 'sentic-bi': 115 | 116 | from biaff_lstm_model import BiAffLSTM_Model 117 | 118 | model = BiAffLSTM_Model(num_classes,max_length,len(dicts['token2id']),embd\ 119 | ,emb_dim = input_dim,hidden_dim=hidden_dim,concept_vector=concept_embd,lr=0.001,num_ways=num_ways,cell=cell) 120 | 121 | elif args.mode == 'sentic': 122 | 123 | from aff_lstm_model import AffLSTM_Model 124 | 125 | model = AffLSTM_Model(num_classes,max_length,len(dicts['token2id']),embd\ 126 | ,emb_dim = input_dim,hidden_dim=hidden_dim,concept_vector=concept_embd,lr=0.001,num_ways=num_ways,cell=cell) 127 | else: 128 | 129 | print "using non-concept model" 130 | 131 | if args.mode == 'distance': 132 | 133 | from distance_lstm_model import DistAttLSTM_Model 134 | 135 | model = DistAttLSTM_Model(num_classes,max_length,len(dicts['token2id']),embd\ 136 | ,emb_dim = input_dim,hidden_dim=hidden_dim,lr=0.001,num_ways=num_ways) 137 | elif args.mode == 'distance-deep': 138 | 139 | from distance_dmn_model import DistDMN_Model 140 | 141 | model = DistDMN_Model(num_classes,max_length,len(dicts['token2id']),embd\ 142 | ,emb_dim = input_dim,hidden_dim=hidden_dim,lr=0.001,num_ways=num_ways) 143 | if args.mode == 'global-attention': 144 | 145 | from biatt_lstm_model import BiAttLSTM_Model 146 | 147 | model = BiAttLSTM_Model(num_classes,max_length,len(dicts['token2id']),embd\ 148 | ,emb_dim = input_dim,hidden_dim=hidden_dim,lr=0.001,num_ways=num_ways) 149 | 150 | elif args.mode == 'target-attention': 151 | 152 | from att_lstm_model import AttLSTM_Model 153 | 154 | model = AttLSTM_Model(num_classes,max_length,len(dicts['token2id']),embd\ 155 | ,emb_dim = input_dim,hidden_dim=hidden_dim,lr=0.001,num_ways=num_ways) 156 | 157 | num_back=0 158 | 159 | step_per_epoch = train_batcher.max_batch_num 160 | 161 | best_acc = 0 162 | 163 | best_rep = "" 164 | 165 | best_dev_rep = "" 166 | 167 | train_batcher.shuffle() 168 | 169 | for epoch in range(num_epoch): 170 | 171 | loss = 0.0 172 | 173 | print "Epoch %d" % epoch 174 | 175 | for i in range(step_per_epoch): 176 | 177 | random_ = np.random.random_sample() 178 | 179 | if not use_concepts: 180 | 181 | input_x, y, targets, lengths = train_batcher.next() 182 | 183 | else: 184 | 185 | input_x,y,targets,lengths,cpts,cpts_len= train_batcher.next() 186 | 187 | cpts = autograd.Variable(torch.from_numpy(cpts).long()) 188 | 189 | cpts_len = autograd.Variable(torch.from_numpy(cpts_len)) 190 | 191 | 192 | input_x = autograd.Variable(torch.from_numpy(input_x).long()) 193 | 194 | y = autograd.Variable(torch.from_numpy(y)).long() 195 | 196 | targets = autograd.Variable(torch.from_numpy(targets).float()) 197 | 198 | lengths = autograd.Variable(torch.from_numpy(lengths)) 199 | 200 | if use_concepts: 201 | 202 | loss += model.train_(input_x,y,targets,lengths,cpts,cpts_len).data[0] 203 | 204 | else: 205 | 206 | loss += model.train_(input_x,y,targets,lengths).data[0] 207 | 208 | if data_name == 'sentihood': 209 | 210 | if not use_concepts: 211 | 212 | input_x, y, targets, lengths = dev_batcher.next() 213 | 214 | else: 215 | 216 | input_x,y,targets,lengths,cpts,cpts_len= dev_batcher.next() 217 | 218 | cpts = autograd.Variable(torch.from_numpy(cpts).long()) 219 | 220 | cpts_len = autograd.Variable(torch.from_numpy(cpts_len)) 221 | 222 | input_x = autograd.Variable(torch.from_numpy(input_x).long()) 223 | 224 | targets = autograd.Variable(torch.from_numpy(targets).float()) 225 | 226 | lengths = autograd.Variable(torch.from_numpy(lengths)) 227 | 228 | if use_concepts: 229 | 230 | logits = model.test(input_x,targets,lengths,cpts,cpts_len) 231 | 232 | else: 233 | 234 | logits = model.test(input_x,targets,lengths) 235 | 236 | dev_acc,dev_rep,_ = acc_hook(logits,y,other_id) 237 | 238 | # print "dev set" 239 | 240 | # print "\n".join(dev_rep) 241 | 242 | if not use_concepts: 243 | 244 | input_x, y, targets, lengths = test_batcher.next() 245 | 246 | else: 247 | 248 | input_x,y,targets,lengths,cpts,cpts_len= test_batcher.next() 249 | 250 | cpts = autograd.Variable(torch.from_numpy(cpts).long()) 251 | 252 | cpts_len = autograd.Variable(torch.from_numpy(cpts_len)) 253 | 254 | input_x = autograd.Variable(torch.from_numpy(input_x).long()) 255 | 256 | targets = autograd.Variable(torch.from_numpy(targets).float()) 257 | 258 | lengths = autograd.Variable(torch.from_numpy(lengths)) 259 | 260 | if use_concepts: 261 | 262 | logits = model.test(input_x,targets,lengths,cpts,cpts_len) 263 | 264 | else: 265 | 266 | logits = model.test(input_x,targets,lengths) 267 | 268 | test_acc,rep,_ = acc_hook(logits,y,other_id) 269 | 270 | # print "test set" 271 | 272 | # print "\n".join(rep) 273 | 274 | if data_name == 'sentihood': 275 | 276 | if dev_acc > best_acc: 277 | 278 | best_rep = rep 279 | 280 | best_acc = dev_acc 281 | 282 | best_dev_rep = dev_rep 283 | 284 | if data_name == 'sentihood': 285 | 286 | print "best dev" 287 | print "\n".join(best_dev_rep) 288 | print "best test" 289 | print "\n".join(best_rep) 290 | else: 291 | print "test set" 292 | 293 | print "\n".join(rep) 294 | 295 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from sklearn.externals import joblib 4 | #import spacy 5 | import torch 6 | import torch.autograd as autograd 7 | 8 | 9 | def _bilayer_attention(inputs,query,mask,linear1,linear2,nonlinear_func,err=0.0): 10 | batch_size = len(inputs) 11 | 12 | dim = inputs.size()[-1] + query.size()[-1] 13 | 14 | inputs_ = torch.cat([inputs,query.unsqueeze(1).expand(batch_size,inputs.size()[1],query.size()[-1])],2) 15 | 16 | inputs_ = inputs_.view(-1,dim) 17 | 18 | att_vecs = linear2(nonlinear_func(linear1(inputs_))).squeeze().view(batch_size,-1).exp() 19 | 20 | att_vecs = att_vecs * mask 21 | 22 | att_vecs = att_vecs / (att_vecs.sum(-1).expand_as(att_vecs) + err) 23 | 24 | outputs = (att_vecs.unsqueeze(2).expand_as(inputs) * inputs).sum(1) 25 | 26 | outputs = outputs.squeeze(1) 27 | 28 | return att_vecs,outputs 29 | 30 | def deep_bilayer_attention(inputs,mask,linear1s,linear2s,nonlinear_func,num_classes=1, err=0.0): 31 | 32 | batch_size = len(inputs) 33 | 34 | depth = len(linear1s) 35 | 36 | input_dim = inputs.size()[-1] 37 | 38 | prev = None 39 | 40 | 41 | max_len = inputs.size()[1] 42 | 43 | for i in range(depth): 44 | 45 | if prev is not None: 46 | 47 | query_ = prev 48 | 49 | dim = input_dim + query_.size()[-1] 50 | 51 | inputs_ = torch.cat([inputs,query_.unsqueeze(1).expand(batch_size,max_len,num_classes,query_.size()[-1])],-1) 52 | 53 | inputs_ = inputs_.view(-1,dim) 54 | 55 | layer1_outputs = linear1s[i](inputs_) 56 | 57 | layer1_outputs = nonlinear_func(layer1_outputs.view(batch_size*max_len,num_classes,-1)) 58 | 59 | else: 60 | 61 | dim = input_dim 62 | 63 | inputs_ = inputs.view(-1,dim) 64 | 65 | layer1_outputs = linear1s[i](inputs_) 66 | 67 | att_dim = layer1_outputs.size()[-1] 68 | 69 | layer1_outputs = layer1_outputs.unsqueeze(1).expand(batch_size*max_len,num_classes,att_dim) 70 | 71 | layer1_outputs = nonlinear_func(layer1_outputs) 72 | 73 | att_vecs = [] 74 | 75 | for j in range(num_classes): 76 | 77 | att_vecs.append(linear2s[i][j](layer1_outputs[:,i]).view(batch_size,max_len,1)) 78 | 79 | att_vecs = torch.cat(att_vecs,-1).exp() 80 | 81 | 82 | att_vecs = att_vecs * mask.unsqueeze(2).expand_as(att_vecs) 83 | 84 | att_vecs = att_vecs / (att_vecs.sum(1,keepdim=True).expand_as(att_vecs) + err) 85 | 86 | if prev is None: 87 | 88 | inputs = inputs.unsqueeze(2).expand(batch_size,max_len,num_classes,input_dim) 89 | 90 | prev = (att_vecs.unsqueeze(3).expand_as(inputs) * inputs).sum(1) 91 | 92 | prev = prev.squeeze(1) 93 | 94 | return att_vecs, prev 95 | 96 | def multi_bilayer_attention(inputs,query,mask,linear1,linear2,nonlinear_func,num_att=12,err=0.0): 97 | 98 | batch_size = len(inputs) 99 | 100 | dim = inputs.size()[-1] + query.size()[-1] 101 | 102 | inputs_ = torch.cat([inputs,query.unsqueeze(1).expand(batch_size,inputs.size()[1],query.size()[-1])],2) 103 | 104 | inputs_ = inputs_.view(-1,dim) 105 | 106 | att_vecs = linear2(nonlinear_func(linear1(inputs_))).squeeze().view(batch_size,-1,num_att).exp() 107 | 108 | mask = mask.unsqueeze(2).expand_as(att_vecs) 109 | 110 | att_vecs = att_vecs * mask 111 | 112 | att_vecs = att_vecs / (att_vecs.sum(1).expand_as(att_vecs) + err) 113 | 114 | inputs = inputs.unsqueeze(2).expand(batch_size,inputs.size()[1],num_att,inputs.size()[2]) 115 | 116 | outputs = (att_vecs.unsqueeze(3).expand_as(inputs) * inputs).sum(1) 117 | 118 | outputs = outputs.squeeze(1) 119 | 120 | return att_vecs,outputs 121 | 122 | 123 | def _multi_bilayer_attention(inputs,query,mask,linear1,linear2,nonlinear_func,num_att=12,err=0.0): 124 | 125 | batch_size = len(inputs) 126 | 127 | 128 | dim = inputs.size()[-1] + query.size()[-1] 129 | 130 | inputs_ = torch.cat([inputs,query.unsqueeze(1).expand(batch_size,inputs.size()[1],query.size()[-1])],2) 131 | 132 | inputs_ = inputs_.view(-1,dim) 133 | 134 | att_vecs = linear2(nonlinear_func(linear1(inputs_))).view(batch_size,-1,num_att).exp() 135 | 136 | mask = mask.unsqueeze(2).expand_as(att_vecs) 137 | 138 | att_vecs = att_vecs * mask 139 | 140 | 141 | return att_vecs 142 | 143 | def _lengths_to_masks(lengths, max_length): 144 | 145 | tiled_ranges = autograd.Variable(torch.arange(0,float(max_length)).unsqueeze(0).expand([len(lengths),max_length])) 146 | 147 | lengths = lengths.float().unsqueeze(1).expand_as(tiled_ranges) 148 | 149 | mask = tiled_ranges.lt(lengths).float() 150 | 151 | return mask 152 | 153 | def weight_variable(shape): 154 | 155 | initial = np.random.uniform(-0.01, 0.01,shape) 156 | 157 | initial = torch.from_numpy(initial) 158 | 159 | return initial.float() 160 | 161 | 162 | def add_item(label2id,id2label,tag): 163 | if tag not in label2id: 164 | label2id[tag] = len(label2id) 165 | id2label[len(label2id)-1] = tag 166 | return label2id[tag] 167 | 168 | def target2distance(targets,t_max=100.0): 169 | start =0 170 | distances = [] 171 | for target in targets: 172 | target_ = [i for i in range(len(target)) if target[i] != 0] 173 | distance = [np.max([0,1- np.min([abs(i-t) for t in target_])/t_max]) for i in range(len(target))] 174 | distances.append(distance) 175 | return autograd.Variable(torch.FloatTensor(distances)) 176 | def construct_word_target_sequence(words,w2id,id2w): 177 | word_seq = [] 178 | targets = [] 179 | targets_ = [] 180 | for i,w in enumerate(words): 181 | if 'LOCATION1' in w: 182 | targets.append(i) 183 | if 'LOCATION2' in w: 184 | targets_.append(i) 185 | if 'LOCATION' in w: 186 | word_seq.append(0) 187 | else: 188 | word_seq.append(add_item(w2id,id2w,w)) 189 | return word_seq,targets,targets_ 190 | 191 | 192 | 193 | def align_vec(Xs,max_length): 194 | Xs_new = np.zeros([len(Xs),max_length],dtype=np.int32) 195 | lengths= np.zeros(len(Xs),dtype=np.int32) 196 | for i in range(len(Xs)): 197 | lengths[i] = len(Xs[i]) 198 | 199 | for j in range(len(Xs[i])): 200 | Xs_new[i][j]= Xs[i][j] 201 | 202 | return Xs_new,lengths 203 | 204 | def sparse2vec(Ys,num_labels): 205 | Ys_new = np.zeros([len(Ys),num_labels],dtype=np.int32) 206 | for i in range(len(Ys)): 207 | Ys_new[i][Ys[i]] = 1.0 208 | return Ys_new 209 | 210 | def sparse2vec_3D(Ys,num_labels): 211 | Ys_new = np.zeros([len(Ys),2,num_labels],dtype=np.int32) 212 | for i in range(len(Ys)): 213 | Ys_new[i][0][Ys[i][0]] = 1.0 214 | Ys_new[i][1][Ys[i][1]] = 1.0 215 | 216 | return Ys_new 217 | 218 | def label2vec(Ys,num_labels,dim,num_ways=3): 219 | Ys_new = np.zeros([len(Ys),dim,num_labels,num_ways],dtype=np.float32) 220 | Ys_new[:,:,:,0] = 1.0 221 | for i in range(len(Ys)): 222 | for j in range(dim): 223 | if Ys[i][j] != []: 224 | for y in Ys[i][j]: 225 | Ys_new[i][j][y[0]][y[1]] = 1.0 226 | Ys_new[i][j][y[0]][0] = 0.0 227 | 228 | 229 | return Ys_new 230 | 231 | 232 | def read_json_data(fn,dicts,nlp): 233 | data_array = [] 234 | if dicts is None: 235 | token2id = {'LOCATION':0} 236 | label2id = {}#,2:_EOS,3:_UNK} 237 | label2id_ = {} 238 | id2token = {0:'LOCATION'} 239 | id2label = {} 240 | id2label_ = {} 241 | feat2id = {} 242 | id2feat = {} 243 | cfeat2id = {} 244 | id2cfeat = {} 245 | dicts = {'token2id':token2id,'label2id':label2id,'id2token':id2token,'id2label':id2label,"label2id_":label2id_,"id2label_":id2label_,"feat2id":feat2id,'id2feat':id2feat,'cfeat2id':cfeat2id,'id2cfeat':id2cfeat} 246 | else: 247 | token2id = dicts['token2id'] 248 | label2id = dicts['label2id'] 249 | id2token = dicts['id2token'] 250 | id2label = dicts['id2label'] 251 | id2label_ = dicts['id2label_'] 252 | label2id_ = dicts['label2id_'] 253 | feat2id = dicts['feat2id'] 254 | id2feat = dicts['id2feat'] 255 | cfeat2id = dicts['cfeat2id'] 256 | id2cfeat = dicts['id2cfeat'] 257 | 258 | max_word_length = 0 259 | polarity = {'Positive':1,'Negative':2} 260 | with open(fn,'r') as ipt: 261 | data = json.loads("".join(ipt.readlines())) 262 | w_seqs = [] 263 | target_seqs = [] 264 | classes = [] 265 | tags = [] 266 | # target_other_seqs = [] 267 | cnt_multi = 0 268 | couple_targets = [] 269 | couple_classes = [] 270 | couple_xs = [] 271 | couple_tags = [] 272 | for d in data: 273 | d['text'] = d['text'].strip() 274 | d_ = nlp(d['text']) 275 | tokens = [w.text for w in d_ if w.text != "" and w.text != " "] 276 | pos_seqs = [add_item(feat2id,id2feat,w.tag_) for w in d_ if w.text != "" and w.text!=" "] 277 | 278 | tid_seqs,targets,targets_ = construct_word_target_sequence(tokens,token2id,id2token) 279 | max_word_length = len(tid_seqs) if len(tid_seqs) > max_word_length else max_word_length 280 | # if len(d['opinions'])>1 and len(targets_) !=0: 281 | # print d['text'] 282 | # print [(o['aspect'],o['target_entity'],o['sentiment']) for o in d['opinions']] 283 | opinions = [o for o in d['opinions'] if o['target_entity'] == 'LOCATION1'] 284 | opinions_ = [o for o in d['opinions'] if o['target_entity'] == 'LOCATION2'] 285 | couple_features = [] 286 | if len(targets_) != 0: 287 | 288 | 289 | features = _feature(targets,targets_,tokens) 290 | if opinions != []: 291 | cls = [(add_item(label2id,id2label,l['aspect']),polarity[l['sentiment']]) for l in opinions] 292 | else: 293 | cls = [] 294 | 295 | # w_seqs.append(tid_seqs) 296 | # target_seqs.append(targets_) 297 | if opinions_ != []: 298 | cls_ = [(add_item(label2id,id2label,l['aspect']),polarity[l['sentiment']]) for l in opinions_] 299 | else: 300 | cls_ = [] 301 | couple_targets.append([targets,targets_]) 302 | couple_classes.append([cls,cls_]) 303 | couple_xs.append(tid_seqs) 304 | couple_tags.append(pos_seqs) 305 | couple_features.append(add_item(cfeat2id,id2cfeat,features[0])) 306 | #tags.append(pos_seqs) 307 | #raw.append((opinions_,d['text'])) 308 | else: 309 | w_seqs.append(tid_seqs) 310 | target_seqs.append(targets) 311 | 312 | if opinions != []: 313 | classes.append([[(add_item(label2id,id2label,l['aspect']),polarity[l['sentiment']]) for l in opinions]]) 314 | else: 315 | classes.append([[]]) 316 | tags.append(pos_seqs) 317 | 318 | return w_seqs,target_seqs,classes,dicts,max_word_length,tags,couple_targets,couple_classes,couple_xs,couple_tags,np.asarray(couple_features) 319 | 320 | 321 | 322 | 323 | def custom_pipeline(nlp): 324 | return [nlp.tagger,]#, nlp.parser, nlp.entity) 325 | 326 | 327 | def create_dataset(input_dir,output_dir): 328 | 329 | nlp = spacy.load('en',create_pipeline=custom_pipeline) 330 | 331 | train_set_fn = input_dir + '/sentihood-train.json' 332 | dev_set_fn = input_dir + '/sentihood-dev.json' 333 | test_set_fn = input_dir + '/sentihood-test.json' 334 | 335 | words_train,targets_train,classes_train,dicts,max_length_train,tags_train,couple_targets_train,couple_classes_train,couple_xs_train,couple_tags_train,couple_feature_train = read_json_data(train_set_fn,None,nlp) 336 | words_dev,targets_dev,classes_dev,dicts,max_length_dev,tags_dev,couple_targets_dev,couple_classes_dev,couple_xs_dev, couple_tags_dev,couple_feature_dev= read_json_data(dev_set_fn,dicts,nlp) 337 | words_test,targets_test,classes_test,dicts,max_length_test,tags_test,couple_targets_test,couple_classes_test,couple_xs_test,couple_tags_test,couple_feature_test = read_json_data(test_set_fn,dicts,nlp) 338 | 339 | max_length = max([max_length_train,max_length_dev,max_length_test]) 340 | 341 | words_train,length_train = align_vec(words_train,max_length) 342 | words_dev,length_dev = align_vec(words_dev,max_length) 343 | words_test,length_test = align_vec(words_test,max_length) 344 | 345 | couple_xs_train,couple_length_train = align_vec(couple_xs_train,max_length) 346 | couple_xs_dev,couple_length_dev = align_vec(couple_xs_dev,max_length) 347 | couple_xs_test,couple_length_test = align_vec(couple_xs_test,max_length) 348 | 349 | tags_train,_ = align_vec(tags_train,max_length) 350 | tags_dev,_ = align_vec(tags_dev,max_length) 351 | tags_test,_ = align_vec(tags_test,max_length) 352 | 353 | couple_tags_train,_ = align_vec(couple_tags_train,max_length) 354 | couple_tags_dev,_ = align_vec(couple_tags_dev,max_length) 355 | couple_tags_test,_ = align_vec(couple_tags_test,max_length) 356 | 357 | num_classes = len(dicts['label2id']) 358 | classes_train = label2vec(classes_train,num_classes,1)[:,0,:] 359 | classes_dev = label2vec(classes_dev,num_classes,1)[:,0,:] 360 | classes_test = label2vec(classes_test,num_classes,1)[:,0,:] 361 | 362 | couple_classes_train = label2vec(couple_classes_train,num_classes,2) 363 | couple_classes_dev = label2vec(couple_classes_dev,num_classes,2) 364 | couple_classes_test = label2vec(couple_classes_test,num_classes,2) 365 | 366 | 367 | 368 | targets_train = sparse2vec(targets_train,max_length) 369 | targets_dev = sparse2vec(targets_dev,max_length) 370 | targets_test = sparse2vec(targets_test,max_length) 371 | 372 | couple_targets_train = sparse2vec_3D(couple_targets_train,max_length) 373 | couple_targets_dev = sparse2vec_3D(couple_targets_dev,max_length) 374 | couple_targets_test = sparse2vec_3D(couple_targets_test,max_length) 375 | 376 | 377 | w2v,dim = load_word2vec('./resources/all.bin') 378 | embd_table = create_id2vec(dicts['token2id'], w2v,dim) 379 | data = {'train': (words_train,length_train,targets_train,classes_train,tags_train),'dev':(words_dev,length_dev,targets_dev,classes_dev,tags_dev),'test':(words_test,length_test,targets_test,classes_test,tags_test),'dicts':dicts,'embd':embd_table,"couple_train":(couple_xs_train,couple_length_train,couple_targets_train,couple_classes_train,couple_tags_train,couple_feature_train),"couple_dev":(couple_xs_dev,couple_length_dev,couple_targets_dev,couple_classes_dev,couple_tags_dev,couple_feature_dev),"couple_test":(couple_xs_test,couple_length_test,couple_targets_test,couple_classes_test,couple_tags_test,couple_feature_test)} 380 | 381 | joblib.dump(data,output_dir) 382 | 383 | 384 | # def load_word2vec(file_path): 385 | # word2vec = {} 386 | # with open(file_path) as lines: 387 | # for line in lines: 388 | # split = line.split() 389 | # word = split[0] 390 | # vector_strings = split[1:] 391 | # vector = [float(num) for num in vector_strings] 392 | # word2vec[word] = np.array(vector) 393 | # return word2vec 394 | 395 | def create_id2vec(word2id,word2vec,dim_of_vector): 396 | unk_vec = np.random.uniform(-0.01, 0.01,dim_of_vector) 397 | loc_vec = np.random.uniform(-0.01, 0.01,dim_of_vector) 398 | dim_of_vector = len(unk_vec) 399 | num_of_tokens = len(word2id) 400 | id2vec = np.zeros((num_of_tokens+1,dim_of_vector),dtype=np.float32) 401 | for word,t_id in word2id.items(): 402 | if word == 'LOCATION': 403 | id2vec[t_id,:] = loc_vec 404 | elif word.lower() in word2vec: 405 | id2vec[t_id,:] = word2vec[word.lower()] 406 | else: 407 | id2vec[t_id,:] = unk_vec 408 | return id2vec 409 | 410 | 411 | def load_word2vec(file_path): 412 | word2vec = {} 413 | dim = 0 414 | with open(file_path) as lines: 415 | for line in lines: 416 | split = line.split() 417 | word = split[0] 418 | vector_strings = split[1:] 419 | vector = [float(num) for num in vector_strings] 420 | word2vec[word] = np.array(vector) 421 | dim = len(vector) 422 | return word2vec,dim 423 | def print_results(preds,raw,label_alphabet): 424 | 425 | for pred,item in zip(preds,raw): 426 | print "------" 427 | print item 428 | print [ label_alphabet[p] for p in pred] 429 | 430 | 431 | 432 | -------------------------------------------------------------------------------- /src/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SenticNet/sentic-lstm/47265133c912a124aea9d7a5c2b740e9d2e0c846/src/utils.pyc --------------------------------------------------------------------------------