├── LICENSE.txt ├── README.md ├── config └── imdb.ini ├── dataHelper.py ├── dataloader ├── Dataset.py ├── __init__.py ├── ag.py ├── glove.py ├── imdb.py ├── mr.py ├── sst.py └── torch_text_demo │ ├── imdb.py │ ├── sst.py │ └── trec.py ├── docs ├── data_config.md ├── data_config_en.md ├── windows_torch.md └── windows_torch_en.md ├── main.py ├── models ├── BiBloSA.py ├── CNN.py ├── CNNBasic.py ├── CNNInception.py ├── CNNKim.py ├── CNNMultiLayer.py ├── CNNText.py ├── CNN_Inception.py ├── Capsule.py ├── ConvS2S.py ├── DiSAN.py ├── FastText.py ├── LSTM.py ├── LSTMBI.py ├── LSTMStack.py ├── LSTMTree.py ├── LSTMwithAttention.py ├── MLP.py ├── MemoryNetwork.py ├── QuantumCNN.py ├── RCNN.py ├── RNN_CNN.py ├── SelfAttention.py ├── Transformer.py └── __init__.py ├── opts.py ├── parameter_search.py ├── push.bash ├── search.sh ├── trandition.py └── utils.py /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Barun Patra 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Text Classification Benchmark 2 | A Benchmark of Text Classification in PyTorch 3 | 4 | 5 | ## Motivation 6 | 7 | We are trying to build a Benchmark for Text Classification including 8 | 9 | 10 | >Many Text Classification **DataSet**, including Sentiment/Topic Classfication, popular language(e.g. English and Chinese). Meanwhile, a basic word embedding is provided. 11 | 12 | >Implment many popular and state-of-art **Models**, especially in deep neural network. 13 | 14 | ## Have done 15 | We have done some dataset and models 16 | ### Dataset done 17 | - IMDB 18 | - SST 19 | - Trec 20 | 21 | ### Models done 22 | - FastText 23 | - BasicCNN (KimCNN,MultiLayerCNN, Multi-perspective CNN) 24 | - InceptionCNN 25 | - LSTM (BILSTM, StackLSTM) 26 | - LSTM with Attention (Self Attention / Quantum Attention) 27 | - Hybrids between CNN and RNN (RCNN, C-LSTM) 28 | - Transformer - Attention is all you need 29 | - ConS2S 30 | - Capsule 31 | - Quantum-inspired NN 32 | 33 | ## Libary 34 | 35 | You should have install [these librarys](docs/windows_torch_en.md) 36 |
37 | python3 38 | torch 39 | torchtext (optional) 40 |41 | 42 | ## Dataset 43 | Dataset will be automatically configured in current path, or download manually your data in [Dataset](docs/data_config_en.md), step-by step. 44 | 45 | including 46 |
47 | Glove embeding 48 | Sentiment classfication dataset IMDB 49 |50 | 51 | 52 | ## usage 53 | 54 | 55 | Run in default setting 56 |
python main.py
57 |
58 | CNN
59 | python main.py --model cnn
60 |
61 | LSTM
62 | python main.py --model lstm
63 |
64 | ## Road Map
65 | - [X] Data preprossing framework
66 | - [X] Models modules
67 | - [ ] Loss, Estimator and hyper-paramter tuning.
68 | - [ ] Test modules
69 | - [ ] More Dataset
70 | - [ ] More models
71 |
72 |
73 |
74 | ## Organisation of the repository
75 | The core of this repository is models and dataset.
76 |
77 |
78 | * ```dataloader/```: loading all dataset such as ```IMDB```, ```SST```
79 |
80 | * ```models/```: creating all models such as ```FastText```, ```LSTM```,```CNN```,```Capsule```,```QuantumCNN``` ,```Multi-Head Attention```
81 |
82 | * ```opts.py```: Parameter and config info.
83 |
84 | * ```utils.py```: tools.
85 |
86 | * ```dataHelper```: data helper
87 |
88 |
89 |
90 |
91 | ## Contributor
92 | - [@Allenzhai](https://github.com/zhaizheng)
93 | - [@JaredWei](https://github.com/jacobwei)
94 | - [@AlexMeng](https://github.com/EdwardLorenz)
95 | - [@Lilianwang](https://github.com/WangLilian)
96 | - [@ZhanSu](https://github.com/shuishen112)
97 | - [@Wabywang](https://github.com/Wabyking)
98 |
99 | Welcome your issues and contribution!!!
100 |
101 |
--------------------------------------------------------------------------------
/config/imdb.ini:
--------------------------------------------------------------------------------
1 | [COMMON]
2 | dataset = imdb
3 |
4 |
--------------------------------------------------------------------------------
/dataHelper.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import numpy as np
5 | import string
6 | from collections import Counter
7 | import pandas as pd
8 | from tqdm import tqdm
9 | import random
10 | import time
11 | from utils import log_time_delta
12 | from tqdm import tqdm
13 | from dataloader import Dataset
14 | import torch
15 | from torch.autograd import Variable
16 | from codecs import open
17 | try:
18 | import cPickle as pickle
19 | except ImportError:
20 | import pickle
21 | class Alphabet(dict):
22 | def __init__(self, start_feature_id = 1, alphabet_type="text"):
23 | self.fid = start_feature_id
24 | if alphabet_type=="text":
25 | self.add('[PADDING]')
26 | self.add('[UNK]')
27 | self.add('[END]')
28 | self.unknow_token = self.get('[UNK]')
29 | self.end_token = self.get('[END]')
30 | self.padding_token = self.get('[PADDING]')
31 |
32 | def add(self, item):
33 | idx = self.get(item, None)
34 | if idx is None:
35 | idx = self.fid
36 | self[item] = idx
37 | # self[idx] = item
38 | self.fid += 1
39 | return idx
40 |
41 | def addAll(self,words):
42 | for word in words:
43 | self.add(word)
44 |
45 | def dump(self, fname,path="temp"):
46 | if not os.path.exists(path):
47 | os.mkdir(path)
48 | with open(os.path.join(path,fname), "w",encoding="utf-8") as out:
49 | for k in sorted(self.keys()):
50 | out.write("{}\t{}\n".format(k, self[k]))
51 |
52 | class DottableDict(dict):
53 | def __init__(self, *args, **kwargs):
54 | dict.__init__(self, *args, **kwargs)
55 | self.__dict__ = self
56 | self.allowDotting()
57 | def allowDotting(self, state=True):
58 | if state:
59 | self.__dict__ = self
60 | else:
61 | self.__dict__ = dict()
62 |
63 | class BucketIterator(object):
64 | def __init__(self,data,opt=None,batch_size=2,shuffle=True,test=False,position=False):
65 | self.shuffle=shuffle
66 | self.data=data
67 | self.batch_size=batch_size
68 | self.test=test
69 | if opt is not None:
70 | self.setup(opt)
71 | def setup(self,opt):
72 |
73 | self.batch_size=opt.batch_size
74 | self.shuffle=opt.__dict__.get("shuffle",self.shuffle)
75 | self.position=opt.__dict__.get("position",False)
76 | self.padding_token = opt.alphabet.padding_token
77 |
78 | def transform(self,data):
79 | if torch.cuda.is_available():
80 | data=data.reset_index()
81 | text= Variable(torch.LongTensor(data.text).cuda())
82 | label= Variable(torch.LongTensor([int(i) for i in data.label.tolist()]).cuda())
83 | else:
84 | data=data.reset_index()
85 | text= Variable(torch.LongTensor(data.text))
86 | label= Variable(torch.LongTensor(data.label.tolist()))
87 | if self.position:
88 | position_tensor = self.get_position(data.text)
89 | return DottableDict({"text":(text,position_tensor),"label":label})
90 | return DottableDict({"text":text,"label":label})
91 |
92 | def get_position(self,inst_data):
93 | inst_position = np.array([[pos_i+1 if w_i != self.padding_token else 0 for pos_i, w_i in enumerate(inst)] for inst in inst_data])
94 | inst_position_tensor = Variable( torch.LongTensor(inst_position), volatile=self.test)
95 | if torch.cuda.is_available():
96 | inst_position_tensor=inst_position_tensor.cuda()
97 | return inst_position_tensor
98 |
99 | def __iter__(self):
100 | if self.shuffle:
101 | self.data = self.data.sample(frac=1).reset_index(drop=True)
102 | batch_nums = int(len(self.data)/self.batch_size)
103 | for i in range(batch_nums):
104 | yield self.transform(self.data[i*self.batch_size:(i+1)*self.batch_size])
105 | yield self.transform(self.data[-1*self.batch_size:])
106 |
107 |
108 |
109 |
110 | @log_time_delta
111 | def vectors_lookup(vectors,vocab,dim):
112 | embedding = np.zeros((len(vocab),dim))
113 | count = 1
114 | for word in vocab:
115 | if word in vectors:
116 | count += 1
117 | embedding[vocab[word]]= vectors[word]
118 | else:
119 | embedding[vocab[word]]= np.random.uniform(-0.5,+0.5,dim)#vectors['[UNKNOW]'] #.tolist()
120 | print( 'word in embedding',count)
121 | return embedding
122 |
123 | @log_time_delta
124 | def load_text_vec(alphabet,filename="",embedding_size=-1):
125 | vectors = {}
126 | with open(filename,encoding='utf-8') as f:
127 | for line in tqdm(f):
128 | items = line.strip().split(' ')
129 | if len(items) == 2:
130 | vocab_size, embedding_size= items[0],items[1]
131 | print( 'embedding_size',embedding_size)
132 | print( 'vocab_size in pretrained embedding',vocab_size)
133 | else:
134 | word = items[0]
135 | if word in alphabet:
136 | vectors[word] = items[1:]
137 | print( 'words need to be found ',len(alphabet))
138 | print( 'words found in wor2vec embedding ',len(vectors.keys()))
139 |
140 | if embedding_size==-1:
141 | embedding_size = len(vectors[list(vectors.keys())[0]])
142 | return vectors,embedding_size
143 |
144 | def getEmbeddingFile(opt):
145 | #"glove" "w2v"
146 | embedding_name = opt.__dict__.get("embedding","glove_6b_300")
147 | if embedding_name.startswith("glove"):
148 | return os.path.join( ".vector_cache","glove.6B.300d.txt")
149 | else:
150 | return opt.embedding_dir
151 | # please refer to https://pypi.python.org/pypi/torchwordemb/0.0.7
152 | return
153 | @log_time_delta
154 | def getSubVectors(opt,alphabet):
155 | pickle_filename = "temp/"+opt.dataset+".vec"
156 | if not os.path.exists(pickle_filename) or opt.debug:
157 | glove_file = getEmbeddingFile(opt)
158 | wordset= set(alphabet.keys()) # python 2.7
159 | loaded_vectors,embedding_size = load_text_vec(wordset,glove_file)
160 |
161 | vectors = vectors_lookup(loaded_vectors,alphabet,embedding_size)
162 | if opt.debug:
163 | if not os.path.exists("temp"):
164 | os.mkdir("temp")
165 | with open("temp/oov.txt","w","utf-8") as f:
166 | unknown_set = set(alphabet.keys()) - set(loaded_vectors.keys())
167 | f.write("\n".join( unknown_set))
168 | if opt.debug:
169 | pickle.dump(vectors,open(pickle_filename,"wb"))
170 | return vectors
171 | else:
172 | print("load cache for SubVector")
173 | return pickle.load(open(pickle_filename,"rb"))
174 |
175 | def getDataSet(opt):
176 | import dataloader
177 | dataset= dataloader.getDataset(opt)
178 | # files=[os.path.join(data_dir,data_name) for data_name in ['train.txt','test.txt','dev.txt']]
179 |
180 | return dataset.getFormatedData()
181 |
182 | #data_dir = os.path.join(".data/clean",opt.dataset)
183 | #if not os.path.exists(data_dir):
184 | # import dataloader
185 | # dataset= dataloader.getDataset(opt)
186 | # return dataset.getFormatedData()
187 | #else:
188 | # for root, dirs, files in os.walk(data_dir):
189 | # for file in files:
190 | # yield os.path.join(root,file)
191 |
192 |
193 | # files=[os.path.join(data_dir,data_name) for data_name in ['train.txt','test.txt','dev.txt']]
194 |
195 | import re
196 | def clean(text):
197 | # text="'tycoon.pip install torch0.3XXX.whl
12 | 如果是一个conda安装包(压缩文件后缀)
13 | conda install --offline torch0.3XXX.tar.bz
14 |
15 | ## TorchText 安装
16 |
17 | 前提是有git和pip,如果没有需要下载git,并将其放到Path环境变量里
18 | pip install git+https://github.com/pytorch/text.git
19 |
20 | 还需要有代理的话
21 |
22 |
23 |
24 | pip install git+https://github.com/pytorch/text.git --proxy proxy.xx.com:8080
25 |
26 |
27 | 参考链接
28 | https://zhuanlan.zhihu.com/p/31747695
29 |
--------------------------------------------------------------------------------
/docs/windows_torch_en.md:
--------------------------------------------------------------------------------
1 | # Windows Platform Installation for PyTorch
2 |
3 | If Linux, Mac directly use pytorch from [homepage](http://pytorch.org/), and reinstall TorchText
4 |
5 | ## Python installation
6 | Please install anaconda directly: [installation package](https://repo.continuum.io/archive/Anaconda3-5.0.1-Windows-x86_64.exe)
7 |
8 | ## Pytorch installation
9 | In[Baidu Network Disk](https://pan.baidu.com/s/1dF6ayLr#list/path=%2Fpytorch) download offline, Version 0.3 or 0.2 wheels
10 | pip install torch0.3XXX.whl
11 |
12 | If it is a conda installation environment
13 | conda install --offline torch0.3XXX.tar.bz
14 |
15 | ## TorchText installation
16 |
17 | The assumption is that you have git and pip, if you don't, you need to download git and put it in the Path environment variable.
18 | pip install git+https://github.com/pytorch/text.git
19 |
20 | If you need a proxy,
21 | pip install git+https://github.com/pytorch/text.git --proxy proxy.xx.com:8080
22 |
23 |
24 | Reference Link:
25 | https://zhuanlan.zhihu.com/p/31747695
26 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import torch
8 | from torch.autograd import Variable
9 | import torch.optim as optim
10 | import numpy as np
11 |
12 | from six.moves import cPickle
13 |
14 | import opts
15 | import models
16 | import torch.nn as nn
17 | import utils
18 | import torch.nn.functional as F
19 | from torchtext import data
20 | from torchtext import datasets
21 | from torchtext.vocab import Vectors, GloVe, CharNGram, FastText
22 | from torch.nn.modules.loss import NLLLoss,MultiLabelSoftMarginLoss,MultiLabelMarginLoss,BCELoss
23 | import dataHelper
24 | import time,os
25 |
26 |
27 | from_torchtext = False
28 |
29 | opt = opts.parse_opt()
30 | #opt.proxy="http://xxxx.xxxx.com:8080"
31 |
32 |
33 | if "CUDA_VISIBLE_DEVICES" not in os.environ.keys():
34 | os.environ["CUDA_VISIBLE_DEVICES"] =opt.gpu
35 | #opt.model ='lstm'
36 | #opt.model ='capsule'
37 |
38 | if from_torchtext:
39 | train_iter, test_iter = utils.loadData(opt)
40 | else:
41 | import dataHelper as helper
42 | train_iter, test_iter = dataHelper.loadData(opt)
43 |
44 | opt.lstm_layers=2
45 |
46 | model=models.setup(opt)
47 | if torch.cuda.is_available():
48 | model.cuda()
49 | model.train()
50 | print("# parameters:", sum(param.numel() for param in model.parameters() if param.requires_grad))
51 | optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.learning_rate)
52 | optimizer.zero_grad()
53 | loss_fun = F.cross_entropy
54 |
55 | #batch = next(iter(train_iter))
56 |
57 | #x=batch.text[0]
58 |
59 | #x=batch.text[0] #64x200
60 |
61 | #print(utils.evaluation(model,test_iter))
62 | for i in range(opt.max_epoch):
63 | for epoch,batch in enumerate(train_iter):
64 | start= time.time()
65 |
66 | text = batch.text[0] if from_torchtext else batch.text
67 | predicted = model(text)
68 |
69 | loss= loss_fun(predicted,batch.label)
70 |
71 | loss.backward()
72 | utils.clip_gradient(optimizer, opt.grad_clip)
73 | optimizer.step()
74 | if epoch% 100==0:
75 | if torch.cuda.is_available():
76 | print("%d iteration %d epoch with loss : %.5f in %.4f seconds" % (i,epoch,loss.cpu().data.numpy()[0],time.time()-start))
77 | else:
78 | print("%d iteration %d epoch with loss : %.5f in %.4f seconds" % (i,epoch,loss.data.numpy()[0],time.time()-start))
79 |
80 | percision=utils.evaluation(model,test_iter,from_torchtext)
81 | print("%d iteration with percision %.4f" % (i,percision))
82 |
83 |
84 |
--------------------------------------------------------------------------------
/models/BiBloSA.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | #https://github.com/galsang/BiBloSA-pytorch/blob/master/model/model.py
4 |
5 |
--------------------------------------------------------------------------------
/models/CNN.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class CNN(nn.Module):
7 | def __init__(self, opt):
8 | super(CNN, self).__init__()
9 |
10 | self.embedding_type = opt.embedding_type
11 | self.batch_size = opt.batch_size
12 | self.max_sent_len = opt.max_sent_len
13 | self.embedding_dim = opt.embedding_dim
14 | self.vocab_size = opt.vocab_size
15 | self.CLASS_SIZE = opt.label_size
16 | self.FILTERS = opt["FILTERS"]
17 | self.FILTER_NUM = opt["FILTER_NUM"]
18 | self.keep_dropout = opt.keep_dropout
19 | self.IN_CHANNEL = 1
20 |
21 | assert (len(self.FILTERS) == len(self.FILTER_NUM))
22 |
23 | # one for UNK and one for zero padding
24 | self.embedding = nn.Embedding(self.vocab_size + 2, self.embedding_dim, padding_idx=self.vocab_size + 1)
25 | if self.embedding_type == "static" or self.embedding_type == "non-static" or self.embedding_type == "multichannel":
26 | self.WV_MATRIX = opt["WV_MATRIX"]
27 | self.embedding.weight.data.copy_(torch.from_numpy(self.WV_MATRIX))
28 | if self.embedding_type == "static":
29 | self.embedding.weight.requires_grad = False
30 | elif self.embedding_type == "multichannel":
31 | self.embedding2 = nn.Embedding(self.vocab_size + 2, self.embedding_dim, padding_idx=self.VOCAB_SIZE + 1)
32 | self.embedding2.weight.data.copy_(torch.from_numpy(self.WV_MATRIX))
33 | self.embedding2.weight.requires_grad = False
34 | self.IN_CHANNEL = 2
35 |
36 | for i in range(len(self.FILTERS)):
37 | conv = nn.Conv1d(self.IN_CHANNEL, self.FILTER_NUM[i], self.embedding_dim * self.FILTERS[i], stride=self.WORD_DIM)
38 | setattr(self, 'conv_%d'%i, conv)
39 |
40 | self.fc = nn.Linear(sum(self.FILTER_NUM), self.label_size)
41 |
42 | def get_conv(self, i):
43 | return getattr(self, 'conv_%d'%i)
44 |
45 | def forward(self, inp):
46 | x = self.embedding(inp).view(-1, 1, self.embedding_dim * self.max_sent_len)
47 | if self.embedding_type == "multichannel":
48 | x2 = self.embedding2(inp).view(-1, 1, self.embedding_dim * self.max_sent_len)
49 | x = torch.cat((x, x2), 1)
50 |
51 | conv_results = [
52 | F.max_pool1d(F.relu(self.get_conv(i)(x)), self.max_sent_len - self.FILTERS[i] + 1)
53 | .view(-1, self.FILTER_NUM[i])
54 | for i in range(len(self.FILTERS))]
55 |
56 | x = torch.cat(conv_results, 1)
57 | x = F.dropout(x, p=self.keep_dropout, training=self.training)
58 | x = self.fc(x)
59 | return x
60 |
61 |
62 |
63 | #https://github.com/zachAlbus/pyTorch-text-classification/blob/master/Yoon/model.py
64 | class CNN1(nn.Module):
65 |
66 | def __init__(self, opt):
67 | super(CNN1,self).__init__()
68 | self.opt = opt
69 |
70 | V = opt.vocab_size
71 | D = opt.embedding_dim
72 | C = opt.label_size
73 | Ci = 1
74 | Co = opt.kernel_num
75 | Ks = opt.kernel_sizes
76 |
77 | self.embed = nn.Embedding(V, D)
78 | #self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks]
79 | self.convs1 = nn.ModuleList([nn.Conv2d(Ci, Co, (K, D)) for K in Ks])
80 | '''
81 | self.conv13 = nn.Conv2d(Ci, Co, (3, D))
82 | self.conv14 = nn.Conv2d(Ci, Co, (4, D))
83 | self.conv15 = nn.Conv2d(Ci, Co, (5, D))
84 | '''
85 | self.dropout = nn.Dropout(opt.dropout)
86 | self.fc1 = nn.Linear(len(Ks)*Co, C)
87 |
88 | def conv_and_pool(self, x, conv):
89 | x = F.relu(conv(x)).squeeze(3) #(N,Co,W)
90 | x = F.max_pool1d(x, x.size(2)).squeeze(2)
91 | return x
92 |
93 |
94 | def forward(self, x):
95 | x = self.embed(x) # (N,W,D)
96 |
97 | if self.args.static:
98 | x = Variable(x)
99 |
100 | x = x.unsqueeze(1) # (N,Ci,W,D)
101 |
102 | x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks)
103 |
104 |
105 | x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks)
106 |
107 | x = torch.cat(x, 1)
108 |
109 | '''
110 | x1 = self.conv_and_pool(x,self.conv13) #(N,Co)
111 | x2 = self.conv_and_pool(x,self.conv14) #(N,Co)
112 | x3 = self.conv_and_pool(x,self.conv15) #(N,Co)
113 | x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)
114 | '''
115 | x = self.dropout(x) # (N,len(Ks)*Co)
116 | logit = self.fc1(x) # (N,C)
117 | return logit
118 |
119 | import torch.nn as nn
120 |
121 |
122 | #https://github.com/zachAlbus/pyTorch-text-classification/blob/master/Zhang/model.py
123 | class CNN2(nn.Module):
124 | def __init__(self, opt):
125 | super(CNN2, self).__init__()
126 | self.embed = nn.Embedding(opt.vocab_size + 1, opt.embedding_dim)
127 |
128 | self.conv1 = nn.Sequential(
129 | nn.Conv1d(opt.l0, 256, kernel_size=7, stride=1),
130 | nn.ReLU(),
131 | nn.MaxPool1d(kernel_size=3, stride=3)
132 | )
133 |
134 | self.conv2 = nn.Sequential(
135 | nn.Conv1d(256, 256, kernel_size=7, stride=1),
136 | nn.ReLU(),
137 | nn.MaxPool1d(kernel_size=3, stride=3)
138 | )
139 |
140 | self.conv3 = nn.Sequential(
141 | nn.Conv1d(256, 256, kernel_size=3, stride=1),
142 | nn.ReLU()
143 | )
144 |
145 | self.conv4 = nn.Sequential(
146 | nn.Conv1d(256, 256, kernel_size=3, stride=1),
147 | nn.ReLU()
148 | )
149 |
150 | self.conv5 = nn.Sequential(
151 | nn.Conv1d(256, 256, kernel_size=3, stride=1),
152 | nn.ReLU()
153 | )
154 |
155 | self.conv6 = nn.Sequential(
156 | nn.Conv1d(256, 256, kernel_size=3, stride=1),
157 | nn.ReLU(),
158 | nn.MaxPool1d(kernel_size=3, stride=3)
159 | )
160 |
161 | self.fc = nn.Linear(256, opt.label_size)
162 |
163 | def forward(self, x_input):
164 | # Embedding
165 | x = self.embed(x_input) # dim: (batch_size, max_seq_len, embedding_size)
166 | x = self.conv1(x)
167 | x = self.conv2(x)
168 | x = self.conv3(x)
169 | x = self.conv4(x)
170 | x = self.conv5(x)
171 | x = self.conv6(x)
172 |
173 | # collapse
174 | x = x.view(x.size(0), -1)
175 | x = self.fc(x)
176 |
177 | return F.log_softmax(x)
178 | class CNN3(nn.Module):
179 | """
180 | A CNN for text classification.
181 | Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.
182 | """
183 | def __init__(self, args):
184 | super(CNN3, self).__init__()
185 | self.args = args
186 |
187 | embedding_dim = args.embed_dim
188 | embedding_num = args.num_features
189 | class_number = args.class_num
190 | in_channel = 1
191 | out_channel = args.kernel_num
192 | kernel_sizes = args.kernel_sizes
193 |
194 | self.embed = nn.Embedding(embedding_num+1, embedding_dim)
195 | self.conv = nn.ModuleList([nn.Conv2d(in_channel, out_channel, (K, embedding_dim)) for K in kernel_sizes])
196 |
197 | self.dropout = nn.Dropout(args.dropout)
198 | self.fc = nn.Linear(len(kernel_sizes) * out_channel, class_number)
199 |
200 |
201 | def forward(self, input_x):
202 | """
203 | :param input_x: a list size having the number of batch_size elements with the same length
204 | :return: batch_size X num_aspects tensor
205 | """
206 | # Embedding
207 | x = self.embed(input_x) # dim: (batch_size, max_seq_len, embedding_size)
208 |
209 | if self.args.static:
210 | x = F.Variable(input_x)
211 |
212 | # Conv & max pool
213 | x = x.unsqueeze(1) # dim: (batch_size, 1, max_seq_len, embedding_size)
214 |
215 | # turns to be a list: [ti : i \in kernel_sizes] where ti: tensor of dim([batch, num_kernels, max_seq_len-i+1])
216 | x = [F.relu(conv(x)).squeeze(3) for conv in self.conv]
217 |
218 | # dim: [(batch_size, num_kernels), ...]*len(kernel_sizes)
219 | x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
220 | x = torch.cat(x, 1)
221 |
222 | # Dropout & output
223 | x = self.dropout(x) # (batch_size,len(kernel_sizes)*num_kernels)
224 | logit = F.log_softmax(self.fc(x)) # (batch_size, num_aspects)
225 |
226 | return logit
--------------------------------------------------------------------------------
/models/CNNBasic.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch as t
3 | import numpy as np
4 | from torch import nn
5 |
6 | class BasicCNN1D(nn.Module):
7 | def __init__(self, opt ):
8 | super(BasicCNN1D, self).__init__()
9 | self.model_name = 'CNNText'
10 | self.opt=opt
11 | self.content_dim=opt.__dict__.get("content_dim",256)
12 | self.kernel_size=opt.__dict__.get("kernel_size",3)
13 |
14 |
15 | self.encoder = nn.Embedding(opt.vocab_size,opt.embedding_dim)
16 | if opt.__dict__.get("embeddings",None) is not None:
17 | self.encoder.weight=nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
18 |
19 | self.content_conv = nn.Sequential(
20 | nn.Conv1d(in_channels = opt.embedding_dim,
21 | out_channels = self.content_dim, #256
22 | kernel_size = self.kernel_size), #3
23 | nn.ReLU(),
24 | nn.MaxPool1d(kernel_size = (opt.max_seq_len - self.kernel_size + 1))
25 | # nn.AdaptiveMaxPool1d()
26 | )
27 | self.fc = nn.Linear(self.content_dim, opt.label_size)
28 |
29 | def forward(self, content):
30 |
31 | content = self.encoder(content) #64x200x300
32 | content_out = self.content_conv(content.permute(0,2,1)) #64x256x1
33 | reshaped = content_out.view(content_out.size(0), -1) #64x256
34 | logits = self.fc(reshaped) #64x3
35 | return logits
36 | class BasicCNN2D(nn.Module):
37 | """
38 | A CNN for text classification.
39 | Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.
40 | """
41 | def __init__(self, args):
42 | super(BasicCNN2D, self).__init__()
43 | self.opt = opt
44 |
45 | self.embedding_dim = opt.embedding_dim
46 | self.vocab_size = opt.vocab_size
47 | self.label_size = opt.label_size
48 | self.keep_dropout = opt.keep_dropout
49 | in_channel = 1
50 | self.kernel_nums = opt.kernel_nums
51 | self.kernel_sizes = opt.kernel_sizes
52 |
53 | self.embed = nn.Embedding(self.vocab_size+1, self.embedding_dim)
54 |
55 | if opt.__dict__.get("embeddings",None) is not None:
56 | self.embed.weight=nn.Parameter(opt.embeddings)
57 |
58 | self.conv = nn.ModuleList([nn.Conv2d(in_channel, out_channel, (K, self.embedding_dim)) for K,out_channel in zip(self.kernel_sizes,self.kernel_nums)])
59 |
60 | self.dropout = nn.Dropout(self.keep_dropout)
61 | self.fc = nn.Linear(len(self.kernel_sizes) * self.out_channel, self.label_size)
62 |
63 |
64 | def forward(self, input_x):
65 | """
66 | :param input_x: a list size having the number of batch_size elements with the same length
67 | :return: batch_size X num_aspects tensor
68 | """
69 | # Embedding
70 | x = self.embed(input_x) # dim: (batch_size, max_seq_len, embedding_size)
71 |
72 | if self.opt.static:
73 | x = F.Variable(input_x)
74 |
75 | # Conv & max pool
76 | x = x.unsqueeze(1) # dim: (batch_size, 1, max_seq_len, embedding_size)
77 |
78 | # turns to be a list: [ti : i \in kernel_sizes] where ti: tensor of dim([batch, num_kernels, max_seq_len-i+1])
79 | x = [F.relu(conv(x)).squeeze(3) for conv in self.conv]
80 |
81 | # dim: [(batch_size, num_kernels), ...]*len(kernel_sizes)
82 | x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
83 | x = torch.cat(x, 1)
84 |
85 | # Dropout & output
86 | x = self.dropout(x) # (batch_size,len(kernel_sizes)*num_kernels)
87 | logit = F.log_softmax(self.fc(x)) # (batch_size, num_aspects)
88 |
89 | return logit
90 | import argparse
91 |
92 | def parse_opt():
93 | parser = argparse.ArgumentParser()
94 | # Data input settings
95 | parser.add_argument('--hidden_dim', type=int, default=128,
96 | help='hidden_dim')
97 |
98 |
99 | parser.add_argument('--batch_size', type=int, default=64,
100 | help='batch_size')
101 | parser.add_argument('--embedding_dim', type=int, default=300,
102 | help='embedding_dim')
103 | parser.add_argument('--learning_rate', type=float, default=4e-4,
104 | help='learning_rate')
105 | parser.add_argument('--grad_clip', type=float, default=1e-1,
106 | help='grad_clip')
107 | parser.add_argument('--model', type=str, default="lstm",
108 | help='model name')
109 | parser.add_argument('--model', type=str, default="lstm",
110 | help='model name')
111 |
112 |
113 | #
114 | args = parser.parse_args()
115 | args.embedding_dim=300
116 | args.vocab_size=10000
117 | args.kernel_size=3
118 | args.num_classes=3
119 | args.content_dim=256
120 | args.max_seq_len=50
121 |
122 | #
123 | # # Check if args are valid
124 | # assert args.rnn_size > 0, "rnn_size should be greater than 0"
125 |
126 |
127 | return args
128 |
129 | if __name__ == '__main__':
130 |
131 | opt = parse_opt()
132 | m = CNNText(opt)
133 | content = t.autograd.Variable(t.arange(0,3200).view(-1,50)).long()
134 | o = m(content)
135 | print(o.size())
136 |
137 |
--------------------------------------------------------------------------------
/models/CNNInception.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | import torch as t
5 | import torch
6 | import numpy as np
7 | from torch import nn
8 | from collections import OrderedDict
9 |
10 | class Inception(nn.Module):
11 | def __init__(self,cin,co,relu=True,norm=True):
12 | super(Inception, self).__init__()
13 | assert(co%4==0)
14 | cos=[int(co/4)]*4
15 | self.activa=nn.Sequential()
16 | if norm:self.activa.add_module('norm',nn.BatchNorm1d(co))
17 | if relu:self.activa.add_module('relu',nn.ReLU(True))
18 | self.branch1 =nn.Sequential(OrderedDict([
19 | ('conv1', nn.Conv1d(cin,cos[0], 1,stride=1)),
20 | ]))
21 | self.branch2 =nn.Sequential(OrderedDict([
22 | ('conv1', nn.Conv1d(cin,cos[1], 1)),
23 | ('norm1', nn.BatchNorm1d(cos[1])),
24 | ('relu1', nn.ReLU(inplace=True)),
25 | ('conv3', nn.Conv1d(cos[1],cos[1], 3,stride=1,padding=1)),
26 | ]))
27 | self.branch3 =nn.Sequential(OrderedDict([
28 | ('conv1', nn.Conv1d(cin,cos[2], 3,padding=1)),
29 | ('norm1', nn.BatchNorm1d(cos[2])),
30 | ('relu1', nn.ReLU(inplace=True)),
31 | ('conv3', nn.Conv1d(cos[2],cos[2], 5,stride=1,padding=2)),
32 | ]))
33 | self.branch4 =nn.Sequential(OrderedDict([
34 | #('pool',nn.MaxPool1d(2)),
35 | ('conv3', nn.Conv1d(cin,cos[3], 3,stride=1,padding=1)),
36 | ]))
37 | def forward(self,x):
38 | branch1=self.branch1(x)
39 | branch2=self.branch2(x)
40 | branch3=self.branch3(x)
41 | branch4=self.branch4(x)
42 | result=self.activa(torch.cat((branch1,branch2,branch3,branch4),1))
43 | return result
44 | class InceptionCNN(nn.Module):
45 | def __init__(self, opt ):
46 | super(InceptionCNN, self).__init__()
47 | incept_dim=getattr(opt,"inception_dim",512)
48 | self.model_name = 'CNNText_inception'
49 | self.opt=opt
50 | self.encoder = nn.Embedding(opt.vocab_size,opt.embedding_dim)
51 |
52 | self.content_conv=nn.Sequential(
53 | Inception(opt.embedding_dim,incept_dim),#(batch_size,64,opt.content_seq_len)->(batch_size,64,(opt.content_seq_len)/2)
54 | #Inception(incept_dim,incept_dim),#(batch_size,64,opt.content_seq_len/2)->(batch_size,32,(opt.content_seq_len)/4)
55 | Inception(incept_dim,incept_dim),
56 | nn.MaxPool1d(opt.max_seq_len)
57 | )
58 | self.fc = nn.Sequential(
59 | nn.Linear(incept_dim,getattr(opt,"linear_hidden_size",2000)),
60 | nn.BatchNorm1d(getattr(opt,"linear_hidden_size",2000)),
61 | nn.ReLU(inplace=True),
62 | nn.Linear(getattr(opt,"linear_hidden_size",2000) ,opt.label_size)
63 | )
64 | if opt.__dict__.get("embeddings",None) is not None:
65 | self.encoder.weight=nn.Parameter(opt.embeddings)
66 |
67 | def forward(self,content):
68 |
69 | content=self.encoder(content)
70 | if self.opt.embedding_type=="static":
71 | content=content.detach(0)
72 |
73 | content_out=self.content_conv(content.permute(0,2,1))
74 | out=content_out.view(content_out.size(0), -1)
75 | out=self.fc(out)
76 | return out
77 |
78 | if __name__ == '__main__':
79 | import sys
80 | sys.path.append(r"..")
81 | import opts
82 | opt=opts.parse_opt()
83 | opt.vocab_size=2501
84 | opt.label_size=3
85 | m = CNNText_inception(opt)
86 |
87 | content = t.autograd.Variable(t.arange(0,2500).view(10,250)).long()
88 | o = m(content)
89 | print(o.size())
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
--------------------------------------------------------------------------------
/models/CNNKim.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class KIMCNN1D(nn.Module):
7 | def __init__(self, opt):
8 | super(KIMCNN1D, self).__init__()
9 |
10 | self.embedding_type = opt.embedding_type
11 | self.batch_size = opt.batch_size
12 | self.max_seq_len = opt.max_seq_len
13 | self.embedding_dim = opt.embedding_dim
14 | self.vocab_size = opt.vocab_size
15 | self.label_size = opt.label_size
16 | self.kernel_sizes = opt.kernel_sizes
17 | self.kernel_nums = opt.kernel_nums
18 | self.keep_dropout = opt.keep_dropout
19 | self.in_channel = 1
20 |
21 | assert (len(self.kernel_sizes) == len(self.kernel_nums))
22 |
23 | # one for UNK and one for zero padding
24 | self.embedding = nn.Embedding(self.vocab_size + 2, self.embedding_dim, padding_idx=self.vocab_size + 1)
25 | if self.embedding_type == "static" or self.embedding_type == "non-static" or self.embedding_type == "multichannel":
26 | self.embedding.weight=nn.Parameter(opt.embeddings)
27 | if self.embedding_type == "static":
28 | self.embedding.weight.requires_grad = False
29 | elif self.embedding_type == "multichannel":
30 | self.embedding2 = nn.Embedding(self.vocab_size + 2, self.embedding_dim, padding_idx=self.vocab_size + 1)
31 | self.embedding2.weight=nn.Parameter(opt.embeddings)
32 | self.embedding2.weight.requires_grad = False
33 | self.in_channel = 2
34 | else:
35 | pass
36 | #
37 | # for i in range(len(self.kernel_sizes)):
38 | # conv = nn.Conv1d(self.in_channel, self.kernel_nums[i], self.embedding_dim * self.kernel_sizes[i], stride=self.embedding_dim)
39 | # setattr(self, 'conv_%d'%i, conv)
40 | self.convs = nn.ModuleList([nn.Conv1d(self.in_channel, num, self.embedding_dim * size, stride=self.embedding_dim) for size,num in zip(opt.kernel_sizes,opt.kernel_nums)])
41 | self.fc = nn.Linear(sum(self.kernel_nums), self.label_size)
42 |
43 | def get_conv(self, i):
44 | return getattr(self, 'conv_%d'%i)
45 |
46 | def forward(self, inp):
47 | x = self.embedding(inp).view(-1, 1, self.embedding_dim * self.max_seq_len)
48 | if self.embedding_type == "multichannel":
49 | x2 = self.embedding2(inp).view(-1, 1, self.embedding_dim * self.max_seq_len)
50 | x = torch.cat((x, x2), 1)
51 |
52 | # conv_results = [
53 | # F.max_pool1d(F.relu(self.get_conv(i)(x)), self.max_seq_len - self.kernel_sizes[i] + 1)
54 | # .view(-1, self.kernel_nums[i])
55 | # for i in range(len(self.kernel_sizes))]
56 | conv_results = [
57 | F.max_pool1d(F.relu(self.convs[i](x)), self.max_seq_len - self.kernel_sizes[i] + 1)
58 | .view(-1, self.kernel_nums[i])
59 | for i in range(len(self.convs))]
60 |
61 | x = torch.cat(conv_results, 1)
62 | x = F.dropout(x, p=self.keep_dropout, training=self.training)
63 | x = self.fc(x)
64 | return x
65 |
66 |
67 |
68 | #https://github.com/zachAlbus/pyTorch-text-classification/blob/master/Yoon/model.py
69 | class KIMCNN2D(nn.Module):
70 |
71 | def __init__(self, opt):
72 | super(KIMCNN2D,self).__init__()
73 | self.opt = opt
74 | self.embedding_type = opt.embedding_type
75 | self.batch_size = opt.batch_size
76 | self.max_seq_len = opt.max_seq_len
77 | self.embedding_dim = opt.embedding_dim
78 | self.vocab_size = opt.vocab_size
79 | self.label_size = opt.label_size
80 | self.kernel_sizes = opt.kernel_sizes
81 | self.kernel_nums = opt.kernel_nums
82 | self.keep_dropout = opt.keep_dropout
83 |
84 | self.embedding = nn.Embedding(self.vocab_size + 2, self.embedding_dim, padding_idx=self.vocab_size + 1)
85 | if self.embedding_type == "static" or self.embedding_type == "non-static" or self.embedding_type == "multichannel":
86 | self.embedding.weight=nn.Parameter(opt.embeddings)
87 | if self.embedding_type == "static":
88 | self.embedding.weight.requires_grad = False
89 | elif self.embedding_type == "multichannel":
90 | self.embedding2 = nn.Embedding(self.vocab_size + 2, self.embedding_dim, padding_idx=self.vocab_size + 1)
91 | self.embedding2.weight=nn.Parameter(opt.embeddings)
92 | self.embedding2.weight.requires_grad = False
93 | self.in_channel = 2
94 | else:
95 | pass
96 | #self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks]
97 | self.convs1 = nn.ModuleList([nn.Conv2d(1, num, (size, opt.embedding_dim)) for size,num in zip(opt.kernel_sizes,opt.kernel_nums)])
98 | '''
99 | self.conv13 = nn.Conv2d(Ci, Co, (3, D))
100 | self.conv14 = nn.Conv2d(Ci, Co, (4, D))
101 | self.conv15 = nn.Conv2d(Ci, Co, (5, D))
102 | '''
103 | self.dropout = nn.Dropout(opt.keep_dropout)
104 | self.fc = nn.Linear(sum(opt.kernel_nums), opt.label_size)
105 |
106 | def conv_and_pool(self, x, conv):
107 | x = F.relu(conv(x)).squeeze(3) #(N,Co,W)
108 | x = F.max_pool1d(x, x.size(2)).squeeze(2)
109 | return x
110 |
111 |
112 | def forward(self, x):
113 | x = self.embedding(x) # (N,W,D)
114 |
115 |
116 |
117 | x = x.unsqueeze(1) # (N,Ci,W,D)
118 |
119 | x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks)
120 |
121 |
122 | x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks)
123 |
124 | x = torch.cat(x, 1)
125 |
126 | '''
127 | x1 = self.conv_and_pool(x,self.conv13) #(N,Co)
128 | x2 = self.conv_and_pool(x,self.conv14) #(N,Co)
129 | x3 = self.conv_and_pool(x,self.conv15) #(N,Co)
130 | x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)
131 | '''
132 | x = self.dropout(x) # (N,len(Ks)*Co)
133 | logit = self.fc(x) # (N,C)
134 | return logit
135 |
136 |
--------------------------------------------------------------------------------
/models/CNNMultiLayer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | #https://github.com/zachAlbus/pyTorch-text-classification/blob/master/Zhang/model.py
9 | class MultiLayerCNN(nn.Module):
10 | def __init__(self, opt):
11 | super(MultiLayerCNN, self).__init__()
12 | self.embed = nn.Embedding(opt.vocab_size + 1, opt.embedding_dim)
13 |
14 | if opt.__dict__.get("embeddings",None) is not None:
15 | self.embed.weight=nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
16 |
17 | self.conv1 = nn.Sequential(
18 | nn.Conv1d(opt.max_seq_len, 256, kernel_size=7, stride=1),
19 | nn.ReLU(),
20 | nn.MaxPool1d(kernel_size=3, stride=3)
21 | )
22 |
23 | self.conv2 = nn.Sequential(
24 | nn.Conv1d(256, 256, kernel_size=7, stride=1),
25 | nn.ReLU(),
26 | nn.MaxPool1d(kernel_size=3, stride=3)
27 | )
28 |
29 | self.conv3 = nn.Sequential(
30 | nn.Conv1d(256, 256, kernel_size=3, stride=1),
31 | nn.ReLU()
32 | )
33 |
34 | self.conv4 = nn.Sequential(
35 | nn.Conv1d(256, 256, kernel_size=3, stride=1),
36 | nn.ReLU()
37 | )
38 |
39 | self.conv5 = nn.Sequential(
40 | nn.Conv1d(256, 256, kernel_size=3, stride=1),
41 | nn.ReLU()
42 | )
43 |
44 | self.conv6 = nn.Sequential(
45 | nn.Conv1d(256, 256, kernel_size=3, stride=1),
46 | nn.ReLU(),
47 | nn.MaxPool1d(kernel_size=3, stride=3)
48 | )
49 |
50 | self.fc = nn.Linear(256*7, opt.label_size)
51 |
52 | def forward(self, x):
53 | # Embedding
54 | x = self.embed(x) # dim: (batch_size, max_seq_len, embedding_size)
55 | x = self.conv1(x)
56 | x = self.conv2(x)
57 | x = self.conv3(x)
58 | x = self.conv4(x)
59 | x = self.conv5(x)
60 | x = self.conv6(x)
61 |
62 | # collapse
63 | x = x.view(x.size(0), -1)
64 | x = self.fc(x)
65 |
66 | return F.log_softmax(x)
67 |
--------------------------------------------------------------------------------
/models/CNNText.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch as t
3 | import numpy as np
4 | from torch import nn
5 |
6 | class CNNText(nn.Module):
7 | def __init__(self, opt ):
8 | super(CNNText, self).__init__()
9 | self.model_name = 'CNNText'
10 | self.opt=opt
11 | self.content_dim=opt.__dict__.get("content_dim",256)
12 | self.kernel_size=opt.__dict__.get("kernel_size",3)
13 |
14 |
15 | self.encoder = nn.Embedding(opt.vocab_size,opt.embedding_dim)
16 | if opt.__dict__.get("embeddings",None) is not None:
17 | self.encoder.weight=nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
18 |
19 |
20 | self.content_conv = nn.Sequential(
21 | nn.Conv1d(in_channels = opt.embedding_dim,
22 | out_channels = self.content_dim,
23 | kernel_size = self.kernel_size),
24 | nn.ReLU(),
25 | nn.MaxPool1d(kernel_size = (opt.max_seq_len - self.kernel_size + 1))
26 | # nn.AdaptiveMaxPool1d()
27 | )
28 |
29 | self.fc = nn.Linear(self.content_dim, opt.label_size)
30 |
31 |
32 | def forward(self, content):
33 |
34 | content = self.encoder(content)
35 | content_out = self.content_conv(content.permute(0,2,1))
36 | reshaped = content_out.view(content_out.size(0), -1)
37 | logits = self.fc(reshaped)
38 | return logits
39 |
40 | import argparse
41 |
42 | def parse_opt():
43 | parser = argparse.ArgumentParser()
44 | # Data input settings
45 | parser.add_argument('--hidden_dim', type=int, default=128,
46 | help='hidden_dim')
47 |
48 |
49 | parser.add_argument('--batch_size', type=int, default=64,
50 | help='batch_size')
51 | parser.add_argument('--embedding_dim', type=int, default=300,
52 | help='embedding_dim')
53 | parser.add_argument('--learning_rate', type=float, default=4e-4,
54 | help='learning_rate')
55 | parser.add_argument('--grad_clip', type=float, default=1e-1,
56 | help='grad_clip')
57 | parser.add_argument('--model', type=str, default="lstm",
58 | help='model name')
59 |
60 |
61 | #
62 | args = parser.parse_args()
63 | args.embedding_dim=300
64 | args.vocab_size=10000
65 | args.kernel_size=3
66 | args.num_classes=3
67 | args.content_dim=256
68 | args.max_seq_len=50
69 |
70 | #
71 | # # Check if args are valid
72 | # assert args.rnn_size > 0, "rnn_size should be greater than 0"
73 |
74 |
75 | return args
76 |
77 | if __name__ == '__main__':
78 |
79 |
80 | opt = parse_opt()
81 | m = CNNText(opt)
82 | content = t.autograd.Variable(t.arange(0,3200).view(-1,50)).long()
83 | o = m(content)
84 | print(o.size())
85 |
86 |
--------------------------------------------------------------------------------
/models/CNN_Inception.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | import torch as t
5 | import torch
6 | import numpy as np
7 | from torch import nn
8 | from collections import OrderedDict
9 |
10 | class Inception(nn.Module):
11 | def __init__(self,cin,co,relu=True,norm=True):
12 | super(Inception, self).__init__()
13 | assert(co%4==0)
14 | cos=[co/4]*4
15 | self.activa=nn.Sequential()
16 | if norm:self.activa.add_module('norm',nn.BatchNorm1d(co))
17 | if relu:self.activa.add_module('relu',nn.ReLU(True))
18 | self.branch1 =nn.Sequential(OrderedDict([
19 | ('conv1', nn.Conv1d(cin,cos[0], 1,stride=1)),
20 | ]))
21 | self.branch2 =nn.Sequential(OrderedDict([
22 | ('conv1', nn.Conv1d(cin,cos[1], 1)),
23 | ('norm1', nn.BatchNorm1d(cos[1])),
24 | ('relu1', nn.ReLU(inplace=True)),
25 | ('conv3', nn.Conv1d(cos[1],cos[1], 3,stride=1,padding=1)),
26 | ]))
27 | self.branch3 =nn.Sequential(OrderedDict([
28 | ('conv1', nn.Conv1d(cin,cos[2], 3,padding=1)),
29 | ('norm1', nn.BatchNorm1d(cos[2])),
30 | ('relu1', nn.ReLU(inplace=True)),
31 | ('conv3', nn.Conv1d(cos[2],cos[2], 5,stride=1,padding=2)),
32 | ]))
33 | self.branch4 =nn.Sequential(OrderedDict([
34 | #('pool',nn.MaxPool1d(2)),
35 | ('conv3', nn.Conv1d(cin,cos[3], 3,stride=1,padding=1)),
36 | ]))
37 | def forward(self,x):
38 | branch1=self.branch1(x)
39 | branch2=self.branch2(x)
40 | branch3=self.branch3(x)
41 | branch4=self.branch4(x)
42 | result=self.activa(torch.cat((branch1,branch2,branch3,branch4),1))
43 | return result
44 | class CNNText_inception(nn.Module):
45 | def __init__(self, opt ):
46 | super(CNNText_inception, self).__init__()
47 | incept_dim=getattr(opt,"inception_dim",512)
48 | self.model_name = 'CNNText_inception'
49 | self.opt=opt
50 | self.encoder = nn.Embedding(opt.vocab_size,opt.embedding_dim)
51 |
52 | self.content_conv=nn.Sequential(
53 | Inception(opt.embedding_dim,incept_dim),#(batch_size,64,opt.content_seq_len)->(batch_size,64,(opt.content_seq_len)/2)
54 | #Inception(incept_dim,incept_dim),#(batch_size,64,opt.content_seq_len/2)->(batch_size,32,(opt.content_seq_len)/4)
55 | Inception(incept_dim,incept_dim),
56 | nn.MaxPool1d(opt.max_seq_len)
57 | )
58 | self.fc = nn.Sequential(
59 | nn.Linear(incept_dim,getattr(opt,"linear_hidden_size",2000)),
60 | nn.BatchNorm1d(getattr(opt,"linear_hidden_size",2000)),
61 | nn.ReLU(inplace=True),
62 | nn.Linear(getattr(opt,"linear_hidden_size",2000) ,opt.label_size)
63 | )
64 | if opt.__dict__.get("embeddings",None) is not None:
65 | print('load embedding')
66 | self.encoder.weight.data.copy_(t.from_numpy(opt.embeddings))
67 |
68 | def forward(self,content):
69 |
70 | content=self.encoder(content)
71 | if self.opt.static:
72 | content=content.detach(0)
73 |
74 | content_out=self.content_conv(content.permute(0,2,1))
75 | out=content_out.view(content_out.size(0), -1)
76 | out=self.fc(out)
77 | return out
78 |
79 | if __name__ == '__main__':
80 | import sys
81 | sys.path.append(r"..")
82 | import opts
83 | opt=opts.parse_opt()
84 | opt.vocab_size=2501
85 | opt.label_size=3
86 | m = CNNText_inception(opt)
87 |
88 | content = t.autograd.Variable(t.arange(0,2500).view(10,250)).long()
89 | o = m(content)
90 | print(o.size())
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
--------------------------------------------------------------------------------
/models/Capsule.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # paper
3 |
4 |
5 | #
6 |
7 |
8 |
9 | import torch
10 | import torch.nn.functional as F
11 | from torch import nn
12 | import numpy as np
13 |
14 | BATCH_SIZE = 100
15 |
16 | NUM_EPOCHS = 500
17 | NUM_ROUTING_ITERATIONS = 3
18 |
19 | cuda = torch.cuda.is_available()
20 |
21 | def softmax(input, dim=1):
22 | transposed_input = input.transpose(dim, len(input.size()) - 1)
23 | softmaxed_output = F.softmax(transposed_input.contiguous().view(-1, transposed_input.size(-1)))
24 | return softmaxed_output.view(*transposed_input.size()).transpose(dim, len(input.size()) - 1)
25 |
26 |
27 |
28 |
29 |
30 | class CapsuleLayer(nn.Module):
31 | def __init__(self, num_capsules, num_route_nodes, in_channels, out_channels, kernel_size=None, stride=None,
32 | num_iterations=NUM_ROUTING_ITERATIONS,padding=0):
33 | super(CapsuleLayer, self).__init__()
34 |
35 | self.num_route_nodes = num_route_nodes
36 | self.num_iterations = num_iterations
37 |
38 | self.num_capsules = num_capsules
39 |
40 |
41 |
42 | if num_route_nodes != -1:
43 | self.route_weights = nn.Parameter(torch.randn(num_capsules, num_route_nodes, in_channels, out_channels))
44 | else:
45 | prime=[3,5,7,9,11,13,17,19,23]
46 | sizes=prime[:self.num_capsules]
47 | self.capsules = nn.ModuleList(
48 | [nn.Conv1d(in_channels, out_channels, kernel_size=i, stride=2, padding=int((i-1)/2)) for i in sizes])
49 |
50 | def squash(self, tensor, dim=-1):
51 | squared_norm = (tensor ** 2).sum(dim=dim, keepdim=True)
52 | scale = squared_norm / (1 + squared_norm)
53 | return scale * tensor / torch.sqrt(squared_norm)
54 |
55 | def forward(self, x):
56 |
57 | if self.num_route_nodes != -1:
58 | priors =torch.matmul( x[None, :, :, None, :],self.route_weights[:, None, :, :, :])
59 |
60 | if torch.cuda.is_available():
61 | logits = torch.autograd.Variable(torch.zeros(priors.size())).cuda()
62 | else:
63 | logits = torch.autograd.Variable(torch.zeros(priors.size()))
64 | for i in range(self.num_iterations):
65 | probs = softmax(logits, dim=2)
66 | outputs = self.squash((torch.mul(probs , priors)).sum(dim=2, keepdim=True))
67 |
68 | if i != self.num_iterations - 1:
69 | delta_logits = (torch.mul(priors , outputs)).sum(dim=-1, keepdim=True)
70 | logits = logits + delta_logits
71 | else:
72 | outputs = [capsule(x).view(x.size(0), -1, 1) for capsule in self.capsules]
73 | outputs = torch.cat(outputs, dim=-1)
74 | outputs = self.squash(outputs)
75 |
76 | return outputs
77 |
78 |
79 | class CapsuleNet(nn.Module):
80 | def __init__(self,opt):
81 | super(CapsuleNet, self).__init__()
82 | self.opt=opt #300*300
83 | self.label_size=opt.label_size
84 | self.embed = nn.Embedding(opt.vocab_size+1, opt.embedding_dim)
85 | self.opt.cnn_dim = 1
86 | self.kernel_size = 3
87 | self.kernel_size_primary=3
88 | if opt.__dict__.get("embeddings",None) is not None:
89 | self.embed.weight=nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
90 |
91 | self.primary_capsules = CapsuleLayer(num_capsules=8, num_route_nodes=-1, in_channels=256, out_channels=32)
92 | self.digit_capsules = CapsuleLayer(num_capsules=opt.label_size, num_route_nodes=int(32 * opt.max_seq_len/2), in_channels=8,
93 | out_channels=16)
94 | if self.opt.cnn_dim == 2:
95 | self.conv_2d = nn.Conv2d(in_channels=1, out_channels=256, kernel_size=(self.kernel_size,opt.embedding_dim), stride=(1,opt.embedding_dim),padding=(int((self.kernel_size-1)/2),0))
96 | else:
97 | self.conv_1d = nn.Conv1d(in_channels=1, out_channels=256, kernel_size=opt.embedding_dim * self.kernel_size, stride=opt.embedding_dim, padding=opt.embedding_dim* int((self.kernel_size-1)/2) )
98 |
99 | self.decoder = nn.Sequential(
100 | nn.Linear(16 * self.label_size, 512),
101 | nn.ReLU(inplace=True),
102 | nn.Linear(512, 1024),
103 | nn.ReLU(inplace=True),
104 | nn.Linear(1024, 784),
105 | nn.Sigmoid()
106 | )
107 |
108 | def forward(self, x, y=None,reconstruct=False):
109 | #x = next(iter(train_iter)).text[0]
110 |
111 | x= self.embed(x)
112 | if self.opt.cnn_dim == 1:
113 | x=x.view(x.size(0),1,x.size(-1)*x.size(-2))
114 | x_conv = F.relu(self.conv_1d(x), inplace=True)
115 | else:
116 |
117 | x=x.unsqueeze(1)
118 | x_conv = F.relu(self.conv_2d(x), inplace=True).squeeze(3)
119 |
120 | x = self.primary_capsules(x_conv)
121 | x = self.digit_capsules(x).squeeze().transpose(0, 1)
122 |
123 | classes = (x ** 2).sum(dim=-1) ** 0.5
124 | classes = F.softmax(classes)
125 | if not reconstruct:
126 | return classes
127 | if y is None:
128 | # In all batches, get the most active capsule.
129 | _, max_length_indices = classes.max(dim=1)
130 | if torch.cuda.is_available():
131 | y = Variable(torch.sparse.torch.eye(self.label_size)).cuda().index_select(dim=0, index=max_length_indices.data)
132 | else:
133 | y = Variable(torch.sparse.torch.eye(self.label_size)).index_select(dim=0, index=max_length_indices.data)
134 | reconstructions = self.decoder((x * y[:, :, None]).view(x.size(0), -1))
135 |
136 | return classes, reconstructions
137 |
--------------------------------------------------------------------------------
/models/ConvS2S.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
--------------------------------------------------------------------------------
/models/DiSAN.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # https://github.com/taoshen58/DiSAN/blob/master/SST_disan/src/model/model_disan.py
--------------------------------------------------------------------------------
/models/FastText.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import torch as t
4 |
5 | import numpy as np
6 | from torch import nn
7 | from collections import OrderedDict
8 | class FastText(nn.Module):
9 | def __init__(self, opt ):
10 | super(FastText, self).__init__()
11 | self.model_name = 'FastText'
12 |
13 | linear_hidden_size=getattr(opt,"linear_hidden_size",2000)
14 | self.encoder = nn.Embedding(opt.vocab_size,opt.embedding_dim)
15 | if opt.__dict__.get("embeddings",None) is not None:
16 | print('load embedding')
17 | self.encoder.weight=nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
18 |
19 |
20 | self.content_fc = nn.Sequential(
21 | nn.Linear(opt.embedding_dim,linear_hidden_size),
22 | nn.BatchNorm1d(linear_hidden_size),
23 | nn.ReLU(inplace=True),
24 | # nn.Linear(opt.linear_hidden_size,opt.linear_hidden_size),
25 | # nn.BatchNorm1d(opt.linear_hidden_size),
26 | # nn.ReLU(inplace=True),
27 | nn.Linear(linear_hidden_size,opt.label_size)
28 | )
29 |
30 |
31 | def forward(self,content):
32 |
33 | content_=t.mean(self.encoder(content),dim=1)
34 |
35 |
36 | out=self.content_fc(content_.view(content_.size(0),-1))
37 |
38 | return out
39 | if __name__ == '__main__':
40 | import sys
41 | sys.path.append(r"..")
42 | import opts
43 | opt=opts.parse_opt()
44 | opt.vocab_size=2501
45 | opt.label_size=3
46 | m = FastText(opt)
47 |
48 | content = t.autograd.Variable(t.arange(0,2500).view(10,250)).long()
49 | o = m(content)
50 | print(o.size())
--------------------------------------------------------------------------------
/models/LSTM.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | import torch
6 | from torch.autograd import Variable
7 | #from memory_profiler import profile
8 |
9 | class LSTMClassifier(nn.Module):
10 | # embedding_dim, hidden_dim, vocab_size, label_size, batch_size, use_gpu
11 | def __init__(self,opt):
12 | self.opt=opt
13 | super(LSTMClassifier, self).__init__()
14 | self.hidden_dim = opt.hidden_dim
15 | self.batch_size = opt.batch_size
16 | self.use_gpu = torch.cuda.is_available()
17 |
18 | self.word_embeddings = nn.Embedding(opt.vocab_size, opt.embedding_dim)
19 | self.word_embeddings.weight = nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
20 | # self.word_embeddings.weight.data.copy_(torch.from_numpy(opt.embeddings))
21 | self.lstm = nn.LSTM(opt.embedding_dim, opt.hidden_dim)
22 | self.hidden2label = nn.Linear(opt.hidden_dim, opt.label_size)
23 | self.hidden = self.init_hidden()
24 | self.mean = opt.__dict__.get("lstm_mean",True)
25 |
26 | def init_hidden(self,batch_size=None):
27 | if batch_size is None:
28 | batch_size= self.batch_size
29 |
30 | if self.use_gpu:
31 | h0 = Variable(torch.zeros(1, batch_size, self.hidden_dim).cuda())
32 | c0 = Variable(torch.zeros(1, batch_size, self.hidden_dim).cuda())
33 | else:
34 | h0 = Variable(torch.zeros(1, batch_size, self.hidden_dim))
35 | c0 = Variable(torch.zeros(1,batch_size, self.hidden_dim))
36 | return (h0, c0)
37 | # @profile
38 | def forward(self, sentence):
39 | embeds = self.word_embeddings(sentence) #64x200x300
40 |
41 | # x = embeds.view(sentence.size()[1], self.batch_size, -1)
42 | x=embeds.permute(1,0,2) #200x64x300
43 | self.hidden= self.init_hidden(sentence.size()[0]) #1x64x128
44 | lstm_out, self.hidden = self.lstm(x, self.hidden) #200x64x128
45 | if self.mean=="mean":
46 | out = lstm_out.permute(1,0,2)
47 | final = torch.mean(out,1)
48 | else:
49 | final=lstm_out[-1]
50 | y = self.hidden2label(final) #64x3
51 | return y
52 | # def forward1(self, sentence):
53 | #
54 | # return torch.zeros(sentence.size()[0], self.opt.label_size)
55 | ## def __call__(self, **args):
56 | ## self.forward(args)
57 | # def test():
58 | #
59 | # import numpy as np
60 | #
61 | # word_embeddings = nn.Embedding(10000, 300)
62 | # lstm = nn.LSTM(300, 100)
63 | # h0 = Variable(torch.zeros(1, 128, 100))
64 | # c0 = Variable(torch.zeros(1, 128, 100))
65 | # hidden=(h0, c0)
66 | # sentence = Variable(torch.LongTensor(np.zeros((128,30),dtype=np.int64)))
67 | # embeds = word_embeddings(sentence)
68 | # torch.tile(sentence)
69 | # sentence.size()[0]
70 | #
71 | #
72 | #
73 | ## x= Variable(torch.zeros(30, 128, 300))
74 | # x = embeds.view(sentence.size()[1], self.batch_size, -1)
75 | # embeds=embeds.permute(1,0,2)
76 | # lstm_out, hidden = lstm(embeds, hidden)
77 | ##
--------------------------------------------------------------------------------
/models/LSTMBI.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | import torch
6 | from torch.autograd import Variable
7 | #from memory_profiler import profile
8 |
9 | class LSTMBI(nn.Module):
10 | # embedding_dim, hidden_dim, vocab_size, label_size, batch_size, use_gpu
11 | def __init__(self,opt):
12 | self.opt=opt
13 | super(LSTMBI, self).__init__()
14 | self.hidden_dim = opt.hidden_dim
15 | self.batch_size = opt.batch_size
16 | self.use_gpu = torch.cuda.is_available()
17 |
18 | self.word_embeddings = nn.Embedding(opt.vocab_size, opt.embedding_dim)
19 | self.word_embeddings.weight = nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
20 | # self.word_embeddings.weight.data.copy_(torch.from_numpy(opt.embeddings))
21 |
22 | self.lstm_layers = opt.lstm_layers
23 | #self.bidirectional = True
24 | self.dropout = opt.keep_dropout
25 | self.bilstm = nn.LSTM(opt.embedding_dim, opt.hidden_dim // 2, num_layers=self.lstm_layers, dropout=self.dropout, bidirectional=True)
26 | self.hidden2label = nn.Linear(opt.hidden_dim, opt.label_size)
27 | self.hidden = self.init_hidden()
28 | self.mean = opt.__dict__.get("lstm_mean",True)
29 |
30 | def init_hidden(self,batch_size=None):
31 | if batch_size is None:
32 | batch_size= self.batch_size
33 |
34 | if self.use_gpu:
35 | h0 = Variable(torch.zeros(2*self.lstm_layers, batch_size, self.hidden_dim // 2).cuda())
36 | c0 = Variable(torch.zeros(2*self.lstm_layers, batch_size, self.hidden_dim // 2).cuda())
37 | else:
38 | h0 = Variable(torch.zeros(2*self.lstm_layers, batch_size, self.hidden_dim // 2))
39 | c0 = Variable(torch.zeros(2*self.lstm_layers, batch_size, self.hidden_dim // 2))
40 | return (h0, c0)
41 | # @profile
42 | def forward(self, sentence):
43 | embeds = self.word_embeddings(sentence)
44 |
45 | # x = embeds.view(sentence.size()[1], self.batch_size, -1)
46 | x=embeds.permute(1,0,2) # we do this because the default parameter of lstm is False
47 | self.hidden= self.init_hidden(sentence.size()[0]) #2x64x64
48 | lstm_out, self.hidden = self.bilstm(x, self.hidden) #lstm_out:200x64x128
49 | if self.mean=="mean":
50 | out = lstm_out.permute(1,0,2)
51 | final = torch.mean(out,1)
52 | else:
53 | final=lstm_out[-1]
54 | y = self.hidden2label(final) #64x3 #lstm_out[-1]
55 | return y
56 |
--------------------------------------------------------------------------------
/models/LSTMStack.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DrJZhou/TextClassificationBenchmark/b5ac116c74493c28f2af2541f21385df7c73ef93/models/LSTMStack.py
--------------------------------------------------------------------------------
/models/LSTMTree.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # https://github.com/dasguptar/treelstm.pytorch
--------------------------------------------------------------------------------
/models/LSTMwithAttention.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import torch
4 | import numpy as np
5 | import torch.nn as nn
6 | from sklearn.utils import shuffle
7 | from torch.autograd import Variable
8 |
9 | class LSTMAttention(torch.nn.Module):
10 | def __init__(self,opt):
11 | self.opt=opt
12 | super(LSTMAttention, self).__init__()
13 | self.hidden_dim = opt.hidden_dim
14 | self.batch_size = opt.batch_size
15 | self.use_gpu = torch.cuda.is_available()
16 |
17 | self.word_embeddings = nn.Embedding(opt.vocab_size, opt.embedding_dim)
18 | self.word_embeddings.weight = nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
19 | # self.word_embeddings.weight.data.copy_(torch.from_numpy(opt.embeddings))
20 |
21 | self.num_layers = opt.lstm_layers
22 | #self.bidirectional = True
23 | self.dropout = opt.keep_dropout
24 | self.bilstm = nn.LSTM(opt.embedding_dim, opt.hidden_dim // 2, batch_first=True,num_layers=self.num_layers, dropout=self.dropout, bidirectional=True)
25 | self.hidden2label = nn.Linear(opt.hidden_dim, opt.label_size)
26 | self.hidden = self.init_hidden()
27 | self.mean = opt.__dict__.get("lstm_mean",True)
28 | self.attn_fc = torch.nn.Linear(opt.embedding_dim, 1)
29 | def init_hidden(self,batch_size=None):
30 | if batch_size is None:
31 | batch_size= self.batch_size
32 |
33 | if self.use_gpu:
34 | h0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2).cuda())
35 | c0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2).cuda())
36 | else:
37 | h0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2))
38 | c0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2))
39 | return (h0, c0)
40 |
41 |
42 | def attention(self, rnn_out, state):
43 | merged_state = torch.cat([s for s in state],1)
44 | merged_state = merged_state.squeeze(0).unsqueeze(2)
45 | # (batch, seq_len, cell_size) * (batch, cell_size, 1) = (batch, seq_len, 1)
46 | weights = torch.bmm(rnn_out, merged_state)
47 | weights = torch.nn.functional.softmax(weights.squeeze(2)).unsqueeze(2)
48 | # (batch, cell_size, seq_len) * (batch, seq_len, 1) = (batch, cell_size, 1)
49 | return torch.bmm(torch.transpose(rnn_out, 1, 2), weights).squeeze(2)
50 | # end method attention
51 |
52 |
53 | def forward(self, X):
54 | embedded = self.word_embeddings(X)
55 | hidden= self.init_hidden(X.size()[0]) #
56 | rnn_out, hidden = self.bilstm(embedded, hidden)
57 | h_n, c_n = hidden
58 | attn_out = self.attention(rnn_out, h_n)
59 | logits = self.hidden2label(attn_out)
60 | return logits
--------------------------------------------------------------------------------
/models/MLP.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.init as init
7 | from torch.autograd import Variable
8 |
9 | # https://github.com/nmhkahn/MemN2N-pytorch/blob/master/memn2n/model.py
10 |
11 | def position_encoding(sentence_size, embedding_dim):
12 | encoding = np.ones((embedding_dim, sentence_size), dtype=np.float32)
13 | ls = sentence_size + 1
14 | le = embedding_dim + 1
15 | for i in range(1, le):
16 | for j in range(1, ls):
17 | encoding[i-1, j-1] = (i - (embedding_dim+1)/2) * (j - (sentence_size+1)/2)
18 | encoding = 1 + 4 * encoding / embedding_dim / sentence_size
19 | # Make position encoding of time words identity to avoid modifying them
20 | encoding[:, -1] = 1.0
21 | return np.transpose(encoding)
22 |
23 | class AttrProxy(object):
24 | """
25 | Translates index lookups into attribute lookups.
26 | To implement some trick which able to use list of nn.Module in a nn.Module
27 | see https://discuss.pytorch.org/t/list-of-nn-module-in-a-nn-module/219/2
28 | """
29 | def __init__(self, module, prefix):
30 | self.module = module
31 | self.prefix = prefix
32 |
33 | def __getitem__(self, i):
34 | return getattr(self.module, self.prefix + str(i))
35 |
36 |
37 | class MemN2N(nn.Module):
38 | def __init__(self, opt):
39 | super(MemN2N, self).__init__()
40 |
41 | use_cuda = opt["use_cuda"]
42 | num_vocab = opt["num_vocab"]
43 | embedding_dim = opt["embedding_dim"]
44 | sentence_size = opt["sentence_size"]
45 | self.max_hops = opt["max_hops"]
46 |
47 | for hop in range(self.max_hops+1):
48 | C = nn.Embedding(num_vocab, embedding_dim, padding_idx=0)
49 | C.weight.data.normal_(0, 0.1)
50 | self.add_module("C_{}".format(hop), C)
51 | self.C = AttrProxy(self, "C_")
52 |
53 | self.softmax = nn.Softmax()
54 | self.encoding = Variable(torch.FloatTensor(
55 | position_encoding(sentence_size, embedding_dim)), requires_grad=False)
56 |
57 | if use_cuda:
58 | self.encoding = self.encoding.cuda()
59 |
60 | def forward(self, story, query):
61 | story_size = story.size()
62 |
63 | u = list()
64 | query_embed = self.C[0](query)
65 | # weired way to perform reduce_dot
66 | encoding = self.encoding.unsqueeze(0).expand_as(query_embed)
67 | u.append(torch.sum(query_embed*encoding, 1))
68 |
69 | for hop in range(self.max_hops):
70 | embed_A = self.C[hop](story.view(story.size(0), -1))
71 | embed_A = embed_A.view(story_size+(embed_A.size(-1),))
72 |
73 | encoding = self.encoding.unsqueeze(0).unsqueeze(1).expand_as(embed_A)
74 | m_A = torch.sum(embed_A*encoding, 2)
75 |
76 | u_temp = u[-1].unsqueeze(1).expand_as(m_A)
77 | prob = self.softmax(torch.sum(m_A*u_temp, 2))
78 |
79 | embed_C = self.C[hop+1](story.view(story.size(0), -1))
80 | embed_C = embed_C.view(story_size+(embed_C.size(-1),))
81 | m_C = torch.sum(embed_C*encoding, 2)
82 |
83 | prob = prob.unsqueeze(2).expand_as(m_C)
84 | o_k = torch.sum(m_C*prob, 1)
85 |
86 | u_k = u[-1] + o_k
87 | u.append(u_k)
88 |
89 | a_hat = u[-1]@self.C[self.max_hops].weight.transpose(0, 1)
90 | return a_hat, self.softmax(a_hat)
--------------------------------------------------------------------------------
/models/MemoryNetwork.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #https://github.com/nmhkahn/MemN2N-pytorch/blob/master/memn2n/model.py
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.init as init
7 | from torch.autograd import Variable
8 |
9 | def position_encoding(sentence_size, embedding_dim):
10 | encoding = np.ones((embedding_dim, sentence_size), dtype=np.float32)
11 | ls = sentence_size + 1
12 | le = embedding_dim + 1
13 | for i in range(1, le):
14 | for j in range(1, ls):
15 | encoding[i-1, j-1] = (i - (embedding_dim+1)/2) * (j - (sentence_size+1)/2)
16 | encoding = 1 + 4 * encoding / embedding_dim / sentence_size
17 | # Make position encoding of time words identity to avoid modifying them
18 | encoding[:, -1] = 1.0
19 | return np.transpose(encoding)
20 |
21 | class AttrProxy(object):
22 | """
23 | Translates index lookups into attribute lookups.
24 | To implement some trick which able to use list of nn.Module in a nn.Module
25 | see https://discuss.pytorch.org/t/list-of-nn-module-in-a-nn-module/219/2
26 | """
27 | def __init__(self, module, prefix):
28 | self.module = module
29 | self.prefix = prefix
30 |
31 | def __getitem__(self, i):
32 | return getattr(self.module, self.prefix + str(i))
33 |
34 |
35 | class MemN2N(nn.Module):
36 | def __init__(self, settings):
37 | super(MemN2N, self).__init__()
38 |
39 | use_cuda = settings["use_cuda"]
40 | num_vocab = settings["num_vocab"]
41 | embedding_dim = settings["embedding_dim"]
42 | sentence_size = settings["sentence_size"]
43 | self.max_hops = settings["max_hops"]
44 |
45 | for hop in range(self.max_hops+1):
46 | C = nn.Embedding(num_vocab, embedding_dim, padding_idx=0)
47 | C.weight.data.normal_(0, 0.1)
48 | self.add_module("C_{}".format(hop), C)
49 | self.C = AttrProxy(self, "C_")
50 |
51 | self.softmax = nn.Softmax()
52 | self.encoding = Variable(torch.FloatTensor(
53 | position_encoding(sentence_size, embedding_dim)), requires_grad=False)
54 |
55 | if use_cuda:
56 | self.encoding = self.encoding.cuda()
57 |
58 | def forward(self, query):
59 |
60 | story=query # for text classfication
61 |
62 | story_size = story.size()
63 |
64 | u = list()
65 | query_embed = self.C[0](query)
66 | # weired way to perform reduce_dot
67 | encoding = self.encoding.unsqueeze(0).expand_as(query_embed)
68 | u.append(torch.sum(query_embed*encoding, 1))
69 |
70 | for hop in range(self.max_hops):
71 | embed_A = self.C[hop](story.view(story.size(0), -1))
72 | embed_A = embed_A.view(story_size+(embed_A.size(-1),))
73 |
74 | encoding = self.encoding.unsqueeze(0).unsqueeze(1).expand_as(embed_A)
75 | m_A = torch.sum(embed_A*encoding, 2)
76 |
77 | u_temp = u[-1].unsqueeze(1).expand_as(m_A)
78 | prob = self.softmax(torch.sum(m_A*u_temp, 2))
79 |
80 | embed_C = self.C[hop+1](story.view(story.size(0), -1))
81 | embed_C = embed_C.view(story_size+(embed_C.size(-1),))
82 | m_C = torch.sum(embed_C*encoding, 2)
83 |
84 | prob = prob.unsqueeze(2).expand_as(m_C)
85 | o_k = torch.sum(m_C*prob, 1)
86 |
87 | u_k = u[-1] + o_k
88 | u.append(u_k)
89 |
90 | a_hat = u[-1]@self.C[self.max_hops].weight.transpose(0, 1)
91 | return a_hat, self.softmax(a_hat)
--------------------------------------------------------------------------------
/models/QuantumCNN.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
--------------------------------------------------------------------------------
/models/RCNN.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | import torch
4 | from torch.autograd import Variable
5 | #from memory_profiler import profile
6 |
7 | """
8 | Lai S, Xu L, Liu K, et al. Recurrent Convolutional Neural Networks for Text Classification[C]//AAAI. 2015, 333: 2267-2273.
9 | """
10 |
11 | class RCNN(nn.Module):
12 | # embedding_dim, hidden_dim, vocab_size, label_size, batch_size, use_gpu
13 | def __init__(self,opt):
14 | self.opt=opt
15 | super(RCNN, self).__init__()
16 | self.hidden_dim = opt.hidden_dim
17 | self.batch_size = opt.batch_size
18 | self.use_gpu = torch.cuda.is_available()
19 |
20 | self.word_embeddings = nn.Embedding(opt.vocab_size, opt.embedding_dim)
21 | self.word_embeddings.weight = nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
22 | # self.word_embeddings.weight.data.copy_(torch.from_numpy(opt.embeddings))
23 |
24 | self.num_layers = 1
25 | #self.bidirectional = True
26 | self.dropout = opt.keep_dropout
27 | self.bilstm = nn.LSTM(input_size=opt.embedding_dim, hidden_size=opt.hidden_dim // 2, num_layers=self.num_layers, dropout=self.dropout, bidirectional=True)
28 |
29 | ###self.hidden2label = nn.Linear(opt.hidden_dim, opt.label_size)
30 | self.hidden = self.init_hidden()
31 |
32 | self.max_pooling = nn.MaxPool1d(kernel_size=3, stride=2)
33 |
34 | self.content_dim = 256
35 | #self.conv = nn.Conv1d(opt.hidden_dim, self.content_dim, opt.hidden_dim * 2, stride=opt.embedding_dim)
36 | self.hidden2label = nn.Linear( (2*opt.hidden_dim // 2+opt.embedding_dim), opt.label_size)
37 |
38 | def init_hidden(self,batch_size=None):
39 | if batch_size is None:
40 | batch_size= self.batch_size
41 |
42 | if self.use_gpu:
43 | h0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2).cuda())
44 | c0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2).cuda())
45 | else:
46 | h0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2))
47 | c0 = Variable(torch.zeros(2*self.num_layers,batch_size, self.hidden_dim // 2))
48 | return (h0, c0)
49 | # @profile
50 | def forward(self, sentence):
51 | embeds = self.word_embeddings(sentence) #64x200x300
52 |
53 | # x = embeds.view(sentence.size()[1], self.batch_size, -1)
54 | x=embeds.permute(1,0,2) #200x64x300
55 | self.hidden= self.init_hidden(sentence.size()[0]) #2x64x128
56 | lstm_out, self.hidden = self.bilstm(x, self.hidden) ###input (seq_len, batch, input_size) #Outupts:output, (h_n, c_n) output:(seq_len, batch, hidden_size * num_directions)
57 | #lstm_out 200x64x128
58 |
59 | c_lr = lstm_out.permute(1,0,2) #64x200x128
60 | xi = torch.cat((c_lr[:,:,0:int(c_lr.size()[2]/2)],embeds,c_lr[:,:,int(c_lr.size()[2]/2):]),2) #64x200x428
61 | yi = torch.tanh(xi.permute(0,2,1)) #64x428x200
62 | y = self.max_pooling(yi) #64x428x99
63 | y = y.permute(2,0,1)
64 |
65 | ##y = self.conv(lstm_out.permute(1,2,0)) ###64x256x1
66 |
67 | y = self.hidden2label(y[-1])
68 | #y = self.hidden2label(y[:,-1,:].view(y[:,-1,:].size()[0],-1))
69 | return y
--------------------------------------------------------------------------------
/models/RNN_CNN.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | import torch
4 | from torch.autograd import Variable
5 | #from memory_profiler import profile
6 |
7 | class RNN_CNN(nn.Module):
8 | # embedding_dim, hidden_dim, vocab_size, label_size, batch_size, use_gpu
9 | def __init__(self,opt):
10 | self.opt=opt
11 | super(RNN_CNN, self).__init__()
12 | self.hidden_dim = opt.hidden_dim
13 | self.batch_size = opt.batch_size
14 | self.use_gpu = torch.cuda.is_available()
15 |
16 | self.word_embeddings = nn.Embedding(opt.vocab_size, opt.embedding_dim)
17 | self.word_embeddings.weight = nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
18 | # self.word_embeddings.weight.data.copy_(torch.from_numpy(opt.embeddings))
19 | self.lstm = nn.LSTM(opt.embedding_dim, opt.hidden_dim)
20 | ###self.hidden2label = nn.Linear(opt.hidden_dim, opt.label_size)
21 | self.hidden = self.init_hidden()
22 |
23 | self.content_dim = 256
24 | self.conv = nn.Conv1d(in_channels=opt.hidden_dim, out_channels=self.content_dim, kernel_size=opt.hidden_dim * 2, stride=opt.embedding_dim)
25 | self.hidden2label = nn.Linear(self.content_dim, opt.label_size)
26 |
27 | def init_hidden(self,batch_size=None):
28 | if batch_size is None:
29 | batch_size= self.batch_size
30 |
31 | if self.use_gpu:
32 | h0 = Variable(torch.zeros(1, batch_size, self.hidden_dim).cuda())
33 | c0 = Variable(torch.zeros(1, batch_size, self.hidden_dim).cuda())
34 | else:
35 | h0 = Variable(torch.zeros(1, batch_size, self.hidden_dim))
36 | c0 = Variable(torch.zeros(1,batch_size, self.hidden_dim))
37 | return (h0, c0)
38 | # @profile
39 | def forward(self, sentence):
40 | embeds = self.word_embeddings(sentence) #64x200x300
41 |
42 | # x = embeds.view(sentence.size()[1], self.batch_size, -1)
43 | x=embeds.permute(1,0,2) #200x64x300
44 | self.hidden= self.init_hidden(sentence.size()[0]) #1x64x128
45 | lstm_out, self.hidden = self.lstm(x, self.hidden) ###input (seq_len, batch, input_size) #Outupts:output, (h_n, c_n) output:(seq_len, batch, hidden_size * num_directions)
46 | #lstm_out 200x64x128 lstm_out.permute(1,2,0):64x128x200
47 | y = self.conv(lstm_out.permute(1,2,0)) ###64x256x1
48 | ###y = self.conv(lstm_out.permute(1,2,0).contiguous().view(self.batch_size,128,-1))
49 | #y = self.hidden2label(y.view(sentence.size()[0],-1))
50 | y = self.hidden2label(y.view(y.size()[0],-1)) #64x3
51 | return y
--------------------------------------------------------------------------------
/models/SelfAttention.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-#
2 | # https://arxiv.org/pdf/1703.03130.pdf
3 | # A Structured Self-attentive Sentence Embedding
4 | # https://github.com/nn116003/self-attention-classification/blob/master/imdb_attn.py
5 |
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 | import torch
9 | from torch.autograd import Variable
10 | #from memory_profiler import profile
11 |
12 | class SelfAttention(nn.Module):
13 | # embedding_dim, hidden_dim, vocab_size, label_size, batch_size, use_gpu
14 | def __init__(self,opt):
15 | self.opt=opt
16 | super(SelfAttention, self).__init__()
17 | self.hidden_dim = opt.hidden_dim
18 | self.batch_size = opt.batch_size
19 | self.use_gpu = torch.cuda.is_available()
20 |
21 | self.word_embeddings = nn.Embedding(opt.vocab_size, opt.embedding_dim)
22 | self.word_embeddings.weight = nn.Parameter(opt.embeddings,requires_grad=opt.embedding_training)
23 | # self.word_embeddings.weight.data.copy_(torch.from_numpy(opt.embeddings))
24 |
25 | self.num_layers = 1
26 | #self.bidirectional = True
27 | self.dropout = opt.keep_dropout
28 | self.bilstm = nn.LSTM(opt.embedding_dim, opt.hidden_dim // 2, num_layers=self.num_layers, dropout=self.dropout, bidirectional=True)
29 | self.hidden2label = nn.Linear(opt.hidden_dim, opt.label_size)
30 | self.hidden = self.init_hidden()
31 | self.self_attention = nn.Sequential(
32 | nn.Linear(opt.hidden_dim, 24),
33 | nn.ReLU(True),
34 | nn.Linear(24,1)
35 | )
36 | def init_hidden(self,batch_size=None):
37 | if batch_size is None:
38 | batch_size= self.batch_size
39 |
40 | if self.use_gpu:
41 | h0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2).cuda())
42 | c0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2).cuda())
43 | else:
44 | h0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2))
45 | c0 = Variable(torch.zeros(2*self.num_layers, batch_size, self.hidden_dim // 2))
46 | return (h0, c0)
47 | # @profile
48 | def forward(self, sentence):
49 | embeds = self.word_embeddings(sentence)
50 |
51 | # x = embeds.view(sentence.size()[1], self.batch_size, -1)
52 | x=embeds.permute(1,0,2)
53 | self.hidden= self.init_hidden(sentence.size()[0]) #2x64x64
54 | lstm_out, self.hidden = self.bilstm(x, self.hidden) #lstm_out:200x64x128
55 | final =lstm_out.permute(1,0,2)#torch.mean(,1)
56 | attn_ene = self.self_attention(final)
57 | attns =F.softmax(attn_ene.view(self.batch_size, -1))
58 | feats = (final * attns).sum(dim=1)
59 | y = self.hidden2label(feats) #64x3
60 |
61 | return y
--------------------------------------------------------------------------------
/models/Transformer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | ''' Define the Transformer model '''
4 | import torch
5 | import torch.nn as nn
6 | import numpy as np
7 | import torch.nn.init as init
8 |
9 |
10 |
11 | __author__ = "Yu-Hsiang Huang"
12 | #refer to "https://github.com/jadore801120/attention-is-all-you-need-pytorch"
13 |
14 | class ConstantsClass():
15 | def __init__(self):
16 | self.PAD = 0
17 | self.UNK = 1
18 | self.BOS = 2
19 | self.EOS = 3
20 | self.PAD_WORD = '