├── .gitignore ├── fetch_and_preprocess.sh ├── layers └── CRowAddTable.lua ├── scripts ├── convert-wordvecs.lua ├── preprocess-sick.py ├── download.py └── preprocess-sst.py ├── lib ├── CollapseUnaryTransformer.java ├── DependencyParse.java └── ConstituencyParse.java ├── models ├── TreeLSTM.lua ├── ChildSumTreeLSTM.lua ├── BinaryTreeLSTM.lua └── LSTM.lua ├── util ├── Tree.lua ├── Vocab.lua └── read_data.lua ├── init.lua ├── README.md ├── relatedness ├── main.lua ├── TreeLSTMSim.lua └── LSTMSim.lua ├── sentiment ├── TreeLSTMSentiment.lua ├── main.lua └── LSTMSentiment.lua └── LICENSE.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | data 3 | predictions 4 | trained_models 5 | *~ 6 | #*# 7 | *.class 8 | lib/stanford-parser 9 | lib/stanford-tagger 10 | 11 | -------------------------------------------------------------------------------- /fetch_and_preprocess.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | python2.7 scripts/download.py 4 | 5 | CLASSPATH="lib:lib/stanford-parser/stanford-parser.jar:lib/stanford-parser/stanford-parser-3.5.1-models.jar" 6 | javac -cp $CLASSPATH lib/*.java 7 | python2.7 scripts/preprocess-sick.py 8 | python2.7 scripts/preprocess-sst.py 9 | 10 | glove_dir="data/glove" 11 | glove_pre="glove.840B" 12 | glove_dim="300d" 13 | if [ ! -f $glove_dir/$glove_pre.$glove_dim.th ]; then 14 | th scripts/convert-wordvecs.lua $glove_dir/$glove_pre.$glove_dim.txt \ 15 | $glove_dir/$glove_pre.vocab $glove_dir/$glove_pre.$glove_dim.th 16 | fi 17 | -------------------------------------------------------------------------------- /layers/CRowAddTable.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Add a vector to every row of a matrix. 4 | 5 | Input: { [n x m], [m] } 6 | 7 | Output: [n x m] 8 | 9 | --]] 10 | 11 | local CRowAddTable, parent = torch.class('treelstm.CRowAddTable', 'nn.Module') 12 | 13 | function CRowAddTable:__init() 14 | parent.__init(self) 15 | self.gradInput = {} 16 | end 17 | 18 | function CRowAddTable:updateOutput(input) 19 | self.output:resizeAs(input[1]):copy(input[1]) 20 | for i = 1, self.output:size(1) do 21 | self.output[i]:add(input[2]) 22 | end 23 | return self.output 24 | end 25 | 26 | function CRowAddTable:updateGradInput(input, gradOutput) 27 | self.gradInput[1] = self.gradInput[1] or input[1].new() 28 | self.gradInput[2] = self.gradInput[2] or input[2].new() 29 | self.gradInput[1]:resizeAs(input[1]) 30 | self.gradInput[2]:resizeAs(input[2]):zero() 31 | 32 | self.gradInput[1]:copy(gradOutput) 33 | for i = 1, gradOutput:size(1) do 34 | self.gradInput[2]:add(gradOutput[i]) 35 | end 36 | 37 | return self.gradInput 38 | end 39 | -------------------------------------------------------------------------------- /scripts/convert-wordvecs.lua: -------------------------------------------------------------------------------- 1 | require('torch') 2 | require('xlua') 3 | 4 | local path = arg[1] 5 | local vocabpath = arg[2] 6 | local vecpath = arg[3] 7 | local prefix_toks = stringx.split(path, '.') 8 | print('Converting ' .. path .. ' to Torch serialized format') 9 | 10 | -- get dimension and number of lines 11 | local file = io.open(path, 'r') 12 | local line 13 | local count = 0 14 | local dim = 0 15 | while true do 16 | line = file:read() 17 | if not line then break end 18 | if count == 0 then 19 | dim = #stringx.split(line) - 1 20 | end 21 | count = count + 1 22 | end 23 | 24 | print('count = ' .. count) 25 | print('dim = ' .. dim) 26 | 27 | -- convert to torch-friendly format 28 | file:seek('set') 29 | local vocab = io.open(vocabpath, 'w') 30 | local vecs = torch.FloatTensor(count, dim) 31 | for i = 1, count do 32 | xlua.progress(i, count) 33 | local tokens = stringx.split(file:read()) 34 | local word = tokens[1] 35 | vocab:write(word .. '\n') 36 | for j = 1, dim do 37 | vecs[{i, j}] = tonumber(tokens[j + 1]) 38 | end 39 | end 40 | file:close() 41 | vocab:close() 42 | torch.save(vecpath, vecs) 43 | -------------------------------------------------------------------------------- /lib/CollapseUnaryTransformer.java: -------------------------------------------------------------------------------- 1 | import java.util.List; 2 | 3 | import edu.stanford.nlp.ling.Label; 4 | import edu.stanford.nlp.trees.Tree; 5 | import edu.stanford.nlp.trees.TreeTransformer; 6 | import edu.stanford.nlp.util.Generics; 7 | 8 | /** 9 | * This transformer collapses chains of unary nodes so that the top 10 | * node is the only node left. The Sentiment model does not handle 11 | * unary nodes, so this simplifies them to make a binary tree consist 12 | * entirely of binary nodes and preterminals. A new tree with new 13 | * nodes and labels is returned; the original tree is unchanged. 14 | * 15 | * @author John Bauer 16 | */ 17 | public class CollapseUnaryTransformer implements TreeTransformer { 18 | public Tree transformTree(Tree tree) { 19 | if (tree.isPreTerminal() || tree.isLeaf()) { 20 | return tree.deepCopy(); 21 | } 22 | 23 | Label label = tree.label().labelFactory().newLabel(tree.label()); 24 | Tree[] children = tree.children(); 25 | while (children.length == 1 && !children[0].isLeaf()) { 26 | children = children[0].children(); 27 | } 28 | List processedChildren = Generics.newArrayList(); 29 | for (Tree child : children) { 30 | processedChildren.add(transformTree(child)); 31 | } 32 | return tree.treeFactory().newTreeNode(label, processedChildren); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /models/TreeLSTM.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Tree-LSTM base class 4 | 5 | --]] 6 | 7 | local TreeLSTM, parent = torch.class('treelstm.TreeLSTM', 'nn.Module') 8 | 9 | function TreeLSTM:__init(config) 10 | parent.__init(self) 11 | self.in_dim = config.in_dim 12 | if self.in_dim == nil then error('input dimension must be specified') end 13 | self.mem_dim = config.mem_dim or 150 14 | self.mem_zeros = torch.zeros(self.mem_dim) 15 | self.train = false 16 | end 17 | 18 | function TreeLSTM:forward(tree, inputs) 19 | end 20 | 21 | function TreeLSTM:backward(tree, inputs, grad) 22 | end 23 | 24 | function TreeLSTM:training() 25 | self.train = true 26 | end 27 | 28 | function TreeLSTM:evaluate() 29 | self.train = false 30 | end 31 | 32 | function TreeLSTM:allocate_module(tree, module) 33 | local modules = module .. 's' 34 | local num_free = #self[modules] 35 | if num_free == 0 then 36 | tree[module] = self['new_' .. module](self) 37 | else 38 | tree[module] = self[modules][num_free] 39 | self[modules][num_free] = nil 40 | end 41 | 42 | -- necessary for dropout to behave properly 43 | if self.train then tree[module]:training() else tree[module]:evaluate() end 44 | end 45 | 46 | function TreeLSTM:free_module(tree, module) 47 | if tree[module] == nil then return end 48 | table.insert(self[module .. 's'], tree[module]) 49 | tree[module] = nil 50 | end 51 | -------------------------------------------------------------------------------- /util/Tree.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | A basic tree structure. 4 | 5 | --]] 6 | 7 | local Tree = torch.class('treelstm.Tree') 8 | 9 | function Tree:__init() 10 | self.parent = nil 11 | self.num_children = 0 12 | self.children = {} 13 | end 14 | 15 | function Tree:add_child(c) 16 | c.parent = self 17 | self.num_children = self.num_children + 1 18 | self.children[self.num_children] = c 19 | end 20 | 21 | function Tree:size() 22 | if self._size ~= nil then return self._size end 23 | local size = 1 24 | for i = 1, self.num_children do 25 | size = size + self.children[i]:size() 26 | end 27 | self._size = size 28 | return size 29 | end 30 | 31 | function Tree:depth() 32 | local depth = 0 33 | if self.num_children > 0 then 34 | for i = 1, self.num_children do 35 | local child_depth = self.children[i]:depth() 36 | if child_depth > depth then 37 | depth = child_depth 38 | end 39 | end 40 | depth = depth + 1 41 | end 42 | return depth 43 | end 44 | 45 | local function depth_first_preorder(tree, nodes) 46 | if tree == nil then 47 | return 48 | end 49 | table.insert(nodes, tree) 50 | for i = 1, tree.num_children do 51 | depth_first_preorder(tree.children[i], nodes) 52 | end 53 | end 54 | 55 | function Tree:depth_first_preorder() 56 | local nodes = {} 57 | depth_first_preorder(self, nodes) 58 | return nodes 59 | end 60 | -------------------------------------------------------------------------------- /init.lua: -------------------------------------------------------------------------------- 1 | require('torch') 2 | require('nn') 3 | require('nngraph') 4 | require('optim') 5 | require('xlua') 6 | require('sys') 7 | require('lfs') 8 | 9 | treelstm = {} 10 | 11 | include('util/read_data.lua') 12 | include('util/Tree.lua') 13 | include('util/Vocab.lua') 14 | include('layers/CRowAddTable.lua') 15 | include('models/LSTM.lua') 16 | include('models/TreeLSTM.lua') 17 | include('models/ChildSumTreeLSTM.lua') 18 | include('models/BinaryTreeLSTM.lua') 19 | include('relatedness/LSTMSim.lua') 20 | include('relatedness/TreeLSTMSim.lua') 21 | include('sentiment/LSTMSentiment.lua') 22 | include('sentiment/TreeLSTMSentiment.lua') 23 | 24 | printf = utils.printf 25 | 26 | -- global paths (modify if desired) 27 | treelstm.data_dir = 'data' 28 | treelstm.models_dir = 'trained_models' 29 | treelstm.predictions_dir = 'predictions' 30 | 31 | -- share module parameters 32 | function share_params(cell, src) 33 | if torch.type(cell) == 'nn.gModule' then 34 | for i = 1, #cell.forwardnodes do 35 | local node = cell.forwardnodes[i] 36 | if node.data.module then 37 | node.data.module:share(src.forwardnodes[i].data.module, 38 | 'weight', 'bias', 'gradWeight', 'gradBias') 39 | end 40 | end 41 | elseif torch.isTypeOf(cell, 'nn.Module') then 42 | cell:share(src, 'weight', 'bias', 'gradWeight', 'gradBias') 43 | else 44 | error('parameters cannot be shared for this input') 45 | end 46 | end 47 | 48 | function header(s) 49 | print(string.rep('-', 80)) 50 | print(s) 51 | print(string.rep('-', 80)) 52 | end 53 | -------------------------------------------------------------------------------- /util/Vocab.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | A vocabulary object. Initialized from a file with one vocabulary token per line. 4 | Maps between vocabulary tokens and indices. If an UNK token is defined in the 5 | vocabulary, returns the index to this token if queried for an out-of-vocabulary 6 | token. 7 | 8 | --]] 9 | 10 | local Vocab = torch.class('treelstm.Vocab') 11 | 12 | function Vocab:__init(path) 13 | self.size = 0 14 | self._index = {} 15 | self._tokens = {} 16 | 17 | local file = io.open(path) 18 | while true do 19 | local line = file:read() 20 | if line == nil then break end 21 | self.size = self.size + 1 22 | self._tokens[self.size] = line 23 | self._index[line] = self.size 24 | end 25 | file:close() 26 | 27 | local unks = {'', '', 'UUUNKKK'} 28 | for _, tok in pairs(unks) do 29 | self.unk_index = self.unk_index or self._index[tok] 30 | if self.unk_index ~= nil then 31 | self.unk_token = tok 32 | break 33 | end 34 | end 35 | 36 | local starts = {'', ''} 37 | for _, tok in pairs(starts) do 38 | self.start_index = self.start_index or self._index[tok] 39 | if self.start_index ~= nil then 40 | self.start_token = tok 41 | break 42 | end 43 | end 44 | 45 | local ends = {'', ''} 46 | for _, tok in pairs(ends) do 47 | self.end_index = self.end_index or self._index[tok] 48 | if self.end_index ~= nil then 49 | self.end_token = tok 50 | break 51 | end 52 | end 53 | end 54 | 55 | function Vocab:contains(w) 56 | if not self._index[w] then return false end 57 | return true 58 | end 59 | 60 | function Vocab:add(w) 61 | if self._index[w] ~= nil then 62 | return self._index[w] 63 | end 64 | self.size = self.size + 1 65 | self._tokens[self.size] = w 66 | self._index[w] = self.size 67 | return self.size 68 | end 69 | 70 | function Vocab:index(w) 71 | local index = self._index[w] 72 | if index == nil then 73 | if self.unk_index == nil then 74 | error('Token not in vocabulary and no UNK token defined: ' .. w) 75 | end 76 | return self.unk_index 77 | end 78 | return index 79 | end 80 | 81 | function Vocab:token(i) 82 | if i < 1 or i > self.size then 83 | error('Index ' .. i .. ' out of bounds') 84 | end 85 | return self._tokens[i] 86 | end 87 | 88 | function Vocab:map(tokens) 89 | local len = #tokens 90 | local output = torch.IntTensor(len) 91 | for i = 1, len do 92 | output[i] = self:index(tokens[i]) 93 | end 94 | return output 95 | end 96 | 97 | function Vocab:add_unk_token() 98 | if self.unk_token ~= nil then return end 99 | self.unk_index = self:add('') 100 | end 101 | 102 | function Vocab:add_start_token() 103 | if self.start_token ~= nil then return end 104 | self.start_index = self:add('') 105 | end 106 | 107 | function Vocab:add_end_token() 108 | if self.end_token ~= nil then return end 109 | self.end_index = self:add('') 110 | end 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Tree-Structured Long Short-Term Memory Networks 2 | =============================================== 3 | 4 | An implementation of the Tree-LSTM architectures described in the paper 5 | [Improved Semantic Representations From Tree-Structured Long Short-Term Memory 6 | Networks](http://arxiv.org/abs/1503.00075) by Kai Sheng Tai, Richard Socher, and 7 | Christopher Manning. 8 | 9 | ## Requirements 10 | 11 | - [Torch7](https://github.com/torch/torch7) 12 | - [penlight](https://github.com/stevedonovan/Penlight) 13 | - [nn](https://github.com/torch/nn) 14 | - [nngraph](https://github.com/torch/nngraph) 15 | - [optim](https://github.com/torch/optim) 16 | - Java >= 8 (for Stanford CoreNLP utilities) 17 | - Python >= 2.7 18 | 19 | The Torch/Lua dependencies can be installed using [luarocks](http://luarocks.org). For example: 20 | 21 | ``` 22 | luarocks install nngraph 23 | ``` 24 | 25 | ## Usage 26 | 27 | First run the following script: 28 | 29 | ``` 30 | ./fetch_and_preprocess.sh 31 | ``` 32 | 33 | This downloads the following data: 34 | 35 | - [SICK dataset](http://alt.qcri.org/semeval2014/task1/index.php?id=data-and-tools) (semantic relatedness task) 36 | - [Stanford Sentiment Treebank](http://nlp.stanford.edu/sentiment/index.html) (sentiment classification task) 37 | - [Glove word vectors](http://nlp.stanford.edu/projects/glove/) (Common Crawl 840B) -- **Warning:** this is a 2GB download! 38 | 39 | and the following libraries: 40 | 41 | - [Stanford Parser](http://nlp.stanford.edu/software/lex-parser.shtml) 42 | - [Stanford POS Tagger](http://nlp.stanford.edu/software/tagger.shtml) 43 | 44 | The preprocessing script generates dependency parses of the SICK dataset using the 45 | [Stanford Neural Network Dependency Parser](http://nlp.stanford.edu/software/nndep.shtml). 46 | 47 | Alternatively, the download and preprocessing scripts can be called individually. 48 | 49 | ### Semantic Relatedness 50 | 51 | The goal of this task is to predict similarity ratings for pairs of sentences. We train and evaluate our models on the [Sentences Involving Compositional Knowledge (SICK)](http://alt.qcri.org/semeval2014/task1/index.php?id=data-and-tools) dataset. 52 | 53 | To train models for the semantic relatedness prediction task on the SICK dataset, 54 | run: 55 | 56 | ``` 57 | th relatedness/main.lua --model --layers --dim --epochs 58 | ``` 59 | 60 | where: 61 | 62 | - `model`: the LSTM variant to train (default: dependency, i.e. the Dependency Tree-LSTM) 63 | - `layers`: the number of layers (default: 1, ignored for Tree-LSTMs) 64 | - `dim`: the LSTM memory dimension (default: 150) 65 | - `epochs`: the number of training epochs (default: 10) 66 | 67 | ### Sentiment Classification 68 | 69 | The goal of this task is to predict sentiment labels for sentences. For this task, we use the [Stanford Sentiment Treebank](http://nlp.stanford.edu/sentiment/index.html) dataset. Here, there are two sub-tasks: binary and fine-grained. In the binary sub-task, the sentences are labeled `positive` or `negative`. In the fine-grained sub-task, the sentences are labeled `very positive`, `positive`, `neutral`, `negative` or `very negative`. 70 | 71 | To train models for the sentiment classification task on the Stanford Sentiment Treebank, run: 72 | 73 | ``` 74 | th sentiment/main.lua --model --layers --dim --epochs 75 | ``` 76 | 77 | This trains a Constituency Tree-LSTM model for the "fine-grained" 5-class classification sub-task. 78 | 79 | For the binary classification sub-task, run with the `-b` or `--binary` flag, for example: 80 | 81 | ``` 82 | th sentiment/main.lua -m constituency -b 83 | ``` 84 | 85 | Predictions are written to the `predictions` directory and trained model parameters are saved to the `trained_models` directory. 86 | 87 | See the [paper](http://arxiv.org/abs/1503.00075) for more details on these experiments. 88 | 89 | ## Third-party Implementations 90 | 91 | - A Tensorflow Fold [re-implementation](https://github.com/tensorflow/fold/blob/master/tensorflow_fold/g3doc/sentiment.ipynb) of the Tree-LSTM for sentiment classification 92 | -------------------------------------------------------------------------------- /scripts/preprocess-sick.py: -------------------------------------------------------------------------------- 1 | """ 2 | Preprocessing script for SICK data. 3 | 4 | """ 5 | 6 | import os 7 | import glob 8 | 9 | def make_dirs(dirs): 10 | for d in dirs: 11 | if not os.path.exists(d): 12 | os.makedirs(d) 13 | 14 | def dependency_parse(filepath, cp='', tokenize=True): 15 | print('\nDependency parsing ' + filepath) 16 | dirpath = os.path.dirname(filepath) 17 | filepre = os.path.splitext(os.path.basename(filepath))[0] 18 | tokpath = os.path.join(dirpath, filepre + '.toks') 19 | parentpath = os.path.join(dirpath, filepre + '.parents') 20 | relpath = os.path.join(dirpath, filepre + '.rels') 21 | tokenize_flag = '-tokenize - ' if tokenize else '' 22 | cmd = ('java -cp %s DependencyParse -tokpath %s -parentpath %s -relpath %s %s < %s' 23 | % (cp, tokpath, parentpath, relpath, tokenize_flag, filepath)) 24 | os.system(cmd) 25 | 26 | def constituency_parse(filepath, cp='', tokenize=True): 27 | dirpath = os.path.dirname(filepath) 28 | filepre = os.path.splitext(os.path.basename(filepath))[0] 29 | tokpath = os.path.join(dirpath, filepre + '.toks') 30 | parentpath = os.path.join(dirpath, filepre + '.cparents') 31 | tokenize_flag = '-tokenize - ' if tokenize else '' 32 | cmd = ('java -cp %s ConstituencyParse -tokpath %s -parentpath %s %s < %s' 33 | % (cp, tokpath, parentpath, tokenize_flag, filepath)) 34 | os.system(cmd) 35 | 36 | def build_vocab(filepaths, dst_path, lowercase=True): 37 | vocab = set() 38 | for filepath in filepaths: 39 | with open(filepath) as f: 40 | for line in f: 41 | if lowercase: 42 | line = line.lower() 43 | vocab |= set(line.split()) 44 | with open(dst_path, 'w') as f: 45 | for w in sorted(vocab): 46 | f.write(w + '\n') 47 | 48 | def split(filepath, dst_dir): 49 | with open(filepath) as datafile, \ 50 | open(os.path.join(dst_dir, 'a.txt'), 'w') as afile, \ 51 | open(os.path.join(dst_dir, 'b.txt'), 'w') as bfile, \ 52 | open(os.path.join(dst_dir, 'id.txt'), 'w') as idfile, \ 53 | open(os.path.join(dst_dir, 'sim.txt'), 'w') as simfile: 54 | datafile.readline() 55 | for line in datafile: 56 | i, a, b, sim, ent = line.strip().split('\t') 57 | idfile.write(i + '\n') 58 | afile.write(a + '\n') 59 | bfile.write(b + '\n') 60 | simfile.write(sim + '\n') 61 | 62 | def parse(dirpath, cp=''): 63 | dependency_parse(os.path.join(dirpath, 'a.txt'), cp=cp, tokenize=True) 64 | dependency_parse(os.path.join(dirpath, 'b.txt'), cp=cp, tokenize=True) 65 | constituency_parse(os.path.join(dirpath, 'a.txt'), cp=cp, tokenize=True) 66 | constituency_parse(os.path.join(dirpath, 'b.txt'), cp=cp, tokenize=True) 67 | 68 | if __name__ == '__main__': 69 | print('=' * 80) 70 | print('Preprocessing SICK dataset') 71 | print('=' * 80) 72 | 73 | base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 74 | data_dir = os.path.join(base_dir, 'data') 75 | sick_dir = os.path.join(data_dir, 'sick') 76 | lib_dir = os.path.join(base_dir, 'lib') 77 | train_dir = os.path.join(sick_dir, 'train') 78 | dev_dir = os.path.join(sick_dir, 'dev') 79 | test_dir = os.path.join(sick_dir, 'test') 80 | make_dirs([train_dir, dev_dir, test_dir]) 81 | 82 | # java classpath for calling Stanford parser 83 | classpath = ':'.join([ 84 | lib_dir, 85 | os.path.join(lib_dir, 'stanford-parser/stanford-parser.jar'), 86 | os.path.join(lib_dir, 'stanford-parser/stanford-parser-3.5.1-models.jar')]) 87 | 88 | # split into separate files 89 | split(os.path.join(sick_dir, 'SICK_train.txt'), train_dir) 90 | split(os.path.join(sick_dir, 'SICK_trial.txt'), dev_dir) 91 | split(os.path.join(sick_dir, 'SICK_test_annotated.txt'), test_dir) 92 | 93 | # parse sentences 94 | parse(train_dir, cp=classpath) 95 | parse(dev_dir, cp=classpath) 96 | parse(test_dir, cp=classpath) 97 | 98 | # get vocabulary 99 | build_vocab( 100 | glob.glob(os.path.join(sick_dir, '*/*.toks')), 101 | os.path.join(sick_dir, 'vocab.txt')) 102 | build_vocab( 103 | glob.glob(os.path.join(sick_dir, '*/*.toks')), 104 | os.path.join(sick_dir, 'vocab-cased.txt'), 105 | lowercase=False) 106 | -------------------------------------------------------------------------------- /scripts/download.py: -------------------------------------------------------------------------------- 1 | """ 2 | Downloads the following: 3 | - Stanford parser 4 | - Stanford POS tagger 5 | - Glove vectors 6 | - SICK dataset (semantic relatedness task) 7 | - Stanford Sentiment Treebank (sentiment classification task) 8 | 9 | """ 10 | 11 | from __future__ import print_function 12 | import urllib2 13 | import sys 14 | import os 15 | import shutil 16 | import zipfile 17 | import gzip 18 | 19 | def download(url, dirpath): 20 | filename = url.split('/')[-1] 21 | filepath = os.path.join(dirpath, filename) 22 | try: 23 | u = urllib2.urlopen(url) 24 | except: 25 | print("URL %s failed to open" %url) 26 | raise Exception 27 | try: 28 | f = open(filepath, 'wb') 29 | except: 30 | print("Cannot write %s" %filepath) 31 | raise Exception 32 | try: 33 | filesize = int(u.info().getheaders("Content-Length")[0]) 34 | except: 35 | print("URL %s failed to report length" %url) 36 | raise Exception 37 | print("Downloading: %s Bytes: %s" % (filename, filesize)) 38 | 39 | downloaded = 0 40 | block_sz = 8192 41 | status_width = 70 42 | while True: 43 | buf = u.read(block_sz) 44 | if not buf: 45 | print('') 46 | break 47 | else: 48 | print('', end='\r') 49 | downloaded += len(buf) 50 | f.write(buf) 51 | status = (("[%-" + str(status_width + 1) + "s] %3.2f%%") % 52 | ('=' * int(float(downloaded) / filesize * status_width) + '>', downloaded * 100. / filesize)) 53 | print(status, end='') 54 | sys.stdout.flush() 55 | f.close() 56 | return filepath 57 | 58 | def unzip(filepath): 59 | print("Extracting: " + filepath) 60 | dirpath = os.path.dirname(filepath) 61 | with zipfile.ZipFile(filepath) as zf: 62 | zf.extractall(dirpath) 63 | os.remove(filepath) 64 | 65 | def download_tagger(dirpath): 66 | tagger_dir = 'stanford-tagger' 67 | if os.path.exists(os.path.join(dirpath, tagger_dir)): 68 | print('Found Stanford POS Tagger - skip') 69 | return 70 | url = 'http://nlp.stanford.edu/software/stanford-postagger-2015-01-29.zip' 71 | filepath = download(url, dirpath) 72 | zip_dir = '' 73 | with zipfile.ZipFile(filepath) as zf: 74 | zip_dir = zf.namelist()[0] 75 | zf.extractall(dirpath) 76 | os.remove(filepath) 77 | os.rename(os.path.join(dirpath, zip_dir), os.path.join(dirpath, tagger_dir)) 78 | 79 | def download_parser(dirpath): 80 | parser_dir = 'stanford-parser' 81 | if os.path.exists(os.path.join(dirpath, parser_dir)): 82 | print('Found Stanford Parser - skip') 83 | return 84 | url = 'http://nlp.stanford.edu/software/stanford-parser-full-2015-01-29.zip' 85 | filepath = download(url, dirpath) 86 | zip_dir = '' 87 | with zipfile.ZipFile(filepath) as zf: 88 | zip_dir = zf.namelist()[0] 89 | zf.extractall(dirpath) 90 | os.remove(filepath) 91 | os.rename(os.path.join(dirpath, zip_dir), os.path.join(dirpath, parser_dir)) 92 | 93 | def download_wordvecs(dirpath): 94 | if os.path.exists(dirpath): 95 | print('Found Glove vectors - skip') 96 | return 97 | else: 98 | os.makedirs(dirpath) 99 | url = 'http://www-nlp.stanford.edu/data/glove.840B.300d.zip' 100 | unzip(download(url, dirpath)) 101 | 102 | def download_sick(dirpath): 103 | if os.path.exists(dirpath): 104 | print('Found SICK dataset - skip') 105 | return 106 | else: 107 | os.makedirs(dirpath) 108 | train_url = 'http://alt.qcri.org/semeval2014/task1/data/uploads/sick_train.zip' 109 | trial_url = 'http://alt.qcri.org/semeval2014/task1/data/uploads/sick_trial.zip' 110 | test_url = 'http://alt.qcri.org/semeval2014/task1/data/uploads/sick_test_annotated.zip' 111 | unzip(download(train_url, dirpath)) 112 | unzip(download(trial_url, dirpath)) 113 | unzip(download(test_url, dirpath)) 114 | 115 | def download_sst(dirpath): 116 | if os.path.exists(dirpath): 117 | print('Found SST dataset - skip') 118 | return 119 | url = 'http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip' 120 | parent_dir = os.path.dirname(dirpath) 121 | unzip(download(url, parent_dir)) 122 | os.rename( 123 | os.path.join(parent_dir, 'stanfordSentimentTreebank'), 124 | os.path.join(parent_dir, 'sst')) 125 | shutil.rmtree(os.path.join(parent_dir, '__MACOSX')) # remove extraneous dir 126 | 127 | if __name__ == '__main__': 128 | base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 129 | 130 | # data 131 | data_dir = os.path.join(base_dir, 'data') 132 | wordvec_dir = os.path.join(data_dir, 'glove') 133 | sick_dir = os.path.join(data_dir, 'sick') 134 | sst_dir = os.path.join(data_dir, 'sst') 135 | 136 | # libraries 137 | lib_dir = os.path.join(base_dir, 'lib') 138 | 139 | # download dependencies 140 | download_tagger(lib_dir) 141 | download_parser(lib_dir) 142 | download_wordvecs(wordvec_dir) 143 | download_sick(sick_dir) 144 | download_sst(sst_dir) 145 | -------------------------------------------------------------------------------- /lib/DependencyParse.java: -------------------------------------------------------------------------------- 1 | import edu.stanford.nlp.process.WordTokenFactory; 2 | import edu.stanford.nlp.ling.HasWord; 3 | import edu.stanford.nlp.ling.Word; 4 | import edu.stanford.nlp.ling.TaggedWord; 5 | import edu.stanford.nlp.parser.nndep.DependencyParser; 6 | import edu.stanford.nlp.process.PTBTokenizer; 7 | import edu.stanford.nlp.trees.TypedDependency; 8 | import edu.stanford.nlp.util.StringUtils; 9 | import edu.stanford.nlp.tagger.maxent.MaxentTagger; 10 | 11 | import java.io.BufferedWriter; 12 | import java.io.FileWriter; 13 | import java.io.StringReader; 14 | import java.util.ArrayList; 15 | import java.util.Collection; 16 | import java.util.List; 17 | import java.util.Properties; 18 | import java.util.Scanner; 19 | 20 | public class DependencyParse { 21 | 22 | public static final String TAGGER_MODEL = "stanford-tagger/models/english-left3words-distsim.tagger"; 23 | public static final String PARSER_MODEL = "edu/stanford/nlp/models/parser/nndep/english_SD.gz"; 24 | 25 | public static void main(String[] args) throws Exception { 26 | Properties props = StringUtils.argsToProperties(args); 27 | if (!props.containsKey("tokpath") || 28 | !props.containsKey("parentpath") || 29 | !props.containsKey("relpath")) { 30 | System.err.println( 31 | "usage: java DependencyParse -tokenize - -tokpath -parentpath -relpath "); 32 | System.exit(1); 33 | } 34 | 35 | boolean tokenize = false; 36 | if (props.containsKey("tokenize")) { 37 | tokenize = true; 38 | } 39 | 40 | String tokPath = props.getProperty("tokpath"); 41 | String parentPath = props.getProperty("parentpath"); 42 | String relPath = props.getProperty("relpath"); 43 | 44 | BufferedWriter tokWriter = new BufferedWriter(new FileWriter(tokPath)); 45 | BufferedWriter parentWriter = new BufferedWriter(new FileWriter(parentPath)); 46 | BufferedWriter relWriter = new BufferedWriter(new FileWriter(relPath)); 47 | 48 | MaxentTagger tagger = new MaxentTagger(TAGGER_MODEL); 49 | DependencyParser parser = DependencyParser.loadFromModelFile(PARSER_MODEL); 50 | Scanner stdin = new Scanner(System.in); 51 | int count = 0; 52 | long start = System.currentTimeMillis(); 53 | while (stdin.hasNextLine()) { 54 | String line = stdin.nextLine(); 55 | List tokens = new ArrayList<>(); 56 | if (tokenize) { 57 | PTBTokenizer tokenizer = new PTBTokenizer( 58 | new StringReader(line), new WordTokenFactory(), ""); 59 | for (Word label; tokenizer.hasNext(); ) { 60 | tokens.add(tokenizer.next()); 61 | } 62 | } else { 63 | for (String word : line.split(" ")) { 64 | tokens.add(new Word(word)); 65 | } 66 | } 67 | 68 | List tagged = tagger.tagSentence(tokens); 69 | 70 | int len = tagged.size(); 71 | Collection tdl = parser.predict(tagged).typedDependencies(); 72 | int[] parents = new int[len]; 73 | for (int i = 0; i < len; i++) { 74 | // if a node has a parent of -1 at the end of parsing, then the node 75 | // has no parent. 76 | parents[i] = -1; 77 | } 78 | 79 | String[] relns = new String[len]; 80 | for (TypedDependency td : tdl) { 81 | // let root have index 0 82 | int child = td.dep().index(); 83 | int parent = td.gov().index(); 84 | relns[child - 1] = td.reln().toString(); 85 | parents[child - 1] = parent; 86 | } 87 | 88 | // print tokens 89 | StringBuilder sb = new StringBuilder(); 90 | for (int i = 0; i < len - 1; i++) { 91 | if (tokenize) { 92 | sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word())); 93 | } else { 94 | sb.append(tokens.get(i).word()); 95 | } 96 | sb.append(' '); 97 | } 98 | if (tokenize) { 99 | sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word())); 100 | } else { 101 | sb.append(tokens.get(len - 1).word()); 102 | } 103 | sb.append('\n'); 104 | tokWriter.write(sb.toString()); 105 | 106 | // print parent pointers 107 | sb = new StringBuilder(); 108 | for (int i = 0; i < len - 1; i++) { 109 | sb.append(parents[i]); 110 | sb.append(' '); 111 | } 112 | sb.append(parents[len - 1]); 113 | sb.append('\n'); 114 | parentWriter.write(sb.toString()); 115 | 116 | // print relations 117 | sb = new StringBuilder(); 118 | for (int i = 0; i < len - 1; i++) { 119 | sb.append(relns[i]); 120 | sb.append(' '); 121 | } 122 | sb.append(relns[len - 1]); 123 | sb.append('\n'); 124 | relWriter.write(sb.toString()); 125 | 126 | count++; 127 | if (count % 1000 == 0) { 128 | double elapsed = (System.currentTimeMillis() - start) / 1000.0; 129 | System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed); 130 | } 131 | } 132 | 133 | long totalTimeMillis = System.currentTimeMillis() - start; 134 | System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n", 135 | count, totalTimeMillis / 1000.0, totalTimeMillis / (double) count); 136 | tokWriter.close(); 137 | parentWriter.close(); 138 | relWriter.close(); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /models/ChildSumTreeLSTM.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | A Child-Sum Tree-LSTM with input at each node. 4 | 5 | --]] 6 | 7 | local ChildSumTreeLSTM, parent = torch.class('treelstm.ChildSumTreeLSTM', 'treelstm.TreeLSTM') 8 | 9 | function ChildSumTreeLSTM:__init(config) 10 | parent.__init(self, config) 11 | self.gate_output = config.gate_output 12 | if self.gate_output == nil then self.gate_output = true end 13 | 14 | -- a function that instantiates an output module that takes the hidden state h as input 15 | self.output_module_fn = config.output_module_fn 16 | self.criterion = config.criterion 17 | 18 | -- composition module 19 | self.composer = self:new_composer() 20 | self.composers = {} 21 | 22 | -- output module 23 | self.output_module = self:new_output_module() 24 | self.output_modules = {} 25 | end 26 | 27 | function ChildSumTreeLSTM:new_composer() 28 | local input = nn.Identity()() 29 | local child_c = nn.Identity()() 30 | local child_h = nn.Identity()() 31 | local child_h_sum = nn.Sum(1)(child_h) 32 | 33 | local i = nn.Sigmoid()( 34 | nn.CAddTable(){ 35 | nn.Linear(self.in_dim, self.mem_dim)(input), 36 | nn.Linear(self.mem_dim, self.mem_dim)(child_h_sum) 37 | }) 38 | local f = nn.Sigmoid()( 39 | treelstm.CRowAddTable(){ 40 | nn.TemporalConvolution(self.mem_dim, self.mem_dim, 1)(child_h), 41 | nn.Linear(self.in_dim, self.mem_dim)(input), 42 | }) 43 | local update = nn.Tanh()( 44 | nn.CAddTable(){ 45 | nn.Linear(self.in_dim, self.mem_dim)(input), 46 | nn.Linear(self.mem_dim, self.mem_dim)(child_h_sum) 47 | }) 48 | local c = nn.CAddTable(){ 49 | nn.CMulTable(){i, update}, 50 | nn.Sum(1)(nn.CMulTable(){f, child_c}) 51 | } 52 | 53 | local h 54 | if self.gate_output then 55 | local o = nn.Sigmoid()( 56 | nn.CAddTable(){ 57 | nn.Linear(self.in_dim, self.mem_dim)(input), 58 | nn.Linear(self.mem_dim, self.mem_dim)(child_h_sum) 59 | }) 60 | h = nn.CMulTable(){o, nn.Tanh()(c)} 61 | else 62 | h = nn.Tanh()(c) 63 | end 64 | 65 | local composer = nn.gModule({input, child_c, child_h}, {c, h}) 66 | if self.composer ~= nil then 67 | share_params(composer, self.composer) 68 | end 69 | return composer 70 | end 71 | 72 | function ChildSumTreeLSTM:new_output_module() 73 | if self.output_module_fn == nil then return nil end 74 | local output_module = self.output_module_fn() 75 | if self.output_module ~= nil then 76 | share_params(output_module, self.output_module) 77 | end 78 | return output_module 79 | end 80 | 81 | function ChildSumTreeLSTM:forward(tree, inputs) 82 | local loss = 0 83 | for i = 1, tree.num_children do 84 | local _, child_loss = self:forward(tree.children[i], inputs) 85 | loss = loss + child_loss 86 | end 87 | local child_c, child_h = self:get_child_states(tree) 88 | self:allocate_module(tree, 'composer') 89 | tree.state = tree.composer:forward{inputs[tree.idx], child_c, child_h} 90 | 91 | if self.output_module ~= nil then 92 | self:allocate_module(tree, 'output_module') 93 | tree.output = tree.output_module:forward(tree.state[2]) 94 | if self.train and tree.gold_label ~= nil then 95 | loss = loss + self.criterion:forward(tree.output, tree.gold_label) 96 | end 97 | end 98 | return tree.state, loss 99 | end 100 | 101 | function ChildSumTreeLSTM:backward(tree, inputs, grad) 102 | local grad_inputs = torch.Tensor(inputs:size()) 103 | self:_backward(tree, inputs, grad, grad_inputs) 104 | return grad_inputs 105 | end 106 | 107 | function ChildSumTreeLSTM:_backward(tree, inputs, grad, grad_inputs) 108 | local output_grad = self.mem_zeros 109 | if tree.output ~= nil and tree.gold_label ~= nil then 110 | output_grad = tree.output_module:backward( 111 | tree.state[2], self.criterion:backward(tree.output, tree.gold_label)) 112 | end 113 | self:free_module(tree, 'output_module') 114 | tree.output = nil 115 | 116 | local child_c, child_h = self:get_child_states(tree) 117 | local composer_grad = tree.composer:backward( 118 | {inputs[tree.idx], child_c, child_h}, 119 | {grad[1], grad[2] + output_grad}) 120 | self:free_module(tree, 'composer') 121 | tree.state = nil 122 | 123 | grad_inputs[tree.idx] = composer_grad[1] 124 | local child_c_grads, child_h_grads = composer_grad[2], composer_grad[3] 125 | for i = 1, tree.num_children do 126 | self:_backward(tree.children[i], inputs, {child_c_grads[i], child_h_grads[i]}, grad_inputs) 127 | end 128 | end 129 | 130 | function ChildSumTreeLSTM:clean(tree) 131 | self:free_module(tree, 'composer') 132 | self:free_module(tree, 'output_module') 133 | tree.state = nil 134 | tree.output = nil 135 | for i = 1, tree.num_children do 136 | self:clean(tree.children[i]) 137 | end 138 | end 139 | 140 | function ChildSumTreeLSTM:parameters() 141 | local params, grad_params = {}, {} 142 | local cp, cg = self.composer:parameters() 143 | tablex.insertvalues(params, cp) 144 | tablex.insertvalues(grad_params, cg) 145 | if self.output_module ~= nil then 146 | local op, og = self.output_module:parameters() 147 | tablex.insertvalues(params, op) 148 | tablex.insertvalues(grad_params, og) 149 | end 150 | return params, grad_params 151 | end 152 | 153 | function ChildSumTreeLSTM:get_child_states(tree) 154 | local child_c, child_h 155 | if tree.num_children == 0 then 156 | child_c = torch.zeros(1, self.mem_dim) 157 | child_h = torch.zeros(1, self.mem_dim) 158 | else 159 | child_c = torch.Tensor(tree.num_children, self.mem_dim) 160 | child_h = torch.Tensor(tree.num_children, self.mem_dim) 161 | for i = 1, tree.num_children do 162 | child_c[i], child_h[i] = unpack(tree.children[i].state) 163 | end 164 | end 165 | return child_c, child_h 166 | end 167 | -------------------------------------------------------------------------------- /relatedness/main.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Training script for semantic relatedness prediction on the SICK dataset. 4 | 5 | --]] 6 | 7 | require('..') 8 | 9 | -- Pearson correlation 10 | function pearson(x, y) 11 | x = x - x:mean() 12 | y = y - y:mean() 13 | return x:dot(y) / (x:norm() * y:norm()) 14 | end 15 | 16 | -- read command line arguments 17 | local args = lapp [[ 18 | Training script for semantic relatedness prediction on the SICK dataset. 19 | -m,--model (default dependency) Model architecture: [dependency, constituency, lstm, bilstm] 20 | -l,--layers (default 1) Number of layers (ignored for Tree-LSTM) 21 | -d,--dim (default 150) LSTM memory dimension 22 | -e,--epochs (default 10) Number of training epochs 23 | ]] 24 | 25 | local model_name, model_class 26 | if args.model == 'dependency' then 27 | model_name = 'Dependency Tree LSTM' 28 | model_class = treelstm.TreeLSTMSim 29 | elseif args.model == 'constituency' then 30 | model_name = 'Constituency Tree LSTM' 31 | model_class = treelstm.TreeLSTMSim 32 | elseif args.model == 'lstm' then 33 | model_name = 'LSTM' 34 | model_class = treelstm.LSTMSim 35 | elseif args.model == 'bilstm' then 36 | model_name = 'Bidirectional LSTM' 37 | model_class = treelstm.LSTMSim 38 | end 39 | local model_structure = args.model 40 | header(model_name .. ' for Semantic Relatedness') 41 | 42 | -- directory containing dataset files 43 | local data_dir = 'data/sick/' 44 | 45 | -- load vocab 46 | local vocab = treelstm.Vocab(data_dir .. 'vocab-cased.txt') 47 | 48 | -- load embeddings 49 | print('loading word embeddings') 50 | local emb_dir = 'data/glove/' 51 | local emb_prefix = emb_dir .. 'glove.840B' 52 | local emb_vocab, emb_vecs = treelstm.read_embedding(emb_prefix .. '.vocab', emb_prefix .. '.300d.th') 53 | local emb_dim = emb_vecs:size(2) 54 | 55 | -- use only vectors in vocabulary (not necessary, but gives faster training) 56 | local num_unk = 0 57 | local vecs = torch.Tensor(vocab.size, emb_dim) 58 | for i = 1, vocab.size do 59 | local w = vocab:token(i) 60 | if emb_vocab:contains(w) then 61 | vecs[i] = emb_vecs[emb_vocab:index(w)] 62 | else 63 | num_unk = num_unk + 1 64 | vecs[i]:uniform(-0.05, 0.05) 65 | end 66 | end 67 | print('unk count = ' .. num_unk) 68 | emb_vocab = nil 69 | emb_vecs = nil 70 | collectgarbage() 71 | 72 | -- load datasets 73 | print('loading datasets') 74 | local train_dir = data_dir .. 'train/' 75 | local dev_dir = data_dir .. 'dev/' 76 | local test_dir = data_dir .. 'test/' 77 | local constituency = (args.model == 'constituency') 78 | local train_dataset = treelstm.read_relatedness_dataset(train_dir, vocab, constituency) 79 | local dev_dataset = treelstm.read_relatedness_dataset(dev_dir, vocab, constituency) 80 | local test_dataset = treelstm.read_relatedness_dataset(test_dir, vocab, constituency) 81 | printf('num train = %d\n', train_dataset.size) 82 | printf('num dev = %d\n', dev_dataset.size) 83 | printf('num test = %d\n', test_dataset.size) 84 | 85 | -- initialize model 86 | local model = model_class{ 87 | emb_vecs = vecs, 88 | structure = model_structure, 89 | num_layers = args.layers, 90 | mem_dim = args.dim, 91 | } 92 | 93 | -- number of epochs to train 94 | local num_epochs = args.epochs 95 | 96 | -- print information 97 | header('model configuration') 98 | printf('max epochs = %d\n', num_epochs) 99 | model:print_config() 100 | 101 | -- train 102 | local train_start = sys.clock() 103 | local best_dev_score = -1.0 104 | local best_dev_model = model 105 | header('Training model') 106 | for i = 1, num_epochs do 107 | local start = sys.clock() 108 | printf('-- epoch %d\n', i) 109 | model:train(train_dataset) 110 | printf('-- finished epoch in %.2fs\n', sys.clock() - start) 111 | 112 | -- uncomment to compute train scores 113 | --[[ 114 | local train_predictions = model:predict_dataset(train_dataset) 115 | local train_score = pearson(train_predictions, train_dataset.labels) 116 | printf('-- train score: %.4f\n', train_score) 117 | --]] 118 | 119 | local dev_predictions = model:predict_dataset(dev_dataset) 120 | local dev_score = pearson(dev_predictions, dev_dataset.labels) 121 | printf('-- dev score: %.4f\n', dev_score) 122 | 123 | if dev_score > best_dev_score then 124 | best_dev_score = dev_score 125 | best_dev_model = model_class{ 126 | emb_vecs = vecs, 127 | structure = model_structure, 128 | num_layers = args.layers, 129 | mem_dim = args.dim, 130 | } 131 | best_dev_model.params:copy(model.params) 132 | end 133 | end 134 | printf('finished training in %.2fs\n', sys.clock() - train_start) 135 | 136 | -- evaluate 137 | header('Evaluating on test set') 138 | printf('-- using model with dev score = %.4f\n', best_dev_score) 139 | local test_predictions = best_dev_model:predict_dataset(test_dataset) 140 | local test_score = pearson(test_predictions, test_dataset.labels) 141 | printf('-- test score: %.4f\n', test_score) 142 | 143 | -- create predictions and model directories if necessary 144 | if lfs.attributes(treelstm.predictions_dir) == nil then 145 | lfs.mkdir(treelstm.predictions_dir) 146 | end 147 | 148 | if lfs.attributes(treelstm.models_dir) == nil then 149 | lfs.mkdir(treelstm.models_dir) 150 | end 151 | 152 | -- get paths 153 | local file_idx = 1 154 | local predictions_save_path, model_save_path 155 | while true do 156 | predictions_save_path = string.format( 157 | treelstm.predictions_dir .. '/rel-%s.%dl.%dd.%d.pred', args.model, args.layers, args.dim, file_idx) 158 | model_save_path = string.format( 159 | treelstm.models_dir .. '/rel-%s.%dl.%dd.%d.th', args.model, args.layers, args.dim, file_idx) 160 | if lfs.attributes(predictions_save_path) == nil and lfs.attributes(model_save_path) == nil then 161 | break 162 | end 163 | file_idx = file_idx + 1 164 | end 165 | 166 | -- write predictions to disk 167 | local predictions_file = torch.DiskFile(predictions_save_path, 'w') 168 | print('writing predictions to ' .. predictions_save_path) 169 | for i = 1, test_predictions:size(1) do 170 | predictions_file:writeFloat(test_predictions[i]) 171 | end 172 | predictions_file:close() 173 | 174 | -- write models to disk 175 | print('writing model to ' .. model_save_path) 176 | best_dev_model:save(model_save_path) 177 | 178 | -- to load a saved model 179 | -- local loaded = model_class.load(model_save_path) 180 | -------------------------------------------------------------------------------- /sentiment/TreeLSTMSentiment.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Sentiment classification using a Binary Tree-LSTM. 4 | 5 | --]] 6 | 7 | local TreeLSTMSentiment = torch.class('treelstm.TreeLSTMSentiment') 8 | 9 | function TreeLSTMSentiment:__init(config) 10 | self.mem_dim = config.mem_dim or 150 11 | self.learning_rate = config.learning_rate or 0.05 12 | self.emb_learning_rate = config.emb_learning_rate or 0.1 13 | self.batch_size = config.batch_size or 25 14 | self.reg = config.reg or 1e-4 15 | self.structure = config.structure or 'constituency' 16 | self.fine_grained = (config.fine_grained == nil) and true or config.fine_grained 17 | self.dropout = (config.dropout == nil) and true or config.dropout 18 | 19 | -- word embedding 20 | self.emb_dim = config.emb_vecs:size(2) 21 | self.emb = nn.LookupTable(config.emb_vecs:size(1), self.emb_dim) 22 | self.emb.weight:copy(config.emb_vecs) 23 | 24 | self.in_zeros = torch.zeros(self.emb_dim) 25 | self.num_classes = self.fine_grained and 5 or 3 26 | 27 | -- optimizer configuration 28 | self.optim_state = { learningRate = self.learning_rate } 29 | 30 | -- negative log likelihood optimization objective 31 | self.criterion = nn.ClassNLLCriterion() 32 | 33 | local treelstm_config = { 34 | in_dim = self.emb_dim, 35 | mem_dim = self.mem_dim, 36 | output_module_fn = function() return self:new_sentiment_module() end, 37 | criterion = self.criterion, 38 | } 39 | 40 | if self.structure == 'dependency' then 41 | self.treelstm = treelstm.ChildSumTreeLSTM(treelstm_config) 42 | elseif self.structure == 'constituency' then 43 | self.treelstm = treelstm.BinaryTreeLSTM(treelstm_config) 44 | else 45 | error('invalid parse tree type: ' .. self.structure) 46 | end 47 | 48 | self.params, self.grad_params = self.treelstm:getParameters() 49 | end 50 | 51 | function TreeLSTMSentiment:new_sentiment_module() 52 | local sentiment_module = nn.Sequential() 53 | if self.dropout then 54 | sentiment_module:add(nn.Dropout()) 55 | end 56 | sentiment_module 57 | :add(nn.Linear(self.mem_dim, self.num_classes)) 58 | :add(nn.LogSoftMax()) 59 | return sentiment_module 60 | end 61 | 62 | function TreeLSTMSentiment:train(dataset) 63 | self.treelstm:training() 64 | local indices = torch.randperm(dataset.size) 65 | local zeros = torch.zeros(self.mem_dim) 66 | for i = 1, dataset.size, self.batch_size do 67 | xlua.progress(i, dataset.size) 68 | local batch_size = math.min(i + self.batch_size - 1, dataset.size) - i + 1 69 | 70 | local feval = function(x) 71 | self.grad_params:zero() 72 | self.emb:zeroGradParameters() 73 | 74 | local loss = 0 75 | for j = 1, batch_size do 76 | local idx = indices[i + j - 1] 77 | local sent = dataset.sents[idx] 78 | local tree = dataset.trees[idx] 79 | 80 | local inputs = self.emb:forward(sent) 81 | local _, tree_loss = self.treelstm:forward(tree, inputs) 82 | loss = loss + tree_loss 83 | local input_grad = self.treelstm:backward(tree, inputs, {zeros, zeros}) 84 | self.emb:backward(sent, input_grad) 85 | end 86 | 87 | loss = loss / batch_size 88 | self.grad_params:div(batch_size) 89 | self.emb.gradWeight:div(batch_size) 90 | 91 | -- regularization 92 | loss = loss + 0.5 * self.reg * self.params:norm() ^ 2 93 | self.grad_params:add(self.reg, self.params) 94 | return loss, self.grad_params 95 | end 96 | 97 | optim.adagrad(feval, self.params, self.optim_state) 98 | self.emb:updateParameters(self.emb_learning_rate) 99 | end 100 | xlua.progress(dataset.size, dataset.size) 101 | end 102 | 103 | function TreeLSTMSentiment:predict(tree, sent) 104 | self.treelstm:evaluate() 105 | local prediction 106 | local inputs = self.emb:forward(sent) 107 | self.treelstm:forward(tree, inputs) 108 | local output = tree.output 109 | if self.fine_grained then 110 | prediction = argmax(output) 111 | else 112 | prediction = (output[1] > output[3]) and 1 or 3 113 | end 114 | self.treelstm:clean(tree) 115 | return prediction 116 | end 117 | 118 | function TreeLSTMSentiment:predict_dataset(dataset) 119 | local predictions = torch.Tensor(dataset.size) 120 | for i = 1, dataset.size do 121 | xlua.progress(i, dataset.size) 122 | predictions[i] = self:predict(dataset.trees[i], dataset.sents[i]) 123 | end 124 | return predictions 125 | end 126 | 127 | function argmax(v) 128 | local idx = 1 129 | local max = v[1] 130 | for i = 2, v:size(1) do 131 | if v[i] > max then 132 | max = v[i] 133 | idx = i 134 | end 135 | end 136 | return idx 137 | end 138 | 139 | function TreeLSTMSentiment:print_config() 140 | local num_params = self.params:size(1) 141 | local num_sentiment_params = self:new_sentiment_module():getParameters():size(1) 142 | printf('%-25s = %s\n', 'fine grained sentiment', tostring(self.fine_grained)) 143 | printf('%-25s = %d\n', 'num params', num_params) 144 | printf('%-25s = %d\n', 'num compositional params', num_params - num_sentiment_params) 145 | printf('%-25s = %d\n', 'word vector dim', self.emb_dim) 146 | printf('%-25s = %d\n', 'Tree-LSTM memory dim', self.mem_dim) 147 | printf('%-25s = %.2e\n', 'regularization strength', self.reg) 148 | printf('%-25s = %d\n', 'minibatch size', self.batch_size) 149 | printf('%-25s = %.2e\n', 'learning rate', self.learning_rate) 150 | printf('%-25s = %.2e\n', 'word vector learning rate', self.emb_learning_rate) 151 | printf('%-25s = %s\n', 'dropout', tostring(self.dropout)) 152 | end 153 | 154 | function TreeLSTMSentiment:save(path) 155 | local config = { 156 | batch_size = self.batch_size, 157 | dropout = self.dropout, 158 | emb_learning_rate = self.emb_learning_rate, 159 | emb_vecs = self.emb.weight:float(), 160 | fine_grained = self.fine_grained, 161 | learning_rate = self.learning_rate, 162 | mem_dim = self.mem_dim, 163 | reg = self.reg, 164 | structure = self.structure, 165 | } 166 | 167 | torch.save(path, { 168 | params = self.params, 169 | config = config, 170 | }) 171 | end 172 | 173 | function TreeLSTMSentiment.load(path) 174 | local state = torch.load(path) 175 | local model = treelstm.TreeLSTMSentiment.new(state.config) 176 | model.params:copy(state.params) 177 | return model 178 | end 179 | -------------------------------------------------------------------------------- /util/read_data.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Functions for loading data from disk. 4 | 5 | --]] 6 | 7 | function treelstm.read_embedding(vocab_path, emb_path) 8 | local vocab = treelstm.Vocab(vocab_path) 9 | local embedding = torch.load(emb_path) 10 | return vocab, embedding 11 | end 12 | 13 | function treelstm.read_sentences(path, vocab) 14 | local sentences = {} 15 | local file = io.open(path, 'r') 16 | local line 17 | while true do 18 | line = file:read() 19 | if line == nil then break end 20 | local tokens = stringx.split(line) 21 | local len = #tokens 22 | local sent = torch.IntTensor(len) 23 | for i = 1, len do 24 | local token = tokens[i] 25 | sent[i] = vocab:index(token) 26 | end 27 | sentences[#sentences + 1] = sent 28 | end 29 | 30 | file:close() 31 | return sentences 32 | end 33 | 34 | function treelstm.read_trees(parent_path, label_path) 35 | local parent_file = io.open(parent_path, 'r') 36 | local label_file 37 | if label_path ~= nil then label_file = io.open(label_path, 'r') end 38 | local count = 0 39 | local trees = {} 40 | 41 | while true do 42 | local parents = parent_file:read() 43 | if parents == nil then break end 44 | parents = stringx.split(parents) 45 | for i, p in ipairs(parents) do 46 | parents[i] = tonumber(p) 47 | end 48 | 49 | local labels 50 | if label_file ~= nil then 51 | labels = stringx.split(label_file:read()) 52 | for i, l in ipairs(labels) do 53 | -- ignore unlabeled nodes 54 | if l == '#' then 55 | labels[i] = nil 56 | else 57 | labels[i] = tonumber(l) 58 | end 59 | end 60 | end 61 | 62 | count = count + 1 63 | trees[count] = treelstm.read_tree(parents, labels) 64 | end 65 | parent_file:close() 66 | return trees 67 | end 68 | 69 | function treelstm.read_tree(parents, labels) 70 | local size = #parents 71 | local trees = {} 72 | if labels == nil then labels = {} end 73 | local root 74 | for i = 1, size do 75 | if not trees[i] and parents[i] ~= -1 then 76 | local idx = i 77 | local prev = nil 78 | while true do 79 | local parent = parents[idx] 80 | if parent == -1 then 81 | break 82 | end 83 | 84 | local tree = treelstm.Tree() 85 | if prev ~= nil then 86 | tree:add_child(prev) 87 | end 88 | trees[idx] = tree 89 | tree.idx = idx 90 | tree.gold_label = labels[idx] 91 | if trees[parent] ~= nil then 92 | trees[parent]:add_child(tree) 93 | break 94 | elseif parent == 0 then 95 | root = tree 96 | break 97 | else 98 | prev = tree 99 | idx = parent 100 | end 101 | end 102 | end 103 | end 104 | 105 | -- index leaves (only meaningful for constituency trees) 106 | local leaf_idx = 1 107 | for i = 1, size do 108 | local tree = trees[i] 109 | if tree ~= nil and tree.num_children == 0 then 110 | tree.leaf_idx = leaf_idx 111 | leaf_idx = leaf_idx + 1 112 | end 113 | end 114 | return root 115 | end 116 | 117 | --[[ 118 | 119 | Semantic Relatedness 120 | 121 | --]] 122 | 123 | function treelstm.read_relatedness_dataset(dir, vocab, constituency) 124 | local dataset = {} 125 | dataset.vocab = vocab 126 | if constituency then 127 | dataset.ltrees = treelstm.read_trees(dir .. 'a.cparents') 128 | dataset.rtrees = treelstm.read_trees(dir .. 'b.cparents') 129 | else 130 | dataset.ltrees = treelstm.read_trees(dir .. 'a.parents') 131 | dataset.rtrees = treelstm.read_trees(dir .. 'b.parents') 132 | end 133 | dataset.lsents = treelstm.read_sentences(dir .. 'a.toks', vocab) 134 | dataset.rsents = treelstm.read_sentences(dir .. 'b.toks', vocab) 135 | dataset.size = #dataset.ltrees 136 | local id_file = torch.DiskFile(dir .. 'id.txt') 137 | local sim_file = torch.DiskFile(dir .. 'sim.txt') 138 | dataset.ids = torch.IntTensor(dataset.size) 139 | dataset.labels = torch.Tensor(dataset.size) 140 | for i = 1, dataset.size do 141 | dataset.ids[i] = id_file:readInt() 142 | dataset.labels[i] = 0.25 * (sim_file:readDouble() - 1) 143 | end 144 | id_file:close() 145 | sim_file:close() 146 | return dataset 147 | end 148 | 149 | --[[ 150 | 151 | Sentiment 152 | 153 | --]] 154 | 155 | function treelstm.read_sentiment_dataset(dir, vocab, fine_grained, dependency) 156 | local dataset = {} 157 | dataset.vocab = vocab 158 | dataset.fine_grained = fine_grained 159 | local trees 160 | if dependency then 161 | trees = treelstm.read_trees(dir .. 'dparents.txt', dir .. 'dlabels.txt') 162 | else 163 | trees = treelstm.read_trees(dir .. 'parents.txt', dir .. 'labels.txt') 164 | for _, tree in ipairs(trees) do 165 | set_spans(tree) 166 | end 167 | end 168 | 169 | local sents = treelstm.read_sentences(dir .. 'sents.txt', vocab) 170 | if not fine_grained then 171 | dataset.trees = {} 172 | dataset.sents = {} 173 | for i = 1, #trees do 174 | if trees[i].gold_label ~= 0 then 175 | table.insert(dataset.trees, trees[i]) 176 | table.insert(dataset.sents, sents[i]) 177 | end 178 | end 179 | else 180 | dataset.trees = trees 181 | dataset.sents = sents 182 | end 183 | 184 | dataset.size = #dataset.trees 185 | dataset.labels = torch.Tensor(dataset.size) 186 | for i = 1, dataset.size do 187 | remap_labels(dataset.trees[i], fine_grained) 188 | dataset.labels[i] = dataset.trees[i].gold_label 189 | end 190 | return dataset 191 | end 192 | 193 | function set_spans(tree) 194 | if tree.num_children == 0 then 195 | tree.lo, tree.hi = tree.leaf_idx, tree.leaf_idx 196 | return 197 | end 198 | 199 | for i = 1, tree.num_children do 200 | set_spans(tree.children[i]) 201 | end 202 | 203 | tree.lo, tree.hi = tree.children[1].lo, tree.children[1].hi 204 | for i = 2, tree.num_children do 205 | tree.lo = math.min(tree.lo, tree.children[i].lo) 206 | tree.hi = math.max(tree.hi, tree.children[i].hi) 207 | end 208 | end 209 | 210 | function remap_labels(tree, fine_grained) 211 | if tree.gold_label ~= nil then 212 | if fine_grained then 213 | tree.gold_label = tree.gold_label + 3 214 | else 215 | if tree.gold_label < 0 then 216 | tree.gold_label = 1 217 | elseif tree.gold_label == 0 then 218 | tree.gold_label = 2 219 | else 220 | tree.gold_label = 3 221 | end 222 | end 223 | end 224 | 225 | for i = 1, tree.num_children do 226 | remap_labels(tree.children[i], fine_grained) 227 | end 228 | end 229 | -------------------------------------------------------------------------------- /sentiment/main.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Tree-LSTM training script for sentiment classication on the Stanford 4 | Sentiment Treebank 5 | 6 | --]] 7 | 8 | require('..') 9 | 10 | function accuracy(pred, gold) 11 | return torch.eq(pred, gold):sum() / pred:size(1) 12 | end 13 | 14 | -- read command line arguments 15 | local args = lapp [[ 16 | Training script for sentiment classification on the SST dataset. 17 | -m,--model (default constituency) Model architecture: [constituency, lstm, bilstm] 18 | -l,--layers (default 1) Number of layers (ignored for Tree-LSTM) 19 | -d,--dim (default 150) LSTM memory dimension 20 | -e,--epochs (default 10) Number of training epochs 21 | -b,--binary Train and evaluate on binary sub-task 22 | ]] 23 | 24 | local model_name, model_class, model_structure 25 | if args.model == 'constituency' then 26 | model_name = 'Constituency Tree LSTM' 27 | model_class = treelstm.TreeLSTMSentiment 28 | elseif args.model == 'dependency' then 29 | model_name = 'Dependency Tree LSTM' 30 | model_class = treelstm.TreeLSTMSentiment 31 | elseif args.model == 'lstm' then 32 | model_name = 'LSTM' 33 | model_class = treelstm.LSTMSentiment 34 | elseif args.model == 'bilstm' then 35 | model_name = 'Bidirectional LSTM' 36 | model_class = treelstm.LSTMSentiment 37 | end 38 | model_structure = args.model 39 | header(model_name .. ' for Sentiment Classification') 40 | 41 | -- binary or fine-grained subtask 42 | local fine_grained = not args.binary 43 | 44 | -- directory containing dataset files 45 | local data_dir = 'data/sst/' 46 | 47 | -- load vocab 48 | local vocab = treelstm.Vocab(data_dir .. 'vocab-cased.txt') 49 | 50 | -- load embeddings 51 | print('loading word embeddings') 52 | local emb_dir = 'data/glove/' 53 | local emb_prefix = emb_dir .. 'glove.840B' 54 | local emb_vocab, emb_vecs = treelstm.read_embedding(emb_prefix .. '.vocab', emb_prefix .. '.300d.th') 55 | local emb_dim = emb_vecs:size(2) 56 | 57 | -- use only vectors in vocabulary (not necessary, but gives faster training) 58 | local num_unk = 0 59 | local vecs = torch.Tensor(vocab.size, emb_dim) 60 | for i = 1, vocab.size do 61 | local w = string.gsub(vocab:token(i), '\\', '') -- remove escape characters 62 | if emb_vocab:contains(w) then 63 | vecs[i] = emb_vecs[emb_vocab:index(w)] 64 | else 65 | num_unk = num_unk + 1 66 | vecs[i]:uniform(-0.05, 0.05) 67 | end 68 | end 69 | print('unk count = ' .. num_unk) 70 | emb_vocab = nil 71 | emb_vecs = nil 72 | collectgarbage() 73 | 74 | -- load datasets 75 | print('loading datasets') 76 | local train_dir = data_dir .. 'train/' 77 | local dev_dir = data_dir .. 'dev/' 78 | local test_dir = data_dir .. 'test/' 79 | local dependency = (args.model == 'dependency') 80 | local train_dataset = treelstm.read_sentiment_dataset(train_dir, vocab, fine_grained, dependency) 81 | local dev_dataset = treelstm.read_sentiment_dataset(dev_dir, vocab, fine_grained, dependency) 82 | local test_dataset = treelstm.read_sentiment_dataset(test_dir, vocab, fine_grained, dependency) 83 | 84 | printf('num train = %d\n', train_dataset.size) 85 | printf('num dev = %d\n', dev_dataset.size) 86 | printf('num test = %d\n', test_dataset.size) 87 | 88 | -- initialize model 89 | local model = model_class{ 90 | emb_vecs = vecs, 91 | structure = model_structure, 92 | fine_grained = fine_grained, 93 | num_layers = args.layers, 94 | mem_dim = args.dim, 95 | } 96 | 97 | -- number of epochs to train 98 | local num_epochs = args.epochs 99 | 100 | -- print information 101 | header('model configuration') 102 | printf('max epochs = %d\n', num_epochs) 103 | model:print_config() 104 | 105 | -- train 106 | local train_start = sys.clock() 107 | local best_dev_score = -1.0 108 | local best_dev_model = model 109 | header('Training model') 110 | for i = 1, num_epochs do 111 | local start = sys.clock() 112 | printf('-- epoch %d\n', i) 113 | model:train(train_dataset) 114 | printf('-- finished epoch in %.2fs\n', sys.clock() - start) 115 | 116 | -- uncomment to compute train scores 117 | --[[ 118 | local train_predictions = model:predict_dataset(train_dataset) 119 | local train_score = accuracy(train_predictions, train_dataset.labels) 120 | printf('-- train score: %.4f\n', train_score) 121 | --]] 122 | 123 | local dev_predictions = model:predict_dataset(dev_dataset) 124 | local dev_score = accuracy(dev_predictions, dev_dataset.labels) 125 | printf('-- dev score: %.4f\n', dev_score) 126 | 127 | if dev_score > best_dev_score then 128 | best_dev_score = dev_score 129 | best_dev_model = model_class{ 130 | emb_vecs = vecs, 131 | structure = model_structure, 132 | fine_grained = fine_grained, 133 | num_layers = args.layers, 134 | mem_dim = args.dim, 135 | } 136 | best_dev_model.params:copy(model.params) 137 | best_dev_model.emb.weight:copy(model.emb.weight) 138 | end 139 | end 140 | printf('finished training in %.2fs\n', sys.clock() - train_start) 141 | 142 | -- evaluate 143 | header('Evaluating on test set') 144 | printf('-- using model with dev score = %.4f\n', best_dev_score) 145 | local test_predictions = best_dev_model:predict_dataset(test_dataset) 146 | printf('-- test score: %.4f\n', accuracy(test_predictions, test_dataset.labels)) 147 | 148 | -- create predictions and models directories if necessary 149 | if lfs.attributes(treelstm.predictions_dir) == nil then 150 | lfs.mkdir(treelstm.predictions_dir) 151 | end 152 | 153 | if lfs.attributes(treelstm.models_dir) == nil then 154 | lfs.mkdir(treelstm.models_dir) 155 | end 156 | 157 | -- get paths 158 | local file_idx = 1 159 | local subtask = fine_grained and '5class' or '2class' 160 | local predictions_save_path, model_save_path 161 | while true do 162 | predictions_save_path = string.format( 163 | treelstm.predictions_dir .. '/sent-%s.%s.%dl.%dd.%d.pred', args.model, subtask, args.layers, args.dim, file_idx) 164 | model_save_path = string.format( 165 | treelstm.models_dir .. '/sent-%s.%s.%dl.%dd.%d.th', args.model, subtask, args.layers, args.dim, file_idx) 166 | if lfs.attributes(predictions_save_path) == nil and lfs.attributes(model_save_path) == nil then 167 | break 168 | end 169 | file_idx = file_idx + 1 170 | end 171 | 172 | -- write predictions to disk 173 | local predictions_file = torch.DiskFile(predictions_save_path, 'w') 174 | print('writing predictions to ' .. predictions_save_path) 175 | for i = 1, test_predictions:size(1) do 176 | predictions_file:writeInt(test_predictions[i]) 177 | end 178 | predictions_file:close() 179 | 180 | -- write model to disk 181 | print('writing model to ' .. model_save_path) 182 | best_dev_model:save(model_save_path) 183 | 184 | -- to load a saved model 185 | -- local loaded = model_class.load(model_save_path) 186 | -------------------------------------------------------------------------------- /models/BinaryTreeLSTM.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | A Binary Tree-LSTM with input at the leaf nodes. 4 | 5 | --]] 6 | 7 | local BinaryTreeLSTM, parent = torch.class('treelstm.BinaryTreeLSTM', 'treelstm.TreeLSTM') 8 | 9 | function BinaryTreeLSTM:__init(config) 10 | parent.__init(self, config) 11 | self.gate_output = config.gate_output 12 | if self.gate_output == nil then self.gate_output = true end 13 | 14 | -- a function that instantiates an output module that takes the hidden state h as input 15 | self.output_module_fn = config.output_module_fn 16 | self.criterion = config.criterion 17 | 18 | -- leaf input module 19 | self.leaf_module = self:new_leaf_module() 20 | self.leaf_modules = {} 21 | 22 | -- composition module 23 | self.composer = self:new_composer() 24 | self.composers = {} 25 | 26 | -- output module 27 | self.output_module = self:new_output_module() 28 | self.output_modules = {} 29 | end 30 | 31 | function BinaryTreeLSTM:new_leaf_module() 32 | local input = nn.Identity()() 33 | local c = nn.Linear(self.in_dim, self.mem_dim)(input) 34 | local h 35 | if self.gate_output then 36 | local o = nn.Sigmoid()(nn.Linear(self.in_dim, self.mem_dim)(input)) 37 | h = nn.CMulTable(){o, nn.Tanh()(c)} 38 | else 39 | h = nn.Tanh()(c) 40 | end 41 | 42 | local leaf_module = nn.gModule({input}, {c, h}) 43 | if self.leaf_module ~= nil then 44 | share_params(leaf_module, self.leaf_module) 45 | end 46 | return leaf_module 47 | end 48 | 49 | function BinaryTreeLSTM:new_composer() 50 | local lc, lh = nn.Identity()(), nn.Identity()() 51 | local rc, rh = nn.Identity()(), nn.Identity()() 52 | local new_gate = function() 53 | return nn.CAddTable(){ 54 | nn.Linear(self.mem_dim, self.mem_dim)(lh), 55 | nn.Linear(self.mem_dim, self.mem_dim)(rh) 56 | } 57 | end 58 | 59 | local i = nn.Sigmoid()(new_gate()) -- input gate 60 | local lf = nn.Sigmoid()(new_gate()) -- left forget gate 61 | local rf = nn.Sigmoid()(new_gate()) -- right forget gate 62 | local update = nn.Tanh()(new_gate()) -- memory cell update vector 63 | local c = nn.CAddTable(){ -- memory cell 64 | nn.CMulTable(){i, update}, 65 | nn.CMulTable(){lf, lc}, 66 | nn.CMulTable(){rf, rc} 67 | } 68 | 69 | local h 70 | if self.gate_output then 71 | local o = nn.Sigmoid()(new_gate()) -- output gate 72 | h = nn.CMulTable(){o, nn.Tanh()(c)} 73 | else 74 | h = nn.Tanh()(c) 75 | end 76 | local composer = nn.gModule( 77 | {lc, lh, rc, rh}, 78 | {c, h}) 79 | 80 | if self.composer ~= nil then 81 | share_params(composer, self.composer) 82 | end 83 | return composer 84 | end 85 | 86 | function BinaryTreeLSTM:new_output_module() 87 | if self.output_module_fn == nil then return nil end 88 | local output_module = self.output_module_fn() 89 | if self.output_module ~= nil then 90 | share_params(output_module, self.output_module) 91 | end 92 | return output_module 93 | end 94 | 95 | function BinaryTreeLSTM:forward(tree, inputs) 96 | local lloss, rloss = 0, 0 97 | if tree.num_children == 0 then 98 | self:allocate_module(tree, 'leaf_module') 99 | tree.state = tree.leaf_module:forward(inputs[tree.leaf_idx]) 100 | else 101 | self:allocate_module(tree, 'composer') 102 | 103 | -- get child hidden states 104 | local lvecs, lloss = self:forward(tree.children[1], inputs) 105 | local rvecs, rloss = self:forward(tree.children[2], inputs) 106 | local lc, lh = self:unpack_state(lvecs) 107 | local rc, rh = self:unpack_state(rvecs) 108 | 109 | -- compute state and output 110 | tree.state = tree.composer:forward{lc, lh, rc, rh} 111 | end 112 | 113 | local loss 114 | if self.output_module ~= nil then 115 | self:allocate_module(tree, 'output_module') 116 | tree.output = tree.output_module:forward(tree.state[2]) 117 | if self.train then 118 | loss = self.criterion:forward(tree.output, tree.gold_label) + lloss + rloss 119 | end 120 | end 121 | 122 | return tree.state, loss 123 | end 124 | 125 | function BinaryTreeLSTM:backward(tree, inputs, grad) 126 | local grad_inputs = torch.Tensor(inputs:size()) 127 | self:_backward(tree, inputs, grad, grad_inputs) 128 | return grad_inputs 129 | end 130 | 131 | function BinaryTreeLSTM:_backward(tree, inputs, grad, grad_inputs) 132 | local output_grad = self.mem_zeros 133 | if tree.output ~= nil and tree.gold_label ~= nil then 134 | output_grad = tree.output_module:backward( 135 | tree.state[2], self.criterion:backward(tree.output, tree.gold_label)) 136 | end 137 | self:free_module(tree, 'output_module') 138 | 139 | if tree.num_children == 0 then 140 | grad_inputs[tree.leaf_idx] = tree.leaf_module:backward( 141 | inputs[tree.leaf_idx], 142 | {grad[1], grad[2] + output_grad}) 143 | self:free_module(tree, 'leaf_module') 144 | else 145 | local lc, lh, rc, rh = self:get_child_states(tree) 146 | local composer_grad = tree.composer:backward( 147 | {lc, lh, rc, rh}, 148 | {grad[1], grad[2] + output_grad}) 149 | self:free_module(tree, 'composer') 150 | 151 | -- backward propagate to children 152 | self:_backward(tree.children[1], inputs, {composer_grad[1], composer_grad[2]}, grad_inputs) 153 | self:_backward(tree.children[2], inputs, {composer_grad[3], composer_grad[4]}, grad_inputs) 154 | end 155 | tree.state = nil 156 | tree.output = nil 157 | end 158 | 159 | function BinaryTreeLSTM:parameters() 160 | local params, grad_params = {}, {} 161 | local cp, cg = self.composer:parameters() 162 | tablex.insertvalues(params, cp) 163 | tablex.insertvalues(grad_params, cg) 164 | local lp, lg = self.leaf_module:parameters() 165 | tablex.insertvalues(params, lp) 166 | tablex.insertvalues(grad_params, lg) 167 | if self.output_module ~= nil then 168 | local op, og = self.output_module:parameters() 169 | tablex.insertvalues(params, op) 170 | tablex.insertvalues(grad_params, og) 171 | end 172 | return params, grad_params 173 | end 174 | 175 | -- 176 | -- helper functions 177 | -- 178 | 179 | function BinaryTreeLSTM:unpack_state(state) 180 | local c, h 181 | if state == nil then 182 | c, h = self.mem_zeros, self.mem_zeros 183 | else 184 | c, h = unpack(state) 185 | end 186 | return c, h 187 | end 188 | 189 | function BinaryTreeLSTM:get_child_states(tree) 190 | local lc, lh, rc, rh 191 | if tree.children[1] ~= nil then 192 | lc, lh = self:unpack_state(tree.children[1].state) 193 | end 194 | 195 | if tree.children[2] ~= nil then 196 | rc, rh = self:unpack_state(tree.children[2].state) 197 | end 198 | return lc, lh, rc, rh 199 | end 200 | 201 | function BinaryTreeLSTM:clean(tree) 202 | tree.state = nil 203 | tree.output = nil 204 | self:free_module(tree, 'leaf_module') 205 | self:free_module(tree, 'composer') 206 | self:free_module(tree, 'output_module') 207 | for i = 1, tree.num_children do 208 | self:clean(tree.children[i]) 209 | end 210 | end 211 | -------------------------------------------------------------------------------- /models/LSTM.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Long Short-Term Memory. 4 | 5 | --]] 6 | 7 | local LSTM, parent = torch.class('treelstm.LSTM', 'nn.Module') 8 | 9 | function LSTM:__init(config) 10 | parent.__init(self) 11 | 12 | self.in_dim = config.in_dim 13 | self.mem_dim = config.mem_dim or 150 14 | self.num_layers = config.num_layers or 1 15 | self.gate_output = config.gate_output 16 | if self.gate_output == nil then self.gate_output = true end 17 | 18 | self.master_cell = self:new_cell() 19 | self.depth = 0 20 | self.cells = {} -- table of cells in a roll-out 21 | 22 | -- initial (t = 0) states for forward propagation and initial error signals 23 | -- for backpropagation 24 | local ctable_init, ctable_grad, htable_init, htable_grad 25 | if self.num_layers == 1 then 26 | ctable_init = torch.zeros(self.mem_dim) 27 | htable_init = torch.zeros(self.mem_dim) 28 | ctable_grad = torch.zeros(self.mem_dim) 29 | htable_grad = torch.zeros(self.mem_dim) 30 | else 31 | ctable_init, ctable_grad, htable_init, htable_grad = {}, {}, {}, {} 32 | for i = 1, self.num_layers do 33 | ctable_init[i] = torch.zeros(self.mem_dim) 34 | htable_init[i] = torch.zeros(self.mem_dim) 35 | ctable_grad[i] = torch.zeros(self.mem_dim) 36 | htable_grad[i] = torch.zeros(self.mem_dim) 37 | end 38 | end 39 | self.initial_values = {ctable_init, htable_init} 40 | self.gradInput = { 41 | torch.zeros(self.in_dim), 42 | ctable_grad, 43 | htable_grad 44 | } 45 | end 46 | 47 | -- Instantiate a new LSTM cell. 48 | -- Each cell shares the same parameters, but the activations of their constituent 49 | -- layers differ. 50 | function LSTM:new_cell() 51 | local input = nn.Identity()() 52 | local ctable_p = nn.Identity()() 53 | local htable_p = nn.Identity()() 54 | 55 | -- multilayer LSTM 56 | local htable, ctable = {}, {} 57 | for layer = 1, self.num_layers do 58 | local h_p = (self.num_layers == 1) and htable_p or nn.SelectTable(layer)(htable_p) 59 | local c_p = (self.num_layers == 1) and ctable_p or nn.SelectTable(layer)(ctable_p) 60 | 61 | local new_gate = function() 62 | local in_module = (layer == 1) 63 | and nn.Linear(self.in_dim, self.mem_dim)(input) 64 | or nn.Linear(self.mem_dim, self.mem_dim)(htable[layer - 1]) 65 | return nn.CAddTable(){ 66 | in_module, 67 | nn.Linear(self.mem_dim, self.mem_dim)(h_p) 68 | } 69 | end 70 | 71 | -- input, forget, and output gates 72 | local i = nn.Sigmoid()(new_gate()) 73 | local f = nn.Sigmoid()(new_gate()) 74 | local update = nn.Tanh()(new_gate()) 75 | 76 | -- update the state of the LSTM cell 77 | ctable[layer] = nn.CAddTable(){ 78 | nn.CMulTable(){f, c_p}, 79 | nn.CMulTable(){i, update} 80 | } 81 | 82 | if self.gate_output then 83 | local o = nn.Sigmoid()(new_gate()) 84 | htable[layer] = nn.CMulTable(){o, nn.Tanh()(ctable[layer])} 85 | else 86 | htable[layer] = nn.Tanh()(ctable[layer]) 87 | end 88 | end 89 | 90 | -- if LSTM is single-layered, this makes htable/ctable Tensors (instead of tables). 91 | -- this avoids some quirks with nngraph involving tables of size 1. 92 | htable, ctable = nn.Identity()(htable), nn.Identity()(ctable) 93 | local cell = nn.gModule({input, ctable_p, htable_p}, {ctable, htable}) 94 | 95 | -- share parameters 96 | if self.master_cell then 97 | share_params(cell, self.master_cell) 98 | end 99 | return cell 100 | end 101 | 102 | -- Forward propagate. 103 | -- inputs: T x in_dim tensor, where T is the number of time steps. 104 | -- reverse: if true, read the input from right to left (useful for bidirectional LSTMs). 105 | -- Returns the final hidden state of the LSTM. 106 | function LSTM:forward(inputs, reverse) 107 | local size = inputs:size(1) 108 | for t = 1, size do 109 | local input = reverse and inputs[size - t + 1] or inputs[t] 110 | self.depth = self.depth + 1 111 | local cell = self.cells[self.depth] 112 | if cell == nil then 113 | cell = self:new_cell() 114 | self.cells[self.depth] = cell 115 | end 116 | local prev_output 117 | if self.depth > 1 then 118 | prev_output = self.cells[self.depth - 1].output 119 | else 120 | prev_output = self.initial_values 121 | end 122 | 123 | local outputs = cell:forward({input, prev_output[1], prev_output[2]}) 124 | local ctable, htable = unpack(outputs) 125 | if self.num_layers == 1 then 126 | self.output = htable 127 | else 128 | self.output = {} 129 | for i = 1, self.num_layers do 130 | self.output[i] = htable[i] 131 | end 132 | end 133 | end 134 | return self.output 135 | end 136 | 137 | -- Backpropagate. forward() must have been called previously on the same input. 138 | -- inputs: T x in_dim tensor, where T is the number of time steps. 139 | -- grad_outputs: T x num_layers x mem_dim tensor. 140 | -- reverse: if true, read the input from right to left. 141 | -- Returns the gradients with respect to the inputs (in the same order as the inputs). 142 | function LSTM:backward(inputs, grad_outputs, reverse) 143 | local size = inputs:size(1) 144 | if self.depth == 0 then 145 | error("No cells to backpropagate through") 146 | end 147 | 148 | local input_grads = torch.Tensor(inputs:size()) 149 | for t = size, 1, -1 do 150 | local input = reverse and inputs[size - t + 1] or inputs[t] 151 | local grad_output = reverse and grad_outputs[size - t + 1] or grad_outputs[t] 152 | local cell = self.cells[self.depth] 153 | local grads = {self.gradInput[2], self.gradInput[3]} 154 | if self.num_layers == 1 then 155 | grads[2]:add(grad_output) 156 | else 157 | for i = 1, self.num_layers do 158 | grads[2][i]:add(grad_output[i]) 159 | end 160 | end 161 | 162 | local prev_output = (self.depth > 1) and self.cells[self.depth - 1].output 163 | or self.initial_values 164 | self.gradInput = cell:backward({input, prev_output[1], prev_output[2]}, grads) 165 | if reverse then 166 | input_grads[size - t + 1] = self.gradInput[1] 167 | else 168 | input_grads[t] = self.gradInput[1] 169 | end 170 | self.depth = self.depth - 1 171 | end 172 | self:forget() -- important to clear out state 173 | return input_grads 174 | end 175 | 176 | function LSTM:share(lstm, ...) 177 | if self.in_dim ~= lstm.in_dim then error("LSTM input dimension mismatch") end 178 | if self.mem_dim ~= lstm.mem_dim then error("LSTM memory dimension mismatch") end 179 | if self.num_layers ~= lstm.num_layers then error("LSTM layer count mismatch") end 180 | if self.gate_output ~= lstm.gate_output then error("LSTM output gating mismatch") end 181 | share_params(self.master_cell, lstm.master_cell, ...) 182 | end 183 | 184 | function LSTM:zeroGradParameters() 185 | self.master_cell:zeroGradParameters() 186 | end 187 | 188 | function LSTM:parameters() 189 | return self.master_cell:parameters() 190 | end 191 | 192 | -- Clear saved gradients 193 | function LSTM:forget() 194 | self.depth = 0 195 | for i = 1, #self.gradInput do 196 | local gradInput = self.gradInput[i] 197 | if type(gradInput) == 'table' then 198 | for _, t in pairs(gradInput) do t:zero() end 199 | else 200 | self.gradInput[i]:zero() 201 | end 202 | end 203 | end 204 | -------------------------------------------------------------------------------- /relatedness/TreeLSTMSim.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Semantic relatedness prediction using Tree-LSTMs. 4 | 5 | --]] 6 | 7 | local TreeLSTMSim = torch.class('treelstm.TreeLSTMSim') 8 | 9 | function TreeLSTMSim:__init(config) 10 | self.mem_dim = config.mem_dim or 150 11 | self.learning_rate = config.learning_rate or 0.05 12 | self.emb_learning_rate = config.emb_learning_rate or 0.0 13 | self.batch_size = config.batch_size or 25 14 | self.reg = config.reg or 1e-4 15 | self.structure = config.structure or 'dependency' -- {dependency, constituency} 16 | self.sim_nhidden = config.sim_nhidden or 50 17 | 18 | -- word embedding 19 | self.emb_dim = config.emb_vecs:size(2) 20 | self.emb = nn.LookupTable(config.emb_vecs:size(1), self.emb_dim) 21 | self.emb.weight:copy(config.emb_vecs) 22 | 23 | -- number of similarity rating classes 24 | self.num_classes = 5 25 | 26 | -- optimizer configuration 27 | self.optim_state = { learningRate = self.learning_rate } 28 | 29 | -- KL divergence optimization objective 30 | self.criterion = nn.DistKLDivCriterion() 31 | 32 | -- initialize tree-lstm model 33 | local treelstm_config = { 34 | in_dim = self.emb_dim, 35 | mem_dim = self.mem_dim, 36 | gate_output = false, 37 | } 38 | 39 | if self.structure == 'dependency' then 40 | self.treelstm = treelstm.ChildSumTreeLSTM(treelstm_config) 41 | elseif self.structure == 'constituency' then 42 | self.treelstm = treelstm.BinaryTreeLSTM(treelstm_config) 43 | else 44 | error('invalid parse tree type: ' .. self.structure) 45 | end 46 | 47 | -- similarity model 48 | self.sim_module = self:new_sim_module() 49 | local modules = nn.Parallel() 50 | :add(self.treelstm) 51 | :add(self.sim_module) 52 | self.params, self.grad_params = modules:getParameters() 53 | end 54 | 55 | function TreeLSTMSim:new_sim_module() 56 | local vecs_to_input 57 | local lvec = nn.Identity()() 58 | local rvec = nn.Identity()() 59 | local mult_dist = nn.CMulTable(){lvec, rvec} 60 | local add_dist = nn.Abs()(nn.CSubTable(){lvec, rvec}) 61 | local vec_dist_feats = nn.JoinTable(1){mult_dist, add_dist} 62 | vecs_to_input = nn.gModule({lvec, rvec}, {vec_dist_feats}) 63 | 64 | -- define similarity model architecture 65 | local sim_module = nn.Sequential() 66 | :add(vecs_to_input) 67 | :add(nn.Linear(2 * self.mem_dim, self.sim_nhidden)) 68 | :add(nn.Sigmoid()) -- does better than tanh 69 | :add(nn.Linear(self.sim_nhidden, self.num_classes)) 70 | :add(nn.LogSoftMax()) 71 | return sim_module 72 | end 73 | 74 | function TreeLSTMSim:train(dataset) 75 | self.treelstm:training() 76 | local indices = torch.randperm(dataset.size) 77 | local zeros = torch.zeros(self.mem_dim) 78 | for i = 1, dataset.size, self.batch_size do 79 | xlua.progress(i, dataset.size) 80 | local batch_size = math.min(i + self.batch_size - 1, dataset.size) - i + 1 81 | 82 | -- get target distributions for batch 83 | local targets = torch.zeros(batch_size, self.num_classes) 84 | for j = 1, batch_size do 85 | local sim = dataset.labels[indices[i + j - 1]] * (self.num_classes - 1) + 1 86 | local ceil, floor = math.ceil(sim), math.floor(sim) 87 | if ceil == floor then 88 | targets[{j, floor}] = 1 89 | else 90 | targets[{j, floor}] = ceil - sim 91 | targets[{j, ceil}] = sim - floor 92 | end 93 | end 94 | 95 | local feval = function(x) 96 | self.grad_params:zero() 97 | self.emb:zeroGradParameters() 98 | local loss = 0 99 | for j = 1, batch_size do 100 | local idx = indices[i + j - 1] 101 | local ltree, rtree = dataset.ltrees[idx], dataset.rtrees[idx] 102 | local lsent, rsent = dataset.lsents[idx], dataset.rsents[idx] 103 | self.emb:forward(lsent) 104 | local linputs = torch.Tensor(self.emb.output:size()):copy(self.emb.output) 105 | local rinputs = self.emb:forward(rsent) 106 | 107 | -- get sentence representations 108 | local lrep = self.treelstm:forward(ltree, linputs)[2] 109 | local rrep = self.treelstm:forward(rtree, rinputs)[2] 110 | 111 | -- compute relatedness 112 | local output = self.sim_module:forward{lrep, rrep} 113 | 114 | -- compute loss and backpropagate 115 | local example_loss = self.criterion:forward(output, targets[j]) 116 | loss = loss + example_loss 117 | local sim_grad = self.criterion:backward(output, targets[j]) 118 | local rep_grad = self.sim_module:backward({lrep, rrep}, sim_grad) 119 | local linput_grads = self.treelstm:backward(dataset.ltrees[idx], linputs, {zeros, rep_grad[1]}) 120 | local rinput_grads = self.treelstm:backward(dataset.rtrees[idx], rinputs, {zeros, rep_grad[2]}) 121 | self.emb:backward(lsent, linput_grads) 122 | self.emb:backward(rsent, rinput_grads) 123 | end 124 | 125 | loss = loss / batch_size 126 | self.grad_params:div(batch_size) 127 | self.emb.gradWeight:div(batch_size) 128 | self.emb:updateParameters(self.emb_learning_rate) 129 | 130 | -- regularization 131 | loss = loss + 0.5 * self.reg * self.params:norm() ^ 2 132 | self.grad_params:add(self.reg, self.params) 133 | return loss, self.grad_params 134 | end 135 | 136 | optim.adagrad(feval, self.params, self.optim_state) 137 | end 138 | xlua.progress(dataset.size, dataset.size) 139 | end 140 | 141 | -- Predict the similarity of a sentence pair. 142 | function TreeLSTMSim:predict(ltree, rtree, lsent, rsent) 143 | local linputs = self.emb:forward(lsent) 144 | local lrep = self.treelstm:forward(ltree, linputs)[2] 145 | local rinputs = self.emb:forward(rsent) 146 | local rrep = self.treelstm:forward(rtree, rinputs)[2] 147 | local output = self.sim_module:forward{lrep, rrep} 148 | self.treelstm:clean(ltree) 149 | self.treelstm:clean(rtree) 150 | return torch.range(1, 5):dot(output:exp()) 151 | end 152 | 153 | -- Produce similarity predictions for each sentence pair in the dataset. 154 | function TreeLSTMSim:predict_dataset(dataset) 155 | self.treelstm:evaluate() 156 | local predictions = torch.Tensor(dataset.size) 157 | for i = 1, dataset.size do 158 | xlua.progress(i, dataset.size) 159 | local ltree, rtree = dataset.ltrees[i], dataset.rtrees[i] 160 | local lsent, rsent = dataset.lsents[i], dataset.rsents[i] 161 | predictions[i] = self:predict(ltree, rtree, lsent, rsent) 162 | end 163 | return predictions 164 | end 165 | 166 | function TreeLSTMSim:print_config() 167 | local num_params = self.params:size(1) 168 | local num_sim_params = self:new_sim_module():getParameters():size(1) 169 | printf('%-25s = %d\n', 'num params', num_params) 170 | printf('%-25s = %d\n', 'num compositional params', num_params - num_sim_params) 171 | printf('%-25s = %d\n', 'word vector dim', self.emb_dim) 172 | printf('%-25s = %d\n', 'Tree-LSTM memory dim', self.mem_dim) 173 | printf('%-25s = %.2e\n', 'regularization strength', self.reg) 174 | printf('%-25s = %d\n', 'minibatch size', self.batch_size) 175 | printf('%-25s = %.2e\n', 'learning rate', self.learning_rate) 176 | printf('%-25s = %.2e\n', 'word vector learning rate', self.emb_learning_rate) 177 | printf('%-25s = %s\n', 'parse tree type', self.structure) 178 | printf('%-25s = %d\n', 'sim module hidden dim', self.sim_nhidden) 179 | end 180 | 181 | -- 182 | -- Serialization 183 | -- 184 | 185 | function TreeLSTMSim:save(path) 186 | local config = { 187 | batch_size = self.batch_size, 188 | emb_vecs = self.emb.weight:float(), 189 | learning_rate = self.learning_rate, 190 | emb_learning_rate = self.emb_learning_rate, 191 | mem_dim = self.mem_dim, 192 | sim_nhidden = self.sim_nhidden, 193 | reg = self.reg, 194 | structure = self.structure, 195 | } 196 | 197 | torch.save(path, { 198 | params = self.params, 199 | config = config, 200 | }) 201 | end 202 | 203 | function TreeLSTMSim.load(path) 204 | local state = torch.load(path) 205 | local model = treelstm.TreeLSTMSim.new(state.config) 206 | model.params:copy(state.params) 207 | return model 208 | end 209 | -------------------------------------------------------------------------------- /lib/ConstituencyParse.java: -------------------------------------------------------------------------------- 1 | import edu.stanford.nlp.process.WordTokenFactory; 2 | import edu.stanford.nlp.ling.HasWord; 3 | import edu.stanford.nlp.ling.Word; 4 | import edu.stanford.nlp.ling.CoreLabel; 5 | import edu.stanford.nlp.process.PTBTokenizer; 6 | import edu.stanford.nlp.util.StringUtils; 7 | import edu.stanford.nlp.parser.lexparser.LexicalizedParser; 8 | import edu.stanford.nlp.parser.lexparser.TreeBinarizer; 9 | import edu.stanford.nlp.trees.GrammaticalStructure; 10 | import edu.stanford.nlp.trees.GrammaticalStructureFactory; 11 | import edu.stanford.nlp.trees.PennTreebankLanguagePack; 12 | import edu.stanford.nlp.trees.Tree; 13 | import edu.stanford.nlp.trees.Trees; 14 | import edu.stanford.nlp.trees.TreebankLanguagePack; 15 | import edu.stanford.nlp.trees.TypedDependency; 16 | 17 | import java.io.BufferedWriter; 18 | import java.io.FileWriter; 19 | import java.io.StringReader; 20 | import java.io.IOException; 21 | import java.util.ArrayList; 22 | import java.util.Collection; 23 | import java.util.List; 24 | import java.util.HashMap; 25 | import java.util.Properties; 26 | import java.util.Scanner; 27 | 28 | public class ConstituencyParse { 29 | 30 | private boolean tokenize; 31 | private BufferedWriter tokWriter, parentWriter; 32 | private LexicalizedParser parser; 33 | private TreeBinarizer binarizer; 34 | private CollapseUnaryTransformer transformer; 35 | private GrammaticalStructureFactory gsf; 36 | 37 | private static final String PCFG_PATH = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"; 38 | 39 | public ConstituencyParse(String tokPath, String parentPath, boolean tokenize) throws IOException { 40 | this.tokenize = tokenize; 41 | if (tokPath != null) { 42 | tokWriter = new BufferedWriter(new FileWriter(tokPath)); 43 | } 44 | parentWriter = new BufferedWriter(new FileWriter(parentPath)); 45 | parser = LexicalizedParser.loadModel(PCFG_PATH); 46 | binarizer = TreeBinarizer.simpleTreeBinarizer( 47 | parser.getTLPParams().headFinder(), parser.treebankLanguagePack()); 48 | transformer = new CollapseUnaryTransformer(); 49 | 50 | // set up to produce dependency representations from constituency trees 51 | TreebankLanguagePack tlp = new PennTreebankLanguagePack(); 52 | gsf = tlp.grammaticalStructureFactory(); 53 | } 54 | 55 | public List sentenceToTokens(String line) { 56 | List tokens = new ArrayList<>(); 57 | if (tokenize) { 58 | PTBTokenizer tokenizer = new PTBTokenizer(new StringReader(line), new WordTokenFactory(), ""); 59 | for (Word label; tokenizer.hasNext(); ) { 60 | tokens.add(tokenizer.next()); 61 | } 62 | } else { 63 | for (String word : line.split(" ")) { 64 | tokens.add(new Word(word)); 65 | } 66 | } 67 | 68 | return tokens; 69 | } 70 | 71 | public Tree parse(List tokens) { 72 | Tree tree = parser.apply(tokens); 73 | return tree; 74 | } 75 | 76 | public int[] constTreeParents(Tree tree) { 77 | Tree binarized = binarizer.transformTree(tree); 78 | Tree collapsedUnary = transformer.transformTree(binarized); 79 | Trees.convertToCoreLabels(collapsedUnary); 80 | collapsedUnary.indexSpans(); 81 | List leaves = collapsedUnary.getLeaves(); 82 | int size = collapsedUnary.size() - leaves.size(); 83 | int[] parents = new int[size]; 84 | HashMap index = new HashMap(); 85 | 86 | int idx = leaves.size(); 87 | int leafIdx = 0; 88 | for (Tree leaf : leaves) { 89 | Tree cur = leaf.parent(collapsedUnary); // go to preterminal 90 | int curIdx = leafIdx++; 91 | boolean done = false; 92 | while (!done) { 93 | Tree parent = cur.parent(collapsedUnary); 94 | if (parent == null) { 95 | parents[curIdx] = 0; 96 | break; 97 | } 98 | 99 | int parentIdx; 100 | int parentNumber = parent.nodeNumber(collapsedUnary); 101 | if (!index.containsKey(parentNumber)) { 102 | parentIdx = idx++; 103 | index.put(parentNumber, parentIdx); 104 | } else { 105 | parentIdx = index.get(parentNumber); 106 | done = true; 107 | } 108 | 109 | parents[curIdx] = parentIdx + 1; 110 | cur = parent; 111 | curIdx = parentIdx; 112 | } 113 | } 114 | 115 | return parents; 116 | } 117 | 118 | // convert constituency parse to a dependency representation and return the 119 | // parent pointer representation of the tree 120 | public int[] depTreeParents(Tree tree, List tokens) { 121 | GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); 122 | Collection tdl = gs.typedDependencies(); 123 | int len = tokens.size(); 124 | int[] parents = new int[len]; 125 | for (int i = 0; i < len; i++) { 126 | // if a node has a parent of -1 at the end of parsing, then the node 127 | // has no parent. 128 | parents[i] = -1; 129 | } 130 | 131 | for (TypedDependency td : tdl) { 132 | // let root have index 0 133 | int child = td.dep().index(); 134 | int parent = td.gov().index(); 135 | parents[child - 1] = parent; 136 | } 137 | 138 | return parents; 139 | } 140 | 141 | public void printTokens(List tokens) throws IOException { 142 | int len = tokens.size(); 143 | StringBuilder sb = new StringBuilder(); 144 | for (int i = 0; i < len - 1; i++) { 145 | if (tokenize) { 146 | sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word())); 147 | } else { 148 | sb.append(tokens.get(i).word()); 149 | } 150 | sb.append(' '); 151 | } 152 | 153 | if (tokenize) { 154 | sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word())); 155 | } else { 156 | sb.append(tokens.get(len - 1).word()); 157 | } 158 | 159 | sb.append('\n'); 160 | tokWriter.write(sb.toString()); 161 | } 162 | 163 | public void printParents(int[] parents) throws IOException { 164 | StringBuilder sb = new StringBuilder(); 165 | int size = parents.length; 166 | for (int i = 0; i < size - 1; i++) { 167 | sb.append(parents[i]); 168 | sb.append(' '); 169 | } 170 | sb.append(parents[size - 1]); 171 | sb.append('\n'); 172 | parentWriter.write(sb.toString()); 173 | } 174 | 175 | public void close() throws IOException { 176 | if (tokWriter != null) tokWriter.close(); 177 | parentWriter.close(); 178 | } 179 | 180 | public static void main(String[] args) throws Exception { 181 | Properties props = StringUtils.argsToProperties(args); 182 | if (!props.containsKey("parentpath")) { 183 | System.err.println( 184 | "usage: java ConstituencyParse -deps - -tokenize - -tokpath -parentpath "); 185 | System.exit(1); 186 | } 187 | 188 | // whether to tokenize input sentences 189 | boolean tokenize = false; 190 | if (props.containsKey("tokenize")) { 191 | tokenize = true; 192 | } 193 | 194 | // whether to produce dependency trees from the constituency parse 195 | boolean deps = false; 196 | if (props.containsKey("deps")) { 197 | deps = true; 198 | } 199 | 200 | String tokPath = props.containsKey("tokpath") ? props.getProperty("tokpath") : null; 201 | String parentPath = props.getProperty("parentpath"); 202 | ConstituencyParse processor = new ConstituencyParse(tokPath, parentPath, tokenize); 203 | 204 | Scanner stdin = new Scanner(System.in); 205 | int count = 0; 206 | long start = System.currentTimeMillis(); 207 | while (stdin.hasNextLine()) { 208 | String line = stdin.nextLine(); 209 | List tokens = processor.sentenceToTokens(line); 210 | Tree parse = processor.parse(tokens); 211 | 212 | // produce parent pointer representation 213 | int[] parents = deps ? processor.depTreeParents(parse, tokens) 214 | : processor.constTreeParents(parse); 215 | 216 | // print 217 | if (tokPath != null) { 218 | processor.printTokens(tokens); 219 | } 220 | processor.printParents(parents); 221 | 222 | count++; 223 | if (count % 1000 == 0) { 224 | double elapsed = (System.currentTimeMillis() - start) / 1000.0; 225 | System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed); 226 | } 227 | } 228 | 229 | long totalTimeMillis = System.currentTimeMillis() - start; 230 | System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n", 231 | count, totalTimeMillis / 1000.0, totalTimeMillis / (double) count); 232 | processor.close(); 233 | } 234 | } 235 | -------------------------------------------------------------------------------- /sentiment/LSTMSentiment.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Sentiment classification using LSTMs. 4 | 5 | --]] 6 | 7 | local LSTMSentiment = torch.class('treelstm.LSTMSentiment') 8 | 9 | function LSTMSentiment:__init(config) 10 | self.mem_dim = config.mem_dim or 150 11 | self.learning_rate = config.learning_rate or 0.05 12 | self.emb_learning_rate = config.emb_learning_rate or 0.1 13 | self.num_layers = config.num_layers or 1 14 | self.batch_size = config.batch_size or 5 15 | self.reg = config.reg or 1e-4 16 | self.structure = config.structure or 'lstm' -- {lstm, bilstm} 17 | self.fine_grained = (config.fine_grained == nil) and true or config.fine_grained 18 | self.dropout = (config.dropout == nil) and true or config.dropout 19 | self.train_subtrees = 4 -- number of subtrees to sample during training 20 | 21 | -- word embedding 22 | self.emb_dim = config.emb_vecs:size(2) 23 | self.emb = nn.LookupTable(config.emb_vecs:size(1), self.emb_dim) 24 | self.emb.weight:copy(config.emb_vecs) 25 | 26 | self.in_zeros = torch.zeros(self.emb_dim) 27 | self.num_classes = self.fine_grained and 5 or 3 28 | 29 | -- optimizer configuration 30 | self.optim_state = { learningRate = self.learning_rate } 31 | 32 | -- negative log likelihood optimization objective 33 | self.criterion = nn.ClassNLLCriterion() 34 | 35 | -- sentiment classification module 36 | self.sentiment_module = self:new_sentiment_module() 37 | 38 | -- initialize LSTM model 39 | local lstm_config = { 40 | in_dim = self.emb_dim, 41 | mem_dim = self.mem_dim, 42 | num_layers = self.num_layers, 43 | gate_output = true, 44 | } 45 | 46 | if self.structure == 'lstm' then 47 | self.lstm = treelstm.LSTM(lstm_config) 48 | elseif self.structure == 'bilstm' then 49 | self.lstm = treelstm.LSTM(lstm_config) 50 | self.lstm_b = treelstm.LSTM(lstm_config) 51 | else 52 | error('invalid LSTM type: ' .. self.structure) 53 | end 54 | 55 | local modules = nn.Parallel() 56 | :add(self.lstm) 57 | :add(self.sentiment_module) 58 | self.params, self.grad_params = modules:getParameters() 59 | 60 | -- share must only be called after getParameters, since this changes the 61 | -- location of the parameters 62 | if self.structure == 'bilstm' then 63 | share_params(self.lstm_b, self.lstm) 64 | end 65 | end 66 | 67 | function LSTMSentiment:new_sentiment_module() 68 | local input_dim = self.num_layers * self.mem_dim 69 | local inputs, vec 70 | if self.structure == 'lstm' then 71 | local rep = nn.Identity()() 72 | if self.num_layers == 1 then 73 | vec = {rep} 74 | else 75 | vec = nn.JoinTable(1)(rep) 76 | end 77 | inputs = {rep} 78 | elseif self.structure == 'bilstm' then 79 | local frep, brep = nn.Identity()(), nn.Identity()() 80 | input_dim = input_dim * 2 81 | if self.num_layers == 1 then 82 | vec = nn.JoinTable(1){frep, brep} 83 | else 84 | vec = nn.JoinTable(1){nn.JoinTable(1)(frep), nn.JoinTable(1)(brep)} 85 | end 86 | inputs = {frep, brep} 87 | end 88 | 89 | local logprobs 90 | if self.dropout then 91 | logprobs = nn.LogSoftMax()( 92 | nn.Linear(input_dim, self.num_classes)( 93 | nn.Dropout()(vec))) 94 | else 95 | logprobs = nn.LogSoftMax()( 96 | nn.Linear(input_dim, self.num_classes)(vec)) 97 | end 98 | 99 | return nn.gModule(inputs, {logprobs}) 100 | end 101 | 102 | function LSTMSentiment:train(dataset) 103 | self.lstm:training() 104 | self.sentiment_module:training() 105 | if self.structure == 'bilstm' then 106 | self.lstm_b:training() 107 | end 108 | 109 | local indices = torch.randperm(dataset.size) 110 | local zeros = torch.zeros(self.mem_dim) 111 | for i = 1, dataset.size, self.batch_size do 112 | xlua.progress(i, dataset.size) 113 | local batch_size = math.min(i + self.batch_size - 1, dataset.size) - i + 1 114 | 115 | local feval = function(x) 116 | self.grad_params:zero() 117 | self.emb:zeroGradParameters() 118 | 119 | local loss = 0 120 | for j = 1, batch_size do 121 | local idx = indices[i + j - 1] 122 | local tree = dataset.trees[idx] 123 | local sent = dataset.sents[idx] 124 | local subtrees = tree:depth_first_preorder() 125 | for k = 1, self.train_subtrees + 1 do 126 | local subtree = (k == 1) and tree or subtrees[math.ceil(torch.uniform(1, #subtrees))] 127 | local span = sent[{{subtree.lo, subtree.hi}}] 128 | local inputs = self.emb:forward(span) 129 | 130 | -- get sentence representations 131 | local rep 132 | if self.structure == 'lstm' then 133 | rep = self.lstm:forward(inputs) 134 | elseif self.structure == 'bilstm' then 135 | rep = { 136 | self.lstm:forward(inputs), 137 | self.lstm_b:forward(inputs, true), -- true => reverse 138 | } 139 | end 140 | 141 | -- compute class log probabilities 142 | local output = self.sentiment_module:forward(rep) 143 | 144 | -- compute loss and backpropagate 145 | local example_loss = self.criterion:forward(output, subtree.gold_label) 146 | loss = loss + example_loss 147 | local obj_grad = self.criterion:backward(output, subtree.gold_label) 148 | local rep_grad = self.sentiment_module:backward(rep, obj_grad) 149 | local input_grads 150 | if self.structure == 'lstm' then 151 | input_grads = self:LSTM_backward(sent, inputs, rep_grad) 152 | elseif self.structure == 'bilstm' then 153 | input_grads = self:BiLSTM_backward(sent, inputs, rep_grad) 154 | end 155 | self.emb:backward(span, input_grads) 156 | end 157 | end 158 | 159 | local batch_subtrees = batch_size * (self.train_subtrees + 1) 160 | loss = loss / batch_subtrees 161 | self.grad_params:div(batch_subtrees) 162 | self.emb.gradWeight:div(batch_subtrees) 163 | 164 | -- regularization 165 | loss = loss + 0.5 * self.reg * self.params:norm() ^ 2 166 | self.grad_params:add(self.reg, self.params) 167 | return loss, self.grad_params 168 | end 169 | 170 | optim.adagrad(feval, self.params, self.optim_state) 171 | self.emb:updateParameters(self.emb_learning_rate) 172 | end 173 | xlua.progress(dataset.size, dataset.size) 174 | end 175 | 176 | -- LSTM backward propagation 177 | function LSTMSentiment:LSTM_backward(sent, inputs, rep_grad) 178 | local grad 179 | if self.num_layers == 1 then 180 | grad = torch.zeros(sent:nElement(), self.mem_dim) 181 | grad[sent:nElement()] = rep_grad 182 | else 183 | grad = torch.zeros(sent:nElement(), self.num_layers, self.mem_dim) 184 | for l = 1, self.num_layers do 185 | grad[{sent:nElement(), l, {}}] = rep_grad[l] 186 | end 187 | end 188 | local input_grads = self.lstm:backward(inputs, grad) 189 | return input_grads 190 | end 191 | 192 | -- Bidirectional LSTM backward propagation 193 | function LSTMSentiment:BiLSTM_backward(sent, inputs, rep_grad) 194 | local grad, grad_b 195 | if self.num_layers == 1 then 196 | grad = torch.zeros(sent:nElement(), self.mem_dim) 197 | grad_b = torch.zeros(sent:nElement(), self.mem_dim) 198 | grad[sent:nElement()] = rep_grad[1] 199 | grad_b[1] = rep_grad[2] 200 | else 201 | grad = torch.zeros(sent:nElement(), self.num_layers, self.mem_dim) 202 | grad_b = torch.zeros(sent:nElement(), self.num_layers, self.mem_dim) 203 | for l = 1, self.num_layers do 204 | grad[{sent:nElement(), l, {}}] = rep_grad[1][l] 205 | grad_b[{1, l, {}}] = rep_grad[2][l] 206 | end 207 | end 208 | local input_grads = self.lstm:backward(inputs, grad) 209 | local input_grads_b = self.lstm_b:backward(inputs, grad_b, true) 210 | return input_grads + input_grads_b 211 | end 212 | 213 | -- Predict the sentiment of a sentence. 214 | function LSTMSentiment:predict(sent) 215 | self.lstm:evaluate() 216 | self.sentiment_module:evaluate() 217 | local inputs = self.emb:forward(sent) 218 | 219 | local rep 220 | if self.structure == 'lstm' then 221 | rep = self.lstm:forward(inputs) 222 | elseif self.structure == 'bilstm' then 223 | self.lstm_b:evaluate() 224 | rep = { 225 | self.lstm:forward(inputs), 226 | self.lstm_b:forward(inputs, true), 227 | } 228 | end 229 | local logprobs = self.sentiment_module:forward(rep) 230 | local prediction 231 | if self.fine_grained then 232 | prediction = argmax(logprobs) 233 | else 234 | prediction = (logprobs[1] > logprobs[3]) and 1 or 3 235 | end 236 | self.lstm:forget() 237 | if self.structure == 'bilstm' then 238 | self.lstm_b:forget() 239 | end 240 | return prediction 241 | end 242 | 243 | -- Produce sentiment predictions for each sentence in the dataset. 244 | function LSTMSentiment:predict_dataset(dataset) 245 | local predictions = torch.Tensor(dataset.size) 246 | for i = 1, dataset.size do 247 | xlua.progress(i, dataset.size) 248 | predictions[i] = self:predict(dataset.sents[i]) 249 | end 250 | return predictions 251 | end 252 | 253 | function argmax(v) 254 | local idx = 1 255 | local max = v[1] 256 | for i = 2, v:size(1) do 257 | if v[i] > max then 258 | max = v[i] 259 | idx = i 260 | end 261 | end 262 | return idx 263 | end 264 | 265 | function LSTMSentiment:print_config() 266 | local num_params = self.params:size(1) 267 | local num_sentiment_params = self:new_sentiment_module():getParameters():size(1) 268 | printf('%-25s = %s\n', 'fine grained sentiment', tostring(self.fine_grained)) 269 | printf('%-25s = %d\n', 'num params', num_params) 270 | printf('%-25s = %d\n', 'num compositional params', num_params - num_sentiment_params) 271 | printf('%-25s = %d\n', 'word vector dim', self.emb_dim) 272 | printf('%-25s = %d\n', 'LSTM memory dim', self.mem_dim) 273 | printf('%-25s = %s\n', 'LSTM structure', self.structure) 274 | printf('%-25s = %d\n', 'LSTM layers', self.num_layers) 275 | printf('%-25s = %.2e\n', 'regularization strength', self.reg) 276 | printf('%-25s = %d\n', 'minibatch size', self.batch_size * (self.train_subtrees + 1)) 277 | printf('%-25s = %.2e\n', 'learning rate', self.learning_rate) 278 | printf('%-25s = %.2e\n', 'word vector learning rate', self.emb_learning_rate) 279 | printf('%-25s = %s\n', 'dropout', tostring(self.dropout)) 280 | end 281 | 282 | -- 283 | -- Serialization 284 | -- 285 | 286 | function LSTMSentiment:save(path) 287 | local config = { 288 | batch_size = self.batch_size, 289 | dropout = self.dropout, 290 | emb_learning_rate = self.emb_learning_rate, 291 | emb_vecs = self.emb.weight:float(), 292 | fine_grained = self.fine_grained, 293 | learning_rate = self.learning_rate, 294 | num_layers = self.num_layers, 295 | mem_dim = self.mem_dim, 296 | reg = self.reg, 297 | structure = self.structure, 298 | } 299 | 300 | torch.save(path, { 301 | params = self.params, 302 | config = config, 303 | }) 304 | end 305 | 306 | function LSTMSentiment.load(path) 307 | local state = torch.load(path) 308 | local model = treelstm.LSTMSentiment.new(state.config) 309 | model.params:copy(state.params) 310 | return model 311 | end 312 | -------------------------------------------------------------------------------- /relatedness/LSTMSim.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Semantic relatedness prediction using LSTMs. 4 | 5 | --]] 6 | 7 | local LSTMSim = torch.class('treelstm.LSTMSim') 8 | 9 | function LSTMSim:__init(config) 10 | self.mem_dim = config.mem_dim or 150 11 | self.learning_rate = config.learning_rate or 0.05 12 | self.batch_size = config.batch_size or 25 13 | self.num_layers = config.num_layers or 1 14 | self.reg = config.reg or 1e-4 15 | self.structure = config.structure or 'lstm' -- {lstm, bilstm} 16 | self.sim_nhidden = config.sim_nhidden or 50 17 | 18 | -- word embedding 19 | self.emb_vecs = config.emb_vecs 20 | self.emb_dim = config.emb_vecs:size(2) 21 | 22 | -- number of similarity rating classes 23 | self.num_classes = 5 24 | 25 | -- optimizer configuration 26 | self.optim_state = { learningRate = self.learning_rate } 27 | 28 | -- KL divergence optimization objective 29 | self.criterion = nn.DistKLDivCriterion() 30 | 31 | -- initialize LSTM model 32 | local lstm_config = { 33 | in_dim = self.emb_dim, 34 | mem_dim = self.mem_dim, 35 | num_layers = self.num_layers, 36 | gate_output = false, 37 | } 38 | 39 | if self.structure == 'lstm' then 40 | self.llstm = treelstm.LSTM(lstm_config) -- "left" LSTM 41 | self.rlstm = treelstm.LSTM(lstm_config) -- "right" LSTM 42 | elseif self.structure == 'bilstm' then 43 | self.llstm = treelstm.LSTM(lstm_config) 44 | self.llstm_b = treelstm.LSTM(lstm_config) -- backward "left" LSTM 45 | self.rlstm = treelstm.LSTM(lstm_config) 46 | self.rlstm_b = treelstm.LSTM(lstm_config) -- backward "right" LSTM 47 | else 48 | error('invalid LSTM type: ' .. self.structure) 49 | end 50 | 51 | -- similarity model 52 | self.sim_module = self:new_sim_module() 53 | local modules = nn.Parallel() 54 | :add(self.llstm) 55 | :add(self.sim_module) 56 | self.params, self.grad_params = modules:getParameters() 57 | 58 | -- share must only be called after getParameters, since this changes the 59 | -- location of the parameters 60 | share_params(self.rlstm, self.llstm) 61 | if self.structure == 'bilstm' then 62 | -- tying the forward and backward weights improves performance 63 | share_params(self.llstm_b, self.llstm) 64 | share_params(self.rlstm_b, self.llstm) 65 | end 66 | end 67 | 68 | function LSTMSim:new_sim_module() 69 | local lvec, rvec, inputs, input_dim 70 | if self.structure == 'lstm' then 71 | -- standard (left-to-right) LSTM 72 | input_dim = 2 * self.num_layers * self.mem_dim 73 | local linput, rinput = nn.Identity()(), nn.Identity()() 74 | if self.num_layers == 1 then 75 | lvec, rvec = linput, rinput 76 | else 77 | lvec, rvec = nn.JoinTable(1)(linput), nn.JoinTable(1)(rinput) 78 | end 79 | inputs = {linput, rinput} 80 | elseif self.structure == 'bilstm' then 81 | -- bidirectional LSTM 82 | input_dim = 4 * self.num_layers * self.mem_dim 83 | local lf, lb, rf, rb = nn.Identity()(), nn.Identity()(), nn.Identity()(), nn.Identity()() 84 | if self.num_layers == 1 then 85 | lvec = nn.JoinTable(1){lf, lb} 86 | rvec = nn.JoinTable(1){rf, rb} 87 | else 88 | -- in the multilayer case, each input is a table of hidden vectors (one for each layer) 89 | lvec = nn.JoinTable(1){nn.JoinTable(1)(lf), nn.JoinTable(1)(lb)} 90 | rvec = nn.JoinTable(1){nn.JoinTable(1)(rf), nn.JoinTable(1)(rb)} 91 | end 92 | inputs = {lf, lb, rf, rb} 93 | end 94 | local mult_dist = nn.CMulTable(){lvec, rvec} 95 | local add_dist = nn.Abs()(nn.CSubTable(){lvec, rvec}) 96 | local vec_dist_feats = nn.JoinTable(1){mult_dist, add_dist} 97 | local vecs_to_input = nn.gModule(inputs, {vec_dist_feats}) 98 | 99 | -- define similarity model architecture 100 | local sim_module = nn.Sequential() 101 | :add(vecs_to_input) 102 | :add(nn.Linear(input_dim, self.sim_nhidden)) 103 | :add(nn.Sigmoid()) -- does better than tanh 104 | :add(nn.Linear(self.sim_nhidden, self.num_classes)) 105 | :add(nn.LogSoftMax()) 106 | return sim_module 107 | end 108 | 109 | function LSTMSim:train(dataset) 110 | self.llstm:training() 111 | self.rlstm:training() 112 | if self.structure == 'bilstm' then 113 | self.llstm_b:training() 114 | self.rlstm_b:training() 115 | end 116 | 117 | local indices = torch.randperm(dataset.size) 118 | local zeros = torch.zeros(self.mem_dim) 119 | for i = 1, dataset.size, self.batch_size do 120 | xlua.progress(i, dataset.size) 121 | local batch_size = math.min(i + self.batch_size - 1, dataset.size) - i + 1 122 | 123 | -- get target distributions for batch 124 | local targets = torch.zeros(batch_size, self.num_classes) 125 | for j = 1, batch_size do 126 | local sim = dataset.labels[indices[i + j - 1]] * (self.num_classes - 1) + 1 127 | local ceil, floor = math.ceil(sim), math.floor(sim) 128 | if ceil == floor then 129 | targets[{j, floor}] = 1 130 | else 131 | targets[{j, floor}] = ceil - sim 132 | targets[{j, ceil}] = sim - floor 133 | end 134 | end 135 | 136 | local feval = function(x) 137 | self.grad_params:zero() 138 | local loss = 0 139 | for j = 1, batch_size do 140 | local idx = indices[i + j - 1] 141 | local lsent, rsent = dataset.lsents[idx], dataset.rsents[idx] 142 | local linputs = self.emb_vecs:index(1, lsent:long()):double() 143 | local rinputs = self.emb_vecs:index(1, rsent:long()):double() 144 | 145 | -- get sentence representations 146 | local inputs 147 | if self.structure == 'lstm' then 148 | inputs = {self.llstm:forward(linputs), self.rlstm:forward(rinputs)} 149 | elseif self.structure == 'bilstm' then 150 | inputs = { 151 | self.llstm:forward(linputs), 152 | self.llstm_b:forward(linputs, true), -- true => reverse 153 | self.rlstm:forward(rinputs), 154 | self.rlstm_b:forward(rinputs, true) 155 | } 156 | end 157 | 158 | -- compute relatedness 159 | local output = self.sim_module:forward(inputs) 160 | 161 | -- compute loss and backpropagate 162 | local example_loss = self.criterion:forward(output, targets[j]) 163 | loss = loss + example_loss 164 | local sim_grad = self.criterion:backward(output, targets[j]) 165 | local rep_grad = self.sim_module:backward(inputs, sim_grad) 166 | if self.structure == 'lstm' then 167 | self:LSTM_backward(lsent, rsent, linputs, rinputs, rep_grad) 168 | elseif self.structure == 'bilstm' then 169 | self:BiLSTM_backward(lsent, rsent, linputs, rinputs, rep_grad) 170 | end 171 | end 172 | 173 | loss = loss / batch_size 174 | self.grad_params:div(batch_size) 175 | 176 | -- regularization 177 | loss = loss + 0.5 * self.reg * self.params:norm() ^ 2 178 | self.grad_params:add(self.reg, self.params) 179 | return loss, self.grad_params 180 | end 181 | 182 | optim.adagrad(feval, self.params, self.optim_state) 183 | end 184 | xlua.progress(dataset.size, dataset.size) 185 | end 186 | 187 | -- LSTM backward propagation 188 | function LSTMSim:LSTM_backward(lsent, rsent, linputs, rinputs, rep_grad) 189 | local lgrad, rgrad 190 | if self.num_layers == 1 then 191 | lgrad = torch.zeros(lsent:nElement(), self.mem_dim) 192 | rgrad = torch.zeros(rsent:nElement(), self.mem_dim) 193 | lgrad[lsent:nElement()] = rep_grad[1] 194 | rgrad[rsent:nElement()] = rep_grad[2] 195 | else 196 | lgrad = torch.zeros(lsent:nElement(), self.num_layers, self.mem_dim) 197 | rgrad = torch.zeros(rsent:nElement(), self.num_layers, self.mem_dim) 198 | for l = 1, self.num_layers do 199 | lgrad[{lsent:nElement(), l, {}}] = rep_grad[1][l] 200 | rgrad[{rsent:nElement(), l, {}}] = rep_grad[2][l] 201 | end 202 | end 203 | self.llstm:backward(linputs, lgrad) 204 | self.rlstm:backward(rinputs, rgrad) 205 | end 206 | 207 | -- Bidirectional LSTM backward propagation 208 | function LSTMSim:BiLSTM_backward(lsent, rsent, linputs, rinputs, rep_grad) 209 | local lgrad, lgrad_b, rgrad, rgrad_b 210 | if self.num_layers == 1 then 211 | lgrad = torch.zeros(lsent:nElement(), self.mem_dim) 212 | lgrad_b = torch.zeros(lsent:nElement(), self.mem_dim) 213 | rgrad = torch.zeros(rsent:nElement(), self.mem_dim) 214 | rgrad_b = torch.zeros(rsent:nElement(), self.mem_dim) 215 | lgrad[lsent:nElement()] = rep_grad[1] 216 | rgrad[rsent:nElement()] = rep_grad[3] 217 | lgrad_b[1] = rep_grad[2] 218 | rgrad_b[1] = rep_grad[4] 219 | else 220 | lgrad = torch.zeros(lsent:nElement(), self.num_layers, self.mem_dim) 221 | lgrad_b = torch.zeros(lsent:nElement(), self.num_layers, self.mem_dim) 222 | rgrad = torch.zeros(rsent:nElement(), self.num_layers, self.mem_dim) 223 | rgrad_b = torch.zeros(rsent:nElement(), self.num_layers, self.mem_dim) 224 | for l = 1, self.num_layers do 225 | lgrad[{lsent:nElement(), l, {}}] = rep_grad[1][l] 226 | rgrad[{rsent:nElement(), l, {}}] = rep_grad[3][l] 227 | lgrad_b[{1, l, {}}] = rep_grad[2][l] 228 | rgrad_b[{1, l, {}}] = rep_grad[4][l] 229 | end 230 | end 231 | self.llstm:backward(linputs, lgrad) 232 | self.llstm_b:backward(linputs, lgrad_b, true) 233 | self.rlstm:backward(rinputs, rgrad) 234 | self.rlstm_b:backward(rinputs, rgrad_b, true) 235 | end 236 | 237 | -- Predict the similarity of a sentence pair. 238 | function LSTMSim:predict(lsent, rsent) 239 | self.llstm:evaluate() 240 | self.rlstm:evaluate() 241 | local linputs = self.emb_vecs:index(1, lsent:long()):double() 242 | local rinputs = self.emb_vecs:index(1, rsent:long()):double() 243 | local inputs 244 | if self.structure == 'lstm' then 245 | inputs = {self.llstm:forward(linputs), self.rlstm:forward(rinputs)} 246 | elseif self.structure == 'bilstm' then 247 | self.llstm_b:evaluate() 248 | self.rlstm_b:evaluate() 249 | inputs = { 250 | self.llstm:forward(linputs), 251 | self.llstm_b:forward(linputs, true), 252 | self.rlstm:forward(rinputs), 253 | self.rlstm_b:forward(rinputs, true) 254 | } 255 | end 256 | local output = self.sim_module:forward(inputs) 257 | self.llstm:forget() 258 | self.rlstm:forget() 259 | if self.structure == 'bilstm' then 260 | self.llstm_b:forget() 261 | self.rlstm_b:forget() 262 | end 263 | return torch.range(1, 5):dot(output:exp()) 264 | end 265 | 266 | -- Produce similarity predictions for each sentence pair in the dataset. 267 | function LSTMSim:predict_dataset(dataset) 268 | local predictions = torch.Tensor(dataset.size) 269 | for i = 1, dataset.size do 270 | xlua.progress(i, dataset.size) 271 | local lsent, rsent = dataset.lsents[i], dataset.rsents[i] 272 | predictions[i] = self:predict(lsent, rsent) 273 | end 274 | return predictions 275 | end 276 | 277 | function LSTMSim:print_config() 278 | local num_params = self.params:nElement() 279 | local num_sim_params = self:new_sim_module():getParameters():nElement() 280 | printf('%-25s = %d\n', 'num params', num_params) 281 | printf('%-25s = %d\n', 'num compositional params', num_params - num_sim_params) 282 | printf('%-25s = %d\n', 'word vector dim', self.emb_dim) 283 | printf('%-25s = %d\n', 'LSTM memory dim', self.mem_dim) 284 | printf('%-25s = %.2e\n', 'regularization strength', self.reg) 285 | printf('%-25s = %d\n', 'minibatch size', self.batch_size) 286 | printf('%-25s = %.2e\n', 'learning rate', self.learning_rate) 287 | printf('%-25s = %s\n', 'LSTM structure', self.structure) 288 | printf('%-25s = %d\n', 'LSTM layers', self.num_layers) 289 | printf('%-25s = %d\n', 'sim module hidden dim', self.sim_nhidden) 290 | end 291 | 292 | -- 293 | -- Serialization 294 | -- 295 | 296 | function LSTMSim:save(path) 297 | local config = { 298 | batch_size = self.batch_size, 299 | emb_vecs = self.emb_vecs:float(), 300 | learning_rate = self.learning_rate, 301 | num_layers = self.num_layers, 302 | mem_dim = self.mem_dim, 303 | sim_nhidden = self.sim_nhidden, 304 | reg = self.reg, 305 | structure = self.structure, 306 | } 307 | 308 | torch.save(path, { 309 | params = self.params, 310 | config = config, 311 | }) 312 | end 313 | 314 | function LSTMSim.load(path) 315 | local state = torch.load(path) 316 | local model = treelstm.LSTMSim.new(state.config) 317 | model.params:copy(state.params) 318 | return model 319 | end 320 | -------------------------------------------------------------------------------- /scripts/preprocess-sst.py: -------------------------------------------------------------------------------- 1 | """ 2 | Preprocessing script for Stanford Sentiment Treebank data. 3 | 4 | """ 5 | 6 | import os 7 | import glob 8 | 9 | # 10 | # Trees and tree loading 11 | # 12 | 13 | class ConstTree(object): 14 | def __init__(self): 15 | self.left = None 16 | self.right = None 17 | 18 | def size(self): 19 | self.size = 1 20 | if self.left is not None: 21 | self.size += self.left.size() 22 | if self.right is not None: 23 | self.size += self.right.size() 24 | return self.size 25 | 26 | def set_spans(self): 27 | if self.word is not None: 28 | self.span = self.word 29 | return self.span 30 | 31 | self.span = self.left.set_spans() 32 | if self.right is not None: 33 | self.span += ' ' + self.right.set_spans() 34 | return self.span 35 | 36 | def get_labels(self, spans, labels, dictionary): 37 | if self.span in dictionary: 38 | spans[self.idx] = self.span 39 | labels[self.idx] = dictionary[self.span] 40 | if self.left is not None: 41 | self.left.get_labels(spans, labels, dictionary) 42 | if self.right is not None: 43 | self.right.get_labels(spans, labels, dictionary) 44 | 45 | class DepTree(object): 46 | def __init__(self): 47 | self.children = [] 48 | self.lo, self.hi = None, None 49 | 50 | def size(self): 51 | self.size = 1 52 | for c in self.children: 53 | self.size += c.size() 54 | return self.size 55 | 56 | def set_spans(self, words): 57 | self.lo, self.hi = self.idx, self.idx + 1 58 | if len(self.children) == 0: 59 | self.span = words[self.idx] 60 | return 61 | for c in self.children: 62 | c.set_spans(words) 63 | self.lo = min(self.lo, c.lo) 64 | self.hi = max(self.hi, c.hi) 65 | self.span = ' '.join(words[self.lo : self.hi]) 66 | 67 | def get_labels(self, spans, labels, dictionary): 68 | if self.span in dictionary: 69 | spans[self.idx] = self.span 70 | labels[self.idx] = dictionary[self.span] 71 | for c in self.children: 72 | c.get_labels(spans, labels, dictionary) 73 | 74 | def load_trees(dirpath): 75 | const_trees, dep_trees, toks = [], [], [] 76 | with open(os.path.join(dirpath, 'parents.txt')) as parentsfile, \ 77 | open(os.path.join(dirpath, 'dparents.txt')) as dparentsfile, \ 78 | open(os.path.join(dirpath, 'sents.txt')) as toksfile: 79 | parents, dparents = [], [] 80 | for line in parentsfile: 81 | parents.append(map(int, line.split())) 82 | for line in dparentsfile: 83 | dparents.append(map(int, line.split())) 84 | for line in toksfile: 85 | toks.append(line.strip().split()) 86 | for i in xrange(len(toks)): 87 | const_trees.append(load_constituency_tree(parents[i], toks[i])) 88 | dep_trees.append(load_dependency_tree(dparents[i])) 89 | return const_trees, dep_trees, toks 90 | 91 | def load_constituency_tree(parents, words): 92 | trees = [] 93 | root = None 94 | size = len(parents) 95 | for i in xrange(size): 96 | trees.append(None) 97 | 98 | word_idx = 0 99 | for i in xrange(size): 100 | if not trees[i]: 101 | idx = i 102 | prev = None 103 | prev_idx = None 104 | word = words[word_idx] 105 | word_idx += 1 106 | while True: 107 | tree = ConstTree() 108 | parent = parents[idx] - 1 109 | tree.word, tree.parent, tree.idx = word, parent, idx 110 | word = None 111 | if prev is not None: 112 | if tree.left is None: 113 | tree.left = prev 114 | else: 115 | tree.right = prev 116 | trees[idx] = tree 117 | if parent >= 0 and trees[parent] is not None: 118 | if trees[parent].left is None: 119 | trees[parent].left = tree 120 | else: 121 | trees[parent].right = tree 122 | break 123 | elif parent == -1: 124 | root = tree 125 | break 126 | else: 127 | prev = tree 128 | prev_idx = idx 129 | idx = parent 130 | return root 131 | 132 | def load_dependency_tree(parents): 133 | trees = [] 134 | root = None 135 | size = len(parents) 136 | for i in xrange(size): 137 | trees.append(None) 138 | 139 | for i in xrange(size): 140 | if not trees[i]: 141 | idx = i 142 | prev = None 143 | prev_idx = None 144 | while True: 145 | tree = DepTree() 146 | parent = parents[idx] - 1 147 | 148 | # node is not in tree 149 | if parent == -2: 150 | break 151 | 152 | tree.parent, tree.idx = parent, idx 153 | if prev is not None: 154 | tree.children.append(prev) 155 | trees[idx] = tree 156 | if parent >= 0 and trees[parent] is not None: 157 | trees[parent].children.append(tree) 158 | break 159 | elif parent == -1: 160 | root = tree 161 | break 162 | else: 163 | prev = tree 164 | prev_idx = idx 165 | idx = parent 166 | return root 167 | 168 | # 169 | # Various utilities 170 | # 171 | 172 | def make_dirs(dirs): 173 | for d in dirs: 174 | if not os.path.exists(d): 175 | os.makedirs(d) 176 | 177 | def load_sents(dirpath): 178 | sents = [] 179 | with open(os.path.join(dirpath, 'SOStr.txt')) as sentsfile: 180 | for line in sentsfile: 181 | sent = ' '.join(line.split('|')) 182 | sents.append(sent.strip()) 183 | return sents 184 | 185 | def load_splits(dirpath): 186 | splits = [] 187 | with open(os.path.join(dirpath, 'datasetSplit.txt')) as splitfile: 188 | splitfile.readline() 189 | for line in splitfile: 190 | idx, split = line.split(',') 191 | splits.append(int(split)) 192 | return splits 193 | 194 | def load_parents(dirpath): 195 | parents = [] 196 | with open(os.path.join(dirpath, 'STree.txt')) as parentsfile: 197 | for line in parentsfile: 198 | p = ' '.join(line.split('|')) 199 | parents.append(p.strip()) 200 | return parents 201 | 202 | def load_dictionary(dirpath): 203 | labels = [] 204 | with open(os.path.join(dirpath, 'sentiment_labels.txt')) as labelsfile: 205 | labelsfile.readline() 206 | for line in labelsfile: 207 | idx, rating = line.split('|') 208 | idx = int(idx) 209 | rating = float(rating) 210 | if rating <= 0.2: 211 | label = -2 212 | elif rating <= 0.4: 213 | label = -1 214 | elif rating > 0.8: 215 | label = +2 216 | elif rating > 0.6: 217 | label = +1 218 | else: 219 | label = 0 220 | labels.append(label) 221 | 222 | d = {} 223 | with open(os.path.join(dirpath, 'dictionary.txt')) as dictionary: 224 | for line in dictionary: 225 | s, idx = line.split('|') 226 | d[s] = labels[int(idx)] 227 | return d 228 | 229 | def build_vocab(filepaths, dst_path, lowercase=True): 230 | vocab = set() 231 | for filepath in filepaths: 232 | with open(filepath) as f: 233 | for line in f: 234 | if lowercase: 235 | line = line.lower() 236 | vocab |= set(line.split()) 237 | with open(dst_path, 'w') as f: 238 | for w in sorted(vocab): 239 | f.write(w + '\n') 240 | 241 | def split(sst_dir, train_dir, dev_dir, test_dir): 242 | sents = load_sents(sst_dir) 243 | splits = load_splits(sst_dir) 244 | parents = load_parents(sst_dir) 245 | 246 | with open(os.path.join(train_dir, 'sents.txt'), 'w') as train, \ 247 | open(os.path.join(dev_dir, 'sents.txt'), 'w') as dev, \ 248 | open(os.path.join(test_dir, 'sents.txt'), 'w') as test, \ 249 | open(os.path.join(train_dir, 'parents.txt'), 'w') as trainparents, \ 250 | open(os.path.join(dev_dir, 'parents.txt'), 'w') as devparents, \ 251 | open(os.path.join(test_dir, 'parents.txt'), 'w') as testparents: 252 | 253 | for sent, split, p in zip(sents, splits, parents): 254 | if split == 1: 255 | train.write(sent) 256 | train.write('\n') 257 | trainparents.write(p) 258 | trainparents.write('\n') 259 | elif split == 2: 260 | test.write(sent) 261 | test.write('\n') 262 | 263 | testparents.write(p) 264 | testparents.write('\n') 265 | else: 266 | dev.write(sent) 267 | dev.write('\n') 268 | devparents.write(p) 269 | devparents.write('\n') 270 | 271 | def get_labels(tree, dictionary): 272 | size = tree.size() 273 | spans, labels = [], [] 274 | for i in xrange(size): 275 | labels.append(None) 276 | spans.append(None) 277 | tree.get_labels(spans, labels, dictionary) 278 | return spans, labels 279 | 280 | def write_labels(dirpath, dictionary): 281 | print('Writing labels for trees in ' + dirpath) 282 | with open(os.path.join(dirpath, 'labels.txt'), 'w') as labels, \ 283 | open(os.path.join(dirpath, 'dlabels.txt'), 'w') as dlabels: 284 | # load constituency and dependency trees 285 | const_trees, dep_trees, toks = load_trees(dirpath) 286 | 287 | # write span labels 288 | for i in xrange(len(const_trees)): 289 | const_trees[i].set_spans() 290 | dep_trees[i].set_spans(toks[i]) 291 | 292 | # const tree labels 293 | s, l = [], [] 294 | for j in xrange(const_trees[i].size()): 295 | s.append(None) 296 | l.append(None) 297 | const_trees[i].get_labels(s, l, dictionary) 298 | labels.write(' '.join(map(str, l)) + '\n') 299 | 300 | # dep tree labels 301 | dep_trees[i].span = const_trees[i].span 302 | s, l = [], [] 303 | for j in xrange(len(toks[i])): 304 | s.append(None) 305 | l.append('#') 306 | dep_trees[i].get_labels(s, l, dictionary) 307 | dlabels.write(' '.join(map(str, l)) + '\n') 308 | 309 | def dependency_parse(filepath, cp='', tokenize=True): 310 | print('\nDependency parsing ' + filepath) 311 | dirpath = os.path.dirname(filepath) 312 | filepre = os.path.splitext(os.path.basename(filepath))[0] 313 | tokpath = os.path.join(dirpath, filepre + '.toks') 314 | parentpath = os.path.join(dirpath, 'dparents.txt') 315 | relpath = os.path.join(dirpath, 'rels.txt') 316 | tokenize_flag = '-tokenize - ' if tokenize else '' 317 | cmd = ('java -cp %s DependencyParse -tokpath %s -parentpath %s -relpath %s %s < %s' 318 | % (cp, tokpath, parentpath, relpath, tokenize_flag, filepath)) 319 | os.system(cmd) 320 | 321 | if __name__ == '__main__': 322 | print('=' * 80) 323 | print('Preprocessing Stanford Sentiment Treebank') 324 | print('=' * 80) 325 | 326 | base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 327 | data_dir = os.path.join(base_dir, 'data') 328 | lib_dir = os.path.join(base_dir, 'lib') 329 | sst_dir = os.path.join(data_dir, 'sst') 330 | train_dir = os.path.join(sst_dir, 'train') 331 | dev_dir = os.path.join(sst_dir, 'dev') 332 | test_dir = os.path.join(sst_dir, 'test') 333 | make_dirs([train_dir, dev_dir, test_dir]) 334 | 335 | # produce train/dev/test splits 336 | split(sst_dir, train_dir, dev_dir, test_dir) 337 | sent_paths = glob.glob(os.path.join(sst_dir, '*/sents.txt')) 338 | 339 | # produce dependency parses 340 | classpath = ':'.join([ 341 | lib_dir, 342 | os.path.join(lib_dir, 'stanford-parser/stanford-parser.jar'), 343 | os.path.join(lib_dir, 'stanford-parser/stanford-parser-3.5.1-models.jar')]) 344 | for filepath in sent_paths: 345 | dependency_parse(filepath, cp=classpath, tokenize=False) 346 | 347 | # get vocabulary 348 | build_vocab(sent_paths, os.path.join(sst_dir, 'vocab.txt')) 349 | build_vocab(sent_paths, os.path.join(sst_dir, 'vocab-cased.txt'), lowercase=False) 350 | 351 | # write sentiment labels for nodes in trees 352 | dictionary = load_dictionary(sst_dir) 353 | write_labels(train_dir, dictionary) 354 | write_labels(dev_dir, dictionary) 355 | write_labels(test_dir, dictionary) 356 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 5 | 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Library General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 19yy 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License 307 | along with this program; if not, write to the Free Software 308 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 309 | 310 | 311 | Also add information on how to contact you by electronic and paper mail. 312 | 313 | If the program is interactive, make it output a short notice like this 314 | when it starts in an interactive mode: 315 | 316 | Gnomovision version 69, Copyright (C) 19yy name of author 317 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 318 | This is free software, and you are welcome to redistribute it 319 | under certain conditions; type `show c' for details. 320 | 321 | The hypothetical commands `show w' and `show c' should show the appropriate 322 | parts of the General Public License. Of course, the commands you use may 323 | be called something other than `show w' and `show c'; they could even be 324 | mouse-clicks or menu items--whatever suits your program. 325 | 326 | You should also get your employer (if you work as a programmer) or your 327 | school, if any, to sign a "copyright disclaimer" for the program, if 328 | necessary. Here is a sample; alter the names: 329 | 330 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 331 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 332 | 333 | , 1 April 1989 334 | Ty Coon, President of Vice 335 | 336 | This General Public License does not permit incorporating your program into 337 | proprietary programs. If your program is a subroutine library, you may 338 | consider it more useful to permit linking proprietary applications with the 339 | library. If this is what you want to do, use the GNU Library General 340 | Public License instead of this License. 341 | --------------------------------------------------------------------------------