├── .gitignore
├── fetch_and_preprocess.sh
├── layers
    └── CRowAddTable.lua
├── scripts
    ├── convert-wordvecs.lua
    ├── preprocess-sick.py
    ├── download.py
    └── preprocess-sst.py
├── lib
    ├── CollapseUnaryTransformer.java
    ├── DependencyParse.java
    └── ConstituencyParse.java
├── models
    ├── TreeLSTM.lua
    ├── ChildSumTreeLSTM.lua
    ├── BinaryTreeLSTM.lua
    └── LSTM.lua
├── util
    ├── Tree.lua
    ├── Vocab.lua
    └── read_data.lua
├── init.lua
├── README.md
├── relatedness
    ├── main.lua
    ├── TreeLSTMSim.lua
    └── LSTMSim.lua
├── sentiment
    ├── TreeLSTMSentiment.lua
    ├── main.lua
    └── LSTMSentiment.lua
└── LICENSE.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | data
 3 | predictions
 4 | trained_models
 5 | *~
 6 | #*#
 7 | *.class
 8 | lib/stanford-parser
 9 | lib/stanford-tagger
10 | 
11 | 


--------------------------------------------------------------------------------
/fetch_and_preprocess.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | python2.7 scripts/download.py
 4 | 
 5 | CLASSPATH="lib:lib/stanford-parser/stanford-parser.jar:lib/stanford-parser/stanford-parser-3.5.1-models.jar"
 6 | javac -cp $CLASSPATH lib/*.java
 7 | python2.7 scripts/preprocess-sick.py
 8 | python2.7 scripts/preprocess-sst.py
 9 | 
10 | glove_dir="data/glove"
11 | glove_pre="glove.840B"
12 | glove_dim="300d"
13 | if [ ! -f $glove_dir/$glove_pre.$glove_dim.th ]; then
14 |     th scripts/convert-wordvecs.lua $glove_dir/$glove_pre.$glove_dim.txt \
15 |         $glove_dir/$glove_pre.vocab $glove_dir/$glove_pre.$glove_dim.th
16 | fi
17 | 


--------------------------------------------------------------------------------
/layers/CRowAddTable.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | 
 3 |   Add a vector to every row of a matrix.
 4 | 
 5 |   Input: { [n x m], [m] }
 6 | 
 7 |   Output: [n x m]
 8 | 
 9 | --]]
10 | 
11 | local CRowAddTable, parent = torch.class('treelstm.CRowAddTable', 'nn.Module')
12 | 
13 | function CRowAddTable:__init()
14 |    parent.__init(self)
15 |    self.gradInput = {}
16 | end
17 | 
18 | function CRowAddTable:updateOutput(input)
19 |    self.output:resizeAs(input[1]):copy(input[1])
20 |    for i = 1, self.output:size(1) do
21 |       self.output[i]:add(input[2])
22 |    end
23 |    return self.output
24 | end
25 | 
26 | function CRowAddTable:updateGradInput(input, gradOutput)
27 |    self.gradInput[1] = self.gradInput[1] or input[1].new()
28 |    self.gradInput[2] = self.gradInput[2] or input[2].new()
29 |    self.gradInput[1]:resizeAs(input[1])
30 |    self.gradInput[2]:resizeAs(input[2]):zero()
31 | 
32 |    self.gradInput[1]:copy(gradOutput)
33 |    for i = 1, gradOutput:size(1) do
34 |       self.gradInput[2]:add(gradOutput[i])
35 |    end
36 | 
37 |    return self.gradInput
38 | end
39 | 


--------------------------------------------------------------------------------
/scripts/convert-wordvecs.lua:
--------------------------------------------------------------------------------
 1 | require('torch')
 2 | require('xlua')
 3 | 
 4 | local path = arg[1]
 5 | local vocabpath = arg[2]
 6 | local vecpath = arg[3]
 7 | local prefix_toks = stringx.split(path, '.')
 8 | print('Converting ' .. path .. ' to Torch serialized format')
 9 | 
10 | -- get dimension and number of lines
11 | local file = io.open(path, 'r')
12 | local line
13 | local count = 0
14 | local dim = 0
15 | while true do
16 |   line = file:read()
17 |   if not line then break end
18 |   if count == 0 then
19 |     dim = #stringx.split(line) - 1
20 |   end
21 |   count = count + 1
22 | end
23 | 
24 | print('count = ' .. count)
25 | print('dim = ' .. dim)
26 | 
27 | -- convert to torch-friendly format
28 | file:seek('set')
29 | local vocab = io.open(vocabpath, 'w')
30 | local vecs = torch.FloatTensor(count, dim)
31 | for i = 1, count do
32 |   xlua.progress(i, count)
33 |   local tokens = stringx.split(file:read())
34 |   local word = tokens[1]
35 |   vocab:write(word .. '\n')
36 |   for j = 1, dim do
37 |     vecs[{i, j}] = tonumber(tokens[j + 1])
38 |   end
39 | end
40 | file:close()
41 | vocab:close()
42 | torch.save(vecpath, vecs)
43 | 


--------------------------------------------------------------------------------
/lib/CollapseUnaryTransformer.java:
--------------------------------------------------------------------------------
 1 | import java.util.List;
 2 | 
 3 | import edu.stanford.nlp.ling.Label;
 4 | import edu.stanford.nlp.trees.Tree;
 5 | import edu.stanford.nlp.trees.TreeTransformer;
 6 | import edu.stanford.nlp.util.Generics;
 7 | 
 8 | /**
 9 |  * This transformer collapses chains of unary nodes so that the top
10 |  * node is the only node left.  The Sentiment model does not handle
11 |  * unary nodes, so this simplifies them to make a binary tree consist
12 |  * entirely of binary nodes and preterminals.  A new tree with new
13 |  * nodes and labels is returned; the original tree is unchanged.
14 |  *
15 |  * @author John Bauer
16 |  */
17 | public class CollapseUnaryTransformer implements TreeTransformer {
18 |   public Tree transformTree(Tree tree) {
19 |     if (tree.isPreTerminal() || tree.isLeaf()) {
20 |       return tree.deepCopy();
21 |     }
22 | 
23 |     Label label = tree.label().labelFactory().newLabel(tree.label());
24 |     Tree[] children = tree.children();
25 |     while (children.length == 1 && !children[0].isLeaf()) {
26 |       children = children[0].children();
27 |     }
28 |     List<Tree> processedChildren = Generics.newArrayList();
29 |     for (Tree child : children) {
30 |       processedChildren.add(transformTree(child));
31 |     }
32 |     return tree.treeFactory().newTreeNode(label, processedChildren);
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/models/TreeLSTM.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | 
 3 |   Tree-LSTM base class
 4 | 
 5 | --]]
 6 | 
 7 | local TreeLSTM, parent = torch.class('treelstm.TreeLSTM', 'nn.Module')
 8 | 
 9 | function TreeLSTM:__init(config)
10 |   parent.__init(self)
11 |   self.in_dim = config.in_dim
12 |   if self.in_dim == nil then error('input dimension must be specified') end
13 |   self.mem_dim = config.mem_dim or 150
14 |   self.mem_zeros = torch.zeros(self.mem_dim)
15 |   self.train = false
16 | end
17 | 
18 | function TreeLSTM:forward(tree, inputs)
19 | end
20 | 
21 | function TreeLSTM:backward(tree, inputs, grad)
22 | end
23 | 
24 | function TreeLSTM:training()
25 |   self.train = true
26 | end
27 | 
28 | function TreeLSTM:evaluate()
29 |   self.train = false
30 | end
31 | 
32 | function TreeLSTM:allocate_module(tree, module)
33 |   local modules = module .. 's'
34 |   local num_free = #self[modules]
35 |   if num_free == 0 then
36 |     tree[module] = self['new_' .. module](self)
37 |   else
38 |     tree[module] = self[modules][num_free]
39 |     self[modules][num_free] = nil
40 |   end
41 | 
42 |   -- necessary for dropout to behave properly
43 |   if self.train then tree[module]:training() else tree[module]:evaluate() end
44 | end
45 | 
46 | function TreeLSTM:free_module(tree, module)
47 |   if tree[module] == nil then return end
48 |   table.insert(self[module .. 's'], tree[module])
49 |   tree[module] = nil
50 | end
51 | 


--------------------------------------------------------------------------------
/util/Tree.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | 
 3 |   A basic tree structure.
 4 | 
 5 | --]]
 6 | 
 7 | local Tree = torch.class('treelstm.Tree')
 8 | 
 9 | function Tree:__init()
10 |   self.parent = nil
11 |   self.num_children = 0
12 |   self.children = {}
13 | end
14 | 
15 | function Tree:add_child(c)
16 |   c.parent = self
17 |   self.num_children = self.num_children + 1
18 |   self.children[self.num_children] = c
19 | end
20 | 
21 | function Tree:size()
22 |   if self._size ~= nil then return self._size end
23 |   local size = 1
24 |   for i = 1, self.num_children do
25 |     size = size + self.children[i]:size()
26 |   end
27 |   self._size = size
28 |   return size
29 | end
30 | 
31 | function Tree:depth()
32 |   local depth = 0
33 |   if self.num_children > 0 then
34 |     for i = 1, self.num_children do
35 |       local child_depth = self.children[i]:depth()
36 |       if child_depth > depth then
37 |         depth = child_depth
38 |       end
39 |     end
40 |     depth = depth + 1
41 |   end
42 |   return depth
43 | end
44 | 
45 | local function depth_first_preorder(tree, nodes)
46 |   if tree == nil then
47 |     return
48 |   end
49 |   table.insert(nodes, tree)
50 |   for i = 1, tree.num_children do
51 |     depth_first_preorder(tree.children[i], nodes)
52 |   end
53 | end
54 | 
55 | function Tree:depth_first_preorder()
56 |   local nodes = {}
57 |   depth_first_preorder(self, nodes)
58 |   return nodes
59 | end
60 | 


--------------------------------------------------------------------------------
/init.lua:
--------------------------------------------------------------------------------
 1 | require('torch')
 2 | require('nn')
 3 | require('nngraph')
 4 | require('optim')
 5 | require('xlua')
 6 | require('sys')
 7 | require('lfs')
 8 | 
 9 | treelstm = {}
10 | 
11 | include('util/read_data.lua')
12 | include('util/Tree.lua')
13 | include('util/Vocab.lua')
14 | include('layers/CRowAddTable.lua')
15 | include('models/LSTM.lua')
16 | include('models/TreeLSTM.lua')
17 | include('models/ChildSumTreeLSTM.lua')
18 | include('models/BinaryTreeLSTM.lua')
19 | include('relatedness/LSTMSim.lua')
20 | include('relatedness/TreeLSTMSim.lua')
21 | include('sentiment/LSTMSentiment.lua')
22 | include('sentiment/TreeLSTMSentiment.lua')
23 | 
24 | printf = utils.printf
25 | 
26 | -- global paths (modify if desired)
27 | treelstm.data_dir        = 'data'
28 | treelstm.models_dir      = 'trained_models'
29 | treelstm.predictions_dir = 'predictions'
30 | 
31 | -- share module parameters
32 | function share_params(cell, src)
33 |   if torch.type(cell) == 'nn.gModule' then
34 |     for i = 1, #cell.forwardnodes do
35 |       local node = cell.forwardnodes[i]
36 |       if node.data.module then
37 |         node.data.module:share(src.forwardnodes[i].data.module,
38 |           'weight', 'bias', 'gradWeight', 'gradBias')
39 |       end
40 |     end
41 |   elseif torch.isTypeOf(cell, 'nn.Module') then
42 |     cell:share(src, 'weight', 'bias', 'gradWeight', 'gradBias')
43 |   else
44 |     error('parameters cannot be shared for this input')
45 |   end
46 | end
47 | 
48 | function header(s)
49 |   print(string.rep('-', 80))
50 |   print(s)
51 |   print(string.rep('-', 80))
52 | end
53 | 


--------------------------------------------------------------------------------
/util/Vocab.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 | A vocabulary object. Initialized from a file with one vocabulary token per line.
  4 | Maps between vocabulary tokens and indices. If an UNK token is defined in the
  5 | vocabulary, returns the index to this token if queried for an out-of-vocabulary
  6 | token.
  7 | 
  8 | --]]
  9 | 
 10 | local Vocab = torch.class('treelstm.Vocab')
 11 | 
 12 | function Vocab:__init(path)
 13 |   self.size = 0
 14 |   self._index = {}
 15 |   self._tokens = {}
 16 | 
 17 |   local file = io.open(path)
 18 |   while true do
 19 |     local line = file:read()
 20 |     if line == nil then break end
 21 |     self.size = self.size + 1
 22 |     self._tokens[self.size] = line
 23 |     self._index[line] = self.size
 24 |   end
 25 |   file:close()
 26 | 
 27 |   local unks = {'<unk>', '<UNK>', 'UUUNKKK'}
 28 |   for _, tok in pairs(unks) do
 29 |     self.unk_index = self.unk_index or self._index[tok]
 30 |     if self.unk_index ~= nil then
 31 |       self.unk_token = tok
 32 |       break
 33 |     end
 34 |   end
 35 | 
 36 |   local starts = {'<s>', '<S>'}
 37 |   for _, tok in pairs(starts) do
 38 |     self.start_index = self.start_index or self._index[tok]
 39 |     if self.start_index ~= nil then
 40 |       self.start_token = tok
 41 |       break
 42 |     end
 43 |   end
 44 | 
 45 |   local ends = {'</s>', '</S>'}
 46 |   for _, tok in pairs(ends) do
 47 |     self.end_index = self.end_index or self._index[tok]
 48 |     if self.end_index ~= nil then
 49 |       self.end_token = tok
 50 |       break
 51 |     end
 52 |   end
 53 | end
 54 | 
 55 | function Vocab:contains(w)
 56 |   if not self._index[w] then return false end
 57 |   return true
 58 | end
 59 | 
 60 | function Vocab:add(w)
 61 |   if self._index[w] ~= nil then
 62 |     return self._index[w]
 63 |   end
 64 |   self.size = self.size + 1
 65 |   self._tokens[self.size] = w
 66 |   self._index[w] = self.size
 67 |   return self.size
 68 | end
 69 | 
 70 | function Vocab:index(w)
 71 |   local index = self._index[w]
 72 |   if index == nil then
 73 |     if self.unk_index == nil then
 74 |       error('Token not in vocabulary and no UNK token defined: ' .. w)
 75 |     end
 76 |     return self.unk_index
 77 |   end
 78 |   return index
 79 | end
 80 | 
 81 | function Vocab:token(i)
 82 |   if i < 1 or i > self.size then
 83 |     error('Index ' .. i .. ' out of bounds')
 84 |   end
 85 |   return self._tokens[i]
 86 | end
 87 | 
 88 | function Vocab:map(tokens)
 89 |   local len = #tokens
 90 |   local output = torch.IntTensor(len)
 91 |   for i = 1, len do
 92 |     output[i] = self:index(tokens[i])
 93 |   end
 94 |   return output
 95 | end
 96 | 
 97 | function Vocab:add_unk_token()
 98 |   if self.unk_token ~= nil then return end
 99 |   self.unk_index = self:add('<unk>')
100 | end
101 | 
102 | function Vocab:add_start_token()
103 |   if self.start_token ~= nil then return end
104 |   self.start_index = self:add('<s>')
105 | end
106 | 
107 | function Vocab:add_end_token()
108 |   if self.end_token ~= nil then return end
109 |   self.end_index = self:add('</s>')
110 | end
111 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Tree-Structured Long Short-Term Memory Networks
 2 | ===============================================
 3 | 
 4 | An implementation of the Tree-LSTM architectures described in the paper 
 5 | [Improved Semantic Representations From Tree-Structured Long Short-Term Memory
 6 | Networks](http://arxiv.org/abs/1503.00075) by Kai Sheng Tai, Richard Socher, and 
 7 | Christopher Manning.
 8 | 
 9 | ## Requirements
10 | 
11 | - [Torch7](https://github.com/torch/torch7)
12 | - [penlight](https://github.com/stevedonovan/Penlight)
13 | - [nn](https://github.com/torch/nn)
14 | - [nngraph](https://github.com/torch/nngraph)
15 | - [optim](https://github.com/torch/optim)
16 | - Java >= 8 (for Stanford CoreNLP utilities)
17 | - Python >= 2.7
18 | 
19 | The Torch/Lua dependencies can be installed using [luarocks](http://luarocks.org). For example:
20 | 
21 | ```
22 | luarocks install nngraph
23 | ```
24 | 
25 | ## Usage
26 | 
27 | First run the following script:
28 | 
29 | ```
30 | ./fetch_and_preprocess.sh
31 | ```
32 | 
33 | This downloads the following data:
34 | 
35 |   - [SICK dataset](http://alt.qcri.org/semeval2014/task1/index.php?id=data-and-tools) (semantic relatedness task)
36 |   - [Stanford Sentiment Treebank](http://nlp.stanford.edu/sentiment/index.html) (sentiment classification task)
37 |   - [Glove word vectors](http://nlp.stanford.edu/projects/glove/) (Common Crawl 840B) -- **Warning:** this is a 2GB download!
38 | 
39 | and the following libraries:
40 | 
41 |   - [Stanford Parser](http://nlp.stanford.edu/software/lex-parser.shtml)
42 |   - [Stanford POS Tagger](http://nlp.stanford.edu/software/tagger.shtml)
43 | 
44 | The preprocessing script generates dependency parses of the SICK dataset using the
45 | [Stanford Neural Network Dependency Parser](http://nlp.stanford.edu/software/nndep.shtml).
46 | 
47 | Alternatively, the download and preprocessing scripts can be called individually.
48 | 
49 | ### Semantic Relatedness
50 | 
51 | The goal of this task is to predict similarity ratings for pairs of sentences. We train and evaluate our models on the [Sentences Involving Compositional Knowledge (SICK)](http://alt.qcri.org/semeval2014/task1/index.php?id=data-and-tools) dataset.
52 | 
53 | To train models for the semantic relatedness prediction task on the SICK dataset,
54 | run:
55 | 
56 | ```
57 | th relatedness/main.lua --model <dependency|constituency|lstm|bilstm> --layers <num_layers> --dim <mem_dim> --epochs <num_epochs>
58 | ```
59 | 
60 | where:
61 | 
62 |   - `model`: the LSTM variant to train (default: dependency, i.e. the Dependency Tree-LSTM)
63 |   - `layers`: the number of layers (default: 1, ignored for Tree-LSTMs)
64 |   - `dim`: the LSTM memory dimension (default: 150)
65 |   - `epochs`: the number of training epochs (default: 10)
66 | 
67 | ### Sentiment Classification
68 | 
69 | The goal of this task is to predict sentiment labels for sentences. For this task, we use the [Stanford Sentiment Treebank](http://nlp.stanford.edu/sentiment/index.html) dataset. Here, there are two sub-tasks: binary and fine-grained. In the binary sub-task, the sentences are labeled `positive` or `negative`. In the fine-grained sub-task, the sentences are labeled `very positive`, `positive`, `neutral`, `negative` or `very negative`.
70 | 
71 | To train models for the sentiment classification task on the Stanford Sentiment Treebank, run:
72 | 
73 | ```
74 | th sentiment/main.lua --model <constituency|dependency|lstm|bilstm> --layers <num_layers> --dim <mem_dim> --epochs <num_epochs>
75 | ```
76 | 
77 | This trains a Constituency Tree-LSTM model for the "fine-grained" 5-class classification sub-task.
78 | 
79 | For the binary classification sub-task, run with the `-b` or `--binary` flag, for example:
80 | 
81 | ```
82 | th sentiment/main.lua -m constituency -b
83 | ```
84 | 
85 | Predictions are written to the `predictions` directory and trained model parameters are saved to the `trained_models` directory.
86 | 
87 | See the [paper](http://arxiv.org/abs/1503.00075) for more details on these experiments.
88 | 
89 | ## Third-party Implementations
90 | 
91 | - A Tensorflow Fold [re-implementation](https://github.com/tensorflow/fold/blob/master/tensorflow_fold/g3doc/sentiment.ipynb) of the Tree-LSTM for sentiment classification
92 | 


--------------------------------------------------------------------------------
/scripts/preprocess-sick.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Preprocessing script for SICK data.
  3 | 
  4 | """
  5 | 
  6 | import os
  7 | import glob
  8 | 
  9 | def make_dirs(dirs):
 10 |     for d in dirs:
 11 |         if not os.path.exists(d):
 12 |             os.makedirs(d)
 13 | 
 14 | def dependency_parse(filepath, cp='', tokenize=True):
 15 |     print('\nDependency parsing ' + filepath)
 16 |     dirpath = os.path.dirname(filepath)
 17 |     filepre = os.path.splitext(os.path.basename(filepath))[0]
 18 |     tokpath = os.path.join(dirpath, filepre + '.toks')
 19 |     parentpath = os.path.join(dirpath, filepre + '.parents')
 20 |     relpath =  os.path.join(dirpath, filepre + '.rels')
 21 |     tokenize_flag = '-tokenize - ' if tokenize else ''
 22 |     cmd = ('java -cp %s DependencyParse -tokpath %s -parentpath %s -relpath %s %s < %s'
 23 |         % (cp, tokpath, parentpath, relpath, tokenize_flag, filepath))
 24 |     os.system(cmd)
 25 | 
 26 | def constituency_parse(filepath, cp='', tokenize=True):
 27 |     dirpath = os.path.dirname(filepath)
 28 |     filepre = os.path.splitext(os.path.basename(filepath))[0]
 29 |     tokpath = os.path.join(dirpath, filepre + '.toks')
 30 |     parentpath = os.path.join(dirpath, filepre + '.cparents')
 31 |     tokenize_flag = '-tokenize - ' if tokenize else ''
 32 |     cmd = ('java -cp %s ConstituencyParse -tokpath %s -parentpath %s %s < %s'
 33 |         % (cp, tokpath, parentpath, tokenize_flag, filepath))
 34 |     os.system(cmd)
 35 | 
 36 | def build_vocab(filepaths, dst_path, lowercase=True):
 37 |     vocab = set()
 38 |     for filepath in filepaths:
 39 |         with open(filepath) as f:
 40 |             for line in f:
 41 |                 if lowercase:
 42 |                     line = line.lower()
 43 |                 vocab |= set(line.split())
 44 |     with open(dst_path, 'w') as f:
 45 |         for w in sorted(vocab):
 46 |             f.write(w + '\n')
 47 | 
 48 | def split(filepath, dst_dir):
 49 |     with open(filepath) as datafile, \
 50 |          open(os.path.join(dst_dir, 'a.txt'), 'w') as afile, \
 51 |          open(os.path.join(dst_dir, 'b.txt'), 'w') as bfile,  \
 52 |          open(os.path.join(dst_dir, 'id.txt'), 'w') as idfile, \
 53 |          open(os.path.join(dst_dir, 'sim.txt'), 'w') as simfile:
 54 |             datafile.readline()
 55 |             for line in datafile:
 56 |                 i, a, b, sim, ent = line.strip().split('\t')
 57 |                 idfile.write(i + '\n')
 58 |                 afile.write(a + '\n')
 59 |                 bfile.write(b + '\n')
 60 |                 simfile.write(sim + '\n')
 61 | 
 62 | def parse(dirpath, cp=''):
 63 |     dependency_parse(os.path.join(dirpath, 'a.txt'), cp=cp, tokenize=True)
 64 |     dependency_parse(os.path.join(dirpath, 'b.txt'), cp=cp, tokenize=True)
 65 |     constituency_parse(os.path.join(dirpath, 'a.txt'), cp=cp, tokenize=True)
 66 |     constituency_parse(os.path.join(dirpath, 'b.txt'), cp=cp, tokenize=True)
 67 | 
 68 | if __name__ == '__main__':
 69 |     print('=' * 80)
 70 |     print('Preprocessing SICK dataset')
 71 |     print('=' * 80)
 72 | 
 73 |     base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 74 |     data_dir = os.path.join(base_dir, 'data')
 75 |     sick_dir = os.path.join(data_dir, 'sick')
 76 |     lib_dir = os.path.join(base_dir, 'lib')
 77 |     train_dir = os.path.join(sick_dir, 'train')
 78 |     dev_dir = os.path.join(sick_dir, 'dev')
 79 |     test_dir = os.path.join(sick_dir, 'test')
 80 |     make_dirs([train_dir, dev_dir, test_dir])
 81 | 
 82 |     # java classpath for calling Stanford parser
 83 |     classpath = ':'.join([
 84 |         lib_dir,
 85 |         os.path.join(lib_dir, 'stanford-parser/stanford-parser.jar'),
 86 |         os.path.join(lib_dir, 'stanford-parser/stanford-parser-3.5.1-models.jar')])
 87 | 
 88 |     # split into separate files
 89 |     split(os.path.join(sick_dir, 'SICK_train.txt'), train_dir)
 90 |     split(os.path.join(sick_dir, 'SICK_trial.txt'), dev_dir)
 91 |     split(os.path.join(sick_dir, 'SICK_test_annotated.txt'), test_dir)
 92 | 
 93 |     # parse sentences
 94 |     parse(train_dir, cp=classpath)
 95 |     parse(dev_dir, cp=classpath)
 96 |     parse(test_dir, cp=classpath)
 97 | 
 98 |     # get vocabulary
 99 |     build_vocab(
100 |         glob.glob(os.path.join(sick_dir, '*/*.toks')),
101 |         os.path.join(sick_dir, 'vocab.txt'))
102 |     build_vocab(
103 |         glob.glob(os.path.join(sick_dir, '*/*.toks')),
104 |         os.path.join(sick_dir, 'vocab-cased.txt'),
105 |         lowercase=False)
106 | 


--------------------------------------------------------------------------------
/scripts/download.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Downloads the following:
  3 | - Stanford parser
  4 | - Stanford POS tagger
  5 | - Glove vectors
  6 | - SICK dataset (semantic relatedness task)
  7 | - Stanford Sentiment Treebank (sentiment classification task)
  8 | 
  9 | """
 10 | 
 11 | from __future__ import print_function
 12 | import urllib2
 13 | import sys
 14 | import os
 15 | import shutil
 16 | import zipfile
 17 | import gzip
 18 | 
 19 | def download(url, dirpath):
 20 |     filename = url.split('/')[-1]
 21 |     filepath = os.path.join(dirpath, filename)
 22 |     try:
 23 |         u = urllib2.urlopen(url)
 24 |     except:
 25 |         print("URL %s failed to open" %url)
 26 |         raise Exception
 27 |     try:
 28 |         f = open(filepath, 'wb')
 29 |     except:
 30 |         print("Cannot write %s" %filepath)
 31 |         raise Exception
 32 |     try:
 33 |         filesize = int(u.info().getheaders("Content-Length")[0])
 34 |     except:
 35 |         print("URL %s failed to report length" %url)
 36 |         raise Exception
 37 |     print("Downloading: %s Bytes: %s" % (filename, filesize))
 38 | 
 39 |     downloaded = 0
 40 |     block_sz = 8192
 41 |     status_width = 70
 42 |     while True:
 43 |         buf = u.read(block_sz)
 44 |         if not buf:
 45 |             print('')
 46 |             break
 47 |         else:
 48 |             print('', end='\r')
 49 |         downloaded += len(buf)
 50 |         f.write(buf)
 51 |         status = (("[%-" + str(status_width + 1) + "s] %3.2f%%") %
 52 |             ('=' * int(float(downloaded) / filesize * status_width) + '>', downloaded * 100. / filesize))
 53 |         print(status, end='')
 54 |         sys.stdout.flush()
 55 |     f.close()
 56 |     return filepath
 57 | 
 58 | def unzip(filepath):
 59 |     print("Extracting: " + filepath)
 60 |     dirpath = os.path.dirname(filepath)
 61 |     with zipfile.ZipFile(filepath) as zf:
 62 |         zf.extractall(dirpath)
 63 |     os.remove(filepath)
 64 | 
 65 | def download_tagger(dirpath):
 66 |     tagger_dir = 'stanford-tagger'
 67 |     if os.path.exists(os.path.join(dirpath, tagger_dir)):
 68 |         print('Found Stanford POS Tagger - skip')
 69 |         return
 70 |     url = 'http://nlp.stanford.edu/software/stanford-postagger-2015-01-29.zip'
 71 |     filepath = download(url, dirpath)
 72 |     zip_dir = ''
 73 |     with zipfile.ZipFile(filepath) as zf:
 74 |         zip_dir = zf.namelist()[0]
 75 |         zf.extractall(dirpath)
 76 |     os.remove(filepath)
 77 |     os.rename(os.path.join(dirpath, zip_dir), os.path.join(dirpath, tagger_dir))
 78 | 
 79 | def download_parser(dirpath):
 80 |     parser_dir = 'stanford-parser'
 81 |     if os.path.exists(os.path.join(dirpath, parser_dir)):
 82 |         print('Found Stanford Parser - skip')
 83 |         return
 84 |     url = 'http://nlp.stanford.edu/software/stanford-parser-full-2015-01-29.zip'
 85 |     filepath = download(url, dirpath)
 86 |     zip_dir = ''
 87 |     with zipfile.ZipFile(filepath) as zf:
 88 |         zip_dir = zf.namelist()[0]
 89 |         zf.extractall(dirpath)
 90 |     os.remove(filepath)
 91 |     os.rename(os.path.join(dirpath, zip_dir), os.path.join(dirpath, parser_dir))
 92 | 
 93 | def download_wordvecs(dirpath):
 94 |     if os.path.exists(dirpath):
 95 |         print('Found Glove vectors - skip')
 96 |         return
 97 |     else:
 98 |         os.makedirs(dirpath)
 99 |     url = 'http://www-nlp.stanford.edu/data/glove.840B.300d.zip'
100 |     unzip(download(url, dirpath))
101 | 
102 | def download_sick(dirpath):
103 |     if os.path.exists(dirpath):
104 |         print('Found SICK dataset - skip')
105 |         return
106 |     else:
107 |         os.makedirs(dirpath)
108 |     train_url = 'http://alt.qcri.org/semeval2014/task1/data/uploads/sick_train.zip'
109 |     trial_url = 'http://alt.qcri.org/semeval2014/task1/data/uploads/sick_trial.zip'
110 |     test_url = 'http://alt.qcri.org/semeval2014/task1/data/uploads/sick_test_annotated.zip'
111 |     unzip(download(train_url, dirpath))
112 |     unzip(download(trial_url, dirpath))
113 |     unzip(download(test_url, dirpath))
114 | 
115 | def download_sst(dirpath):
116 |     if os.path.exists(dirpath):
117 |         print('Found SST dataset - skip')
118 |         return
119 |     url = 'http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip'
120 |     parent_dir = os.path.dirname(dirpath)
121 |     unzip(download(url, parent_dir))
122 |     os.rename(
123 |         os.path.join(parent_dir, 'stanfordSentimentTreebank'),
124 |         os.path.join(parent_dir, 'sst'))
125 |     shutil.rmtree(os.path.join(parent_dir, '__MACOSX')) # remove extraneous dir
126 | 
127 | if __name__ == '__main__':
128 |     base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
129 | 
130 |     # data
131 |     data_dir = os.path.join(base_dir, 'data')
132 |     wordvec_dir = os.path.join(data_dir, 'glove')
133 |     sick_dir = os.path.join(data_dir, 'sick')
134 |     sst_dir = os.path.join(data_dir, 'sst')
135 | 
136 |     # libraries
137 |     lib_dir = os.path.join(base_dir, 'lib')
138 | 
139 |     # download dependencies
140 |     download_tagger(lib_dir)
141 |     download_parser(lib_dir)
142 |     download_wordvecs(wordvec_dir)
143 |     download_sick(sick_dir)
144 |     download_sst(sst_dir)
145 | 


--------------------------------------------------------------------------------
/lib/DependencyParse.java:
--------------------------------------------------------------------------------
  1 | import edu.stanford.nlp.process.WordTokenFactory;
  2 | import edu.stanford.nlp.ling.HasWord;
  3 | import edu.stanford.nlp.ling.Word;
  4 | import edu.stanford.nlp.ling.TaggedWord;
  5 | import edu.stanford.nlp.parser.nndep.DependencyParser;
  6 | import edu.stanford.nlp.process.PTBTokenizer;
  7 | import edu.stanford.nlp.trees.TypedDependency;
  8 | import edu.stanford.nlp.util.StringUtils;
  9 | import edu.stanford.nlp.tagger.maxent.MaxentTagger;
 10 | 
 11 | import java.io.BufferedWriter;
 12 | import java.io.FileWriter;
 13 | import java.io.StringReader;
 14 | import java.util.ArrayList;
 15 | import java.util.Collection;
 16 | import java.util.List;
 17 | import java.util.Properties;
 18 | import java.util.Scanner;
 19 | 
 20 | public class DependencyParse {
 21 | 
 22 |   public static final String TAGGER_MODEL = "stanford-tagger/models/english-left3words-distsim.tagger";
 23 |   public static final String PARSER_MODEL = "edu/stanford/nlp/models/parser/nndep/english_SD.gz";
 24 | 
 25 |   public static void main(String[] args) throws Exception {
 26 |     Properties props = StringUtils.argsToProperties(args);
 27 |     if (!props.containsKey("tokpath") ||
 28 |         !props.containsKey("parentpath") ||
 29 |         !props.containsKey("relpath")) {
 30 |       System.err.println(
 31 |         "usage: java DependencyParse -tokenize - -tokpath <tokpath> -parentpath <parentpath> -relpath <relpath>");
 32 |       System.exit(1);
 33 |     }
 34 | 
 35 |     boolean tokenize = false;
 36 |     if (props.containsKey("tokenize")) {
 37 |       tokenize = true;
 38 |     }
 39 | 
 40 |     String tokPath = props.getProperty("tokpath");
 41 |     String parentPath = props.getProperty("parentpath");
 42 |     String relPath = props.getProperty("relpath");
 43 | 
 44 |     BufferedWriter tokWriter = new BufferedWriter(new FileWriter(tokPath));
 45 |     BufferedWriter parentWriter = new BufferedWriter(new FileWriter(parentPath));
 46 |     BufferedWriter relWriter = new BufferedWriter(new FileWriter(relPath));
 47 | 
 48 |     MaxentTagger tagger = new MaxentTagger(TAGGER_MODEL);
 49 |     DependencyParser parser = DependencyParser.loadFromModelFile(PARSER_MODEL);
 50 |     Scanner stdin = new Scanner(System.in);
 51 |     int count = 0;
 52 |     long start = System.currentTimeMillis();
 53 |     while (stdin.hasNextLine()) {
 54 |       String line = stdin.nextLine();
 55 |       List<HasWord> tokens = new ArrayList<>();
 56 |       if (tokenize) {
 57 |         PTBTokenizer<Word> tokenizer = new PTBTokenizer(
 58 |           new StringReader(line), new WordTokenFactory(), "");
 59 |         for (Word label; tokenizer.hasNext(); ) {
 60 |           tokens.add(tokenizer.next());
 61 |         }
 62 |       } else {
 63 |         for (String word : line.split(" ")) {
 64 |           tokens.add(new Word(word));
 65 |         }
 66 |       }
 67 | 
 68 |       List<TaggedWord> tagged = tagger.tagSentence(tokens);
 69 | 
 70 |       int len = tagged.size();
 71 |       Collection<TypedDependency> tdl = parser.predict(tagged).typedDependencies();
 72 |       int[] parents = new int[len];
 73 |       for (int i = 0; i < len; i++) {
 74 |         // if a node has a parent of -1 at the end of parsing, then the node
 75 |         // has no parent.
 76 |         parents[i] = -1;
 77 |       }
 78 | 
 79 |       String[] relns = new String[len];
 80 |       for (TypedDependency td : tdl) {
 81 |         // let root have index 0
 82 |         int child = td.dep().index();
 83 |         int parent = td.gov().index();
 84 |         relns[child - 1] = td.reln().toString();
 85 |         parents[child - 1] = parent;
 86 |       }
 87 | 
 88 |       // print tokens
 89 |       StringBuilder sb = new StringBuilder();
 90 |       for (int i = 0; i < len - 1; i++) {
 91 |         if (tokenize) {
 92 |           sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word()));
 93 |         } else {
 94 |           sb.append(tokens.get(i).word());
 95 |         }
 96 |         sb.append(' ');
 97 |       }
 98 |       if (tokenize) {
 99 |         sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word()));
100 |       } else {
101 |         sb.append(tokens.get(len - 1).word());
102 |       }
103 |       sb.append('\n');
104 |       tokWriter.write(sb.toString());
105 | 
106 |       // print parent pointers
107 |       sb = new StringBuilder();
108 |       for (int i = 0; i < len - 1; i++) {
109 |         sb.append(parents[i]);
110 |         sb.append(' ');
111 |       }
112 |       sb.append(parents[len - 1]);
113 |       sb.append('\n');
114 |       parentWriter.write(sb.toString());
115 | 
116 |       // print relations
117 |       sb = new StringBuilder();
118 |       for (int i = 0; i < len - 1; i++) {
119 |         sb.append(relns[i]);
120 |         sb.append(' ');
121 |       }
122 |       sb.append(relns[len - 1]);
123 |       sb.append('\n');
124 |       relWriter.write(sb.toString());
125 | 
126 |       count++;
127 |       if (count % 1000 == 0) {
128 |         double elapsed = (System.currentTimeMillis() - start) / 1000.0;
129 |         System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed);
130 |       }
131 |     }
132 | 
133 |     long totalTimeMillis = System.currentTimeMillis() - start;
134 |     System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n",
135 |       count, totalTimeMillis / 1000.0, totalTimeMillis / (double) count);
136 |     tokWriter.close();
137 |     parentWriter.close();
138 |     relWriter.close();
139 |   }
140 | }
141 | 


--------------------------------------------------------------------------------
/models/ChildSumTreeLSTM.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 |   A Child-Sum Tree-LSTM with input at each node.
  4 | 
  5 | --]]
  6 | 
  7 | local ChildSumTreeLSTM, parent = torch.class('treelstm.ChildSumTreeLSTM', 'treelstm.TreeLSTM')
  8 | 
  9 | function ChildSumTreeLSTM:__init(config)
 10 |   parent.__init(self, config)
 11 |   self.gate_output = config.gate_output
 12 |   if self.gate_output == nil then self.gate_output = true end
 13 | 
 14 |   -- a function that instantiates an output module that takes the hidden state h as input
 15 |   self.output_module_fn = config.output_module_fn
 16 |   self.criterion = config.criterion
 17 | 
 18 |   -- composition module
 19 |   self.composer = self:new_composer()
 20 |   self.composers = {}
 21 | 
 22 |   -- output module
 23 |   self.output_module = self:new_output_module()
 24 |   self.output_modules = {}
 25 | end
 26 | 
 27 | function ChildSumTreeLSTM:new_composer()
 28 |   local input = nn.Identity()()
 29 |   local child_c = nn.Identity()()
 30 |   local child_h = nn.Identity()()
 31 |   local child_h_sum = nn.Sum(1)(child_h)
 32 | 
 33 |   local i = nn.Sigmoid()(
 34 |     nn.CAddTable(){
 35 |       nn.Linear(self.in_dim, self.mem_dim)(input),
 36 |       nn.Linear(self.mem_dim, self.mem_dim)(child_h_sum)
 37 |     })
 38 |   local f = nn.Sigmoid()(
 39 |     treelstm.CRowAddTable(){
 40 |       nn.TemporalConvolution(self.mem_dim, self.mem_dim, 1)(child_h),
 41 |       nn.Linear(self.in_dim, self.mem_dim)(input),
 42 |     })
 43 |   local update = nn.Tanh()(
 44 |     nn.CAddTable(){
 45 |       nn.Linear(self.in_dim, self.mem_dim)(input),
 46 |       nn.Linear(self.mem_dim, self.mem_dim)(child_h_sum)
 47 |     })
 48 |   local c = nn.CAddTable(){
 49 |       nn.CMulTable(){i, update},
 50 |       nn.Sum(1)(nn.CMulTable(){f, child_c})
 51 |     }
 52 | 
 53 |   local h
 54 |   if self.gate_output then
 55 |     local o = nn.Sigmoid()(
 56 |       nn.CAddTable(){
 57 |         nn.Linear(self.in_dim, self.mem_dim)(input),
 58 |         nn.Linear(self.mem_dim, self.mem_dim)(child_h_sum)
 59 |       })
 60 |     h = nn.CMulTable(){o, nn.Tanh()(c)}
 61 |   else
 62 |     h = nn.Tanh()(c)
 63 |   end
 64 | 
 65 |   local composer = nn.gModule({input, child_c, child_h}, {c, h})
 66 |   if self.composer ~= nil then
 67 |     share_params(composer, self.composer)
 68 |   end
 69 |   return composer
 70 | end
 71 | 
 72 | function ChildSumTreeLSTM:new_output_module()
 73 |   if self.output_module_fn == nil then return nil end
 74 |   local output_module = self.output_module_fn()
 75 |   if self.output_module ~= nil then
 76 |     share_params(output_module, self.output_module)
 77 |   end
 78 |   return output_module
 79 | end
 80 | 
 81 | function ChildSumTreeLSTM:forward(tree, inputs)
 82 |   local loss = 0
 83 |   for i = 1, tree.num_children do
 84 |     local _, child_loss = self:forward(tree.children[i], inputs)
 85 |     loss = loss + child_loss
 86 |   end
 87 |   local child_c, child_h = self:get_child_states(tree)
 88 |   self:allocate_module(tree, 'composer')
 89 |   tree.state = tree.composer:forward{inputs[tree.idx], child_c, child_h}
 90 | 
 91 |   if self.output_module ~= nil then
 92 |     self:allocate_module(tree, 'output_module')
 93 |     tree.output = tree.output_module:forward(tree.state[2])
 94 |     if self.train and tree.gold_label ~= nil then
 95 |       loss = loss + self.criterion:forward(tree.output, tree.gold_label)
 96 |     end
 97 |   end
 98 |   return tree.state, loss
 99 | end
100 | 
101 | function ChildSumTreeLSTM:backward(tree, inputs, grad)
102 |   local grad_inputs = torch.Tensor(inputs:size())
103 |   self:_backward(tree, inputs, grad, grad_inputs)
104 |   return grad_inputs
105 | end
106 | 
107 | function ChildSumTreeLSTM:_backward(tree, inputs, grad, grad_inputs)
108 |   local output_grad = self.mem_zeros
109 |   if tree.output ~= nil and tree.gold_label ~= nil then
110 |     output_grad = tree.output_module:backward(
111 |       tree.state[2], self.criterion:backward(tree.output, tree.gold_label))
112 |   end
113 |   self:free_module(tree, 'output_module')
114 |   tree.output = nil
115 | 
116 |   local child_c, child_h = self:get_child_states(tree)
117 |   local composer_grad = tree.composer:backward(
118 |     {inputs[tree.idx], child_c, child_h},
119 |     {grad[1], grad[2] + output_grad})
120 |   self:free_module(tree, 'composer')
121 |   tree.state = nil
122 | 
123 |   grad_inputs[tree.idx] = composer_grad[1]
124 |   local child_c_grads, child_h_grads = composer_grad[2], composer_grad[3]
125 |   for i = 1, tree.num_children do
126 |     self:_backward(tree.children[i], inputs, {child_c_grads[i], child_h_grads[i]}, grad_inputs)
127 |   end
128 | end
129 | 
130 | function ChildSumTreeLSTM:clean(tree)
131 |   self:free_module(tree, 'composer')
132 |   self:free_module(tree, 'output_module')
133 |   tree.state = nil
134 |   tree.output = nil
135 |   for i = 1, tree.num_children do
136 |     self:clean(tree.children[i])
137 |   end
138 | end
139 | 
140 | function ChildSumTreeLSTM:parameters()
141 |   local params, grad_params = {}, {}
142 |   local cp, cg = self.composer:parameters()
143 |   tablex.insertvalues(params, cp)
144 |   tablex.insertvalues(grad_params, cg)
145 |   if self.output_module ~= nil then
146 |     local op, og = self.output_module:parameters()
147 |     tablex.insertvalues(params, op)
148 |     tablex.insertvalues(grad_params, og)
149 |   end
150 |   return params, grad_params
151 | end
152 | 
153 | function ChildSumTreeLSTM:get_child_states(tree)
154 |   local child_c, child_h
155 |   if tree.num_children == 0 then
156 |     child_c = torch.zeros(1, self.mem_dim)
157 |     child_h = torch.zeros(1, self.mem_dim)
158 |   else
159 |     child_c = torch.Tensor(tree.num_children, self.mem_dim)
160 |     child_h = torch.Tensor(tree.num_children, self.mem_dim)
161 |     for i = 1, tree.num_children do
162 |        child_c[i], child_h[i] = unpack(tree.children[i].state)
163 |     end
164 |   end
165 |   return child_c, child_h
166 | end
167 | 


--------------------------------------------------------------------------------
/relatedness/main.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 |   Training script for semantic relatedness prediction on the SICK dataset.
  4 | 
  5 | --]]
  6 | 
  7 | require('..')
  8 | 
  9 | -- Pearson correlation
 10 | function pearson(x, y)
 11 |   x = x - x:mean()
 12 |   y = y - y:mean()
 13 |   return x:dot(y) / (x:norm() * y:norm())
 14 | end
 15 | 
 16 | -- read command line arguments
 17 | local args = lapp [[
 18 | Training script for semantic relatedness prediction on the SICK dataset.
 19 |   -m,--model  (default dependency) Model architecture: [dependency, constituency, lstm, bilstm]
 20 |   -l,--layers (default 1)          Number of layers (ignored for Tree-LSTM)
 21 |   -d,--dim    (default 150)        LSTM memory dimension
 22 |   -e,--epochs (default 10)         Number of training epochs
 23 | ]]
 24 | 
 25 | local model_name, model_class
 26 | if args.model == 'dependency' then
 27 |   model_name = 'Dependency Tree LSTM'
 28 |   model_class = treelstm.TreeLSTMSim
 29 | elseif args.model == 'constituency' then
 30 |   model_name = 'Constituency Tree LSTM'
 31 |   model_class = treelstm.TreeLSTMSim
 32 | elseif args.model == 'lstm' then
 33 |   model_name = 'LSTM'
 34 |   model_class = treelstm.LSTMSim
 35 | elseif args.model == 'bilstm' then
 36 |   model_name = 'Bidirectional LSTM'
 37 |   model_class = treelstm.LSTMSim
 38 | end
 39 | local model_structure = args.model
 40 | header(model_name .. ' for Semantic Relatedness')
 41 | 
 42 | -- directory containing dataset files
 43 | local data_dir = 'data/sick/'
 44 | 
 45 | -- load vocab
 46 | local vocab = treelstm.Vocab(data_dir .. 'vocab-cased.txt')
 47 | 
 48 | -- load embeddings
 49 | print('loading word embeddings')
 50 | local emb_dir = 'data/glove/'
 51 | local emb_prefix = emb_dir .. 'glove.840B'
 52 | local emb_vocab, emb_vecs = treelstm.read_embedding(emb_prefix .. '.vocab', emb_prefix .. '.300d.th')
 53 | local emb_dim = emb_vecs:size(2)
 54 | 
 55 | -- use only vectors in vocabulary (not necessary, but gives faster training)
 56 | local num_unk = 0
 57 | local vecs = torch.Tensor(vocab.size, emb_dim)
 58 | for i = 1, vocab.size do
 59 |   local w = vocab:token(i)
 60 |   if emb_vocab:contains(w) then
 61 |     vecs[i] = emb_vecs[emb_vocab:index(w)]
 62 |   else
 63 |     num_unk = num_unk + 1
 64 |     vecs[i]:uniform(-0.05, 0.05)
 65 |   end
 66 | end
 67 | print('unk count = ' .. num_unk)
 68 | emb_vocab = nil
 69 | emb_vecs = nil
 70 | collectgarbage()
 71 | 
 72 | -- load datasets
 73 | print('loading datasets')
 74 | local train_dir = data_dir .. 'train/'
 75 | local dev_dir = data_dir .. 'dev/'
 76 | local test_dir = data_dir .. 'test/'
 77 | local constituency = (args.model == 'constituency')
 78 | local train_dataset = treelstm.read_relatedness_dataset(train_dir, vocab, constituency)
 79 | local dev_dataset = treelstm.read_relatedness_dataset(dev_dir, vocab, constituency)
 80 | local test_dataset = treelstm.read_relatedness_dataset(test_dir, vocab, constituency)
 81 | printf('num train = %d\n', train_dataset.size)
 82 | printf('num dev   = %d\n', dev_dataset.size)
 83 | printf('num test  = %d\n', test_dataset.size)
 84 | 
 85 | -- initialize model
 86 | local model = model_class{
 87 |   emb_vecs   = vecs,
 88 |   structure  = model_structure,
 89 |   num_layers = args.layers,
 90 |   mem_dim    = args.dim,
 91 | }
 92 | 
 93 | -- number of epochs to train
 94 | local num_epochs = args.epochs
 95 | 
 96 | -- print information
 97 | header('model configuration')
 98 | printf('max epochs = %d\n', num_epochs)
 99 | model:print_config()
100 | 
101 | -- train
102 | local train_start = sys.clock()
103 | local best_dev_score = -1.0
104 | local best_dev_model = model
105 | header('Training model')
106 | for i = 1, num_epochs do
107 |   local start = sys.clock()
108 |   printf('-- epoch %d\n', i)
109 |   model:train(train_dataset)
110 |   printf('-- finished epoch in %.2fs\n', sys.clock() - start)
111 | 
112 |   -- uncomment to compute train scores
113 |   --[[
114 |   local train_predictions = model:predict_dataset(train_dataset)
115 |   local train_score = pearson(train_predictions, train_dataset.labels)
116 |   printf('-- train score: %.4f\n', train_score)
117 |   --]]
118 | 
119 |   local dev_predictions = model:predict_dataset(dev_dataset)
120 |   local dev_score = pearson(dev_predictions, dev_dataset.labels)
121 |   printf('-- dev score: %.4f\n', dev_score)
122 | 
123 |   if dev_score > best_dev_score then
124 |     best_dev_score = dev_score
125 |     best_dev_model = model_class{
126 |       emb_vecs = vecs,
127 |       structure = model_structure,
128 |       num_layers = args.layers,
129 |       mem_dim    = args.dim,
130 |     }
131 |     best_dev_model.params:copy(model.params)
132 |   end
133 | end
134 | printf('finished training in %.2fs\n', sys.clock() - train_start)
135 | 
136 | -- evaluate
137 | header('Evaluating on test set')
138 | printf('-- using model with dev score = %.4f\n', best_dev_score)
139 | local test_predictions = best_dev_model:predict_dataset(test_dataset)
140 | local test_score = pearson(test_predictions, test_dataset.labels)
141 | printf('-- test score: %.4f\n', test_score)
142 | 
143 | -- create predictions and model directories if necessary
144 | if lfs.attributes(treelstm.predictions_dir) == nil then
145 |   lfs.mkdir(treelstm.predictions_dir)
146 | end
147 | 
148 | if lfs.attributes(treelstm.models_dir) == nil then
149 |   lfs.mkdir(treelstm.models_dir)
150 | end
151 | 
152 | -- get paths
153 | local file_idx = 1
154 | local predictions_save_path, model_save_path
155 | while true do
156 |   predictions_save_path = string.format(
157 |     treelstm.predictions_dir .. '/rel-%s.%dl.%dd.%d.pred', args.model, args.layers, args.dim, file_idx)
158 |   model_save_path = string.format(
159 |     treelstm.models_dir .. '/rel-%s.%dl.%dd.%d.th', args.model, args.layers, args.dim, file_idx)
160 |   if lfs.attributes(predictions_save_path) == nil and lfs.attributes(model_save_path) == nil then
161 |     break
162 |   end
163 |   file_idx = file_idx + 1
164 | end
165 | 
166 | -- write predictions to disk
167 | local predictions_file = torch.DiskFile(predictions_save_path, 'w')
168 | print('writing predictions to ' .. predictions_save_path)
169 | for i = 1, test_predictions:size(1) do
170 |   predictions_file:writeFloat(test_predictions[i])
171 | end
172 | predictions_file:close()
173 | 
174 | -- write models to disk
175 | print('writing model to ' .. model_save_path)
176 | best_dev_model:save(model_save_path)
177 | 
178 | -- to load a saved model
179 | -- local loaded = model_class.load(model_save_path)
180 | 


--------------------------------------------------------------------------------
/sentiment/TreeLSTMSentiment.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 |   Sentiment classification using a Binary Tree-LSTM.
  4 | 
  5 | --]]
  6 | 
  7 | local TreeLSTMSentiment = torch.class('treelstm.TreeLSTMSentiment')
  8 | 
  9 | function TreeLSTMSentiment:__init(config)
 10 |   self.mem_dim           = config.mem_dim           or 150
 11 |   self.learning_rate     = config.learning_rate     or 0.05
 12 |   self.emb_learning_rate = config.emb_learning_rate or 0.1
 13 |   self.batch_size        = config.batch_size        or 25
 14 |   self.reg               = config.reg               or 1e-4
 15 |   self.structure         = config.structure         or 'constituency'
 16 |   self.fine_grained      = (config.fine_grained == nil) and true or config.fine_grained
 17 |   self.dropout           = (config.dropout == nil) and true or config.dropout
 18 | 
 19 |   -- word embedding
 20 |   self.emb_dim = config.emb_vecs:size(2)
 21 |   self.emb = nn.LookupTable(config.emb_vecs:size(1), self.emb_dim)
 22 |   self.emb.weight:copy(config.emb_vecs)
 23 | 
 24 |   self.in_zeros = torch.zeros(self.emb_dim)
 25 |   self.num_classes = self.fine_grained and 5 or 3
 26 | 
 27 |   -- optimizer configuration
 28 |   self.optim_state = { learningRate = self.learning_rate }
 29 | 
 30 |   -- negative log likelihood optimization objective
 31 |   self.criterion = nn.ClassNLLCriterion()
 32 | 
 33 |   local treelstm_config = {
 34 |     in_dim  = self.emb_dim,
 35 |     mem_dim = self.mem_dim,
 36 |     output_module_fn = function() return self:new_sentiment_module() end,
 37 |     criterion = self.criterion,
 38 |   }
 39 | 
 40 |   if self.structure == 'dependency' then
 41 |     self.treelstm = treelstm.ChildSumTreeLSTM(treelstm_config)
 42 |   elseif self.structure == 'constituency' then
 43 |     self.treelstm = treelstm.BinaryTreeLSTM(treelstm_config)
 44 |   else
 45 |     error('invalid parse tree type: ' .. self.structure)
 46 |   end
 47 | 
 48 |   self.params, self.grad_params = self.treelstm:getParameters()
 49 | end
 50 | 
 51 | function TreeLSTMSentiment:new_sentiment_module()
 52 |   local sentiment_module = nn.Sequential()
 53 |   if self.dropout then
 54 |     sentiment_module:add(nn.Dropout())
 55 |   end
 56 |   sentiment_module
 57 |     :add(nn.Linear(self.mem_dim, self.num_classes))
 58 |     :add(nn.LogSoftMax())
 59 |   return sentiment_module
 60 | end
 61 | 
 62 | function TreeLSTMSentiment:train(dataset)
 63 |   self.treelstm:training()
 64 |   local indices = torch.randperm(dataset.size)
 65 |   local zeros = torch.zeros(self.mem_dim)
 66 |   for i = 1, dataset.size, self.batch_size do
 67 |     xlua.progress(i, dataset.size)
 68 |     local batch_size = math.min(i + self.batch_size - 1, dataset.size) - i + 1
 69 | 
 70 |     local feval = function(x)
 71 |       self.grad_params:zero()
 72 |       self.emb:zeroGradParameters()
 73 | 
 74 |       local loss = 0
 75 |       for j = 1, batch_size do
 76 |         local idx = indices[i + j - 1]
 77 |         local sent = dataset.sents[idx]
 78 |         local tree = dataset.trees[idx]
 79 | 
 80 |         local inputs = self.emb:forward(sent)
 81 |         local _, tree_loss = self.treelstm:forward(tree, inputs)
 82 |         loss = loss + tree_loss
 83 |         local input_grad = self.treelstm:backward(tree, inputs, {zeros, zeros})
 84 |         self.emb:backward(sent, input_grad)
 85 |       end
 86 | 
 87 |       loss = loss / batch_size
 88 |       self.grad_params:div(batch_size)
 89 |       self.emb.gradWeight:div(batch_size)
 90 | 
 91 |       -- regularization
 92 |       loss = loss + 0.5 * self.reg * self.params:norm() ^ 2
 93 |       self.grad_params:add(self.reg, self.params)
 94 |       return loss, self.grad_params
 95 |     end
 96 | 
 97 |     optim.adagrad(feval, self.params, self.optim_state)
 98 |     self.emb:updateParameters(self.emb_learning_rate)
 99 |   end
100 |   xlua.progress(dataset.size, dataset.size)
101 | end
102 | 
103 | function TreeLSTMSentiment:predict(tree, sent)
104 |   self.treelstm:evaluate()
105 |   local prediction
106 |   local inputs = self.emb:forward(sent)
107 |   self.treelstm:forward(tree, inputs)
108 |   local output = tree.output
109 |   if self.fine_grained then
110 |     prediction = argmax(output)
111 |   else
112 |     prediction = (output[1] > output[3]) and 1 or 3
113 |   end
114 |   self.treelstm:clean(tree)
115 |   return prediction
116 | end
117 | 
118 | function TreeLSTMSentiment:predict_dataset(dataset)
119 |   local predictions = torch.Tensor(dataset.size)
120 |   for i = 1, dataset.size do
121 |     xlua.progress(i, dataset.size)
122 |     predictions[i] = self:predict(dataset.trees[i], dataset.sents[i])
123 |   end
124 |   return predictions
125 | end
126 | 
127 | function argmax(v)
128 |   local idx = 1
129 |   local max = v[1]
130 |   for i = 2, v:size(1) do
131 |     if v[i] > max then
132 |       max = v[i]
133 |       idx = i
134 |     end
135 |   end
136 |   return idx
137 | end
138 | 
139 | function TreeLSTMSentiment:print_config()
140 |   local num_params = self.params:size(1)
141 |   local num_sentiment_params = self:new_sentiment_module():getParameters():size(1)
142 |   printf('%-25s = %s\n',   'fine grained sentiment', tostring(self.fine_grained))
143 |   printf('%-25s = %d\n',   'num params', num_params)
144 |   printf('%-25s = %d\n',   'num compositional params', num_params - num_sentiment_params)
145 |   printf('%-25s = %d\n',   'word vector dim', self.emb_dim)
146 |   printf('%-25s = %d\n',   'Tree-LSTM memory dim', self.mem_dim)
147 |   printf('%-25s = %.2e\n', 'regularization strength', self.reg)
148 |   printf('%-25s = %d\n',   'minibatch size', self.batch_size)
149 |   printf('%-25s = %.2e\n', 'learning rate', self.learning_rate)
150 |   printf('%-25s = %.2e\n', 'word vector learning rate', self.emb_learning_rate)
151 |   printf('%-25s = %s\n',   'dropout', tostring(self.dropout))
152 | end
153 | 
154 | function TreeLSTMSentiment:save(path)
155 |   local config = {
156 |     batch_size        = self.batch_size,
157 |     dropout           = self.dropout,
158 |     emb_learning_rate = self.emb_learning_rate,
159 |     emb_vecs          = self.emb.weight:float(),
160 |     fine_grained      = self.fine_grained,
161 |     learning_rate     = self.learning_rate,
162 |     mem_dim           = self.mem_dim,
163 |     reg               = self.reg,
164 |     structure         = self.structure,
165 |   }
166 | 
167 |   torch.save(path, {
168 |     params = self.params,
169 |     config = config,
170 |   })
171 | end
172 | 
173 | function TreeLSTMSentiment.load(path)
174 |   local state = torch.load(path)
175 |   local model = treelstm.TreeLSTMSentiment.new(state.config)
176 |   model.params:copy(state.params)
177 |   return model
178 | end
179 | 


--------------------------------------------------------------------------------
/util/read_data.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 |   Functions for loading data from disk.
  4 | 
  5 | --]]
  6 | 
  7 | function treelstm.read_embedding(vocab_path, emb_path)
  8 |   local vocab = treelstm.Vocab(vocab_path)
  9 |   local embedding = torch.load(emb_path)
 10 |   return vocab, embedding
 11 | end
 12 | 
 13 | function treelstm.read_sentences(path, vocab)
 14 |   local sentences = {}
 15 |   local file = io.open(path, 'r')
 16 |   local line
 17 |   while true do
 18 |     line = file:read()
 19 |     if line == nil then break end
 20 |     local tokens = stringx.split(line)
 21 |     local len = #tokens
 22 |     local sent = torch.IntTensor(len)
 23 |     for i = 1, len do
 24 |       local token = tokens[i]
 25 |       sent[i] = vocab:index(token)
 26 |     end
 27 |     sentences[#sentences + 1] = sent
 28 |   end
 29 | 
 30 |   file:close()
 31 |   return sentences
 32 | end
 33 | 
 34 | function treelstm.read_trees(parent_path, label_path)
 35 |   local parent_file = io.open(parent_path, 'r')
 36 |   local label_file
 37 |   if label_path ~= nil then label_file = io.open(label_path, 'r') end
 38 |   local count = 0
 39 |   local trees = {}
 40 | 
 41 |   while true do
 42 |     local parents = parent_file:read()
 43 |     if parents == nil then break end
 44 |     parents = stringx.split(parents)
 45 |     for i, p in ipairs(parents) do
 46 |       parents[i] = tonumber(p)
 47 |     end
 48 | 
 49 |     local labels
 50 |     if label_file ~= nil then
 51 |       labels = stringx.split(label_file:read())
 52 |       for i, l in ipairs(labels) do
 53 |         -- ignore unlabeled nodes
 54 |         if l == '#' then
 55 |           labels[i] = nil
 56 |         else
 57 |           labels[i] = tonumber(l)
 58 |         end
 59 |       end
 60 |     end
 61 | 
 62 |     count = count + 1
 63 |     trees[count] = treelstm.read_tree(parents, labels)
 64 |   end
 65 |   parent_file:close()
 66 |   return trees
 67 | end
 68 | 
 69 | function treelstm.read_tree(parents, labels)
 70 |   local size = #parents
 71 |   local trees = {}
 72 |   if labels == nil then labels = {} end
 73 |   local root
 74 |   for i = 1, size do
 75 |     if not trees[i] and parents[i] ~= -1 then
 76 |       local idx = i
 77 |       local prev = nil
 78 |       while true do
 79 |         local parent = parents[idx]
 80 |         if parent == -1 then
 81 |           break
 82 |         end
 83 | 
 84 |         local tree = treelstm.Tree()
 85 |         if prev ~= nil then
 86 |           tree:add_child(prev)
 87 |         end
 88 |         trees[idx] = tree
 89 |         tree.idx = idx
 90 |         tree.gold_label = labels[idx]
 91 |         if trees[parent] ~= nil then
 92 |           trees[parent]:add_child(tree)
 93 |           break
 94 |         elseif parent == 0 then
 95 |           root = tree
 96 |           break
 97 |         else
 98 |           prev = tree
 99 |           idx = parent
100 |         end
101 |       end
102 |     end
103 |   end
104 | 
105 |   -- index leaves (only meaningful for constituency trees)
106 |   local leaf_idx = 1
107 |   for i = 1, size do
108 |     local tree = trees[i]
109 |     if tree ~= nil and tree.num_children == 0 then
110 |       tree.leaf_idx = leaf_idx
111 |       leaf_idx = leaf_idx + 1
112 |     end
113 |   end
114 |   return root
115 | end
116 | 
117 | --[[
118 | 
119 |   Semantic Relatedness
120 | 
121 | --]]
122 | 
123 | function treelstm.read_relatedness_dataset(dir, vocab, constituency)
124 |   local dataset = {}
125 |   dataset.vocab = vocab
126 |   if constituency then
127 |     dataset.ltrees = treelstm.read_trees(dir .. 'a.cparents')
128 |     dataset.rtrees = treelstm.read_trees(dir .. 'b.cparents')
129 |   else
130 |     dataset.ltrees = treelstm.read_trees(dir .. 'a.parents')
131 |     dataset.rtrees = treelstm.read_trees(dir .. 'b.parents')
132 |   end
133 |   dataset.lsents = treelstm.read_sentences(dir .. 'a.toks', vocab)
134 |   dataset.rsents = treelstm.read_sentences(dir .. 'b.toks', vocab)
135 |   dataset.size = #dataset.ltrees
136 |   local id_file = torch.DiskFile(dir .. 'id.txt')
137 |   local sim_file = torch.DiskFile(dir .. 'sim.txt')
138 |   dataset.ids = torch.IntTensor(dataset.size)
139 |   dataset.labels = torch.Tensor(dataset.size)
140 |   for i = 1, dataset.size do
141 |     dataset.ids[i] = id_file:readInt()
142 |     dataset.labels[i] = 0.25 * (sim_file:readDouble() - 1)
143 |   end
144 |   id_file:close()
145 |   sim_file:close()
146 |   return dataset
147 | end
148 | 
149 | --[[
150 | 
151 |  Sentiment
152 | 
153 | --]]
154 | 
155 | function treelstm.read_sentiment_dataset(dir, vocab, fine_grained, dependency)
156 |   local dataset = {}
157 |   dataset.vocab = vocab
158 |   dataset.fine_grained = fine_grained
159 |   local trees
160 |   if dependency then
161 |     trees = treelstm.read_trees(dir .. 'dparents.txt', dir .. 'dlabels.txt')
162 |   else
163 |     trees = treelstm.read_trees(dir .. 'parents.txt', dir .. 'labels.txt')
164 |     for _, tree in ipairs(trees) do
165 |       set_spans(tree)
166 |     end
167 |   end
168 | 
169 |   local sents = treelstm.read_sentences(dir .. 'sents.txt', vocab)
170 |   if not fine_grained then
171 |     dataset.trees = {}
172 |     dataset.sents = {}
173 |     for i = 1, #trees do
174 |       if trees[i].gold_label ~= 0 then
175 |         table.insert(dataset.trees, trees[i])
176 |         table.insert(dataset.sents, sents[i])
177 |       end
178 |     end
179 |   else
180 |     dataset.trees = trees
181 |     dataset.sents = sents
182 |   end
183 | 
184 |   dataset.size = #dataset.trees
185 |   dataset.labels = torch.Tensor(dataset.size)
186 |   for i = 1, dataset.size do
187 |     remap_labels(dataset.trees[i], fine_grained)
188 |     dataset.labels[i] = dataset.trees[i].gold_label
189 |   end
190 |   return dataset
191 | end
192 | 
193 | function set_spans(tree)
194 |   if tree.num_children == 0 then
195 |     tree.lo, tree.hi = tree.leaf_idx, tree.leaf_idx
196 |     return
197 |   end
198 | 
199 |   for i = 1, tree.num_children do
200 |     set_spans(tree.children[i])
201 |   end
202 | 
203 |   tree.lo, tree.hi = tree.children[1].lo, tree.children[1].hi
204 |   for i = 2, tree.num_children do
205 |     tree.lo = math.min(tree.lo, tree.children[i].lo)
206 |     tree.hi = math.max(tree.hi, tree.children[i].hi)
207 |   end
208 | end
209 | 
210 | function remap_labels(tree, fine_grained)
211 |   if tree.gold_label ~= nil then
212 |     if fine_grained then
213 |       tree.gold_label = tree.gold_label + 3
214 |     else
215 |       if tree.gold_label < 0 then
216 |         tree.gold_label = 1
217 |       elseif tree.gold_label == 0 then
218 |         tree.gold_label = 2
219 |       else
220 |         tree.gold_label = 3
221 |       end
222 |     end
223 |   end
224 | 
225 |   for i = 1, tree.num_children do
226 |     remap_labels(tree.children[i], fine_grained)
227 |   end
228 | end
229 | 


--------------------------------------------------------------------------------
/sentiment/main.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 |   Tree-LSTM training script for sentiment classication on the Stanford
  4 |   Sentiment Treebank
  5 | 
  6 | --]]
  7 | 
  8 | require('..')
  9 | 
 10 | function accuracy(pred, gold)
 11 |   return torch.eq(pred, gold):sum() / pred:size(1)
 12 | end
 13 | 
 14 | -- read command line arguments
 15 | local args = lapp [[
 16 | Training script for sentiment classification on the SST dataset.
 17 |   -m,--model  (default constituency) Model architecture: [constituency, lstm, bilstm]
 18 |   -l,--layers (default 1)            Number of layers (ignored for Tree-LSTM)
 19 |   -d,--dim    (default 150)          LSTM memory dimension
 20 |   -e,--epochs (default 10)           Number of training epochs
 21 |   -b,--binary                        Train and evaluate on binary sub-task
 22 | ]]
 23 | 
 24 | local model_name, model_class, model_structure
 25 | if args.model == 'constituency' then
 26 |   model_name = 'Constituency Tree LSTM'
 27 |   model_class = treelstm.TreeLSTMSentiment
 28 | elseif args.model == 'dependency' then
 29 |   model_name = 'Dependency Tree LSTM'
 30 |   model_class = treelstm.TreeLSTMSentiment
 31 | elseif args.model == 'lstm' then
 32 |   model_name = 'LSTM'
 33 |   model_class = treelstm.LSTMSentiment
 34 | elseif args.model == 'bilstm' then
 35 |   model_name = 'Bidirectional LSTM'
 36 |   model_class = treelstm.LSTMSentiment
 37 | end
 38 | model_structure = args.model
 39 | header(model_name .. ' for Sentiment Classification')
 40 | 
 41 | -- binary or fine-grained subtask
 42 | local fine_grained = not args.binary
 43 | 
 44 | -- directory containing dataset files
 45 | local data_dir = 'data/sst/'
 46 | 
 47 | -- load vocab
 48 | local vocab = treelstm.Vocab(data_dir .. 'vocab-cased.txt')
 49 | 
 50 | -- load embeddings
 51 | print('loading word embeddings')
 52 | local emb_dir = 'data/glove/'
 53 | local emb_prefix = emb_dir .. 'glove.840B'
 54 | local emb_vocab, emb_vecs = treelstm.read_embedding(emb_prefix .. '.vocab', emb_prefix .. '.300d.th')
 55 | local emb_dim = emb_vecs:size(2)
 56 | 
 57 | -- use only vectors in vocabulary (not necessary, but gives faster training)
 58 | local num_unk = 0
 59 | local vecs = torch.Tensor(vocab.size, emb_dim)
 60 | for i = 1, vocab.size do
 61 |   local w = string.gsub(vocab:token(i), '\\', '') -- remove escape characters
 62 |   if emb_vocab:contains(w) then
 63 |     vecs[i] = emb_vecs[emb_vocab:index(w)]
 64 |   else
 65 |     num_unk = num_unk + 1
 66 |     vecs[i]:uniform(-0.05, 0.05)
 67 |   end
 68 | end
 69 | print('unk count = ' .. num_unk)
 70 | emb_vocab = nil
 71 | emb_vecs = nil
 72 | collectgarbage()
 73 | 
 74 | -- load datasets
 75 | print('loading datasets')
 76 | local train_dir = data_dir .. 'train/'
 77 | local dev_dir = data_dir .. 'dev/'
 78 | local test_dir = data_dir .. 'test/'
 79 | local dependency = (args.model == 'dependency')
 80 | local train_dataset = treelstm.read_sentiment_dataset(train_dir, vocab, fine_grained, dependency)
 81 | local dev_dataset = treelstm.read_sentiment_dataset(dev_dir, vocab, fine_grained, dependency)
 82 | local test_dataset = treelstm.read_sentiment_dataset(test_dir, vocab, fine_grained, dependency)
 83 | 
 84 | printf('num train = %d\n', train_dataset.size)
 85 | printf('num dev   = %d\n', dev_dataset.size)
 86 | printf('num test  = %d\n', test_dataset.size)
 87 | 
 88 | -- initialize model
 89 | local model = model_class{
 90 |   emb_vecs = vecs,
 91 |   structure = model_structure,
 92 |   fine_grained = fine_grained,
 93 |   num_layers = args.layers,
 94 |   mem_dim = args.dim,
 95 | }
 96 | 
 97 | -- number of epochs to train
 98 | local num_epochs = args.epochs
 99 | 
100 | -- print information
101 | header('model configuration')
102 | printf('max epochs = %d\n', num_epochs)
103 | model:print_config()
104 | 
105 | -- train
106 | local train_start = sys.clock()
107 | local best_dev_score = -1.0
108 | local best_dev_model = model
109 | header('Training model')
110 | for i = 1, num_epochs do
111 |   local start = sys.clock()
112 |   printf('-- epoch %d\n', i)
113 |   model:train(train_dataset)
114 |   printf('-- finished epoch in %.2fs\n', sys.clock() - start)
115 | 
116 |   -- uncomment to compute train scores
117 |   --[[
118 |   local train_predictions = model:predict_dataset(train_dataset)
119 |   local train_score = accuracy(train_predictions, train_dataset.labels)
120 |   printf('-- train score: %.4f\n', train_score)
121 |   --]]
122 | 
123 |   local dev_predictions = model:predict_dataset(dev_dataset)
124 |   local dev_score = accuracy(dev_predictions, dev_dataset.labels)
125 |   printf('-- dev score: %.4f\n', dev_score)
126 | 
127 |   if dev_score > best_dev_score then
128 |     best_dev_score = dev_score
129 |     best_dev_model = model_class{
130 |       emb_vecs = vecs,
131 |       structure = model_structure,
132 |       fine_grained = fine_grained,
133 |       num_layers = args.layers,
134 |       mem_dim = args.dim,
135 |     }
136 |     best_dev_model.params:copy(model.params)
137 |     best_dev_model.emb.weight:copy(model.emb.weight)
138 |   end
139 | end
140 | printf('finished training in %.2fs\n', sys.clock() - train_start)
141 | 
142 | -- evaluate
143 | header('Evaluating on test set')
144 | printf('-- using model with dev score = %.4f\n', best_dev_score)
145 | local test_predictions = best_dev_model:predict_dataset(test_dataset)
146 | printf('-- test score: %.4f\n', accuracy(test_predictions, test_dataset.labels))
147 | 
148 | -- create predictions and models directories if necessary
149 | if lfs.attributes(treelstm.predictions_dir) == nil then
150 |   lfs.mkdir(treelstm.predictions_dir)
151 | end
152 | 
153 | if lfs.attributes(treelstm.models_dir) == nil then
154 |   lfs.mkdir(treelstm.models_dir)
155 | end
156 | 
157 | -- get paths
158 | local file_idx = 1
159 | local subtask = fine_grained and '5class' or '2class'
160 | local predictions_save_path, model_save_path
161 | while true do
162 |   predictions_save_path = string.format(
163 |     treelstm.predictions_dir .. '/sent-%s.%s.%dl.%dd.%d.pred', args.model, subtask, args.layers, args.dim, file_idx)
164 |   model_save_path = string.format(
165 |     treelstm.models_dir .. '/sent-%s.%s.%dl.%dd.%d.th', args.model, subtask, args.layers, args.dim, file_idx)
166 |   if lfs.attributes(predictions_save_path) == nil and lfs.attributes(model_save_path) == nil then
167 |     break
168 |   end
169 |   file_idx = file_idx + 1
170 | end
171 | 
172 | -- write predictions to disk
173 | local predictions_file = torch.DiskFile(predictions_save_path, 'w')
174 | print('writing predictions to ' .. predictions_save_path)
175 | for i = 1, test_predictions:size(1) do
176 |   predictions_file:writeInt(test_predictions[i])
177 | end
178 | predictions_file:close()
179 | 
180 | -- write model to disk
181 | print('writing model to ' .. model_save_path)
182 | best_dev_model:save(model_save_path)
183 | 
184 | -- to load a saved model
185 | -- local loaded = model_class.load(model_save_path)
186 | 


--------------------------------------------------------------------------------
/models/BinaryTreeLSTM.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 |   A Binary Tree-LSTM with input at the leaf nodes.
  4 | 
  5 | --]]
  6 | 
  7 | local BinaryTreeLSTM, parent = torch.class('treelstm.BinaryTreeLSTM', 'treelstm.TreeLSTM')
  8 | 
  9 | function BinaryTreeLSTM:__init(config)
 10 |   parent.__init(self, config)
 11 |   self.gate_output = config.gate_output
 12 |   if self.gate_output == nil then self.gate_output = true end
 13 | 
 14 |   -- a function that instantiates an output module that takes the hidden state h as input
 15 |   self.output_module_fn = config.output_module_fn
 16 |   self.criterion = config.criterion
 17 | 
 18 |   -- leaf input module
 19 |   self.leaf_module = self:new_leaf_module()
 20 |   self.leaf_modules = {}
 21 | 
 22 |   -- composition module
 23 |   self.composer = self:new_composer()
 24 |   self.composers = {}
 25 | 
 26 |   -- output module
 27 |   self.output_module = self:new_output_module()
 28 |   self.output_modules = {}
 29 | end
 30 | 
 31 | function BinaryTreeLSTM:new_leaf_module()
 32 |   local input = nn.Identity()()
 33 |   local c = nn.Linear(self.in_dim, self.mem_dim)(input)
 34 |   local h
 35 |   if self.gate_output then
 36 |     local o = nn.Sigmoid()(nn.Linear(self.in_dim, self.mem_dim)(input))
 37 |     h = nn.CMulTable(){o, nn.Tanh()(c)}
 38 |   else
 39 |     h = nn.Tanh()(c)
 40 |   end
 41 | 
 42 |   local leaf_module = nn.gModule({input}, {c, h})
 43 |   if self.leaf_module ~= nil then
 44 |     share_params(leaf_module, self.leaf_module)
 45 |   end
 46 |   return leaf_module
 47 | end
 48 | 
 49 | function BinaryTreeLSTM:new_composer()
 50 |   local lc, lh = nn.Identity()(), nn.Identity()()
 51 |   local rc, rh = nn.Identity()(), nn.Identity()()
 52 |   local new_gate = function()
 53 |     return nn.CAddTable(){
 54 |       nn.Linear(self.mem_dim, self.mem_dim)(lh),
 55 |       nn.Linear(self.mem_dim, self.mem_dim)(rh)
 56 |     }
 57 |   end
 58 | 
 59 |   local i = nn.Sigmoid()(new_gate())    -- input gate
 60 |   local lf = nn.Sigmoid()(new_gate())   -- left forget gate
 61 |   local rf = nn.Sigmoid()(new_gate())   -- right forget gate
 62 |   local update = nn.Tanh()(new_gate())  -- memory cell update vector
 63 |   local c = nn.CAddTable(){             -- memory cell
 64 |       nn.CMulTable(){i, update},
 65 |       nn.CMulTable(){lf, lc},
 66 |       nn.CMulTable(){rf, rc}
 67 |     }
 68 | 
 69 |   local h
 70 |   if self.gate_output then
 71 |     local o = nn.Sigmoid()(new_gate()) -- output gate
 72 |     h = nn.CMulTable(){o, nn.Tanh()(c)}
 73 |   else
 74 |     h = nn.Tanh()(c)
 75 |   end
 76 |   local composer = nn.gModule(
 77 |     {lc, lh, rc, rh},
 78 |     {c, h})
 79 | 
 80 |   if self.composer ~= nil then
 81 |     share_params(composer, self.composer)
 82 |   end
 83 |   return composer
 84 | end
 85 | 
 86 | function BinaryTreeLSTM:new_output_module()
 87 |   if self.output_module_fn == nil then return nil end
 88 |   local output_module = self.output_module_fn()
 89 |   if self.output_module ~= nil then
 90 |     share_params(output_module, self.output_module)
 91 |   end
 92 |   return output_module
 93 | end
 94 | 
 95 | function BinaryTreeLSTM:forward(tree, inputs)
 96 |   local lloss, rloss = 0, 0
 97 |   if tree.num_children == 0 then
 98 |     self:allocate_module(tree, 'leaf_module')
 99 |     tree.state = tree.leaf_module:forward(inputs[tree.leaf_idx])
100 |   else
101 |     self:allocate_module(tree, 'composer')
102 | 
103 |     -- get child hidden states
104 |     local lvecs, lloss = self:forward(tree.children[1], inputs)
105 |     local rvecs, rloss = self:forward(tree.children[2], inputs)
106 |     local lc, lh = self:unpack_state(lvecs)
107 |     local rc, rh = self:unpack_state(rvecs)
108 | 
109 |     -- compute state and output
110 |     tree.state = tree.composer:forward{lc, lh, rc, rh}
111 |   end
112 | 
113 |   local loss
114 |   if self.output_module ~= nil then
115 |     self:allocate_module(tree, 'output_module')
116 |     tree.output = tree.output_module:forward(tree.state[2])
117 |     if self.train then
118 |       loss = self.criterion:forward(tree.output, tree.gold_label) + lloss + rloss
119 |     end
120 |   end
121 | 
122 |   return tree.state, loss
123 | end
124 | 
125 | function BinaryTreeLSTM:backward(tree, inputs, grad)
126 |   local grad_inputs = torch.Tensor(inputs:size())
127 |   self:_backward(tree, inputs, grad, grad_inputs)
128 |   return grad_inputs
129 | end
130 | 
131 | function BinaryTreeLSTM:_backward(tree, inputs, grad, grad_inputs)
132 |   local output_grad = self.mem_zeros
133 |   if tree.output ~= nil and tree.gold_label ~= nil then
134 |     output_grad = tree.output_module:backward(
135 |       tree.state[2], self.criterion:backward(tree.output, tree.gold_label))
136 |   end
137 |   self:free_module(tree, 'output_module')
138 | 
139 |   if tree.num_children == 0 then
140 |     grad_inputs[tree.leaf_idx] = tree.leaf_module:backward(
141 |       inputs[tree.leaf_idx],
142 |       {grad[1], grad[2] + output_grad})
143 |     self:free_module(tree, 'leaf_module')
144 |   else
145 |     local lc, lh, rc, rh = self:get_child_states(tree)
146 |     local composer_grad = tree.composer:backward(
147 |       {lc, lh, rc, rh},
148 |       {grad[1], grad[2] + output_grad})
149 |     self:free_module(tree, 'composer')
150 | 
151 |     -- backward propagate to children
152 |     self:_backward(tree.children[1], inputs, {composer_grad[1], composer_grad[2]}, grad_inputs)
153 |     self:_backward(tree.children[2], inputs, {composer_grad[3], composer_grad[4]}, grad_inputs)
154 |   end
155 |   tree.state = nil
156 |   tree.output = nil
157 | end
158 | 
159 | function BinaryTreeLSTM:parameters()
160 |   local params, grad_params = {}, {}
161 |   local cp, cg = self.composer:parameters()
162 |   tablex.insertvalues(params, cp)
163 |   tablex.insertvalues(grad_params, cg)
164 |   local lp, lg = self.leaf_module:parameters()
165 |   tablex.insertvalues(params, lp)
166 |   tablex.insertvalues(grad_params, lg)
167 |   if self.output_module ~= nil then
168 |     local op, og = self.output_module:parameters()
169 |     tablex.insertvalues(params, op)
170 |     tablex.insertvalues(grad_params, og)
171 |   end
172 |   return params, grad_params
173 | end
174 | 
175 | --
176 | -- helper functions
177 | --
178 | 
179 | function BinaryTreeLSTM:unpack_state(state)
180 |   local c, h
181 |   if state == nil then
182 |     c, h = self.mem_zeros, self.mem_zeros
183 |   else
184 |     c, h = unpack(state)
185 |   end
186 |   return c, h
187 | end
188 | 
189 | function BinaryTreeLSTM:get_child_states(tree)
190 |   local lc, lh, rc, rh
191 |   if tree.children[1] ~= nil then
192 |     lc, lh = self:unpack_state(tree.children[1].state)
193 |   end
194 | 
195 |   if tree.children[2] ~= nil then
196 |     rc, rh = self:unpack_state(tree.children[2].state)
197 |   end
198 |   return lc, lh, rc, rh
199 | end
200 | 
201 | function BinaryTreeLSTM:clean(tree)
202 |   tree.state = nil
203 |   tree.output = nil
204 |   self:free_module(tree, 'leaf_module')
205 |   self:free_module(tree, 'composer')
206 |   self:free_module(tree, 'output_module')
207 |   for i = 1, tree.num_children do
208 |     self:clean(tree.children[i])
209 |   end
210 | end
211 | 


--------------------------------------------------------------------------------
/models/LSTM.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 |  Long Short-Term Memory.
  4 | 
  5 | --]]
  6 | 
  7 | local LSTM, parent = torch.class('treelstm.LSTM', 'nn.Module')
  8 | 
  9 | function LSTM:__init(config)
 10 |   parent.__init(self)
 11 | 
 12 |   self.in_dim = config.in_dim
 13 |   self.mem_dim = config.mem_dim or 150
 14 |   self.num_layers = config.num_layers or 1
 15 |   self.gate_output = config.gate_output
 16 |   if self.gate_output == nil then self.gate_output = true end
 17 | 
 18 |   self.master_cell = self:new_cell()
 19 |   self.depth = 0
 20 |   self.cells = {}  -- table of cells in a roll-out
 21 | 
 22 |   -- initial (t = 0) states for forward propagation and initial error signals
 23 |   -- for backpropagation
 24 |   local ctable_init, ctable_grad, htable_init, htable_grad
 25 |   if self.num_layers == 1 then
 26 |     ctable_init = torch.zeros(self.mem_dim)
 27 |     htable_init = torch.zeros(self.mem_dim)
 28 |     ctable_grad = torch.zeros(self.mem_dim)
 29 |     htable_grad = torch.zeros(self.mem_dim)
 30 |   else
 31 |     ctable_init, ctable_grad, htable_init, htable_grad = {}, {}, {}, {}
 32 |     for i = 1, self.num_layers do
 33 |       ctable_init[i] = torch.zeros(self.mem_dim)
 34 |       htable_init[i] = torch.zeros(self.mem_dim)
 35 |       ctable_grad[i] = torch.zeros(self.mem_dim)
 36 |       htable_grad[i] = torch.zeros(self.mem_dim)
 37 |     end
 38 |   end
 39 |   self.initial_values = {ctable_init, htable_init}
 40 |   self.gradInput = {
 41 |     torch.zeros(self.in_dim),
 42 |     ctable_grad,
 43 |     htable_grad
 44 |   }
 45 | end
 46 | 
 47 | -- Instantiate a new LSTM cell.
 48 | -- Each cell shares the same parameters, but the activations of their constituent
 49 | -- layers differ.
 50 | function LSTM:new_cell()
 51 |   local input = nn.Identity()()
 52 |   local ctable_p = nn.Identity()()
 53 |   local htable_p = nn.Identity()()
 54 | 
 55 |   -- multilayer LSTM
 56 |   local htable, ctable = {}, {}
 57 |   for layer = 1, self.num_layers do
 58 |     local h_p = (self.num_layers == 1) and htable_p or nn.SelectTable(layer)(htable_p)
 59 |     local c_p = (self.num_layers == 1) and ctable_p or nn.SelectTable(layer)(ctable_p)
 60 | 
 61 |     local new_gate = function()
 62 |       local in_module = (layer == 1)
 63 |         and nn.Linear(self.in_dim, self.mem_dim)(input)
 64 |         or  nn.Linear(self.mem_dim, self.mem_dim)(htable[layer - 1])
 65 |       return nn.CAddTable(){
 66 |         in_module,
 67 |         nn.Linear(self.mem_dim, self.mem_dim)(h_p)
 68 |       }
 69 |     end
 70 | 
 71 |     -- input, forget, and output gates
 72 |     local i = nn.Sigmoid()(new_gate())
 73 |     local f = nn.Sigmoid()(new_gate())
 74 |     local update = nn.Tanh()(new_gate())
 75 | 
 76 |     -- update the state of the LSTM cell
 77 |     ctable[layer] = nn.CAddTable(){
 78 |       nn.CMulTable(){f, c_p},
 79 |       nn.CMulTable(){i, update}
 80 |     }
 81 | 
 82 |     if self.gate_output then
 83 |       local o = nn.Sigmoid()(new_gate())
 84 |       htable[layer] = nn.CMulTable(){o, nn.Tanh()(ctable[layer])}
 85 |     else
 86 |       htable[layer] = nn.Tanh()(ctable[layer])
 87 |     end
 88 |   end
 89 | 
 90 |   -- if LSTM is single-layered, this makes htable/ctable Tensors (instead of tables).
 91 |   -- this avoids some quirks with nngraph involving tables of size 1.
 92 |   htable, ctable = nn.Identity()(htable), nn.Identity()(ctable)
 93 |   local cell = nn.gModule({input, ctable_p, htable_p}, {ctable, htable})
 94 | 
 95 |   -- share parameters
 96 |   if self.master_cell then
 97 |     share_params(cell, self.master_cell)
 98 |   end
 99 |   return cell
100 | end
101 | 
102 | -- Forward propagate.
103 | -- inputs: T x in_dim tensor, where T is the number of time steps.
104 | -- reverse: if true, read the input from right to left (useful for bidirectional LSTMs).
105 | -- Returns the final hidden state of the LSTM.
106 | function LSTM:forward(inputs, reverse)
107 |   local size = inputs:size(1)
108 |   for t = 1, size do
109 |     local input = reverse and inputs[size - t + 1] or inputs[t]
110 |     self.depth = self.depth + 1
111 |     local cell = self.cells[self.depth]
112 |     if cell == nil then
113 |       cell = self:new_cell()
114 |       self.cells[self.depth] = cell
115 |     end
116 |     local prev_output
117 |     if self.depth > 1 then
118 |       prev_output = self.cells[self.depth - 1].output
119 |     else
120 |       prev_output = self.initial_values
121 |     end
122 | 
123 |     local outputs = cell:forward({input, prev_output[1], prev_output[2]})
124 |     local ctable, htable = unpack(outputs)
125 |     if self.num_layers == 1 then
126 |       self.output = htable
127 |     else
128 |       self.output = {}
129 |       for i = 1, self.num_layers do
130 |         self.output[i] = htable[i]
131 |       end
132 |     end
133 |   end
134 |   return self.output
135 | end
136 | 
137 | -- Backpropagate. forward() must have been called previously on the same input.
138 | -- inputs: T x in_dim tensor, where T is the number of time steps.
139 | -- grad_outputs: T x num_layers x mem_dim tensor.
140 | -- reverse: if true, read the input from right to left.
141 | -- Returns the gradients with respect to the inputs (in the same order as the inputs).
142 | function LSTM:backward(inputs, grad_outputs, reverse)
143 |   local size = inputs:size(1)
144 |   if self.depth == 0 then
145 |     error("No cells to backpropagate through")
146 |   end
147 | 
148 |   local input_grads = torch.Tensor(inputs:size())
149 |   for t = size, 1, -1 do
150 |     local input = reverse and inputs[size - t + 1] or inputs[t]
151 |     local grad_output = reverse and grad_outputs[size - t + 1] or grad_outputs[t]
152 |     local cell = self.cells[self.depth]
153 |     local grads = {self.gradInput[2], self.gradInput[3]}
154 |     if self.num_layers == 1 then
155 |       grads[2]:add(grad_output)
156 |     else
157 |       for i = 1, self.num_layers do
158 |         grads[2][i]:add(grad_output[i])
159 |       end
160 |     end
161 | 
162 |     local prev_output = (self.depth > 1) and self.cells[self.depth - 1].output
163 |                                          or self.initial_values
164 |     self.gradInput = cell:backward({input, prev_output[1], prev_output[2]}, grads)
165 |     if reverse then
166 |       input_grads[size - t + 1] = self.gradInput[1]
167 |     else
168 |       input_grads[t] = self.gradInput[1]
169 |     end
170 |     self.depth = self.depth - 1
171 |   end
172 |   self:forget() -- important to clear out state
173 |   return input_grads
174 | end
175 | 
176 | function LSTM:share(lstm, ...)
177 |   if self.in_dim ~= lstm.in_dim then error("LSTM input dimension mismatch") end
178 |   if self.mem_dim ~= lstm.mem_dim then error("LSTM memory dimension mismatch") end
179 |   if self.num_layers ~= lstm.num_layers then error("LSTM layer count mismatch") end
180 |   if self.gate_output ~= lstm.gate_output then error("LSTM output gating mismatch") end
181 |   share_params(self.master_cell, lstm.master_cell, ...)
182 | end
183 | 
184 | function LSTM:zeroGradParameters()
185 |   self.master_cell:zeroGradParameters()
186 | end
187 | 
188 | function LSTM:parameters()
189 |   return self.master_cell:parameters()
190 | end
191 | 
192 | -- Clear saved gradients
193 | function LSTM:forget()
194 |   self.depth = 0
195 |   for i = 1, #self.gradInput do
196 |     local gradInput = self.gradInput[i]
197 |     if type(gradInput) == 'table' then
198 |       for _, t in pairs(gradInput) do t:zero() end
199 |     else
200 |       self.gradInput[i]:zero()
201 |     end
202 |   end
203 | end
204 | 


--------------------------------------------------------------------------------
/relatedness/TreeLSTMSim.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 |   Semantic relatedness prediction using Tree-LSTMs.
  4 | 
  5 | --]]
  6 | 
  7 | local TreeLSTMSim = torch.class('treelstm.TreeLSTMSim')
  8 | 
  9 | function TreeLSTMSim:__init(config)
 10 |   self.mem_dim       = config.mem_dim       or 150
 11 |   self.learning_rate = config.learning_rate or 0.05
 12 |   self.emb_learning_rate = config.emb_learning_rate or 0.0
 13 |   self.batch_size    = config.batch_size    or 25
 14 |   self.reg           = config.reg           or 1e-4
 15 |   self.structure     = config.structure     or 'dependency' -- {dependency, constituency}
 16 |   self.sim_nhidden   = config.sim_nhidden   or 50
 17 | 
 18 |   -- word embedding
 19 |   self.emb_dim = config.emb_vecs:size(2)
 20 |   self.emb = nn.LookupTable(config.emb_vecs:size(1), self.emb_dim)
 21 |   self.emb.weight:copy(config.emb_vecs)
 22 | 
 23 |   -- number of similarity rating classes
 24 |   self.num_classes = 5
 25 | 
 26 |   -- optimizer configuration
 27 |   self.optim_state = { learningRate = self.learning_rate }
 28 | 
 29 |   -- KL divergence optimization objective
 30 |   self.criterion = nn.DistKLDivCriterion()
 31 | 
 32 |   -- initialize tree-lstm model
 33 |   local treelstm_config = {
 34 |     in_dim = self.emb_dim,
 35 |     mem_dim = self.mem_dim,
 36 |     gate_output = false,
 37 |   }
 38 |   
 39 |   if self.structure == 'dependency' then
 40 |     self.treelstm = treelstm.ChildSumTreeLSTM(treelstm_config)
 41 |   elseif self.structure == 'constituency' then
 42 |     self.treelstm = treelstm.BinaryTreeLSTM(treelstm_config)
 43 |   else
 44 |     error('invalid parse tree type: ' .. self.structure)
 45 |   end
 46 | 
 47 |   -- similarity model
 48 |   self.sim_module = self:new_sim_module()
 49 |   local modules = nn.Parallel()
 50 |     :add(self.treelstm)
 51 |     :add(self.sim_module)
 52 |   self.params, self.grad_params = modules:getParameters()
 53 | end
 54 | 
 55 | function TreeLSTMSim:new_sim_module()
 56 |   local vecs_to_input
 57 |   local lvec = nn.Identity()()
 58 |   local rvec = nn.Identity()()
 59 |   local mult_dist = nn.CMulTable(){lvec, rvec}
 60 |   local add_dist = nn.Abs()(nn.CSubTable(){lvec, rvec})
 61 |   local vec_dist_feats = nn.JoinTable(1){mult_dist, add_dist}
 62 |   vecs_to_input = nn.gModule({lvec, rvec}, {vec_dist_feats})
 63 | 
 64 |    -- define similarity model architecture
 65 |   local sim_module = nn.Sequential()
 66 |     :add(vecs_to_input)
 67 |     :add(nn.Linear(2 * self.mem_dim, self.sim_nhidden))
 68 |     :add(nn.Sigmoid())    -- does better than tanh
 69 |     :add(nn.Linear(self.sim_nhidden, self.num_classes))
 70 |     :add(nn.LogSoftMax())
 71 |   return sim_module
 72 | end
 73 | 
 74 | function TreeLSTMSim:train(dataset)
 75 |   self.treelstm:training()
 76 |   local indices = torch.randperm(dataset.size)
 77 |   local zeros = torch.zeros(self.mem_dim)
 78 |   for i = 1, dataset.size, self.batch_size do
 79 |     xlua.progress(i, dataset.size)
 80 |     local batch_size = math.min(i + self.batch_size - 1, dataset.size) - i + 1
 81 | 
 82 |     -- get target distributions for batch
 83 |     local targets = torch.zeros(batch_size, self.num_classes)
 84 |     for j = 1, batch_size do
 85 |       local sim = dataset.labels[indices[i + j - 1]] * (self.num_classes - 1) + 1
 86 |       local ceil, floor = math.ceil(sim), math.floor(sim)
 87 |       if ceil == floor then
 88 |         targets[{j, floor}] = 1
 89 |       else
 90 |         targets[{j, floor}] = ceil - sim
 91 |         targets[{j, ceil}] = sim - floor
 92 |       end
 93 |     end
 94 | 
 95 |     local feval = function(x)
 96 |       self.grad_params:zero()
 97 |       self.emb:zeroGradParameters()
 98 |       local loss = 0
 99 |       for j = 1, batch_size do
100 |         local idx = indices[i + j - 1]
101 |         local ltree, rtree = dataset.ltrees[idx], dataset.rtrees[idx]
102 |         local lsent, rsent = dataset.lsents[idx], dataset.rsents[idx]
103 |         self.emb:forward(lsent)
104 |         local linputs = torch.Tensor(self.emb.output:size()):copy(self.emb.output)
105 |         local rinputs = self.emb:forward(rsent)
106 | 
107 |         -- get sentence representations
108 |         local lrep = self.treelstm:forward(ltree, linputs)[2]
109 |         local rrep = self.treelstm:forward(rtree, rinputs)[2]
110 | 
111 |         -- compute relatedness
112 |         local output = self.sim_module:forward{lrep, rrep}
113 | 
114 |         -- compute loss and backpropagate
115 |         local example_loss = self.criterion:forward(output, targets[j])
116 |         loss = loss + example_loss
117 |         local sim_grad = self.criterion:backward(output, targets[j])
118 |         local rep_grad = self.sim_module:backward({lrep, rrep}, sim_grad)
119 |         local linput_grads = self.treelstm:backward(dataset.ltrees[idx], linputs, {zeros, rep_grad[1]})
120 |         local rinput_grads = self.treelstm:backward(dataset.rtrees[idx], rinputs, {zeros, rep_grad[2]})
121 |         self.emb:backward(lsent, linput_grads)
122 |         self.emb:backward(rsent, rinput_grads)
123 |       end
124 | 
125 |       loss = loss / batch_size
126 |       self.grad_params:div(batch_size)
127 |       self.emb.gradWeight:div(batch_size)
128 |       self.emb:updateParameters(self.emb_learning_rate)
129 | 
130 |       -- regularization
131 |       loss = loss + 0.5 * self.reg * self.params:norm() ^ 2
132 |       self.grad_params:add(self.reg, self.params)
133 |       return loss, self.grad_params
134 |     end
135 | 
136 |     optim.adagrad(feval, self.params, self.optim_state)
137 |   end
138 |   xlua.progress(dataset.size, dataset.size)
139 | end
140 | 
141 | -- Predict the similarity of a sentence pair.
142 | function TreeLSTMSim:predict(ltree, rtree, lsent, rsent)
143 |   local linputs = self.emb:forward(lsent)
144 |   local lrep = self.treelstm:forward(ltree, linputs)[2]
145 |   local rinputs = self.emb:forward(rsent)
146 |   local rrep = self.treelstm:forward(rtree, rinputs)[2]
147 |   local output = self.sim_module:forward{lrep, rrep}
148 |   self.treelstm:clean(ltree)
149 |   self.treelstm:clean(rtree)
150 |   return torch.range(1, 5):dot(output:exp())
151 | end
152 | 
153 | -- Produce similarity predictions for each sentence pair in the dataset.
154 | function TreeLSTMSim:predict_dataset(dataset)
155 |   self.treelstm:evaluate()
156 |   local predictions = torch.Tensor(dataset.size)
157 |   for i = 1, dataset.size do
158 |     xlua.progress(i, dataset.size)
159 |     local ltree, rtree = dataset.ltrees[i], dataset.rtrees[i]
160 |     local lsent, rsent = dataset.lsents[i], dataset.rsents[i]
161 |     predictions[i] = self:predict(ltree, rtree, lsent, rsent)
162 |   end
163 |   return predictions
164 | end
165 | 
166 | function TreeLSTMSim:print_config()
167 |   local num_params = self.params:size(1)
168 |   local num_sim_params = self:new_sim_module():getParameters():size(1)
169 |   printf('%-25s = %d\n',   'num params', num_params)
170 |   printf('%-25s = %d\n',   'num compositional params', num_params - num_sim_params)
171 |   printf('%-25s = %d\n',   'word vector dim', self.emb_dim)
172 |   printf('%-25s = %d\n',   'Tree-LSTM memory dim', self.mem_dim)
173 |   printf('%-25s = %.2e\n', 'regularization strength', self.reg)
174 |   printf('%-25s = %d\n',   'minibatch size', self.batch_size)
175 |   printf('%-25s = %.2e\n', 'learning rate', self.learning_rate)
176 |   printf('%-25s = %.2e\n', 'word vector learning rate', self.emb_learning_rate)
177 |   printf('%-25s = %s\n',   'parse tree type', self.structure)
178 |   printf('%-25s = %d\n',   'sim module hidden dim', self.sim_nhidden)
179 | end
180 | 
181 | --
182 | -- Serialization
183 | --
184 | 
185 | function TreeLSTMSim:save(path)
186 |   local config = {
187 |     batch_size    = self.batch_size,
188 |     emb_vecs      = self.emb.weight:float(),
189 |     learning_rate = self.learning_rate,
190 |     emb_learning_rate = self.emb_learning_rate,
191 |     mem_dim       = self.mem_dim,
192 |     sim_nhidden   = self.sim_nhidden,
193 |     reg           = self.reg,
194 |     structure     = self.structure,
195 |   }
196 | 
197 |   torch.save(path, {
198 |     params = self.params,
199 |     config = config,
200 |   })
201 | end
202 | 
203 | function TreeLSTMSim.load(path)
204 |   local state = torch.load(path)
205 |   local model = treelstm.TreeLSTMSim.new(state.config)
206 |   model.params:copy(state.params)
207 |   return model
208 | end
209 | 


--------------------------------------------------------------------------------
/lib/ConstituencyParse.java:
--------------------------------------------------------------------------------
  1 | import edu.stanford.nlp.process.WordTokenFactory;
  2 | import edu.stanford.nlp.ling.HasWord;
  3 | import edu.stanford.nlp.ling.Word;
  4 | import edu.stanford.nlp.ling.CoreLabel;
  5 | import edu.stanford.nlp.process.PTBTokenizer;
  6 | import edu.stanford.nlp.util.StringUtils;
  7 | import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
  8 | import edu.stanford.nlp.parser.lexparser.TreeBinarizer;
  9 | import edu.stanford.nlp.trees.GrammaticalStructure;
 10 | import edu.stanford.nlp.trees.GrammaticalStructureFactory;
 11 | import edu.stanford.nlp.trees.PennTreebankLanguagePack;
 12 | import edu.stanford.nlp.trees.Tree;
 13 | import edu.stanford.nlp.trees.Trees;
 14 | import edu.stanford.nlp.trees.TreebankLanguagePack;
 15 | import edu.stanford.nlp.trees.TypedDependency;
 16 | 
 17 | import java.io.BufferedWriter;
 18 | import java.io.FileWriter;
 19 | import java.io.StringReader;
 20 | import java.io.IOException;
 21 | import java.util.ArrayList;
 22 | import java.util.Collection;
 23 | import java.util.List;
 24 | import java.util.HashMap;
 25 | import java.util.Properties;
 26 | import java.util.Scanner;
 27 | 
 28 | public class ConstituencyParse {
 29 | 
 30 |   private boolean tokenize;
 31 |   private BufferedWriter tokWriter, parentWriter;
 32 |   private LexicalizedParser parser;
 33 |   private TreeBinarizer binarizer;
 34 |   private CollapseUnaryTransformer transformer;
 35 |   private GrammaticalStructureFactory gsf;
 36 | 
 37 |   private static final String PCFG_PATH = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
 38 | 
 39 |   public ConstituencyParse(String tokPath, String parentPath, boolean tokenize) throws IOException {
 40 |     this.tokenize = tokenize;
 41 |     if (tokPath != null) {
 42 |       tokWriter = new BufferedWriter(new FileWriter(tokPath));
 43 |     }
 44 |     parentWriter = new BufferedWriter(new FileWriter(parentPath));
 45 |     parser = LexicalizedParser.loadModel(PCFG_PATH);
 46 |     binarizer = TreeBinarizer.simpleTreeBinarizer(
 47 |       parser.getTLPParams().headFinder(), parser.treebankLanguagePack());
 48 |     transformer = new CollapseUnaryTransformer();
 49 | 
 50 |     // set up to produce dependency representations from constituency trees
 51 |     TreebankLanguagePack tlp = new PennTreebankLanguagePack();
 52 |     gsf = tlp.grammaticalStructureFactory();
 53 |   }
 54 | 
 55 |   public List<HasWord> sentenceToTokens(String line) {
 56 |     List<HasWord> tokens = new ArrayList<>();
 57 |     if (tokenize) {
 58 |       PTBTokenizer<Word> tokenizer = new PTBTokenizer(new StringReader(line), new WordTokenFactory(), "");
 59 |       for (Word label; tokenizer.hasNext(); ) {
 60 |         tokens.add(tokenizer.next());
 61 |       }
 62 |     } else {
 63 |       for (String word : line.split(" ")) {
 64 |         tokens.add(new Word(word));
 65 |       }
 66 |     }
 67 | 
 68 |     return tokens;
 69 |   }
 70 | 
 71 |   public Tree parse(List<HasWord> tokens) {
 72 |     Tree tree = parser.apply(tokens);
 73 |     return tree;
 74 |   }
 75 | 
 76 |   public int[] constTreeParents(Tree tree) {
 77 |     Tree binarized = binarizer.transformTree(tree);
 78 |     Tree collapsedUnary = transformer.transformTree(binarized);
 79 |     Trees.convertToCoreLabels(collapsedUnary);
 80 |     collapsedUnary.indexSpans();
 81 |     List<Tree> leaves = collapsedUnary.getLeaves();
 82 |     int size = collapsedUnary.size() - leaves.size();
 83 |     int[] parents = new int[size];
 84 |     HashMap<Integer, Integer> index = new HashMap<Integer, Integer>();
 85 | 
 86 |     int idx = leaves.size();
 87 |     int leafIdx = 0;
 88 |     for (Tree leaf : leaves) {
 89 |       Tree cur = leaf.parent(collapsedUnary); // go to preterminal
 90 |       int curIdx = leafIdx++;
 91 |       boolean done = false;
 92 |       while (!done) {
 93 |         Tree parent = cur.parent(collapsedUnary);
 94 |         if (parent == null) {
 95 |           parents[curIdx] = 0;
 96 |           break;
 97 |         }
 98 | 
 99 |         int parentIdx;
100 |         int parentNumber = parent.nodeNumber(collapsedUnary);
101 |         if (!index.containsKey(parentNumber)) {
102 |           parentIdx = idx++;
103 |           index.put(parentNumber, parentIdx);
104 |         } else {
105 |           parentIdx = index.get(parentNumber);
106 |           done = true;
107 |         }
108 | 
109 |         parents[curIdx] = parentIdx + 1;
110 |         cur = parent;
111 |         curIdx = parentIdx;
112 |       }
113 |     }
114 | 
115 |     return parents;
116 |   }
117 | 
118 |   // convert constituency parse to a dependency representation and return the
119 |   // parent pointer representation of the tree
120 |   public int[] depTreeParents(Tree tree, List<HasWord> tokens) {
121 |     GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
122 |     Collection<TypedDependency> tdl = gs.typedDependencies();
123 |     int len = tokens.size();
124 |     int[] parents = new int[len];
125 |     for (int i = 0; i < len; i++) {
126 |       // if a node has a parent of -1 at the end of parsing, then the node
127 |       // has no parent.
128 |       parents[i] = -1;
129 |     }
130 | 
131 |     for (TypedDependency td : tdl) {
132 |       // let root have index 0
133 |       int child = td.dep().index();
134 |       int parent = td.gov().index();
135 |       parents[child - 1] = parent;
136 |     }
137 | 
138 |     return parents;
139 |   }
140 | 
141 |   public void printTokens(List<HasWord> tokens) throws IOException {
142 |     int len = tokens.size();
143 |     StringBuilder sb = new StringBuilder();
144 |     for (int i = 0; i < len - 1; i++) {
145 |       if (tokenize) {
146 |         sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word()));
147 |       } else {
148 |         sb.append(tokens.get(i).word());
149 |       }
150 |       sb.append(' ');
151 |     }
152 | 
153 |     if (tokenize) {
154 |       sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word()));
155 |     } else {
156 |       sb.append(tokens.get(len - 1).word());
157 |     }
158 | 
159 |     sb.append('\n');
160 |     tokWriter.write(sb.toString());
161 |   }
162 | 
163 |   public void printParents(int[] parents) throws IOException {
164 |     StringBuilder sb = new StringBuilder();
165 |     int size = parents.length;
166 |     for (int i = 0; i < size - 1; i++) {
167 |       sb.append(parents[i]);
168 |       sb.append(' ');
169 |     }
170 |     sb.append(parents[size - 1]);
171 |     sb.append('\n');
172 |     parentWriter.write(sb.toString());
173 |   }
174 | 
175 |   public void close() throws IOException {
176 |     if (tokWriter != null) tokWriter.close();
177 |     parentWriter.close();
178 |   }
179 | 
180 |   public static void main(String[] args) throws Exception {
181 |     Properties props = StringUtils.argsToProperties(args);
182 |     if (!props.containsKey("parentpath")) {
183 |       System.err.println(
184 |         "usage: java ConstituencyParse -deps - -tokenize - -tokpath <tokpath> -parentpath <parentpath>");
185 |       System.exit(1);
186 |     }
187 | 
188 |     // whether to tokenize input sentences
189 |     boolean tokenize = false;
190 |     if (props.containsKey("tokenize")) {
191 |       tokenize = true;
192 |     }
193 | 
194 |     // whether to produce dependency trees from the constituency parse
195 |     boolean deps = false;
196 |     if (props.containsKey("deps")) {
197 |       deps = true;
198 |     }
199 | 
200 |     String tokPath = props.containsKey("tokpath") ? props.getProperty("tokpath") : null;
201 |     String parentPath = props.getProperty("parentpath");
202 |     ConstituencyParse processor = new ConstituencyParse(tokPath, parentPath, tokenize);
203 | 
204 |     Scanner stdin = new Scanner(System.in);
205 |     int count = 0;
206 |     long start = System.currentTimeMillis();
207 |     while (stdin.hasNextLine()) {
208 |       String line = stdin.nextLine();
209 |       List<HasWord> tokens = processor.sentenceToTokens(line);
210 |       Tree parse = processor.parse(tokens);
211 | 
212 |       // produce parent pointer representation
213 |       int[] parents = deps ? processor.depTreeParents(parse, tokens)
214 |                            : processor.constTreeParents(parse);
215 |       
216 |       // print
217 |       if (tokPath != null) {
218 |         processor.printTokens(tokens);
219 |       }
220 |       processor.printParents(parents);
221 | 
222 |       count++;
223 |       if (count % 1000 == 0) {
224 |         double elapsed = (System.currentTimeMillis() - start) / 1000.0;
225 |         System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed);
226 |       }
227 |     }
228 | 
229 |     long totalTimeMillis = System.currentTimeMillis() - start;
230 |     System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n",
231 |       count, totalTimeMillis / 1000.0, totalTimeMillis / (double) count);
232 |     processor.close();
233 |   }
234 | }
235 | 


--------------------------------------------------------------------------------
/sentiment/LSTMSentiment.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 |   Sentiment classification using LSTMs.
  4 | 
  5 | --]]
  6 | 
  7 | local LSTMSentiment = torch.class('treelstm.LSTMSentiment')
  8 | 
  9 | function LSTMSentiment:__init(config)
 10 |   self.mem_dim           = config.mem_dim           or 150
 11 |   self.learning_rate     = config.learning_rate     or 0.05
 12 |   self.emb_learning_rate = config.emb_learning_rate or 0.1
 13 |   self.num_layers        = config.num_layers        or 1
 14 |   self.batch_size        = config.batch_size        or 5
 15 |   self.reg               = config.reg               or 1e-4
 16 |   self.structure         = config.structure         or 'lstm' -- {lstm, bilstm}
 17 |   self.fine_grained      = (config.fine_grained == nil) and true or config.fine_grained
 18 |   self.dropout           = (config.dropout == nil) and true or config.dropout
 19 |   self.train_subtrees    = 4  -- number of subtrees to sample during training
 20 | 
 21 |   -- word embedding
 22 |   self.emb_dim = config.emb_vecs:size(2)
 23 |   self.emb = nn.LookupTable(config.emb_vecs:size(1), self.emb_dim)
 24 |   self.emb.weight:copy(config.emb_vecs)
 25 | 
 26 |   self.in_zeros = torch.zeros(self.emb_dim)
 27 |   self.num_classes = self.fine_grained and 5 or 3
 28 | 
 29 |   -- optimizer configuration
 30 |   self.optim_state = { learningRate = self.learning_rate }
 31 | 
 32 |   -- negative log likelihood optimization objective
 33 |   self.criterion = nn.ClassNLLCriterion()
 34 | 
 35 |   -- sentiment classification module
 36 |   self.sentiment_module = self:new_sentiment_module()
 37 | 
 38 |   -- initialize LSTM model
 39 |   local lstm_config = {
 40 |     in_dim = self.emb_dim,
 41 |     mem_dim = self.mem_dim,
 42 |     num_layers = self.num_layers,
 43 |     gate_output = true,
 44 |   }
 45 | 
 46 |   if self.structure == 'lstm' then
 47 |     self.lstm = treelstm.LSTM(lstm_config)
 48 |   elseif self.structure == 'bilstm' then
 49 |     self.lstm = treelstm.LSTM(lstm_config)
 50 |     self.lstm_b = treelstm.LSTM(lstm_config)
 51 |   else
 52 |     error('invalid LSTM type: ' .. self.structure)
 53 |   end
 54 | 
 55 |   local modules = nn.Parallel()
 56 |     :add(self.lstm)
 57 |     :add(self.sentiment_module)
 58 |   self.params, self.grad_params = modules:getParameters()
 59 | 
 60 |   -- share must only be called after getParameters, since this changes the
 61 |   -- location of the parameters
 62 |   if self.structure == 'bilstm' then
 63 |     share_params(self.lstm_b, self.lstm)
 64 |   end
 65 | end
 66 | 
 67 | function LSTMSentiment:new_sentiment_module()
 68 |   local input_dim = self.num_layers * self.mem_dim
 69 |   local inputs, vec
 70 |   if self.structure == 'lstm' then
 71 |     local rep = nn.Identity()()
 72 |     if self.num_layers == 1 then
 73 |       vec = {rep}
 74 |     else
 75 |       vec = nn.JoinTable(1)(rep)
 76 |     end
 77 |     inputs = {rep}
 78 |   elseif self.structure == 'bilstm' then
 79 |     local frep, brep = nn.Identity()(), nn.Identity()()
 80 |     input_dim = input_dim * 2
 81 |     if self.num_layers == 1 then
 82 |       vec = nn.JoinTable(1){frep, brep}
 83 |     else
 84 |       vec = nn.JoinTable(1){nn.JoinTable(1)(frep), nn.JoinTable(1)(brep)}
 85 |     end
 86 |     inputs = {frep, brep}
 87 |   end
 88 | 
 89 |   local logprobs
 90 |   if self.dropout then
 91 |     logprobs = nn.LogSoftMax()(
 92 |       nn.Linear(input_dim, self.num_classes)(
 93 |         nn.Dropout()(vec)))
 94 |   else
 95 |     logprobs = nn.LogSoftMax()(
 96 |       nn.Linear(input_dim, self.num_classes)(vec))
 97 |   end
 98 | 
 99 |   return nn.gModule(inputs, {logprobs})
100 | end
101 | 
102 | function LSTMSentiment:train(dataset)
103 |   self.lstm:training()
104 |   self.sentiment_module:training()
105 |   if self.structure == 'bilstm' then
106 |     self.lstm_b:training()
107 |   end
108 | 
109 |   local indices = torch.randperm(dataset.size)
110 |   local zeros = torch.zeros(self.mem_dim)
111 |   for i = 1, dataset.size, self.batch_size do
112 |     xlua.progress(i, dataset.size)
113 |     local batch_size = math.min(i + self.batch_size - 1, dataset.size) - i + 1
114 | 
115 |     local feval = function(x)
116 |       self.grad_params:zero()
117 |       self.emb:zeroGradParameters()
118 | 
119 |       local loss = 0
120 |       for j = 1, batch_size do
121 |         local idx = indices[i + j - 1]
122 |         local tree = dataset.trees[idx]
123 |         local sent = dataset.sents[idx]
124 |         local subtrees = tree:depth_first_preorder()
125 |         for k = 1, self.train_subtrees + 1 do
126 |           local subtree = (k == 1) and tree or subtrees[math.ceil(torch.uniform(1, #subtrees))]
127 |           local span = sent[{{subtree.lo, subtree.hi}}]
128 |           local inputs = self.emb:forward(span)
129 | 
130 |           -- get sentence representations
131 |           local rep
132 |           if self.structure == 'lstm' then
133 |             rep = self.lstm:forward(inputs)
134 |           elseif self.structure == 'bilstm' then
135 |             rep = {
136 |               self.lstm:forward(inputs),
137 |               self.lstm_b:forward(inputs, true), -- true => reverse
138 |             }
139 |           end
140 | 
141 |           -- compute class log probabilities
142 |           local output = self.sentiment_module:forward(rep)
143 | 
144 |           -- compute loss and backpropagate
145 |           local example_loss = self.criterion:forward(output, subtree.gold_label)
146 |           loss = loss + example_loss
147 |           local obj_grad = self.criterion:backward(output, subtree.gold_label)
148 |           local rep_grad = self.sentiment_module:backward(rep, obj_grad)
149 |           local input_grads
150 |           if self.structure == 'lstm' then
151 |             input_grads = self:LSTM_backward(sent, inputs, rep_grad)
152 |           elseif self.structure == 'bilstm' then
153 |             input_grads = self:BiLSTM_backward(sent, inputs, rep_grad)
154 |           end
155 |           self.emb:backward(span, input_grads)
156 |         end
157 |       end
158 | 
159 |       local batch_subtrees = batch_size * (self.train_subtrees + 1)
160 |       loss = loss / batch_subtrees
161 |       self.grad_params:div(batch_subtrees)
162 |       self.emb.gradWeight:div(batch_subtrees)
163 | 
164 |       -- regularization
165 |       loss = loss + 0.5 * self.reg * self.params:norm() ^ 2
166 |       self.grad_params:add(self.reg, self.params)
167 |       return loss, self.grad_params
168 |     end
169 | 
170 |     optim.adagrad(feval, self.params, self.optim_state)
171 |     self.emb:updateParameters(self.emb_learning_rate)
172 |   end
173 |   xlua.progress(dataset.size, dataset.size)
174 | end
175 | 
176 | -- LSTM backward propagation
177 | function LSTMSentiment:LSTM_backward(sent, inputs, rep_grad)
178 |   local grad
179 |   if self.num_layers == 1 then
180 |     grad = torch.zeros(sent:nElement(), self.mem_dim)
181 |     grad[sent:nElement()] = rep_grad
182 |   else
183 |     grad = torch.zeros(sent:nElement(), self.num_layers, self.mem_dim)
184 |     for l = 1, self.num_layers do
185 |       grad[{sent:nElement(), l, {}}] = rep_grad[l]
186 |     end
187 |   end
188 |   local input_grads = self.lstm:backward(inputs, grad)
189 |   return input_grads
190 | end
191 | 
192 | -- Bidirectional LSTM backward propagation
193 | function LSTMSentiment:BiLSTM_backward(sent, inputs, rep_grad)
194 |   local grad, grad_b
195 |   if self.num_layers == 1 then
196 |     grad   = torch.zeros(sent:nElement(), self.mem_dim)
197 |     grad_b = torch.zeros(sent:nElement(), self.mem_dim)
198 |     grad[sent:nElement()] = rep_grad[1]
199 |     grad_b[1] = rep_grad[2]
200 |   else
201 |     grad   = torch.zeros(sent:nElement(), self.num_layers, self.mem_dim)
202 |     grad_b = torch.zeros(sent:nElement(), self.num_layers, self.mem_dim)
203 |     for l = 1, self.num_layers do
204 |       grad[{sent:nElement(), l, {}}] = rep_grad[1][l]
205 |       grad_b[{1, l, {}}] = rep_grad[2][l]
206 |     end
207 |   end
208 |   local input_grads = self.lstm:backward(inputs, grad)
209 |   local input_grads_b = self.lstm_b:backward(inputs, grad_b, true)
210 |   return input_grads + input_grads_b
211 | end
212 | 
213 | -- Predict the sentiment of a sentence.
214 | function LSTMSentiment:predict(sent)
215 |   self.lstm:evaluate()
216 |   self.sentiment_module:evaluate()
217 |   local inputs = self.emb:forward(sent)
218 | 
219 |   local rep
220 |   if self.structure == 'lstm' then
221 |     rep = self.lstm:forward(inputs)
222 |   elseif self.structure == 'bilstm' then
223 |     self.lstm_b:evaluate()
224 |     rep = {
225 |       self.lstm:forward(inputs),
226 |       self.lstm_b:forward(inputs, true),
227 |     }
228 |   end
229 |   local logprobs = self.sentiment_module:forward(rep)
230 |   local prediction
231 |   if self.fine_grained then
232 |     prediction = argmax(logprobs)
233 |   else
234 |     prediction = (logprobs[1] > logprobs[3]) and 1 or 3
235 |   end
236 |   self.lstm:forget()
237 |   if self.structure == 'bilstm' then
238 |     self.lstm_b:forget()
239 |   end
240 |   return prediction
241 | end
242 | 
243 | -- Produce sentiment predictions for each sentence in the dataset.
244 | function LSTMSentiment:predict_dataset(dataset)
245 |   local predictions = torch.Tensor(dataset.size)
246 |   for i = 1, dataset.size do
247 |     xlua.progress(i, dataset.size)
248 |     predictions[i] = self:predict(dataset.sents[i])
249 |   end
250 |   return predictions
251 | end
252 | 
253 | function argmax(v)
254 |   local idx = 1
255 |   local max = v[1]
256 |   for i = 2, v:size(1) do
257 |     if v[i] > max then
258 |       max = v[i]
259 |       idx = i
260 |     end
261 |   end
262 |   return idx
263 | end
264 | 
265 | function LSTMSentiment:print_config()
266 |   local num_params = self.params:size(1)
267 |   local num_sentiment_params = self:new_sentiment_module():getParameters():size(1)
268 |   printf('%-25s = %s\n',   'fine grained sentiment', tostring(self.fine_grained))
269 |   printf('%-25s = %d\n',   'num params', num_params)
270 |   printf('%-25s = %d\n',   'num compositional params', num_params - num_sentiment_params)
271 |   printf('%-25s = %d\n',   'word vector dim', self.emb_dim)
272 |   printf('%-25s = %d\n',   'LSTM memory dim', self.mem_dim)
273 |   printf('%-25s = %s\n',   'LSTM structure', self.structure)
274 |   printf('%-25s = %d\n',   'LSTM layers', self.num_layers)
275 |   printf('%-25s = %.2e\n', 'regularization strength', self.reg)
276 |   printf('%-25s = %d\n',   'minibatch size', self.batch_size * (self.train_subtrees + 1))
277 |   printf('%-25s = %.2e\n', 'learning rate', self.learning_rate)
278 |   printf('%-25s = %.2e\n', 'word vector learning rate', self.emb_learning_rate)
279 |   printf('%-25s = %s\n',   'dropout', tostring(self.dropout))
280 | end
281 | 
282 | --
283 | -- Serialization
284 | --
285 | 
286 | function LSTMSentiment:save(path)
287 |   local config = {
288 |     batch_size        = self.batch_size,
289 |     dropout           = self.dropout,
290 |     emb_learning_rate = self.emb_learning_rate,
291 |     emb_vecs          = self.emb.weight:float(),
292 |     fine_grained      = self.fine_grained,
293 |     learning_rate     = self.learning_rate,
294 |     num_layers        = self.num_layers,
295 |     mem_dim           = self.mem_dim,
296 |     reg               = self.reg,
297 |     structure         = self.structure,
298 |   }
299 | 
300 |   torch.save(path, {
301 |     params = self.params,
302 |     config = config,
303 |   })
304 | end
305 | 
306 | function LSTMSentiment.load(path)
307 |   local state = torch.load(path)
308 |   local model = treelstm.LSTMSentiment.new(state.config)
309 |   model.params:copy(state.params)
310 |   return model
311 | end
312 | 


--------------------------------------------------------------------------------
/relatedness/LSTMSim.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 |   Semantic relatedness prediction using LSTMs.
  4 | 
  5 | --]]
  6 | 
  7 | local LSTMSim = torch.class('treelstm.LSTMSim')
  8 | 
  9 | function LSTMSim:__init(config)
 10 |   self.mem_dim       = config.mem_dim       or 150
 11 |   self.learning_rate = config.learning_rate or 0.05
 12 |   self.batch_size    = config.batch_size    or 25
 13 |   self.num_layers    = config.num_layers    or 1
 14 |   self.reg           = config.reg           or 1e-4
 15 |   self.structure     = config.structure     or 'lstm' -- {lstm, bilstm}
 16 |   self.sim_nhidden   = config.sim_nhidden   or 50
 17 | 
 18 |   -- word embedding
 19 |   self.emb_vecs = config.emb_vecs
 20 |   self.emb_dim = config.emb_vecs:size(2)
 21 | 
 22 |   -- number of similarity rating classes
 23 |   self.num_classes = 5
 24 | 
 25 |   -- optimizer configuration
 26 |   self.optim_state = { learningRate = self.learning_rate }
 27 | 
 28 |   -- KL divergence optimization objective
 29 |   self.criterion = nn.DistKLDivCriterion()
 30 | 
 31 |   -- initialize LSTM model
 32 |   local lstm_config = {
 33 |     in_dim = self.emb_dim,
 34 |     mem_dim = self.mem_dim,
 35 |     num_layers = self.num_layers,
 36 |     gate_output = false,
 37 |   }
 38 | 
 39 |   if self.structure == 'lstm' then
 40 |     self.llstm = treelstm.LSTM(lstm_config) -- "left" LSTM
 41 |     self.rlstm = treelstm.LSTM(lstm_config) -- "right" LSTM
 42 |   elseif self.structure == 'bilstm' then
 43 |     self.llstm = treelstm.LSTM(lstm_config)
 44 |     self.llstm_b = treelstm.LSTM(lstm_config) -- backward "left" LSTM
 45 |     self.rlstm = treelstm.LSTM(lstm_config)
 46 |     self.rlstm_b = treelstm.LSTM(lstm_config) -- backward "right" LSTM
 47 |   else
 48 |     error('invalid LSTM type: ' .. self.structure)
 49 |   end
 50 | 
 51 |   -- similarity model
 52 |   self.sim_module = self:new_sim_module()
 53 |   local modules = nn.Parallel()
 54 |     :add(self.llstm)
 55 |     :add(self.sim_module)
 56 |   self.params, self.grad_params = modules:getParameters()
 57 | 
 58 |   -- share must only be called after getParameters, since this changes the
 59 |   -- location of the parameters
 60 |   share_params(self.rlstm, self.llstm)
 61 |   if self.structure == 'bilstm' then
 62 |     -- tying the forward and backward weights improves performance
 63 |     share_params(self.llstm_b, self.llstm)
 64 |     share_params(self.rlstm_b, self.llstm)
 65 |   end
 66 | end
 67 | 
 68 | function LSTMSim:new_sim_module()
 69 |   local lvec, rvec, inputs, input_dim
 70 |   if self.structure == 'lstm' then
 71 |     -- standard (left-to-right) LSTM
 72 |     input_dim = 2 * self.num_layers * self.mem_dim
 73 |     local linput, rinput = nn.Identity()(), nn.Identity()()
 74 |     if self.num_layers == 1 then
 75 |       lvec, rvec = linput, rinput
 76 |     else
 77 |       lvec, rvec = nn.JoinTable(1)(linput), nn.JoinTable(1)(rinput)
 78 |     end
 79 |     inputs = {linput, rinput}
 80 |   elseif self.structure == 'bilstm' then
 81 |     -- bidirectional LSTM
 82 |     input_dim = 4 * self.num_layers * self.mem_dim
 83 |     local lf, lb, rf, rb = nn.Identity()(), nn.Identity()(), nn.Identity()(), nn.Identity()()
 84 |     if self.num_layers == 1 then
 85 |       lvec = nn.JoinTable(1){lf, lb}
 86 |       rvec = nn.JoinTable(1){rf, rb}
 87 |     else
 88 |       -- in the multilayer case, each input is a table of hidden vectors (one for each layer)
 89 |       lvec = nn.JoinTable(1){nn.JoinTable(1)(lf), nn.JoinTable(1)(lb)}
 90 |       rvec = nn.JoinTable(1){nn.JoinTable(1)(rf), nn.JoinTable(1)(rb)}
 91 |     end
 92 |     inputs = {lf, lb, rf, rb}
 93 |   end
 94 |   local mult_dist = nn.CMulTable(){lvec, rvec}
 95 |   local add_dist = nn.Abs()(nn.CSubTable(){lvec, rvec})
 96 |   local vec_dist_feats = nn.JoinTable(1){mult_dist, add_dist}
 97 |   local vecs_to_input = nn.gModule(inputs, {vec_dist_feats})
 98 | 
 99 |    -- define similarity model architecture
100 |   local sim_module = nn.Sequential()
101 |     :add(vecs_to_input)
102 |     :add(nn.Linear(input_dim, self.sim_nhidden))
103 |     :add(nn.Sigmoid())    -- does better than tanh
104 |     :add(nn.Linear(self.sim_nhidden, self.num_classes))
105 |     :add(nn.LogSoftMax())
106 |   return sim_module
107 | end
108 | 
109 | function LSTMSim:train(dataset)
110 |   self.llstm:training()
111 |   self.rlstm:training()
112 |   if self.structure == 'bilstm' then
113 |     self.llstm_b:training()
114 |     self.rlstm_b:training()
115 |   end
116 | 
117 |   local indices = torch.randperm(dataset.size)
118 |   local zeros = torch.zeros(self.mem_dim)
119 |   for i = 1, dataset.size, self.batch_size do
120 |     xlua.progress(i, dataset.size)
121 |     local batch_size = math.min(i + self.batch_size - 1, dataset.size) - i + 1
122 | 
123 |     -- get target distributions for batch
124 |     local targets = torch.zeros(batch_size, self.num_classes)
125 |     for j = 1, batch_size do
126 |       local sim = dataset.labels[indices[i + j - 1]] * (self.num_classes - 1) + 1
127 |       local ceil, floor = math.ceil(sim), math.floor(sim)
128 |       if ceil == floor then
129 |         targets[{j, floor}] = 1
130 |       else
131 |         targets[{j, floor}] = ceil - sim
132 |         targets[{j, ceil}] = sim - floor
133 |       end
134 |     end
135 | 
136 |     local feval = function(x)
137 |       self.grad_params:zero()
138 |       local loss = 0
139 |       for j = 1, batch_size do
140 |         local idx = indices[i + j - 1]
141 |         local lsent, rsent = dataset.lsents[idx], dataset.rsents[idx]
142 |         local linputs = self.emb_vecs:index(1, lsent:long()):double()
143 |         local rinputs = self.emb_vecs:index(1, rsent:long()):double()
144 | 
145 |         -- get sentence representations
146 |         local inputs
147 |         if self.structure == 'lstm' then
148 |           inputs = {self.llstm:forward(linputs), self.rlstm:forward(rinputs)}
149 |         elseif self.structure == 'bilstm' then
150 |           inputs = {
151 |             self.llstm:forward(linputs),
152 |             self.llstm_b:forward(linputs, true), -- true => reverse
153 |             self.rlstm:forward(rinputs),
154 |             self.rlstm_b:forward(rinputs, true)
155 |           }
156 |         end
157 | 
158 |         -- compute relatedness
159 |         local output = self.sim_module:forward(inputs)
160 | 
161 |         -- compute loss and backpropagate
162 |         local example_loss = self.criterion:forward(output, targets[j])
163 |         loss = loss + example_loss
164 |         local sim_grad = self.criterion:backward(output, targets[j])
165 |         local rep_grad = self.sim_module:backward(inputs, sim_grad)
166 |         if self.structure == 'lstm' then
167 |           self:LSTM_backward(lsent, rsent, linputs, rinputs, rep_grad)
168 |         elseif self.structure == 'bilstm' then
169 |           self:BiLSTM_backward(lsent, rsent, linputs, rinputs, rep_grad)
170 |         end
171 |       end
172 | 
173 |       loss = loss / batch_size
174 |       self.grad_params:div(batch_size)
175 | 
176 |       -- regularization
177 |       loss = loss + 0.5 * self.reg * self.params:norm() ^ 2
178 |       self.grad_params:add(self.reg, self.params)
179 |       return loss, self.grad_params
180 |     end
181 | 
182 |     optim.adagrad(feval, self.params, self.optim_state)
183 |   end
184 |   xlua.progress(dataset.size, dataset.size)
185 | end
186 | 
187 | -- LSTM backward propagation
188 | function LSTMSim:LSTM_backward(lsent, rsent, linputs, rinputs, rep_grad)
189 |   local lgrad, rgrad
190 |   if self.num_layers == 1 then
191 |     lgrad = torch.zeros(lsent:nElement(), self.mem_dim)
192 |     rgrad = torch.zeros(rsent:nElement(), self.mem_dim)
193 |     lgrad[lsent:nElement()] = rep_grad[1]
194 |     rgrad[rsent:nElement()] = rep_grad[2]
195 |   else
196 |     lgrad = torch.zeros(lsent:nElement(), self.num_layers, self.mem_dim)
197 |     rgrad = torch.zeros(rsent:nElement(), self.num_layers, self.mem_dim)
198 |     for l = 1, self.num_layers do
199 |       lgrad[{lsent:nElement(), l, {}}] = rep_grad[1][l]
200 |       rgrad[{rsent:nElement(), l, {}}] = rep_grad[2][l]
201 |     end
202 |   end
203 |   self.llstm:backward(linputs, lgrad)
204 |   self.rlstm:backward(rinputs, rgrad)
205 | end
206 | 
207 | -- Bidirectional LSTM backward propagation
208 | function LSTMSim:BiLSTM_backward(lsent, rsent, linputs, rinputs, rep_grad)
209 |   local lgrad, lgrad_b, rgrad, rgrad_b
210 |   if self.num_layers == 1 then
211 |     lgrad   = torch.zeros(lsent:nElement(), self.mem_dim)
212 |     lgrad_b = torch.zeros(lsent:nElement(), self.mem_dim)
213 |     rgrad   = torch.zeros(rsent:nElement(), self.mem_dim)
214 |     rgrad_b = torch.zeros(rsent:nElement(), self.mem_dim)
215 |     lgrad[lsent:nElement()] = rep_grad[1]
216 |     rgrad[rsent:nElement()] = rep_grad[3]
217 |     lgrad_b[1] = rep_grad[2]
218 |     rgrad_b[1] = rep_grad[4]
219 |   else
220 |     lgrad   = torch.zeros(lsent:nElement(), self.num_layers, self.mem_dim)
221 |     lgrad_b = torch.zeros(lsent:nElement(), self.num_layers, self.mem_dim)
222 |     rgrad   = torch.zeros(rsent:nElement(), self.num_layers, self.mem_dim)
223 |     rgrad_b = torch.zeros(rsent:nElement(), self.num_layers, self.mem_dim)
224 |     for l = 1, self.num_layers do
225 |       lgrad[{lsent:nElement(), l, {}}] = rep_grad[1][l]
226 |       rgrad[{rsent:nElement(), l, {}}] = rep_grad[3][l]
227 |       lgrad_b[{1, l, {}}] = rep_grad[2][l]
228 |       rgrad_b[{1, l, {}}] = rep_grad[4][l]
229 |     end
230 |   end
231 |   self.llstm:backward(linputs, lgrad)
232 |   self.llstm_b:backward(linputs, lgrad_b, true)
233 |   self.rlstm:backward(rinputs, rgrad)
234 |   self.rlstm_b:backward(rinputs, rgrad_b, true)
235 | end
236 | 
237 | -- Predict the similarity of a sentence pair.
238 | function LSTMSim:predict(lsent, rsent)
239 |   self.llstm:evaluate()
240 |   self.rlstm:evaluate()
241 |   local linputs = self.emb_vecs:index(1, lsent:long()):double()
242 |   local rinputs = self.emb_vecs:index(1, rsent:long()):double()
243 |   local inputs
244 |   if self.structure == 'lstm' then
245 |     inputs = {self.llstm:forward(linputs), self.rlstm:forward(rinputs)}
246 |   elseif self.structure == 'bilstm' then
247 |     self.llstm_b:evaluate()
248 |     self.rlstm_b:evaluate()
249 |     inputs = {
250 |       self.llstm:forward(linputs),
251 |       self.llstm_b:forward(linputs, true),
252 |       self.rlstm:forward(rinputs),
253 |       self.rlstm_b:forward(rinputs, true)
254 |     }
255 |   end
256 |   local output = self.sim_module:forward(inputs)
257 |   self.llstm:forget()
258 |   self.rlstm:forget()
259 |   if self.structure == 'bilstm' then
260 |     self.llstm_b:forget()
261 |     self.rlstm_b:forget()
262 |   end
263 |   return torch.range(1, 5):dot(output:exp())
264 | end
265 | 
266 | -- Produce similarity predictions for each sentence pair in the dataset.
267 | function LSTMSim:predict_dataset(dataset)
268 |   local predictions = torch.Tensor(dataset.size)
269 |   for i = 1, dataset.size do
270 |     xlua.progress(i, dataset.size)
271 |     local lsent, rsent = dataset.lsents[i], dataset.rsents[i]
272 |     predictions[i] = self:predict(lsent, rsent)
273 |   end
274 |   return predictions
275 | end
276 | 
277 | function LSTMSim:print_config()
278 |   local num_params = self.params:nElement()
279 |   local num_sim_params = self:new_sim_module():getParameters():nElement()
280 |   printf('%-25s = %d\n',   'num params', num_params)
281 |   printf('%-25s = %d\n',   'num compositional params', num_params - num_sim_params)
282 |   printf('%-25s = %d\n',   'word vector dim', self.emb_dim)
283 |   printf('%-25s = %d\n',   'LSTM memory dim', self.mem_dim)
284 |   printf('%-25s = %.2e\n', 'regularization strength', self.reg)
285 |   printf('%-25s = %d\n',   'minibatch size', self.batch_size)
286 |   printf('%-25s = %.2e\n', 'learning rate', self.learning_rate)
287 |   printf('%-25s = %s\n',   'LSTM structure', self.structure)
288 |   printf('%-25s = %d\n',   'LSTM layers', self.num_layers)
289 |   printf('%-25s = %d\n',   'sim module hidden dim', self.sim_nhidden)
290 | end
291 | 
292 | --
293 | -- Serialization
294 | --
295 | 
296 | function LSTMSim:save(path)
297 |   local config = {
298 |     batch_size    = self.batch_size,
299 |     emb_vecs      = self.emb_vecs:float(),
300 |     learning_rate = self.learning_rate,
301 |     num_layers    = self.num_layers,
302 |     mem_dim       = self.mem_dim,
303 |     sim_nhidden   = self.sim_nhidden,
304 |     reg           = self.reg,
305 |     structure     = self.structure,
306 |   }
307 | 
308 |   torch.save(path, {
309 |     params = self.params,
310 |     config = config,
311 |   })
312 | end
313 | 
314 | function LSTMSim.load(path)
315 |   local state = torch.load(path)
316 |   local model = treelstm.LSTMSim.new(state.config)
317 |   model.params:copy(state.params)
318 |   return model
319 | end
320 | 


--------------------------------------------------------------------------------
/scripts/preprocess-sst.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Preprocessing script for Stanford Sentiment Treebank data.
  3 | 
  4 | """
  5 | 
  6 | import os
  7 | import glob
  8 | 
  9 | #
 10 | # Trees and tree loading
 11 | #
 12 | 
 13 | class ConstTree(object):
 14 |     def __init__(self):
 15 |         self.left = None
 16 |         self.right = None
 17 | 
 18 |     def size(self):
 19 |         self.size = 1
 20 |         if self.left is not None:
 21 |             self.size += self.left.size()
 22 |         if self.right is not None:
 23 |             self.size += self.right.size()
 24 |         return self.size
 25 | 
 26 |     def set_spans(self):
 27 |         if self.word is not None:
 28 |             self.span = self.word
 29 |             return self.span
 30 | 
 31 |         self.span = self.left.set_spans()
 32 |         if self.right is not None:
 33 |             self.span += ' ' + self.right.set_spans()
 34 |         return self.span
 35 | 
 36 |     def get_labels(self, spans, labels, dictionary):
 37 |         if self.span in dictionary:
 38 |             spans[self.idx] = self.span
 39 |             labels[self.idx] = dictionary[self.span]
 40 |         if self.left is not None:
 41 |             self.left.get_labels(spans, labels, dictionary)
 42 |         if self.right is not None:
 43 |             self.right.get_labels(spans, labels, dictionary)
 44 | 
 45 | class DepTree(object):
 46 |     def __init__(self):
 47 |         self.children = []
 48 |         self.lo, self.hi = None, None
 49 | 
 50 |     def size(self):
 51 |         self.size = 1
 52 |         for c in self.children:
 53 |             self.size += c.size()
 54 |         return self.size
 55 | 
 56 |     def set_spans(self, words):
 57 |         self.lo, self.hi = self.idx, self.idx + 1
 58 |         if len(self.children) == 0:
 59 |             self.span = words[self.idx]
 60 |             return
 61 |         for c in self.children:
 62 |             c.set_spans(words)
 63 |             self.lo = min(self.lo, c.lo)
 64 |             self.hi = max(self.hi, c.hi)
 65 |         self.span = ' '.join(words[self.lo : self.hi])
 66 | 
 67 |     def get_labels(self, spans, labels, dictionary):
 68 |         if self.span in dictionary:
 69 |             spans[self.idx] = self.span
 70 |             labels[self.idx] = dictionary[self.span]
 71 |         for c in self.children:
 72 |             c.get_labels(spans, labels, dictionary)
 73 | 
 74 | def load_trees(dirpath):
 75 |     const_trees, dep_trees, toks = [], [], []
 76 |     with open(os.path.join(dirpath, 'parents.txt')) as parentsfile, \
 77 |          open(os.path.join(dirpath, 'dparents.txt')) as dparentsfile, \
 78 |          open(os.path.join(dirpath, 'sents.txt')) as toksfile:
 79 |         parents, dparents = [], []
 80 |         for line in parentsfile:
 81 |             parents.append(map(int, line.split()))
 82 |         for line in dparentsfile:
 83 |             dparents.append(map(int, line.split()))
 84 |         for line in toksfile:
 85 |             toks.append(line.strip().split())
 86 |         for i in xrange(len(toks)):
 87 |             const_trees.append(load_constituency_tree(parents[i], toks[i]))
 88 |             dep_trees.append(load_dependency_tree(dparents[i]))
 89 |     return const_trees, dep_trees, toks
 90 | 
 91 | def load_constituency_tree(parents, words):
 92 |     trees = []
 93 |     root = None
 94 |     size = len(parents)
 95 |     for i in xrange(size):
 96 |         trees.append(None)
 97 | 
 98 |     word_idx = 0
 99 |     for i in xrange(size):
100 |         if not trees[i]:
101 |             idx = i
102 |             prev = None
103 |             prev_idx = None
104 |             word = words[word_idx]
105 |             word_idx += 1
106 |             while True:
107 |                 tree = ConstTree()
108 |                 parent = parents[idx] - 1
109 |                 tree.word, tree.parent, tree.idx = word, parent, idx
110 |                 word = None
111 |                 if prev is not None:
112 |                     if tree.left is None:
113 |                         tree.left = prev
114 |                     else:
115 |                         tree.right = prev
116 |                 trees[idx] = tree
117 |                 if parent >= 0 and trees[parent] is not None:
118 |                     if trees[parent].left is None:
119 |                         trees[parent].left = tree
120 |                     else:
121 |                         trees[parent].right = tree
122 |                     break
123 |                 elif parent == -1:
124 |                     root = tree
125 |                     break
126 |                 else:
127 |                     prev = tree
128 |                     prev_idx = idx
129 |                     idx = parent
130 |     return root
131 | 
132 | def load_dependency_tree(parents):
133 |     trees = []
134 |     root = None
135 |     size = len(parents)
136 |     for i in xrange(size):
137 |         trees.append(None)
138 | 
139 |     for i in xrange(size):
140 |         if not trees[i]:
141 |             idx = i
142 |             prev = None
143 |             prev_idx = None
144 |             while True:
145 |                 tree = DepTree()
146 |                 parent = parents[idx] - 1
147 | 
148 |                 # node is not in tree
149 |                 if parent == -2:
150 |                     break
151 | 
152 |                 tree.parent, tree.idx = parent, idx
153 |                 if prev is not None:
154 |                     tree.children.append(prev)
155 |                 trees[idx] = tree
156 |                 if parent >= 0 and trees[parent] is not None:
157 |                     trees[parent].children.append(tree)
158 |                     break
159 |                 elif parent == -1:
160 |                     root = tree
161 |                     break
162 |                 else:
163 |                     prev = tree
164 |                     prev_idx = idx
165 |                     idx = parent
166 |     return root
167 | 
168 | #
169 | # Various utilities
170 | #
171 | 
172 | def make_dirs(dirs):
173 |     for d in dirs:
174 |         if not os.path.exists(d):
175 |             os.makedirs(d)
176 | 
177 | def load_sents(dirpath):
178 |     sents = []
179 |     with open(os.path.join(dirpath, 'SOStr.txt')) as sentsfile:
180 |         for line in sentsfile:
181 |             sent = ' '.join(line.split('|'))
182 |             sents.append(sent.strip())
183 |     return sents
184 | 
185 | def load_splits(dirpath):
186 |     splits = []
187 |     with open(os.path.join(dirpath, 'datasetSplit.txt')) as splitfile:
188 |         splitfile.readline()
189 |         for line in splitfile:
190 |             idx, split = line.split(',')
191 |             splits.append(int(split))
192 |     return splits
193 | 
194 | def load_parents(dirpath):
195 |     parents = []
196 |     with open(os.path.join(dirpath, 'STree.txt')) as parentsfile:
197 |         for line in parentsfile:
198 |             p = ' '.join(line.split('|'))
199 |             parents.append(p.strip())
200 |     return parents
201 | 
202 | def load_dictionary(dirpath):
203 |     labels = []
204 |     with open(os.path.join(dirpath, 'sentiment_labels.txt')) as labelsfile:
205 |         labelsfile.readline()
206 |         for line in labelsfile:
207 |             idx, rating = line.split('|')
208 |             idx = int(idx)
209 |             rating = float(rating)
210 |             if rating <= 0.2:
211 |                 label = -2
212 |             elif rating <= 0.4:
213 |                 label = -1
214 |             elif rating > 0.8:
215 |                 label = +2
216 |             elif rating > 0.6:
217 |                 label = +1
218 |             else:
219 |                 label = 0
220 |             labels.append(label)
221 | 
222 |     d = {}
223 |     with open(os.path.join(dirpath, 'dictionary.txt')) as dictionary:
224 |         for line in dictionary:
225 |             s, idx = line.split('|')
226 |             d[s] = labels[int(idx)]
227 |     return d
228 | 
229 | def build_vocab(filepaths, dst_path, lowercase=True):
230 |     vocab = set()
231 |     for filepath in filepaths:
232 |         with open(filepath) as f:
233 |             for line in f:
234 |                 if lowercase:
235 |                     line = line.lower()
236 |                 vocab |= set(line.split())
237 |     with open(dst_path, 'w') as f:
238 |         for w in sorted(vocab):
239 |             f.write(w + '\n')
240 | 
241 | def split(sst_dir, train_dir, dev_dir, test_dir):
242 |     sents = load_sents(sst_dir)
243 |     splits = load_splits(sst_dir)
244 |     parents = load_parents(sst_dir)
245 | 
246 |     with open(os.path.join(train_dir, 'sents.txt'), 'w') as train, \
247 |          open(os.path.join(dev_dir, 'sents.txt'), 'w') as dev, \
248 |          open(os.path.join(test_dir, 'sents.txt'), 'w') as test, \
249 |          open(os.path.join(train_dir, 'parents.txt'), 'w') as trainparents, \
250 |          open(os.path.join(dev_dir, 'parents.txt'), 'w') as devparents, \
251 |          open(os.path.join(test_dir, 'parents.txt'), 'w') as testparents:
252 | 
253 |         for sent, split, p in zip(sents, splits, parents):
254 |             if split == 1:
255 |                 train.write(sent)
256 |                 train.write('\n')
257 |                 trainparents.write(p)
258 |                 trainparents.write('\n')
259 |             elif split == 2:
260 |                 test.write(sent)
261 |                 test.write('\n')
262 | 
263 |                 testparents.write(p)
264 |                 testparents.write('\n')
265 |             else:
266 |                 dev.write(sent)
267 |                 dev.write('\n')
268 |                 devparents.write(p)
269 |                 devparents.write('\n')
270 | 
271 | def get_labels(tree, dictionary):
272 |     size = tree.size()
273 |     spans, labels = [], []
274 |     for i in xrange(size):
275 |         labels.append(None)
276 |         spans.append(None)
277 |     tree.get_labels(spans, labels, dictionary)
278 |     return spans, labels
279 | 
280 | def write_labels(dirpath, dictionary):
281 |     print('Writing labels for trees in ' + dirpath)
282 |     with open(os.path.join(dirpath, 'labels.txt'), 'w') as labels, \
283 |          open(os.path.join(dirpath, 'dlabels.txt'), 'w') as dlabels:
284 |         # load constituency and dependency trees
285 |         const_trees, dep_trees, toks = load_trees(dirpath)
286 | 
287 |         # write span labels
288 |         for i in xrange(len(const_trees)):
289 |             const_trees[i].set_spans()
290 |             dep_trees[i].set_spans(toks[i])
291 | 
292 |             # const tree labels
293 |             s, l = [], []
294 |             for j in xrange(const_trees[i].size()):
295 |                 s.append(None)
296 |                 l.append(None)
297 |             const_trees[i].get_labels(s, l, dictionary)
298 |             labels.write(' '.join(map(str, l)) + '\n')
299 | 
300 |             # dep tree labels
301 |             dep_trees[i].span = const_trees[i].span
302 |             s, l = [], []
303 |             for j in xrange(len(toks[i])):
304 |                 s.append(None)
305 |                 l.append('#')
306 |             dep_trees[i].get_labels(s, l, dictionary)
307 |             dlabels.write(' '.join(map(str, l)) + '\n')
308 | 
309 | def dependency_parse(filepath, cp='', tokenize=True):
310 |     print('\nDependency parsing ' + filepath)
311 |     dirpath = os.path.dirname(filepath)
312 |     filepre = os.path.splitext(os.path.basename(filepath))[0]
313 |     tokpath = os.path.join(dirpath, filepre + '.toks')
314 |     parentpath = os.path.join(dirpath, 'dparents.txt')
315 |     relpath =  os.path.join(dirpath, 'rels.txt')
316 |     tokenize_flag = '-tokenize - ' if tokenize else ''
317 |     cmd = ('java -cp %s DependencyParse -tokpath %s -parentpath %s -relpath %s %s < %s'
318 |         % (cp, tokpath, parentpath, relpath, tokenize_flag, filepath))
319 |     os.system(cmd)
320 | 
321 | if __name__ == '__main__':
322 |     print('=' * 80)
323 |     print('Preprocessing Stanford Sentiment Treebank')
324 |     print('=' * 80)
325 | 
326 |     base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
327 |     data_dir = os.path.join(base_dir, 'data')
328 |     lib_dir = os.path.join(base_dir, 'lib')
329 |     sst_dir = os.path.join(data_dir, 'sst')
330 |     train_dir = os.path.join(sst_dir, 'train')
331 |     dev_dir = os.path.join(sst_dir, 'dev')
332 |     test_dir = os.path.join(sst_dir, 'test')
333 |     make_dirs([train_dir, dev_dir, test_dir])
334 | 
335 |     # produce train/dev/test splits
336 |     split(sst_dir, train_dir, dev_dir, test_dir)
337 |     sent_paths = glob.glob(os.path.join(sst_dir, '*/sents.txt'))
338 | 
339 |     # produce dependency parses
340 |     classpath = ':'.join([
341 |         lib_dir,
342 |         os.path.join(lib_dir, 'stanford-parser/stanford-parser.jar'),
343 |         os.path.join(lib_dir, 'stanford-parser/stanford-parser-3.5.1-models.jar')])
344 |     for filepath in sent_paths:
345 |         dependency_parse(filepath, cp=classpath, tokenize=False)
346 | 
347 |     # get vocabulary
348 |     build_vocab(sent_paths, os.path.join(sst_dir, 'vocab.txt'))
349 |     build_vocab(sent_paths, os.path.join(sst_dir, 'vocab-cased.txt'), lowercase=False)
350 | 
351 |     # write sentiment labels for nodes in trees
352 |     dictionary = load_dictionary(sst_dir)
353 |     write_labels(train_dir, dictionary)
354 |     write_labels(dev_dir, dictionary)
355 |     write_labels(test_dir, dictionary)
356 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |             GNU GENERAL PUBLIC LICENSE
  2 |                Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.
  5 |                        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                 Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Library General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |             GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                 NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |              END OF TERMS AND CONDITIONS
281 | 
282 |         How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) 19yy  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License
307 |     along with this program; if not, write to the Free Software
308 |     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
309 | 
310 | 
311 | Also add information on how to contact you by electronic and paper mail.
312 | 
313 | If the program is interactive, make it output a short notice like this
314 | when it starts in an interactive mode:
315 | 
316 |     Gnomovision version 69, Copyright (C) 19yy name of author
317 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
318 |     This is free software, and you are welcome to redistribute it
319 |     under certain conditions; type `show c' for details.
320 | 
321 | The hypothetical commands `show w' and `show c' should show the appropriate
322 | parts of the General Public License.  Of course, the commands you use may
323 | be called something other than `show w' and `show c'; they could even be
324 | mouse-clicks or menu items--whatever suits your program.
325 | 
326 | You should also get your employer (if you work as a programmer) or your
327 | school, if any, to sign a "copyright disclaimer" for the program, if
328 | necessary.  Here is a sample; alter the names:
329 | 
330 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
331 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
332 | 
333 |   <signature of Ty Coon>, 1 April 1989
334 |   Ty Coon, President of Vice
335 | 
336 | This General Public License does not permit incorporating your program into
337 | proprietary programs.  If your program is a subroutine library, you may
338 | consider it more useful to permit linking proprietary applications with the
339 | library.  If this is what you want to do, use the GNU Library General
340 | Public License instead of this License.
341 | 


--------------------------------------------------------------------------------