├── README.md ├── tree.py ├── tree-class.ipynb └── tree-class-autobatch.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Simple and Efficient Learning w/ Automatic Operation Batching 2 | by Graham Neubig 3 | 4 | This is example code from the "How to Code a Paper" workshop at NIPS 2017. 5 | It is based on [DyNet's autobatching](http://dynet.io/autobatch/), and you can take a look at the [Slides](http://phontron.com/slides/neubig17howtocode.pdf) or [Paper](https://arxiv.org/abs/1705.07860) for more details. 6 | -------------------------------------------------------------------------------- /tree.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def _tokenize_sexpr(s): 4 | tokker = re.compile(r" +|[()]|[^ ()]+") 5 | toks = [t for t in [match.group(0) for match in tokker.finditer(s)] if t[0] != " "] 6 | return toks 7 | 8 | def _within_bracket(toks): 9 | label = next(toks) 10 | children = [] 11 | for tok in toks: 12 | if tok == "(": 13 | children.append(_within_bracket(toks)) 14 | elif tok == ")": 15 | return Tree(label, children) 16 | else: children.append(Tree(tok, None)) 17 | assert(False),list(toks) 18 | 19 | class Tree(object): 20 | def __init__(self, label, children=None): 21 | self.label = label 22 | self.children = children 23 | 24 | @staticmethod 25 | def from_sexpr(string): 26 | toks = iter(_tokenize_sexpr(string)) 27 | assert next(toks) == "(" 28 | return _within_bracket(toks) 29 | 30 | def __str__(self): 31 | if self.children is None: return self.label 32 | return "[%s %s]" % (self.label, " ".join([str(c) for c in self.children])) 33 | 34 | def isleaf(self): return self.children==None 35 | 36 | def leaves_iter(self): 37 | if self.isleaf(): 38 | yield self 39 | else: 40 | for c in self.children: 41 | for l in c.leaves_iter(): yield l 42 | 43 | def leaves(self): return list(self.leaves_iter()) 44 | 45 | def nonterms_iter(self): 46 | if not self.isleaf(): 47 | yield self 48 | for c in self.children: 49 | for n in c.nonterms_iter(): yield n 50 | 51 | def nonterms(self): return list(self.nonterms_iter()) 52 | -------------------------------------------------------------------------------- /tree-class.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from collections import defaultdict, Counter\n", 12 | "import codecs\n", 13 | "import time\n", 14 | "import random\n", 15 | "import dynet as dy\n", 16 | "import numpy as np\n", 17 | "\n", 18 | "from tree import Tree\n", 19 | "\n", 20 | "def read_dataset(filename):\n", 21 | " return [Tree.from_sexpr(line.strip()) for line in codecs.open(filename,\"r\")]\n", 22 | "\n", 23 | "def get_vocabs(trees):\n", 24 | " label_vocab = Counter()\n", 25 | " word_vocab = Counter()\n", 26 | " for tree in trees:\n", 27 | " label_vocab.update([n.label for n in tree.nonterms()])\n", 28 | " word_vocab.update([l.label for l in tree.leaves()])\n", 29 | " labels = [x for x,c in label_vocab.items() if c > 0]\n", 30 | " words = [\"_UNK_\"] + [x for x,c in word_vocab.items() if c > 0]\n", 31 | " l2i = {l:i for i,l in enumerate(labels)}\n", 32 | " w2i = {w:i for i,w in enumerate(words)}\n", 33 | " return l2i, w2i, labels, words\n", 34 | "\n", 35 | "train = read_dataset(\"train.txt\")\n", 36 | "dev = read_dataset(\"dev.txt\")\n", 37 | "\n", 38 | "l2i, w2i, i2l, i2w = get_vocabs(train)\n", 39 | "ntags = len(l2i)\n", 40 | "nwords = len(w2i)\n", 41 | "\n", 42 | "# Start DyNet and define trainer\n", 43 | "model = dy.Model()\n", 44 | "trainer = dy.AdamTrainer(model)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "# Tai-style Tree LSTM\n", 56 | "class TreeLSTMBuilder(object):\n", 57 | " def __init__(self, model, word_vocab, wdim, hdim):\n", 58 | " self.WS = [model.add_parameters((hdim, wdim)) for _ in \"iou\"]\n", 59 | " self.US = [model.add_parameters((hdim, 2*hdim)) for _ in \"iou\"]\n", 60 | " self.UFS =[model.add_parameters((hdim, hdim)) for _ in \"ff\"]\n", 61 | " self.BS = [model.add_parameters(hdim) for _ in \"iouf\"]\n", 62 | " self.E = model.add_lookup_parameters((len(word_vocab),wdim))\n", 63 | " self.w2i = word_vocab\n", 64 | "\n", 65 | " def expr_for_tree(self, tree):\n", 66 | " if tree.isleaf():\n", 67 | " return self.E[self.w2i.get(tree.label,0)]\n", 68 | " if len(tree.children) == 1:\n", 69 | " assert(tree.children[0].isleaf())\n", 70 | " emb = self.expr_for_tree(tree.children[0])\n", 71 | " Wi,Wo,Wu = [dy.parameter(w) for w in self.WS]\n", 72 | " bi,bo,bu,_ = [dy.parameter(b) for b in self.BS]\n", 73 | " i = dy.logistic(Wi*emb + bi)\n", 74 | " o = dy.logistic(Wo*emb + bo)\n", 75 | " u = dy.tanh( Wu*emb + bu)\n", 76 | " c = dy.cmult(i,u)\n", 77 | " expr = dy.cmult(o,dy.tanh(c))\n", 78 | " return expr\n", 79 | " assert(len(tree.children) == 2),tree.children[0]\n", 80 | " e1 = self.expr_for_tree(tree.children[0])\n", 81 | " e2 = self.expr_for_tree(tree.children[1])\n", 82 | " Ui,Uo,Uu = [dy.parameter(u) for u in self.US]\n", 83 | " Uf1,Uf2 = [dy.parameter(u) for u in self.UFS]\n", 84 | " bi,bo,bu,bf = [dy.parameter(b) for b in self.BS]\n", 85 | " e = dy.concatenate([e1,e2])\n", 86 | " i = dy.logistic(Ui*e + bi)\n", 87 | " o = dy.logistic(Uo*e + bo)\n", 88 | " f1 = dy.logistic(Uf1*e1 + bf)\n", 89 | " f2 = dy.logistic(Uf2*e2 + bf)\n", 90 | " u = dy.tanh( Uu*e + bu)\n", 91 | " c = dy.cmult(i,u) + dy.cmult(f1,e1) + dy.cmult(f2,e2)\n", 92 | " h = dy.cmult(o,dy.tanh(c))\n", 93 | " expr = h\n", 94 | " return expr" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "collapsed": true 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "# Define the model\n", 106 | "EMB_SIZE = 128\n", 107 | "HID_SIZE = 128\n", 108 | "# builder = TreeRNNBuilder(model, w2i, HID_SIZE)\n", 109 | "builder = TreeLSTMBuilder(model, w2i, HID_SIZE, EMB_SIZE)\n", 110 | "W_sm = model.add_parameters((ntags, HID_SIZE)) # Softmax weights\n", 111 | "b_sm = model.add_parameters((ntags)) # Softmax bias" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": true 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "# A function to calculate scores for one value\n", 123 | "def calc_scores(tree):\n", 124 | " emb = builder.expr_for_tree(tree)\n", 125 | " W_sm_exp = dy.parameter(W_sm)\n", 126 | " b_sm_exp = dy.parameter(b_sm)\n", 127 | " return W_sm_exp * emb + b_sm_exp" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": false 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "for ITER in range(100):\n", 139 | " # Perform training\n", 140 | " random.shuffle(train)\n", 141 | " train_loss = 0.0\n", 142 | " start = time.time()\n", 143 | " for tree in train:\n", 144 | " my_loss = dy.pickneglogsoftmax(calc_scores(tree), l2i[tree.label])\n", 145 | " train_loss += my_loss.value()\n", 146 | " my_loss.backward()\n", 147 | " trainer.update()\n", 148 | " dy.renew_cg()\n", 149 | " print(\"iter %r: train loss/sent=%.4f, time=%.2fs\" % (ITER, train_loss/len(train), time.time()-start))\n", 150 | " # Perform testing\n", 151 | " test_correct = 0.0\n", 152 | " for tree in dev:\n", 153 | " scores = calc_scores(tree).npvalue()\n", 154 | " predict = np.argmax(scores)\n", 155 | " if predict == l2i[tree.label]:\n", 156 | " test_correct += 1\n", 157 | " dy.renew_cg()\n", 158 | " print(\"iter %r: test acc=%.4f\" % (ITER, test_correct/len(dev)))" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": { 165 | "collapsed": true 166 | }, 167 | "outputs": [], 168 | "source": [] 169 | } 170 | ], 171 | "metadata": { 172 | "anaconda-cloud": {}, 173 | "kernelspec": { 174 | "display_name": "Python 3", 175 | "language": "python", 176 | "name": "python3" 177 | }, 178 | "language_info": { 179 | "codemirror_mode": { 180 | "name": "ipython", 181 | "version": 3 182 | }, 183 | "file_extension": ".py", 184 | "mimetype": "text/x-python", 185 | "name": "python", 186 | "nbconvert_exporter": "python", 187 | "pygments_lexer": "ipython3", 188 | "version": "3.6.0" 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 2 193 | } 194 | -------------------------------------------------------------------------------- /tree-class-autobatch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from collections import defaultdict, Counter\n", 12 | "import codecs\n", 13 | "import time\n", 14 | "import random\n", 15 | "import dynet_config\n", 16 | "dynet_config.set(autobatch=1)\n", 17 | "import dynet as dy\n", 18 | "import numpy as np\n", 19 | "\n", 20 | "from tree import Tree\n", 21 | "\n", 22 | "def read_dataset(filename):\n", 23 | " return [Tree.from_sexpr(line.strip()) for line in codecs.open(filename,\"r\")]\n", 24 | "\n", 25 | "def get_vocabs(trees):\n", 26 | " label_vocab = Counter()\n", 27 | " word_vocab = Counter()\n", 28 | " for tree in trees:\n", 29 | " label_vocab.update([n.label for n in tree.nonterms()])\n", 30 | " word_vocab.update([l.label for l in tree.leaves()])\n", 31 | " labels = [x for x,c in label_vocab.items() if c > 0]\n", 32 | " words = [\"_UNK_\"] + [x for x,c in word_vocab.items() if c > 0]\n", 33 | " l2i = {l:i for i,l in enumerate(labels)}\n", 34 | " w2i = {w:i for i,w in enumerate(words)}\n", 35 | " return l2i, w2i, labels, words\n", 36 | "\n", 37 | "train = read_dataset(\"train.txt\")\n", 38 | "dev = read_dataset(\"dev.txt\")\n", 39 | "\n", 40 | "l2i, w2i, i2l, i2w = get_vocabs(train)\n", 41 | "ntags = len(l2i)\n", 42 | "nwords = len(w2i)\n", 43 | "\n", 44 | "# Start DyNet and define trainer\n", 45 | "model = dy.Model()\n", 46 | "trainer = dy.AdamTrainer(model)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "# Tai-style Tree LSTM\n", 58 | "class TreeLSTMBuilder(object):\n", 59 | " def __init__(self, model, word_vocab, wdim, hdim):\n", 60 | " self.WS = [model.add_parameters((hdim, wdim)) for _ in \"iou\"]\n", 61 | " self.US = [model.add_parameters((hdim, 2*hdim)) for _ in \"iou\"]\n", 62 | " self.UFS =[model.add_parameters((hdim, hdim)) for _ in \"ff\"]\n", 63 | " self.BS = [model.add_parameters(hdim) for _ in \"iouf\"]\n", 64 | " self.E = model.add_lookup_parameters((len(word_vocab),wdim))\n", 65 | " self.w2i = word_vocab\n", 66 | "\n", 67 | " def expr_for_tree(self, tree):\n", 68 | " if tree.isleaf():\n", 69 | " return self.E[self.w2i.get(tree.label,0)]\n", 70 | " if len(tree.children) == 1:\n", 71 | " assert(tree.children[0].isleaf())\n", 72 | " emb = self.expr_for_tree(tree.children[0])\n", 73 | " Wi,Wo,Wu = [dy.parameter(w) for w in self.WS]\n", 74 | " bi,bo,bu,_ = [dy.parameter(b) for b in self.BS]\n", 75 | " i = dy.logistic(Wi*emb + bi)\n", 76 | " o = dy.logistic(Wo*emb + bo)\n", 77 | " u = dy.tanh( Wu*emb + bu)\n", 78 | " c = dy.cmult(i,u)\n", 79 | " expr = dy.cmult(o,dy.tanh(c))\n", 80 | " return expr\n", 81 | " assert(len(tree.children) == 2),tree.children[0]\n", 82 | " e1 = self.expr_for_tree(tree.children[0])\n", 83 | " e2 = self.expr_for_tree(tree.children[1])\n", 84 | " Ui,Uo,Uu = [dy.parameter(u) for u in self.US]\n", 85 | " Uf1,Uf2 = [dy.parameter(u) for u in self.UFS]\n", 86 | " bi,bo,bu,bf = [dy.parameter(b) for b in self.BS]\n", 87 | " e = dy.concatenate([e1,e2])\n", 88 | " i = dy.logistic(Ui*e + bi)\n", 89 | " o = dy.logistic(Uo*e + bo)\n", 90 | " f1 = dy.logistic(Uf1*e1 + bf)\n", 91 | " f2 = dy.logistic(Uf2*e2 + bf)\n", 92 | " u = dy.tanh( Uu*e + bu)\n", 93 | " c = dy.cmult(i,u) + dy.cmult(f1,e1) + dy.cmult(f2,e2)\n", 94 | " h = dy.cmult(o,dy.tanh(c))\n", 95 | " expr = h\n", 96 | " return expr" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": true 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "# Define the model\n", 108 | "EMB_SIZE = 128\n", 109 | "HID_SIZE = 128\n", 110 | "builder = TreeLSTMBuilder(model, w2i, HID_SIZE, EMB_SIZE)\n", 111 | "W_sm = model.add_parameters((ntags, HID_SIZE)) # Softmax weights\n", 112 | "b_sm = model.add_parameters((ntags)) # Softmax bias" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "collapsed": true 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "# A function to calculate scores for one value\n", 124 | "def calc_scores(tree):\n", 125 | " emb = builder.expr_for_tree(tree)\n", 126 | " W_sm_exp = dy.parameter(W_sm)\n", 127 | " b_sm_exp = dy.parameter(b_sm)\n", 128 | " return W_sm_exp * emb + b_sm_exp" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "for ITER in range(100):\n", 140 | " # Perform training\n", 141 | " random.shuffle(train)\n", 142 | " train_loss = 0.0\n", 143 | " start = time.time()\n", 144 | " batch_size = 16\n", 145 | " my_losses = []\n", 146 | " for tree in train:\n", 147 | " my_losses.append(dy.pickneglogsoftmax(calc_scores(tree), l2i[tree.label]))\n", 148 | " if len(my_losses) == batch_size:\n", 149 | " my_loss = dy.esum(my_losses)\n", 150 | " train_loss += my_loss.value()\n", 151 | " my_loss.backward()\n", 152 | " trainer.update()\n", 153 | " my_losses = []\n", 154 | " dy.renew_cg()\n", 155 | " print(\"iter %r: train loss/sent=%.4f, time=%.2fs\" % (ITER, train_loss/len(train), time.time()-start))\n", 156 | " # Perform testing\n", 157 | " test_correct = 0.0\n", 158 | " for tree in dev:\n", 159 | " scores = calc_scores(tree).npvalue()\n", 160 | " predict = np.argmax(scores)\n", 161 | " if predict == l2i[tree.label]:\n", 162 | " test_correct += 1\n", 163 | " dy.renew_cg()\n", 164 | " print(\"iter %r: test acc=%.4f\" % (ITER, test_correct/len(dev)))" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "collapsed": true 172 | }, 173 | "outputs": [], 174 | "source": [] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": true 181 | }, 182 | "outputs": [], 183 | "source": [] 184 | } 185 | ], 186 | "metadata": { 187 | "anaconda-cloud": {}, 188 | "kernelspec": { 189 | "display_name": "Python 3", 190 | "language": "python", 191 | "name": "python3" 192 | }, 193 | "language_info": { 194 | "codemirror_mode": { 195 | "name": "ipython", 196 | "version": 3 197 | }, 198 | "file_extension": ".py", 199 | "mimetype": "text/x-python", 200 | "name": "python", 201 | "nbconvert_exporter": "python", 202 | "pygments_lexer": "ipython3", 203 | "version": "3.6.0" 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 2 208 | } 209 | --------------------------------------------------------------------------------