├── .gitignore
├── rc
    ├── rnet
    │   ├── code
    │   │   ├── __init__.py
    │   │   ├── rnns
    │   │   │   ├── __init__.py
    │   │   │   ├── lrn.py
    │   │   │   ├── atr.py
    │   │   │   ├── cell.py
    │   │   │   ├── gru.py
    │   │   │   ├── rnn.py
    │   │   │   ├── sru.py
    │   │   │   └── lstm.py
    │   │   ├── download.sh
    │   │   └── evaluate-v1.1.py
    │   ├── config.py
    │   ├── test_lrn.sh
    │   └── train_lrn.sh
    ├── elmo_rnet
    │   ├── config.py
    │   ├── code
    │   │   ├── bilm
    │   │   │   ├── __init__.py
    │   │   │   └── elmo.py
    │   │   ├── rnns
    │   │   │   ├── __init__.py
    │   │   │   ├── lrn.py
    │   │   │   ├── atr.py
    │   │   │   ├── cell.py
    │   │   │   ├── gru.py
    │   │   │   ├── rnn.py
    │   │   │   ├── sru.py
    │   │   │   └── lstm.py
    │   │   ├── download.sh
    │   │   ├── cycle.py
    │   │   └── evaluate-v1.1.py
    │   ├── train_lrn.sh
    │   └── test_lrn.sh
    └── README.md
├── doc
    ├── code
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── recorder.py
    │   │   ├── thread.py
    │   │   ├── initializer.py
    │   │   ├── saver.py
    │   │   └── cycle.py
    │   ├── bert
    │   │   ├── __init__.py
    │   │   ├── load.py
    │   │   ├── tokenizer.py
    │   │   └── vocab.py
    │   ├── lrs
    │   │   ├── vanillalr.py
    │   │   ├── epochlr.py
    │   │   ├── noamlr.py
    │   │   ├── lr.py
    │   │   ├── __init__.py
    │   │   ├── scorelr.py
    │   │   └── gnmtplr.py
    │   ├── rnns
    │   │   ├── __init__.py
    │   │   ├── lrn.py
    │   │   ├── atr.py
    │   │   ├── cell.py
    │   │   ├── gru.py
    │   │   ├── sru.py
    │   │   └── lstm.py
    │   ├── tasks.py
    │   ├── evalu.py
    │   └── vocab.py
    ├── config.py
    └── README.md
├── nli
    ├── code
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── recorder.py
    │   │   ├── thread.py
    │   │   ├── initializer.py
    │   │   ├── saver.py
    │   │   └── cycle.py
    │   ├── bert
    │   │   ├── __init__.py
    │   │   ├── load.py
    │   │   ├── tokenizer.py
    │   │   └── vocab.py
    │   ├── lrs
    │   │   ├── vanillalr.py
    │   │   ├── epochlr.py
    │   │   ├── noamlr.py
    │   │   ├── lr.py
    │   │   ├── __init__.py
    │   │   ├── scorelr.py
    │   │   └── gnmtplr.py
    │   ├── scripts
    │   │   └── convert_to_plain.py
    │   ├── rnns
    │   │   ├── __init__.py
    │   │   ├── lrn.py
    │   │   ├── atr.py
    │   │   ├── cell.py
    │   │   ├── gru.py
    │   │   ├── sru.py
    │   │   └── lstm.py
    │   ├── evalu.py
    │   └── vocab.py
    ├── config.py
    ├── config_bert.py
    └── README.md
├── figures
    ├── ls_mem.png
    ├── memory.png
    └── san_corr.png
├── ner
    ├── code
    │   ├── scripts
    │   │   └── get_test_score.py
    │   ├── requirements.txt
    │   ├── ner_glove.py
    │   ├── callbacks.py
    │   ├── trainer.py
    │   └── tagger.py
    └── README.md
├── lm
    ├── code
    │   ├── locked_dropout.py
    │   ├── get_data.sh
    │   ├── embed_regularize.py
    │   ├── utils.py
    │   ├── generate.py
    │   ├── weight_drop.py
    │   └── data.py
    └── README.md
├── LICENSE
├── mt
    └── README.md
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea


--------------------------------------------------------------------------------
/rc/rnet/code/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rc/rnet/config.py:
--------------------------------------------------------------------------------
1 | code/config.py


--------------------------------------------------------------------------------
/rc/elmo_rnet/config.py:
--------------------------------------------------------------------------------
1 | code/config.py


--------------------------------------------------------------------------------
/doc/code/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 


--------------------------------------------------------------------------------
/nli/code/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 


--------------------------------------------------------------------------------
/figures/ls_mem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bzhangGo/lrn/HEAD/figures/ls_mem.png


--------------------------------------------------------------------------------
/figures/memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bzhangGo/lrn/HEAD/figures/memory.png


--------------------------------------------------------------------------------
/figures/san_corr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bzhangGo/lrn/HEAD/figures/san_corr.png


--------------------------------------------------------------------------------
/doc/code/bert/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 
3 | 
4 | from .bert import *
5 | from .load import *
6 | from .tokenizer import *
7 | 


--------------------------------------------------------------------------------
/nli/code/bert/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 
3 | 
4 | from .bert import *
5 | from .load import *
6 | from .tokenizer import *
7 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/bilm/__init__.py:
--------------------------------------------------------------------------------
1 | # Elmo Interface
2 | # Deep contextualized word representations
3 | 
4 | from .data import Batcher, TokenBatcher
5 | from .model import BidirectionalLanguageModel, dump_token_embeddings, \
6 |     dump_bilm_embeddings
7 | from .elmo import weight_layers
8 | 
9 | 


--------------------------------------------------------------------------------
/rc/rnet/test_lrn.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | export CUDA_ROOT=XXX
 4 | export PATH=$CUDA_ROOT/bin:$PATH
 5 | export LD_LIBRARY_PATH=$CUDA_ROOT/lib64:$LD_LIBRARY_PATH
 6 | 
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | export name=log_lrn
10 | 
11 | python config.py --mode test --cell lrn
12 | 
13 | 


--------------------------------------------------------------------------------
/rc/rnet/train_lrn.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | export CUDA_ROOT=XXX
 4 | export PATH=$CUDA_ROOT/bin:$PATH
 5 | export LD_LIBRARY_PATH=$CUDA_ROOT/lib64:$LD_LIBRARY_PATH
 6 | 
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | export name=log_lrn
10 | 
11 | python config.py --mode train --cell lrn
12 | 
13 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/train_lrn.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | export CUDA_ROOT=XXX
 4 | export PATH=$CUDA_ROOT/bin:$PATH
 5 | export LD_LIBRARY_PATH=$CUDA_ROOT/lib64:$LD_LIBRARY_PATH
 6 | 
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | export name=log_lrn
10 | 
11 | python config.py --mode train --cell lrn
12 | 
13 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/test_lrn.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | export CUDA_ROOT=XXX
 4 | export PATH=$CUDA_ROOT/bin:$PATH
 5 | export LD_LIBRARY_PATH=$CUDA_ROOT/lib64:$LD_LIBRARY_PATH
 6 | 
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | export name=log_lrn
10 | 
11 | python config.py --mode test --cell lrn --batch_size 8
12 | 
13 | 


--------------------------------------------------------------------------------
/doc/code/lrs/vanillalr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | from lrs import lr
 9 | 
10 | 
11 | class VanillaLR(lr.Lr):
12 |     """Very basic learning rate, constant learning rate"""
13 |     def __init__(self,
14 |                  init_lr,
15 |                  name="vanilla_lr"
16 |                  ):
17 |         super(VanillaLR, self).__init__(init_lr, name=name)
18 | 


--------------------------------------------------------------------------------
/nli/code/lrs/vanillalr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | from lrs import lr
 9 | 
10 | 
11 | class VanillaLR(lr.Lr):
12 |     """Very basic learning rate, constant learning rate"""
13 |     def __init__(self,
14 |                  init_lr,
15 |                  name="vanilla_lr"
16 |                  ):
17 |         super(VanillaLR, self).__init__(init_lr, name=name)
18 | 


--------------------------------------------------------------------------------
/ner/code/scripts/get_test_score.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | 
 3 | import sys
 4 | import numpy as np
 5 | 
 6 | 
 7 | def extract_dev_test_score(fname):
 8 |     test_score = float(open(fname, 'rU').readlines()[-1].strip())
 9 | 
10 |     return test_score
11 | 
12 | 
13 | cell_type = sys.argv[1]
14 | exp_dirs = sys.argv[2:]
15 | 
16 | scores = []
17 | for exp_dir in exp_dirs:
18 |     test_score = extract_dev_test_score("{}/log.{}".format(exp_dir, cell_type))
19 |     scores.append(test_score)
20 | 
21 | print(np.mean(scores), np.std(scores))
22 | 


--------------------------------------------------------------------------------
/lm/code/locked_dropout.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | 
 5 | class LockedDropout(nn.Module):
 6 |     def __init__(self):
 7 |         super().__init__()
 8 | 
 9 |     def forward(self, x, dropout=0.5):
10 |         if not self.training or not dropout:
11 |             return x
12 |         m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout)
13 |         # mask = Variable(m, requires_grad=False) / (1 - dropout)
14 |         mask = Variable(m.div_(1 - dropout), requires_grad=False)
15 |         mask = mask.expand_as(x)
16 |         return mask * x
17 | 


--------------------------------------------------------------------------------
/ner/code/requirements.txt:
--------------------------------------------------------------------------------
 1 | backports.weakref==1.0rc1
 2 | bleach==1.5.0
 3 | boto==2.48.0
 4 | bz2file==0.98
 5 | certifi==2017.11.5
 6 | chardet==3.0.4
 7 | enum34==1.1.6
 8 | gensim==3.1.0
 9 | h5py==2.7.1
10 | html5lib==0.9999999
11 | idna==2.6
12 | Keras==2.2.0
13 | m2r==0.1.12
14 | Markdown==2.6.9
15 | numpy==1.13.3
16 | protobuf==3.5.1
17 | python-dateutil==2.6.0
18 | pytz==2017.2
19 | PyYAML==4.2b1
20 | requests==2.21.0
21 | scikit-learn==0.19.1
22 | scipy==1.0.0
23 | seqeval==0.0.3
24 | six==1.11.0
25 | smart-open==1.5.3
26 | tensorboard==1.8.0
27 | tensorflow>=1.12.1
28 | Theano==0.9.0
29 | urllib3>=1.24.2
30 | Werkzeug>=0.15.3
31 | allennlp==0.7.1
32 | 


--------------------------------------------------------------------------------
/nli/code/scripts/convert_to_plain.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | data = open(sys.argv[1], 'r')
 6 | data.readline()
 7 | 
 8 | out_l = open(sys.argv[2]+".l", 'w')
 9 | out_p = open(sys.argv[2]+".p", 'w')
10 | out_q = open(sys.argv[2]+".q", 'w')
11 | 
12 | label = {'entailment': 0,
13 |          'neutral': 1,
14 |          'contradiction': 2}
15 | 
16 | for line in data:
17 |     l, p, q = line.strip().split('\t')[:3]
18 |     if l not in label:
19 |         continue
20 |     out_l.write(str(label[l]) + '\n')
21 |     out_p.write(p.replace('( ', '').replace(' )', '') + '\n')
22 |     out_q.write(q.replace('( ', '').replace(' )', '') + '\n')
23 | 
24 | out_l.close()
25 | out_p.close()
26 | out_q.close()
27 | 


--------------------------------------------------------------------------------
/lm/code/get_data.sh:
--------------------------------------------------------------------------------
 1 | mkdir data
 2 | cd data
 3 | 
 4 | echo "- Downloading Penn Treebank (PTB)"
 5 | mkdir -p penn
 6 | cd penn
 7 | URL="https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/master/word_language_model/data/penn"
 8 | wget --quiet --continue $URL/train.txt
 9 | wget --quiet --continue $URL/valid.txt
10 | wget --quiet --continue $URL/test.txt
11 | cd ..
12 | 
13 | echo "- Downloading WikiText-2 (WT2)"
14 | wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
15 | unzip -q wikitext-2-v1.zip
16 | cd wikitext-2
17 | mv wiki.train.tokens train.txt
18 | mv wiki.valid.tokens valid.txt
19 | mv wiki.test.tokens test.txt
20 | 
21 | echo "---"
22 | echo "Happy language modeling :)"
23 | 
24 | cd ..
25 | 


--------------------------------------------------------------------------------
/doc/code/lrs/epochlr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | from lrs import lr
 9 | 
10 | 
11 | class EpochDecayLr(lr.Lr):
12 |     """Decay the learning rate after each epoch"""
13 |     def __init__(self,
14 |                  init_lr,
15 |                  decay=0.5,     # learning rate decay rate
16 |                  name="epoch_decay_lr"
17 |                  ):
18 |         super(EpochDecayLr, self).__init__(init_lr, name=name)
19 | 
20 |         self.decay = decay
21 | 
22 |     def after_epoch(self, eidx=None):
23 |         if eidx is None:
24 |             self.lrate = self.init_lrate * self.decay
25 |         else:
26 |             self.lrate = self.init_lrate * self.decay ** int(eidx)
27 | 


--------------------------------------------------------------------------------
/nli/code/lrs/epochlr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | from lrs import lr
 9 | 
10 | 
11 | class EpochDecayLr(lr.Lr):
12 |     """Decay the learning rate after each epoch"""
13 |     def __init__(self,
14 |                  init_lr,
15 |                  decay=0.5,     # learning rate decay rate
16 |                  name="epoch_decay_lr"
17 |                  ):
18 |         super(EpochDecayLr, self).__init__(init_lr, name=name)
19 | 
20 |         self.decay = decay
21 | 
22 |     def after_epoch(self, eidx=None):
23 |         if eidx is None:
24 |             self.lrate = self.init_lrate * self.decay
25 |         else:
26 |             self.lrate = self.init_lrate * self.decay ** int(eidx)
27 | 


--------------------------------------------------------------------------------
/doc/code/utils/recorder.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import json
 8 | import tensorflow as tf
 9 | 
10 | 
11 | class Recorder(object):
12 |     """To save training processes, inspired by Nematus"""
13 | 
14 |     def load_from_json(self, file_name):
15 |         tf.logging.info("Loading recoder file from {}".format(file_name))
16 |         record = json.load(open(file_name, 'rb'))
17 |         record = dict((key.encode("UTF-8"), value) for (key, value) in record.items())
18 |         self.__dict__.update(record)
19 | 
20 |     def save_to_json(self, file_name):
21 |         tf.logging.info("Saving recorder file into {}".format(file_name))
22 |         json.dump(self.__dict__, open(file_name, 'wb'), indent=2)
23 | 


--------------------------------------------------------------------------------
/nli/code/utils/recorder.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import json
 8 | import tensorflow as tf
 9 | 
10 | 
11 | class Recorder(object):
12 |     """To save training processes, inspired by Nematus"""
13 | 
14 |     def load_from_json(self, file_name):
15 |         tf.logging.info("Loading recoder file from {}".format(file_name))
16 |         record = json.load(open(file_name, 'rb'))
17 |         record = dict((key.encode("UTF-8"), value) for (key, value) in record.items())
18 |         self.__dict__.update(record)
19 | 
20 |     def save_to_json(self, file_name):
21 |         tf.logging.info("Saving recorder file into {}".format(file_name))
22 |         json.dump(self.__dict__, open(file_name, 'wb'), indent=2)
23 | 


--------------------------------------------------------------------------------
/doc/code/rnns/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from rnns import gru, lstm, atr, sru, lrn
 4 | 
 5 | 
 6 | def get_cell(cell_name, hidden_size, ln=False, scope=None):
 7 |     """Convert the cell_name into cell instance."""
 8 |     cell_name = cell_name.lower()
 9 | 
10 |     if cell_name == "gru":
11 |         return gru.gru(hidden_size, ln=ln, scope=scope or "gru")
12 |     elif cell_name == "lstm":
13 |         return lstm.lstm(hidden_size, ln=ln, scope=scope or "lstm")
14 |     elif cell_name == "atr":
15 |         return atr.atr(hidden_size, ln=ln, scope=scope or "atr")
16 |     elif cell_name == "sru":
17 |         return sru.sru(hidden_size, ln=ln, scope=scope or "sru")
18 |     elif cell_name == "lrn":
19 |         return lrn.lrn(hidden_size, ln=ln, scope=scope or "lrn")
20 |     else:
21 |         raise NotImplementedError(
22 |             "{} is not supported".format(cell_name))
23 | 


--------------------------------------------------------------------------------
/nli/code/rnns/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from rnns import gru, lstm, atr, sru, lrn
 4 | 
 5 | 
 6 | def get_cell(cell_name, hidden_size, ln=False, scope=None):
 7 |     """Convert the cell_name into cell instance."""
 8 |     cell_name = cell_name.lower()
 9 | 
10 |     if cell_name == "gru":
11 |         return gru.gru(hidden_size, ln=ln, scope=scope or "gru")
12 |     elif cell_name == "lstm":
13 |         return lstm.lstm(hidden_size, ln=ln, scope=scope or "lstm")
14 |     elif cell_name == "atr":
15 |         return atr.atr(hidden_size, ln=ln, scope=scope or "atr")
16 |     elif cell_name == "sru":
17 |         return sru.sru(hidden_size, ln=ln, scope=scope or "sru")
18 |     elif cell_name == "lrn":
19 |         return lrn.lrn(hidden_size, ln=ln, scope=scope or "lrn")
20 |     else:
21 |         raise NotImplementedError(
22 |             "{} is not supported".format(cell_name))
23 | 


--------------------------------------------------------------------------------
/rc/rnet/code/rnns/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from rnns import gru, lstm, atr, sru, lrn
 4 | 
 5 | 
 6 | def get_cell(cell_name, hidden_size, ln=False, scope=None):
 7 |     """Convert the cell_name into cell instance."""
 8 |     cell_name = cell_name.lower()
 9 | 
10 |     if cell_name == "gru":
11 |         return gru.gru(hidden_size, ln=ln, scope=scope or "gru")
12 |     elif cell_name == "lstm":
13 |         return lstm.lstm(hidden_size, ln=ln, scope=scope or "lstm")
14 |     elif cell_name == "atr":
15 |         return atr.atr(hidden_size, ln=ln, scope=scope or "atr")
16 |     elif cell_name == "sru":
17 |         return sru.sru(hidden_size, ln=ln, scope=scope or "sru")
18 |     elif cell_name == "lrn":
19 |         return lrn.lrn(hidden_size, ln=ln, scope=scope or "lrn")
20 |     else:
21 |         raise NotImplementedError(
22 |             "{} is not supported".format(cell_name))
23 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/rnns/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from rnns import gru, lstm, atr, sru, lrn
 4 | 
 5 | 
 6 | def get_cell(cell_name, hidden_size, ln=False, scope=None):
 7 |     """Convert the cell_name into cell instance."""
 8 |     cell_name = cell_name.lower()
 9 | 
10 |     if cell_name == "gru":
11 |         return gru.gru(hidden_size, ln=ln, scope=scope or "gru")
12 |     elif cell_name == "lstm":
13 |         return lstm.lstm(hidden_size, ln=ln, scope=scope or "lstm")
14 |     elif cell_name == "atr":
15 |         return atr.atr(hidden_size, ln=ln, scope=scope or "atr")
16 |     elif cell_name == "sru":
17 |         return sru.sru(hidden_size, ln=ln, scope=scope or "sru")
18 |     elif cell_name == "lrn":
19 |         return lrn.lrn(hidden_size, ln=ln, scope=scope or "lrn")
20 |     else:
21 |         raise NotImplementedError(
22 |             "{} is not supported".format(cell_name))
23 | 


--------------------------------------------------------------------------------
/doc/code/utils/thread.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import threading
 8 | 
 9 | 
10 | class threadsafe_iter:
11 |     """Takes an iterator/generator and makes it thread-safe by
12 |     serializing call to the `next` method of given iterator/generator.
13 |     """
14 | 
15 |     def __init__(self, it):
16 |         self.it = it
17 |         self.lock = threading.Lock()
18 | 
19 |     def __iter__(self):
20 |         return self
21 | 
22 |     def __next__(self):
23 |         return self.next()
24 | 
25 |     def next(self):
26 |         with self.lock:
27 |             return next(self.it)
28 | 
29 | 
30 | def threadsafe_generator(f):
31 |     """A decorator that takes a generator function and makes it thread-safe.
32 |     """
33 | 
34 |     def g(*a, **kw):
35 |         return threadsafe_iter(f(*a, **kw))
36 | 
37 |     return g


--------------------------------------------------------------------------------
/nli/code/utils/thread.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import threading
 8 | 
 9 | 
10 | class threadsafe_iter:
11 |     """Takes an iterator/generator and makes it thread-safe by
12 |     serializing call to the `next` method of given iterator/generator.
13 |     """
14 | 
15 |     def __init__(self, it):
16 |         self.it = it
17 |         self.lock = threading.Lock()
18 | 
19 |     def __iter__(self):
20 |         return self
21 | 
22 |     def __next__(self):
23 |         return self.next()
24 | 
25 |     def next(self):
26 |         with self.lock:
27 |             return next(self.it)
28 | 
29 | 
30 | def threadsafe_generator(f):
31 |     """A decorator that takes a generator function and makes it thread-safe.
32 |     """
33 | 
34 |     def g(*a, **kw):
35 |         return threadsafe_iter(f(*a, **kw))
36 | 
37 |     return g


--------------------------------------------------------------------------------
/rc/rnet/code/download.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Download SQuAD
 4 | SQUAD_DIR=~/data/squad
 5 | mkdir -p $SQUAD_DIR
 6 | wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json -O $SQUAD_DIR/train-v1.1.json
 7 | wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -O $SQUAD_DIR/dev-v1.1.json
 8 | 
 9 | # Download GloVe
10 | GLOVE_DIR=~/data/glove
11 | mkdir -p $GLOVE_DIR
12 | wget http://nlp.stanford.edu/data/glove.840B.300d.zip -O $GLOVE_DIR/glove.840B.300d.zip
13 | unzip $GLOVE_DIR/glove.840B.300d.zip -d $GLOVE_DIR
14 | 
15 | # Download Glove Character Embedding
16 | # wget https://raw.githubusercontent.com/minimaxir/char-embeddings/master/glove.840B.300d-char.txt -O $GLOVE_DIR/glove.840B.300d-char.txt
17 | 
18 | # Download fasttext
19 | # FASTTEXT_DIR=~/data/fasttext
20 | # mkdir -p $FASTTEXT_DIR
21 | # wget https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki-news-300d-1M.vec.zip -O $FASTTEXT_DIR/wiki-news-300d-1M.vec.zip
22 | # unzip $FASTTEXT_DIR/wiki-news-300d-1M.vec.zip -d $FASTTEXT_DIR
23 | 
24 | # Download Spacy language models
25 | python3 -m spacy download en
26 | 


--------------------------------------------------------------------------------
/doc/code/lrs/noamlr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import numpy as np
 8 | 
 9 | from lrs import lr
10 | 
11 | 
12 | class NoamDecayLr(lr.Lr):
13 |     """Decay the learning rate during each training step, follows Transformer"""
14 |     def __init__(self,
15 |                  init_lr,               # initial learning rate
16 |                  warmup_steps,          # warmup step
17 |                  hidden_size,           # model hidden size
18 |                  name="noam_decay_lr"  # model name, no use
19 |                  ):
20 |         super(NoamDecayLr, self).__init__(init_lr, name=name)
21 | 
22 |         self.warmup_steps = warmup_steps
23 |         self.hidden_size = hidden_size
24 | 
25 |     def step(self, step):
26 |         step = float(step)
27 |         warmup_steps = float(self.warmup_steps)
28 | 
29 |         multiplier = float(self.hidden_size) ** -0.5
30 |         decay = multiplier * np.minimum((step + 1) * (warmup_steps ** -1.5),
31 |                                         (step + 1) ** -0.5)
32 |         self.lrate = self.init_lrate * decay
33 | 


--------------------------------------------------------------------------------
/nli/code/lrs/noamlr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import numpy as np
 8 | 
 9 | from lrs import lr
10 | 
11 | 
12 | class NoamDecayLr(lr.Lr):
13 |     """Decay the learning rate during each training step, follows Transformer"""
14 |     def __init__(self,
15 |                  init_lr,               # initial learning rate
16 |                  warmup_steps,          # warmup step
17 |                  hidden_size,           # model hidden size
18 |                  name="noam_decay_lr"  # model name, no use
19 |                  ):
20 |         super(NoamDecayLr, self).__init__(init_lr, name=name)
21 | 
22 |         self.warmup_steps = warmup_steps
23 |         self.hidden_size = hidden_size
24 | 
25 |     def step(self, step):
26 |         step = float(step)
27 |         warmup_steps = float(self.warmup_steps)
28 | 
29 |         multiplier = float(self.hidden_size) ** -0.5
30 |         decay = multiplier * np.minimum((step + 1) * (warmup_steps ** -1.5),
31 |                                         (step + 1) ** -0.5)
32 |         self.lrate = self.init_lrate * decay
33 | 


--------------------------------------------------------------------------------
/ner/code/ner_glove.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from wrapper import Sequence
 4 | from utils import load_data_and_labels, load_glove
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 |     DATA_ROOT = os.path.join(os.path.dirname(__file__), os.environ["data_dir"])
 9 |     EMBEDDING_PATH = os.path.join(os.path.dirname(__file__), os.environ["glove_dir"])
10 | 
11 |     train_path = os.path.join(DATA_ROOT, 'train.txt')
12 |     valid_path = os.path.join(DATA_ROOT, 'valid.txt')
13 |     test_path = os.path.join(DATA_ROOT, 'test.txt')
14 | 
15 |     print('Loading data...')
16 |     x_train, y_train = load_data_and_labels(train_path)
17 |     x_valid, y_valid = load_data_and_labels(valid_path)
18 |     x_test, y_test = load_data_and_labels(test_path)
19 |     print(len(x_train), 'train sequences')
20 |     print(len(x_valid), 'valid sequences')
21 |     print(len(x_test), 'test sequences')
22 | 
23 |     embeddings = load_glove(EMBEDDING_PATH)
24 | 
25 |     # Use pre-trained word embeddings
26 |     model = Sequence(cell_type=os.environ['cell_type'], embeddings=embeddings, initial_vocab=embeddings.keys())
27 |     # print(model.trainable_weights)
28 | 
29 |     model.fit(x_train, y_train, x_valid, y_valid, epochs=30)
30 | 
31 |     print('Testing the model...')
32 |     print(model.score(x_test, y_test))
33 | 


--------------------------------------------------------------------------------
/doc/code/lrs/lr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | # This is an abstract class that deals with
 9 | #   different learning rate decay strategy
10 | # Generally, we decay the learning rate with GPU computation
11 | # However, in this paper, we simply decay the learning rate
12 | #   at CPU level, and feed the decayed lr into GPU for
13 | #   optimization
14 | class Lr(object):
15 |     def __init__(self,
16 |                  init_lrate,        # initial learning rate
17 |                  name="lr",         # learning rate name, no use
18 |                  ):
19 |         self.name = name
20 |         self.init_lrate = init_lrate    # just record the init learning rate
21 |         self.lrate = init_lrate         # active learning rate, change with training
22 | 
23 |     # suppose the eidx starts from 1
24 |     def before_epoch(self, eidx=None):
25 |         pass
26 | 
27 |     def after_epoch(self, eidx=None):
28 |         pass
29 | 
30 |     def step(self, step):
31 |         pass
32 | 
33 |     def after_eval(self, eval_score):
34 |         pass
35 | 
36 |     def get_lr(self):
37 |         """Return the learning rate whenever you want"""
38 |         return self.lrate
39 | 


--------------------------------------------------------------------------------
/nli/code/lrs/lr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | # This is an abstract class that deals with
 9 | #   different learning rate decay strategy
10 | # Generally, we decay the learning rate with GPU computation
11 | # However, in this paper, we simply decay the learning rate
12 | #   at CPU level, and feed the decayed lr into GPU for
13 | #   optimization
14 | class Lr(object):
15 |     def __init__(self,
16 |                  init_lrate,        # initial learning rate
17 |                  name="lr",         # learning rate name, no use
18 |                  ):
19 |         self.name = name
20 |         self.init_lrate = init_lrate    # just record the init learning rate
21 |         self.lrate = init_lrate         # active learning rate, change with training
22 | 
23 |     # suppose the eidx starts from 1
24 |     def before_epoch(self, eidx=None):
25 |         pass
26 | 
27 |     def after_epoch(self, eidx=None):
28 |         pass
29 | 
30 |     def step(self, step):
31 |         pass
32 | 
33 |     def after_eval(self, eval_score):
34 |         pass
35 | 
36 |     def get_lr(self):
37 |         """Return the learning rate whenever you want"""
38 |         return self.lrate
39 | 


--------------------------------------------------------------------------------
/lm/code/embed_regularize.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Variable
 6 | 
 7 | def embedded_dropout(embed, words, dropout=0.1, scale=None):
 8 |   if dropout:
 9 |     mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout)
10 |     mask = Variable(mask)
11 |     masked_embed_weight = mask * embed.weight
12 |   else:
13 |     masked_embed_weight = embed.weight
14 |   if scale:
15 |     masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight
16 | 
17 |   padding_idx = embed.padding_idx
18 |   if padding_idx is None:
19 |       padding_idx = -1
20 |   X = F.embedding(words, masked_embed_weight,
21 |     padding_idx, embed.max_norm, embed.norm_type,
22 |     embed.scale_grad_by_freq, embed.sparse
23 |   )
24 |   return X
25 | 
26 | if __name__ == '__main__':
27 |   V = 50
28 |   h = 4
29 |   bptt = 10
30 |   batch_size = 2
31 | 
32 |   embed = torch.nn.Embedding(V, h)
33 | 
34 |   words = np.random.random_integers(low=0, high=V-1, size=(batch_size, bptt))
35 |   words = torch.LongTensor(words)
36 |   words = Variable(words)
37 | 
38 |   origX = embed(words)
39 |   X = embedded_dropout(embed, words)
40 | 
41 |   print(origX)
42 |   print(X)
43 | 


--------------------------------------------------------------------------------
/doc/code/utils/initializer.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | 
10 | def get_initializer(params):
11 |     if params.initializer == "uniform":
12 |         max_val = params.initializer_gain
13 |         return tf.random_uniform_initializer(-max_val, max_val)
14 |     elif params.initializer == "normal":
15 |         return tf.random_normal_initializer(0.0, params.initializer_gain)
16 |     elif params.initializer == "normal_unit_scaling":
17 |         return tf.variance_scaling_initializer(params.initializer_gain,
18 |                                                mode="fan_avg",
19 |                                                distribution="normal")
20 |     elif params.initializer == "uniform_unit_scaling":
21 |         return tf.variance_scaling_initializer(params.initializer_gain,
22 |                                                mode="fan_avg",
23 |                                                distribution="uniform")
24 |     else:
25 |         tf.logging.warn("Unrecognized initializer: %s" % params.initializer)
26 |         tf.logging.warn("Return to default initializer: glorot_uniform_initializer")
27 |         return tf.glorot_uniform_initializer()
28 | 


--------------------------------------------------------------------------------
/nli/code/utils/initializer.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | 
10 | def get_initializer(params):
11 |     if params.initializer == "uniform":
12 |         max_val = params.initializer_gain
13 |         return tf.random_uniform_initializer(-max_val, max_val)
14 |     elif params.initializer == "normal":
15 |         return tf.random_normal_initializer(0.0, params.initializer_gain)
16 |     elif params.initializer == "normal_unit_scaling":
17 |         return tf.variance_scaling_initializer(params.initializer_gain,
18 |                                                mode="fan_avg",
19 |                                                distribution="normal")
20 |     elif params.initializer == "uniform_unit_scaling":
21 |         return tf.variance_scaling_initializer(params.initializer_gain,
22 |                                                mode="fan_avg",
23 |                                                distribution="uniform")
24 |     else:
25 |         tf.logging.warn("Unrecognized initializer: %s" % params.initializer)
26 |         tf.logging.warn("Return to default initializer: glorot_uniform_initializer")
27 |         return tf.glorot_uniform_initializer()
28 | 


--------------------------------------------------------------------------------
/doc/code/lrs/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from lrs import vanillalr, noamlr, scorelr, gnmtplr, epochlr
 4 | 
 5 | 
 6 | def get_lr(params):
 7 | 
 8 |     strategy = params.lrate_strategy.lower()
 9 | 
10 |     if strategy == "noam":
11 |         return noamlr.NoamDecayLr(
12 |             params.lrate,
13 |             params.warmup_steps,
14 |             params.hidden_size
15 |         )
16 |     elif strategy == "gnmt+":
17 |         return gnmtplr.GNMTPDecayLr(
18 |             params.lrate,
19 |             params.warmup_steps,
20 |             params.nstable,
21 |             params.lrdecay_start,
22 |             params.lrdecay_end
23 |         )
24 |     elif strategy == "epoch":
25 |         return epochlr.EpochDecayLr(
26 |             params.lrate,
27 |             params.lrate_decay,
28 |         )
29 |     elif strategy == "score":
30 |         return scorelr.ScoreDecayLr(
31 |             params.lrate,
32 |             history_scores=[v[1] for v in params.recorder.valid_script_scores],
33 |             decay=params.lrate_decay,
34 |             patience=params.lrate_patience,
35 |         )
36 |     elif strategy == "vanilla":
37 |         return vanillalr.VanillaLR(
38 |             params.lrate,
39 |         )
40 |     else:
41 |         raise NotImplementedError(
42 |             "{} is not supported".format(strategy))


--------------------------------------------------------------------------------
/nli/code/lrs/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from lrs import vanillalr, noamlr, scorelr, gnmtplr, epochlr
 4 | 
 5 | 
 6 | def get_lr(params):
 7 | 
 8 |     strategy = params.lrate_strategy.lower()
 9 | 
10 |     if strategy == "noam":
11 |         return noamlr.NoamDecayLr(
12 |             params.lrate,
13 |             params.warmup_steps,
14 |             params.hidden_size
15 |         )
16 |     elif strategy == "gnmt+":
17 |         return gnmtplr.GNMTPDecayLr(
18 |             params.lrate,
19 |             params.warmup_steps,
20 |             params.nstable,
21 |             params.lrdecay_start,
22 |             params.lrdecay_end
23 |         )
24 |     elif strategy == "epoch":
25 |         return epochlr.EpochDecayLr(
26 |             params.lrate,
27 |             params.lrate_decay,
28 |         )
29 |     elif strategy == "score":
30 |         return scorelr.ScoreDecayLr(
31 |             params.lrate,
32 |             history_scores=[v[1] for v in params.recorder.valid_script_scores],
33 |             decay=params.lrate_decay,
34 |             patience=params.lrate_patience,
35 |         )
36 |     elif strategy == "vanilla":
37 |         return vanillalr.VanillaLR(
38 |             params.lrate,
39 |         )
40 |     else:
41 |         raise NotImplementedError(
42 |             "{} is not supported".format(strategy))


--------------------------------------------------------------------------------
/doc/code/bert/load.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import os
 8 | import json
 9 | import tensorflow as tf
10 | 
11 | from .vocab import Vocab
12 | 
13 | 
14 | def load_vocab(model_dir):
15 |     vocab = Vocab(
16 |         vocab_file=os.path.join(model_dir, 'vocab.txt')
17 |     )
18 |     return vocab
19 | 
20 | 
21 | def load_config(model_dir):
22 |     with tf.gfile.GFile(
23 |             os.path.join(model_dir, 'bert_config.json'),
24 |             "r"
25 |     ) as reader:
26 |       text = reader.read()
27 |     return json.loads(text)
28 | 
29 | 
30 | def load_model(session, model_dir):
31 |     tf.logging.warn("Starting Loading BERT Pre-trained Model")
32 |     ops = []
33 |     reader = tf.train.load_checkpoint(
34 |         os.path.join(model_dir, "bert_model.ckpt")
35 |     )
36 | 
37 |     for var in tf.global_variables():
38 |         name = var.op.name
39 |         name = name[name.find('/bert/')+1:]
40 | 
41 |         if reader.has_tensor(name) and 'Adam' not in name:
42 |             tf.logging.info('{} **Good**'.format(name))
43 |             ops.append(
44 |                 tf.assign(var, reader.get_tensor(name)))
45 |         else:
46 |             tf.logging.warn("{} --Bad--".format(name))
47 |     restore_op = tf.group(*ops, name="restore_global_vars")
48 |     session.run(restore_op)
49 | 


--------------------------------------------------------------------------------
/nli/code/bert/load.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import os
 8 | import json
 9 | import tensorflow as tf
10 | 
11 | from .vocab import Vocab
12 | 
13 | 
14 | def load_vocab(model_dir):
15 |     vocab = Vocab(
16 |         vocab_file=os.path.join(model_dir, 'vocab.txt')
17 |     )
18 |     return vocab
19 | 
20 | 
21 | def load_config(model_dir):
22 |     with tf.gfile.GFile(
23 |             os.path.join(model_dir, 'bert_config.json'),
24 |             "r"
25 |     ) as reader:
26 |       text = reader.read()
27 |     return json.loads(text)
28 | 
29 | 
30 | def load_model(session, model_dir):
31 |     tf.logging.warn("Starting Loading BERT Pre-trained Model")
32 |     ops = []
33 |     reader = tf.train.load_checkpoint(
34 |         os.path.join(model_dir, "bert_model.ckpt")
35 |     )
36 | 
37 |     for var in tf.global_variables():
38 |         name = var.op.name
39 |         name = name[name.find('/bert/')+1:]
40 | 
41 |         if reader.has_tensor(name) and 'Adam' not in name:
42 |             tf.logging.info('{} **Good**'.format(name))
43 |             ops.append(
44 |                 tf.assign(var, reader.get_tensor(name)))
45 |         else:
46 |             tf.logging.warn("{} --Bad--".format(name))
47 |     restore_op = tf.group(*ops, name="restore_global_vars")
48 |     session.run(restore_op)
49 | 


--------------------------------------------------------------------------------
/doc/code/lrs/scorelr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | from lrs import lr
 9 | 
10 | 
11 | class ScoreDecayLr(lr.Lr):
12 |     """Decay the learning rate after each evaluation"""
13 |     def __init__(self,
14 |                  init_lr,
15 |                  history_scores=None,   # evaluation history metric scores, such as BLEU
16 |                  decay=0.5,             # learning rate decay rate
17 |                  patience=1,            # decay after this number of bad counter
18 |                  name="score_decay_lr"  # model name, no use
19 |                  ):
20 |         super(ScoreDecayLr, self).__init__(init_lr, name=name)
21 | 
22 |         self.decay = decay
23 |         self.patience = patience
24 |         self.bad_counter = 0
25 |         self.best_score = -1e9
26 | 
27 |         if history_scores is not None:
28 |             for score in history_scores:
29 |                 self.after_eval(score[1])
30 | 
31 |     def after_eval(self, eval_score):
32 |         if eval_score > self.best_score:
33 |             self.best_score = eval_score
34 |             self.bad_counter = 0
35 |         else:
36 |             self.bad_counter += 1
37 |             if self.bad_counter >= self.patience:
38 |                 self.lrate = self.lrate * self.decay
39 | 
40 |                 self.bad_counter = 0
41 | 


--------------------------------------------------------------------------------
/nli/code/lrs/scorelr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | from lrs import lr
 9 | 
10 | 
11 | class ScoreDecayLr(lr.Lr):
12 |     """Decay the learning rate after each evaluation"""
13 |     def __init__(self,
14 |                  init_lr,
15 |                  history_scores=None,   # evaluation history metric scores, such as BLEU
16 |                  decay=0.5,             # learning rate decay rate
17 |                  patience=1,            # decay after this number of bad counter
18 |                  name="score_decay_lr"  # model name, no use
19 |                  ):
20 |         super(ScoreDecayLr, self).__init__(init_lr, name=name)
21 | 
22 |         self.decay = decay
23 |         self.patience = patience
24 |         self.bad_counter = 0
25 |         self.best_score = -1e9
26 | 
27 |         if history_scores is not None:
28 |             for score in history_scores:
29 |                 self.after_eval(score[1])
30 | 
31 |     def after_eval(self, eval_score):
32 |         if eval_score > self.best_score:
33 |             self.best_score = eval_score
34 |             self.bad_counter = 0
35 |         else:
36 |             self.bad_counter += 1
37 |             if self.bad_counter >= self.patience:
38 |                 self.lrate = self.lrate * self.decay
39 | 
40 |                 self.bad_counter = 0
41 | 


--------------------------------------------------------------------------------
/ner/code/callbacks.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Custom callbacks.
 3 | """
 4 | import numpy as np
 5 | from keras.callbacks import Callback
 6 | from seqeval.metrics import f1_score, classification_report
 7 | 
 8 | 
 9 | class F1score(Callback):
10 | 
11 |     def __init__(self, seq, preprocessor=None, name="callback"):
12 |         super(F1score, self).__init__()
13 |         self.seq = seq
14 |         self.p = preprocessor
15 |         self.name = name
16 | 
17 |     def get_lengths(self, y_true):
18 |         lengths = []
19 |         for y in np.argmax(y_true, -1):
20 |             try:
21 |                 i = list(y).index(0)
22 |             except ValueError:
23 |                 i = len(y)
24 |             lengths.append(i)
25 | 
26 |         return lengths
27 | 
28 |     def on_epoch_end(self, epoch, logs={}):
29 |         label_true = []
30 |         label_pred = []
31 |         for i in range(len(self.seq)):
32 |             x_true, y_true = self.seq[i]
33 |             lengths = self.get_lengths(y_true)
34 |             y_pred = self.model.predict_on_batch(x_true)
35 | 
36 |             y_true = self.p.inverse_transform(y_true, lengths)
37 |             y_pred = self.p.inverse_transform(y_pred, lengths)
38 | 
39 |             label_true.extend(y_true)
40 |             label_pred.extend(y_pred)
41 | 
42 |         score = f1_score(label_true, label_pred)
43 |         print('{} - f1: {:04.2f}'.format(self.name, score * 100))
44 |         print(classification_report(label_true, label_pred))
45 |         logs['f1'] = score
46 | 


--------------------------------------------------------------------------------
/doc/code/bert/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | import os
 9 | import argparse
10 | 
11 | from . import tokenization
12 | 
13 | 
14 | def load_tokenizer(params):
15 |     tokenization.validate_case_matches_checkpoint(
16 |         params.lower,
17 |         os.path.join(params.bert_dir, 'bert_model.ckpt')
18 |     )
19 |     tokenizer = tokenization.FullTokenizer(
20 |         vocab_file=os.path.join(params.bert_dir, 'vocab.txt'),
21 |         do_lower_case=params.lower
22 |     )
23 |     return tokenizer
24 | 
25 | 
26 | def tokenize(params):
27 |     tokenizer = load_tokenizer(params)
28 | 
29 |     with open(params.output, 'w') as writer:
30 |         with open(params.input, 'r') as reader:
31 |             for line in reader:
32 |                 writer.write(' '.join(tokenizer.tokenize(line.strip())).encode('utf8') + "\n")
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     parser = argparse.ArgumentParser('Vocabulary Preparison')
37 |     parser.add_argument('--lower', action='store_true', help='whether lowercase the model')
38 |     parser.add_argument('--bert_dir', type=str, help='the pre-trained model directory')
39 |     parser.add_argument('input', type=str, help='the input un-tokenized file')
40 |     parser.add_argument('output', type=str, help='the output tokenized file')
41 | 
42 |     args = parser.parse_args()
43 | 
44 |     tokenize(args)
45 | 
46 |     print("Finishing!")
47 | 


--------------------------------------------------------------------------------
/nli/code/bert/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | import os
 9 | import argparse
10 | 
11 | from . import tokenization
12 | 
13 | 
14 | def load_tokenizer(params):
15 |     tokenization.validate_case_matches_checkpoint(
16 |         params.lower,
17 |         os.path.join(params.bert_dir, 'bert_model.ckpt')
18 |     )
19 |     tokenizer = tokenization.FullTokenizer(
20 |         vocab_file=os.path.join(params.bert_dir, 'vocab.txt'),
21 |         do_lower_case=params.bert_lower
22 |     )
23 |     return tokenizer
24 | 
25 | 
26 | def tokenize(params):
27 |     tokenizer = load_tokenizer(params)
28 | 
29 |     with open(params.output, 'w') as writer:
30 |         with open(params.input, 'r') as reader:
31 |             for line in reader:
32 |                 writer.write(' '.join(tokenizer.tokenize(line.strip())).encode('utf8') + "\n")
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     parser = argparse.ArgumentParser('Vocabulary Preparison')
37 |     parser.add_argument('--lower', action='store_true', help='whether lowercase the model')
38 |     parser.add_argument('--bert_dir', type=str, help='the pre-trained model directory')
39 |     parser.add_argument('input', type=str, help='the input un-tokenized file')
40 |     parser.add_argument('output', type=str, help='the output tokenized file')
41 | 
42 |     args = parser.parse_args()
43 | 
44 |     tokenize(args)
45 | 
46 |     print("Finishing!")
47 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/download.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Download SQuAD
 4 | SQUAD_DIR=~/data/squad
 5 | mkdir -p $SQUAD_DIR
 6 | wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json -O $SQUAD_DIR/train-v1.1.json
 7 | wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -O $SQUAD_DIR/dev-v1.1.json
 8 | 
 9 | # Download GloVe
10 | GLOVE_DIR=~/data/glove
11 | mkdir -p $GLOVE_DIR
12 | wget http://nlp.stanford.edu/data/glove.840B.300d.zip -O $GLOVE_DIR/glove.840B.300d.zip
13 | unzip $GLOVE_DIR/glove.840B.300d.zip -d $GLOVE_DIR
14 | 
15 | # Download Glove Character Embedding
16 | # wget https://raw.githubusercontent.com/minimaxir/char-embeddings/master/glove.840B.300d-char.txt -O $GLOVE_DIR/glove.840B.300d-char.txt
17 | 
18 | # Download fasttext
19 | # FASTTEXT_DIR=~/data/fasttext
20 | # mkdir -p $FASTTEXT_DIR
21 | # wget https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki-news-300d-1M.vec.zip -O $FASTTEXT_DIR/wiki-news-300d-1M.vec.zip
22 | # unzip $FASTTEXT_DIR/wiki-news-300d-1M.vec.zip -d $FASTTEXT_DIR
23 | 
24 | # Download Elmo
25 | ELMO_DIR=~/data/elmo
26 | wget https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5 -O $ELMO_DIR/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5
27 | wget https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json -O $ELMO_DIR/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json
28 | 
29 | # Download Spacy language models
30 | python3 -m spacy download en
31 | 


--------------------------------------------------------------------------------
/doc/code/rnns/lrn.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class lrn(cell.Cell):
14 |     """The Lightweight Recurrent Network."""
15 | 
16 |     def __init__(self, d, ln=False, scope='lrn'):
17 |         super(lrn, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "lrn")):
26 |             h = linear(x, self.d * 3,
27 |                        bias=True, ln=self.ln, scope="hide_x")
28 |         return (h, )
29 | 
30 |     def __call__(self, h_, x):
31 |         # h_: the previous hidden state
32 |         # p,q,r/x: the current input state
33 |         """
34 |             p, q, r = W x
35 |             i = sigmoid(p + h_)
36 |             f = sigmoid(q - h_)
37 |             h = i * r + f * h_
38 |         """
39 |         if isinstance(x, (list, tuple)):
40 |             x = x[0]
41 | 
42 |         with tf.variable_scope(
43 |                 "cell_{}".format(self.scope or "lrn")):
44 |             p, q, r = tf.split(x, 3, -1)
45 | 
46 |             i = tf.sigmoid(p + h_)
47 |             f = tf.sigmoid(q - h_)
48 | 
49 |             h = tf.tanh(i * r + f * h_)
50 | 
51 |         return h
52 | 


--------------------------------------------------------------------------------
/nli/code/rnns/lrn.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class lrn(cell.Cell):
14 |     """The Lightweight Recurrent Neural Network."""
15 | 
16 |     def __init__(self, d, ln=False, scope='lrn'):
17 |         super(lrn, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "lrn")):
26 |             h = linear(x, self.d * 3,
27 |                        bias=True, ln=self.ln, scope="hide_x")
28 |         return (h, )
29 | 
30 |     def __call__(self, h_, x):
31 |         # h_: the previous hidden state
32 |         # p,q,r/x: the current input state
33 |         """
34 |             p, q, r = W x
35 |             i = sigmoid(p + h_)
36 |             f = sigmoid(q - h_)
37 |             h = i * r + f * h_
38 |         """
39 |         if isinstance(x, (list, tuple)):
40 |             x = x[0]
41 | 
42 |         with tf.variable_scope(
43 |                 "cell_{}".format(self.scope or "lrn")):
44 |             p, q, r = tf.split(x, 3, -1)
45 | 
46 |             i = tf.sigmoid(p + h_)
47 |             f = tf.sigmoid(q - h_)
48 | 
49 |             h = tf.tanh(i * r + f * h_)
50 | 
51 |         return h
52 | 


--------------------------------------------------------------------------------
/rc/rnet/code/rnns/lrn.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class lrn(cell.Cell):
14 |     """The Lightweight Recurrent Network."""
15 | 
16 |     def __init__(self, d, ln=False, scope='lrn'):
17 |         super(lrn, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "lrn")):
26 |             h = linear(x, self.d * 3,
27 |                        bias=True, ln=self.ln, scope="hide_x")
28 |         return (h, )
29 | 
30 |     def __call__(self, h_, x):
31 |         # h_: the previous hidden state
32 |         # p,q,r/x: the current input state
33 |         """
34 |             p, q, r = W x
35 |             i = sigmoid(p + h_)
36 |             f = sigmoid(q - h_)
37 |             h = i * r + f * h_
38 |         """
39 |         if isinstance(x, (list, tuple)):
40 |             x = x[0]
41 | 
42 |         with tf.variable_scope(
43 |                 "cell_{}".format(self.scope or "lrn")):
44 |             p, q, r = tf.split(x, 3, -1)
45 | 
46 |             i = tf.sigmoid(p + h_)
47 |             f = tf.sigmoid(q - h_)
48 | 
49 |             h = tf.tanh(i * r + f * h_)
50 | 
51 |         return h
52 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/rnns/lrn.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class lrn(cell.Cell):
14 |     """The Lightweight Recurrent Network."""
15 | 
16 |     def __init__(self, d, ln=False, scope='lrn'):
17 |         super(lrn, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "lrn")):
26 |             h = linear(x, self.d * 3,
27 |                        bias=True, ln=self.ln, scope="hide_x")
28 |         return (h, )
29 | 
30 |     def __call__(self, h_, x):
31 |         # h_: the previous hidden state
32 |         # p,q,r/x: the current input state
33 |         """
34 |             p, q, r = W x
35 |             i = sigmoid(p + h_)
36 |             f = sigmoid(q - h_)
37 |             h = i * r + f * h_
38 |         """
39 |         if isinstance(x, (list, tuple)):
40 |             x = x[0]
41 | 
42 |         with tf.variable_scope(
43 |                 "cell_{}".format(self.scope or "lrn")):
44 |             p, q, r = tf.split(x, 3, -1)
45 | 
46 |             i = tf.sigmoid(p + h_)
47 |             f = tf.sigmoid(q - h_)
48 | 
49 |             h = tf.tanh(i * r + f * h_)
50 | 
51 |         return h
52 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Biao Zhang
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/doc/code/lrs/gnmtplr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import numpy as np
 8 | 
 9 | from lrs import lr
10 | 
11 | 
12 | class GNMTPDecayLr(lr.Lr):
13 |     """Decay the learning rate during each training step, follows GNMT+"""
14 |     def __init__(self,
15 |                  init_lr,               # initial learning rate
16 |                  warmup_steps,          # warmup step
17 |                  nstable,               # number of replica
18 |                  lrdecay_start,         # start of learning rate decay
19 |                  lrdecay_end,           # end of learning rate decay
20 |                  name="gnmtp_decay_lr"  # model name, no use
21 |                  ):
22 |         super(GNMTPDecayLr, self).__init__(init_lr, name=name)
23 | 
24 |         self.warmup_steps = warmup_steps
25 |         self.nstable = nstable
26 |         self.lrdecay_start = lrdecay_start
27 |         self.lrdecay_end = lrdecay_end
28 | 
29 |         if nstable < 1:
30 |             raise Exception("Stabled Lrate Value should "
31 |                             "greater than 0, but is {}".format(nstable))
32 | 
33 |     def step(self, step):
34 |         t = float(step)
35 |         p = float(self.warmup_steps)
36 |         n = float(self.nstable)
37 |         s = float(self.lrdecay_start)
38 |         e = float(self.lrdecay_end)
39 | 
40 |         decay = np.minimum(1. + t * (n - 1) / (n * p), n)
41 |         decay = np.minimum(decay, n * (2 * n) ** ((s - n * t) / (e - s)))
42 | 
43 |         self.lrate = self.init_lrate * decay
44 | 


--------------------------------------------------------------------------------
/nli/code/lrs/gnmtplr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import numpy as np
 8 | 
 9 | from lrs import lr
10 | 
11 | 
12 | class GNMTPDecayLr(lr.Lr):
13 |     """Decay the learning rate during each training step, follows GNMT+"""
14 |     def __init__(self,
15 |                  init_lr,               # initial learning rate
16 |                  warmup_steps,          # warmup step
17 |                  nstable,               # number of replica
18 |                  lrdecay_start,         # start of learning rate decay
19 |                  lrdecay_end,           # end of learning rate decay
20 |                  name="gnmtp_decay_lr"  # model name, no use
21 |                  ):
22 |         super(GNMTPDecayLr, self).__init__(init_lr, name=name)
23 | 
24 |         self.warmup_steps = warmup_steps
25 |         self.nstable = nstable
26 |         self.lrdecay_start = lrdecay_start
27 |         self.lrdecay_end = lrdecay_end
28 | 
29 |         if nstable < 1:
30 |             raise Exception("Stabled Lrate Value should "
31 |                             "greater than 0, but is {}".format(nstable))
32 | 
33 |     def step(self, step):
34 |         t = float(step)
35 |         p = float(self.warmup_steps)
36 |         n = float(self.nstable)
37 |         s = float(self.lrdecay_start)
38 |         e = float(self.lrdecay_end)
39 | 
40 |         decay = np.minimum(1. + t * (n - 1) / (n * p), n)
41 |         decay = np.minimum(decay, n * (2 * n) ** ((s - n * t) / (e - s)))
42 | 
43 |         self.lrate = self.init_lrate * decay
44 | 


--------------------------------------------------------------------------------
/doc/code/rnns/atr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class atr(cell.Cell):
14 |     """The Addition-Subtraction Twin-Gated Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='atr'):
17 |         super(atr, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "atr")):
26 |             h = linear(x, self.d,
27 |                        bias=True, ln=self.ln, scope="hide_x")
28 |         return (h, )
29 | 
30 |     def __call__(self, h_, x):
31 |         # h_: the previous hidden state
32 |         # x: the current input state
33 |         """
34 |             p = W x
35 |             q = U h_
36 |             i = sigmoid(p + q)
37 |             f = sigmoid(p - q)
38 |             h = i * p + f * h_
39 |         """
40 |         if isinstance(x, (list, tuple)):
41 |             x = x[0]
42 | 
43 |         with tf.variable_scope(
44 |                 "cell_{}".format(self.scope or "atr")):
45 |             q = linear(h_, self.d,
46 |                        ln=self.ln, scope="hide_h")
47 |             p = x
48 | 
49 |             f = tf.sigmoid(p - q)
50 |             i = tf.sigmoid(p + q)
51 | 
52 |             h = tf.tanh(i * p + f * h_)
53 | 
54 |         return h
55 | 


--------------------------------------------------------------------------------
/nli/code/rnns/atr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class atr(cell.Cell):
14 |     """The Addition-Subtraction Twin-Gated Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='atr'):
17 |         super(atr, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "atr")):
26 |             h = linear(x, self.d,
27 |                        bias=True, ln=self.ln, scope="hide_x")
28 |         return (h, )
29 | 
30 |     def __call__(self, h_, x):
31 |         # h_: the previous hidden state
32 |         # x: the current input state
33 |         """
34 |             p = W x
35 |             q = U h_
36 |             i = sigmoid(p + q)
37 |             f = sigmoid(p - q)
38 |             h = i * p + f * h_
39 |         """
40 |         if isinstance(x, (list, tuple)):
41 |             x = x[0]
42 | 
43 |         with tf.variable_scope(
44 |                 "cell_{}".format(self.scope or "atr")):
45 |             q = linear(h_, self.d,
46 |                        ln=self.ln, scope="hide_h")
47 |             p = x
48 | 
49 |             f = tf.sigmoid(p - q)
50 |             i = tf.sigmoid(p + q)
51 | 
52 |             h = tf.tanh(i * p + f * h_)
53 | 
54 |         return h
55 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/rnns/atr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class atr(cell.Cell):
14 |     """The Addition-Subtraction Twin-Gated Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='atr'):
17 |         super(atr, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "atr")):
26 |             h = linear(x, self.d,
27 |                        bias=True, ln=self.ln, scope="hide_x")
28 |         return (h, )
29 | 
30 |     def __call__(self, h_, x):
31 |         # h_: the previous hidden state
32 |         # x: the current input state
33 |         """
34 |             p = W x
35 |             q = U h_
36 |             i = sigmoid(p + q)
37 |             f = sigmoid(p - q)
38 |             h = i * p + f * h_
39 |         """
40 |         if isinstance(x, (list, tuple)):
41 |             x = x[0]
42 | 
43 |         with tf.variable_scope(
44 |                 "cell_{}".format(self.scope or "atr")):
45 |             q = linear(h_, self.d,
46 |                        ln=self.ln, scope="hide_h")
47 |             p = x
48 | 
49 |             f = tf.sigmoid(p - q)
50 |             i = tf.sigmoid(p + q)
51 | 
52 |             h = tf.tanh(i * p + f * h_)
53 | 
54 |         return h
55 | 


--------------------------------------------------------------------------------
/rc/rnet/code/rnns/atr.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class atr(cell.Cell):
14 |     """The Addition-Subtraction Twin-Gated Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, twin=False, scope='atr'):
17 |         super(atr, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |         self.twin = twin
20 | 
21 |     def get_init_state(self, shape=None, x=None, scope=None):
22 |         return self._get_init_state(
23 |             self.d, shape=shape, x=x, scope=scope)
24 | 
25 |     def fetch_states(self, x):
26 |         with tf.variable_scope(
27 |                 "fetch_state_{}".format(self.scope or "atr")):
28 |             h = linear(x, self.d,
29 |                        bias=True, ln=self.ln, scope="hide_x")
30 |         return (h, )
31 | 
32 |     def __call__(self, h_, x):
33 |         # h_: the previous hidden state
34 |         # x: the current input state
35 |         """
36 |             p = W x
37 |             q = U h_
38 |             i = sigmoid(p + q)
39 |             f = sigmoid(p - q)
40 |             h = i * p + f * h_
41 |         """
42 |         if isinstance(x, (list, tuple)):
43 |             x = x[0]
44 | 
45 |         with tf.variable_scope(
46 |                 "cell_{}".format(self.scope or "atr")):
47 |             q = linear(h_, self.d,
48 |                        ln=self.ln, scope="hide_h")
49 |             p = x
50 | 
51 |             f = tf.sigmoid(p - q)
52 |             i = tf.sigmoid(p + q)
53 | 
54 |             h = tf.tanh(i * p + f * h_)
55 | 
56 |         return h
57 | 


--------------------------------------------------------------------------------
/doc/code/rnns/cell.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import abc
 8 | import tensorflow as tf
 9 | from func import linear
10 | 
11 | 
12 | # This is an abstract class that deals with
13 | #   recurrent cells, e.g. GRU, LSTM, ATR
14 | class Cell(object):
15 |     def __init__(self,
16 |                  d,             # hidden state dimension
17 |                  ln=False,      # whether use layer normalization
18 |                  scope=None,    # the name scope for this cell
19 |                  ):
20 |         self.d = d
21 |         self.scope = scope
22 |         self.ln = ln
23 | 
24 |     def _get_init_state(self, d, shape=None, x=None, scope=None):
25 |         # gen init state vector
26 |         # if no evidence x is provided, use zero initialization
27 |         if x is None:
28 |             assert shape is not None, "you should provide shape"
29 |             if not isinstance(shape, (tuple, list)):
30 |                 shape = [shape]
31 |             shape = shape + [d]
32 |             return tf.zeros(shape, tf.float32)
33 |         else:
34 |             return linear(
35 |                 x, d, bias=True, ln=self.ln,
36 |                 scope="{}_init".format(scope or self.scope)
37 |             )
38 | 
39 |     def get_hidden(self, x):
40 |         return x
41 | 
42 |     @abc.abstractmethod
43 |     def get_init_state(self, shape=None, x=None, scope=None):
44 |         raise NotImplementedError("Not Supported")
45 | 
46 |     @abc.abstractmethod
47 |     def __call__(self, h_, x):
48 |         raise NotImplementedError("Not Supported")
49 | 
50 |     @abc.abstractmethod
51 |     def fetch_states(self, x):
52 |         raise NotImplementedError("Not Supported")
53 | 


--------------------------------------------------------------------------------
/nli/code/rnns/cell.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import abc
 8 | import tensorflow as tf
 9 | from func import linear
10 | 
11 | 
12 | # This is an abstract class that deals with
13 | #   recurrent cells, e.g. GRU, LSTM, ATR
14 | class Cell(object):
15 |     def __init__(self,
16 |                  d,             # hidden state dimension
17 |                  ln=False,      # whether use layer normalization
18 |                  scope=None,    # the name scope for this cell
19 |                  ):
20 |         self.d = d
21 |         self.scope = scope
22 |         self.ln = ln
23 | 
24 |     def _get_init_state(self, d, shape=None, x=None, scope=None):
25 |         # gen init state vector
26 |         # if no evidence x is provided, use zero initialization
27 |         if x is None:
28 |             assert shape is not None, "you should provide shape"
29 |             if not isinstance(shape, (tuple, list)):
30 |                 shape = [shape]
31 |             shape = shape + [d]
32 |             return tf.zeros(shape, tf.float32)
33 |         else:
34 |             return linear(
35 |                 x, d, bias=True, ln=self.ln,
36 |                 scope="{}_init".format(scope or self.scope)
37 |             )
38 | 
39 |     def get_hidden(self, x):
40 |         return x
41 | 
42 |     @abc.abstractmethod
43 |     def get_init_state(self, shape=None, x=None, scope=None):
44 |         raise NotImplementedError("Not Supported")
45 | 
46 |     @abc.abstractmethod
47 |     def __call__(self, h_, x):
48 |         raise NotImplementedError("Not Supported")
49 | 
50 |     @abc.abstractmethod
51 |     def fetch_states(self, x):
52 |         raise NotImplementedError("Not Supported")
53 | 


--------------------------------------------------------------------------------
/rc/rnet/code/rnns/cell.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import abc
 8 | import tensorflow as tf
 9 | from func import linear
10 | 
11 | 
12 | # This is an abstract class that deals with
13 | #   recurrent cells, e.g. GRU, LSTM, ATR
14 | class Cell(object):
15 |     def __init__(self,
16 |                  d,             # hidden state dimension
17 |                  ln=False,      # whether use layer normalization
18 |                  scope=None,    # the name scope for this cell
19 |                  ):
20 |         self.d = d
21 |         self.scope = scope
22 |         self.ln = ln
23 | 
24 |     def _get_init_state(self, d, shape=None, x=None, scope=None):
25 |         # gen init state vector
26 |         # if no evidence x is provided, use zero initialization
27 |         if x is None:
28 |             assert shape is not None, "you should provide shape"
29 |             if not isinstance(shape, (tuple, list)):
30 |                 shape = [shape]
31 |             shape = shape + [d]
32 |             return tf.zeros(shape, tf.float32)
33 |         else:
34 |             return linear(
35 |                 x, d, bias=True, ln=self.ln,
36 |                 scope="{}_init".format(scope or self.scope)
37 |             )
38 | 
39 |     def get_hidden(self, x):
40 |         return x
41 | 
42 |     @abc.abstractmethod
43 |     def get_init_state(self, shape=None, x=None, scope=None):
44 |         raise NotImplementedError("Not Supported")
45 | 
46 |     @abc.abstractmethod
47 |     def __call__(self, h_, x):
48 |         raise NotImplementedError("Not Supported")
49 | 
50 |     @abc.abstractmethod
51 |     def fetch_states(self, x):
52 |         raise NotImplementedError("Not Supported")
53 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/rnns/cell.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import abc
 8 | import tensorflow as tf
 9 | from func import linear
10 | 
11 | 
12 | # This is an abstract class that deals with
13 | #   recurrent cells, e.g. GRU, LSTM, ATR
14 | class Cell(object):
15 |     def __init__(self,
16 |                  d,             # hidden state dimension
17 |                  ln=False,      # whether use layer normalization
18 |                  scope=None,    # the name scope for this cell
19 |                  ):
20 |         self.d = d
21 |         self.scope = scope
22 |         self.ln = ln
23 | 
24 |     def _get_init_state(self, d, shape=None, x=None, scope=None):
25 |         # gen init state vector
26 |         # if no evidence x is provided, use zero initialization
27 |         if x is None:
28 |             assert shape is not None, "you should provide shape"
29 |             if not isinstance(shape, (tuple, list)):
30 |                 shape = [shape]
31 |             shape = shape + [d]
32 |             return tf.zeros(shape, tf.float32)
33 |         else:
34 |             return linear(
35 |                 x, d, bias=True, ln=self.ln,
36 |                 scope="{}_init".format(scope or self.scope)
37 |             )
38 | 
39 |     def get_hidden(self, x):
40 |         return x
41 | 
42 |     @abc.abstractmethod
43 |     def get_init_state(self, shape=None, x=None, scope=None):
44 |         raise NotImplementedError("Not Supported")
45 | 
46 |     @abc.abstractmethod
47 |     def __call__(self, h_, x):
48 |         raise NotImplementedError("Not Supported")
49 | 
50 |     @abc.abstractmethod
51 |     def fetch_states(self, x):
52 |         raise NotImplementedError("Not Supported")
53 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/cycle.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | 
10 | def session_run(monitored_session, args):
11 |     # Call raw TF session directly
12 |     return monitored_session._tf_sess().run(args)
13 | 
14 | 
15 | def zero_variables(variables, name=None):
16 |     ops = []
17 | 
18 |     for var in variables:
19 |         with tf.device(var.device):
20 |             op = var.assign(tf.zeros(var.shape.as_list()))
21 |         ops.append(op)
22 | 
23 |     return tf.group(*ops, name=name or "zero_op")
24 | 
25 | 
26 | def replicate_variables(variables, device=None):
27 |     new_vars = []
28 | 
29 |     for var in variables:
30 |         device = device or var.device
31 |         with tf.device(device):
32 |             name = "replicate/" + var.name.split(":")[0]
33 |             new_vars.append(tf.Variable(tf.zeros(var.shape.as_list()),
34 |                                         name=name, trainable=False))
35 | 
36 |     return new_vars
37 | 
38 | 
39 | def collect_gradients(gradients, variables):
40 |     ops = []
41 | 
42 |     for grad, var in zip(gradients, variables):
43 |         if isinstance(grad, tf.Tensor):
44 |             ops.append(tf.assign_add(var, grad))
45 |         else:
46 |             ops.append(tf.scatter_add(var, grad.indices, grad.values))
47 | 
48 |     return tf.group(*ops)
49 | 
50 | 
51 | def scale_gradients(gradients, scale):
52 |     scaled_gradients = []
53 | 
54 |     for grad in gradients:
55 |         if isinstance(grad, tf.IndexedSlices):
56 |             slices = tf.IndexedSlices(scale * grad.values, grad.indices)
57 |             scaled_gradients.append(slices)
58 |         else:
59 |             scaled_gradients.append(scale * grad)
60 | 
61 |     return tuple(scaled_gradients)


--------------------------------------------------------------------------------
/doc/code/rnns/gru.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class gru(cell.Cell):
14 |     """The Gated Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='gru'):
17 |         super(gru, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "gru")):
26 |             g = linear(x, self.d * 2,
27 |                        bias=True, ln=self.ln, scope="gate_x")
28 |             h = linear(x, self.d,
29 |                        bias=True, ln=self.ln, scope="hide_x")
30 |         return g, h
31 | 
32 |     def __call__(self, h_, x):
33 |         # h_: the previous hidden state
34 |         # x_g/x: the current input state for gate
35 |         # x_h/x: the current input state for hidden
36 |         """
37 |             z = sigmoid(h_, x)
38 |             r = sigmoid(h_, x)
39 |             h' = tanh(x, r * h_)
40 |             h = z * h_ + (1. - z) * h'
41 |         """
42 |         with tf.variable_scope(
43 |                 "cell_{}".format(self.scope or "gru")):
44 |             x_g, x_h = x
45 | 
46 |             h_g = linear(h_, self.d * 2,
47 |                          ln=self.ln, scope="gate_h")
48 |             z, r = tf.split(
49 |                 tf.sigmoid(x_g + h_g), 2, -1)
50 | 
51 |             h_h = linear(h_ * r, self.d,
52 |                          ln=self.ln, scope="hide_h")
53 |             h = tf.tanh(x_h + h_h)
54 | 
55 |             h = z * h_ + (1. - z) * h
56 | 
57 |         return h
58 | 


--------------------------------------------------------------------------------
/nli/code/rnns/gru.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class gru(cell.Cell):
14 |     """The Gated Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='gru'):
17 |         super(gru, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "gru")):
26 |             g = linear(x, self.d * 2,
27 |                        bias=True, ln=self.ln, scope="gate_x")
28 |             h = linear(x, self.d,
29 |                        bias=True, ln=self.ln, scope="hide_x")
30 |         return g, h
31 | 
32 |     def __call__(self, h_, x):
33 |         # h_: the previous hidden state
34 |         # x_g/x: the current input state for gate
35 |         # x_h/x: the current input state for hidden
36 |         """
37 |             z = sigmoid(h_, x)
38 |             r = sigmoid(h_, x)
39 |             h' = tanh(x, r * h_)
40 |             h = z * h_ + (1. - z) * h'
41 |         """
42 |         with tf.variable_scope(
43 |                 "cell_{}".format(self.scope or "gru")):
44 |             x_g, x_h = x
45 | 
46 |             h_g = linear(h_, self.d * 2,
47 |                          ln=self.ln, scope="gate_h")
48 |             z, r = tf.split(
49 |                 tf.sigmoid(x_g + h_g), 2, -1)
50 | 
51 |             h_h = linear(h_ * r, self.d,
52 |                          ln=self.ln, scope="hide_h")
53 |             h = tf.tanh(x_h + h_h)
54 | 
55 |             h = z * h_ + (1. - z) * h
56 | 
57 |         return h
58 | 


--------------------------------------------------------------------------------
/rc/rnet/code/rnns/gru.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class gru(cell.Cell):
14 |     """The Gated Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='gru'):
17 |         super(gru, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "gru")):
26 |             g = linear(x, self.d * 2,
27 |                        bias=True, ln=self.ln, scope="gate_x")
28 |             h = linear(x, self.d,
29 |                        bias=True, ln=self.ln, scope="hide_x")
30 |         return g, h
31 | 
32 |     def __call__(self, h_, x):
33 |         # h_: the previous hidden state
34 |         # x_g/x: the current input state for gate
35 |         # x_h/x: the current input state for hidden
36 |         """
37 |             z = sigmoid(h_, x)
38 |             r = sigmoid(h_, x)
39 |             h' = tanh(x, r * h_)
40 |             h = z * h_ + (1. - z) * h'
41 |         """
42 |         with tf.variable_scope(
43 |                 "cell_{}".format(self.scope or "gru")):
44 |             x_g, x_h = x
45 | 
46 |             h_g = linear(h_, self.d * 2,
47 |                          ln=self.ln, scope="gate_h")
48 |             z, r = tf.split(
49 |                 tf.sigmoid(x_g + h_g), 2, -1)
50 | 
51 |             h_h = linear(h_ * r, self.d,
52 |                          ln=self.ln, scope="hide_h")
53 |             h = tf.tanh(x_h + h_h)
54 | 
55 |             h = z * h_ + (1. - z) * h
56 | 
57 |         return h
58 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/rnns/gru.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class gru(cell.Cell):
14 |     """The Gated Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='gru'):
17 |         super(gru, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d, shape=shape, x=x, scope=scope)
22 | 
23 |     def fetch_states(self, x):
24 |         with tf.variable_scope(
25 |                 "fetch_state_{}".format(self.scope or "gru")):
26 |             g = linear(x, self.d * 2,
27 |                        bias=True, ln=self.ln, scope="gate_x")
28 |             h = linear(x, self.d,
29 |                        bias=True, ln=self.ln, scope="hide_x")
30 |         return g, h
31 | 
32 |     def __call__(self, h_, x):
33 |         # h_: the previous hidden state
34 |         # x_g/x: the current input state for gate
35 |         # x_h/x: the current input state for hidden
36 |         """
37 |             z = sigmoid(h_, x)
38 |             r = sigmoid(h_, x)
39 |             h' = tanh(x, r * h_)
40 |             h = z * h_ + (1. - z) * h'
41 |         """
42 |         with tf.variable_scope(
43 |                 "cell_{}".format(self.scope or "gru")):
44 |             x_g, x_h = x
45 | 
46 |             h_g = linear(h_, self.d * 2,
47 |                          ln=self.ln, scope="gate_h")
48 |             z, r = tf.split(
49 |                 tf.sigmoid(x_g + h_g), 2, -1)
50 | 
51 |             h_h = linear(h_ * r, self.d,
52 |                          ln=self.ln, scope="hide_h")
53 |             h = tf.tanh(x_h + h_h)
54 | 
55 |             h = z * h_ + (1. - z) * h
56 | 
57 |         return h
58 | 


--------------------------------------------------------------------------------
/rc/README.md:
--------------------------------------------------------------------------------
 1 | ## Reading Comprehension
 2 | 
 3 | 
 4 | We use [SQUAD v1](https://rajpurkar.github.io/SQuAD-explorer/) for experiments and adopt the 
 5 | [RNET model](https://www.aclweb.org/anthology/papers/P/P17/P17-1018/). 
 6 | Main experimental results are summarized below.
 7 | 
 8 | <table>
 9 |   <tr>
10 |     <th>Model</th>
11 |     <th>#Params</th>
12 |     <th>Base</th>
13 |     <th>+Elmo</th>
14 |   </tr>
15 |   <tr>
16 |     <td>rnet</td>
17 |     <td>-</td>
18 |     <td>71.1/79.5</td>
19 |     <td>-/-</td>
20 |   </tr>
21 |   <tr>
22 |     <td>LSTM</td>
23 |     <td>2.67M</td>
24 |     <td>70.46/78.98</td>
25 |     <td>75.17/82.79</td>
26 |   </tr>
27 |   <tr>
28 |     <td>GRU</td>
29 |     <td>2.31M</td>
30 |     <td>70.41/79.15</td>
31 |     <td>75.81/83.12</td>
32 |   </tr>
33 |   <tr>
34 |     <td>ATR</td>
35 |     <td>1.59M</td>
36 |     <td>69.73/78.70</td>
37 |     <td>75.06/82.76</td>
38 |   </tr>
39 |   <tr>
40 |     <td>SRU</td>
41 |     <td>2.44M</td>
42 |     <td>69.27/78.41</td>
43 |     <td>74.56/82.50</td>
44 |   </tr>
45 |   <tr>
46 |     <td>LRN</td>
47 |     <td>2.14M</td>
48 |     <td>70.11/78.83</td>
49 |     <td>76.14/83.83</td>
50 |   </tr>
51 | </table>
52 | 
53 | Exact match/F1-score.
54 | 
55 | ## Requirement
56 | tensorflow >= 1.8.1
57 | 
58 | ## How to Run?
59 | 
60 | - download and preprocess dataset
61 | 
62 |   - see [R-Net](https://github.com/HKUST-KnowComp/R-Net) about the preprocessing of datasets
63 |   - Basically, you need the following datasets: squad v1.1, GloVe, Elmo and convert raw datasets into the required data format.
64 | 
65 | - no hyperparameters are tuned, we keep them all in default.
66 | 
67 | - training and evaluation
68 | 
69 |   Please see the `train_lrn.sh` and `test_lrn.sh` scripts in `rnet` (Base) and `elmo_rnet` (Base+Elmo).
70 |   
71 |   For reporting final EM/F1 score, we used the `evaluate-v1.1.py` script.
72 | 
73 | ## Credits
74 | 
75 | Source code structure is adapted from [R-Net](https://github.com/HKUST-KnowComp/R-Net).


--------------------------------------------------------------------------------
/lm/code/utils.py:
--------------------------------------------------------------------------------
 1 | import os, shutil
 2 | import torch
 3 | from torch.autograd import Variable
 4 | 
 5 | def repackage_hidden(h):
 6 |     """Wraps hidden states in new Variables, to detach them from their history."""
 7 |     if isinstance(h, tuple) or isinstance(h, list):
 8 |         return tuple(repackage_hidden(v) for v in h)
 9 |     else:
10 |         return h.detach()
11 | 
12 | def batchify(data, bsz, args):
13 |     # Work out how cleanly we can divide the dataset into bsz parts.
14 |     nbatch = data.size(0) // bsz
15 |     # Trim off any extra elements that wouldn't cleanly fit (remainders).
16 |     data = data.narrow(0, 0, nbatch * bsz)
17 |     # Evenly divide the data across the bsz batches.
18 |     data = data.view(bsz, -1).t().contiguous()
19 |     print(data.size())
20 |     if args.cuda:
21 |         data = data.cuda()
22 |     return data
23 | 
24 | def get_batch(source, i, args, seq_len=None):
25 |     seq_len = min(seq_len if seq_len else args.bptt, len(source) - 1 - i)
26 |     data = Variable(source[i:i+seq_len])
27 |     # target = Variable(source[i+1:i+1+seq_len].view(-1))
28 |     target = Variable(source[i+1:i+1+seq_len])
29 |     return data, target
30 | 
31 | def create_exp_dir(path, scripts_to_save=None):
32 |     if not os.path.exists(path):
33 |         os.mkdir(path)
34 | 
35 |     print('Experiment dir : {}'.format(path))
36 |     if scripts_to_save is not None:
37 |         os.mkdir(os.path.join(path, 'scripts'))
38 |         for script in scripts_to_save:
39 |             dst_file = os.path.join(path, 'scripts', os.path.basename(script))
40 |             shutil.copyfile(script, dst_file)
41 | 
42 | def save_checkpoint(model, optimizer, path, finetune=False):
43 |     if finetune:
44 |         torch.save(model, os.path.join(path, 'finetune_model.pt'))
45 |         torch.save(optimizer.state_dict(), os.path.join(path, 'finetune_optimizer.pt'))
46 |     else:
47 |         torch.save(model, os.path.join(path, 'model.pt'))
48 |         torch.save(optimizer.state_dict(), os.path.join(path, 'optimizer.pt'))
49 | 


--------------------------------------------------------------------------------
/doc/code/bert/vocab.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | class Vocab(object):
 9 |     def __init__(self, vocab_file=None):
10 |         self.word2id = {}
11 |         self.id2word = {}
12 |         self.word2count = {}
13 | 
14 |         self.pad_sym = "[PAD]"
15 |         self.cls_sym = "[CLS]"
16 |         self.sep_sym = "[SEP]"
17 |         self.unk_sym = "[UNK]"
18 | 
19 |         if vocab_file is not None:
20 |             self.load_vocab(vocab_file)
21 | 
22 |     def insert(self, token):
23 |         if token not in self.word2id:
24 |             index = len(self.word2id)
25 |             self.word2id[token] = index
26 |             self.id2word[index] = token
27 | 
28 |             self.word2count[token] = 0
29 |         self.word2count[token] += 1
30 | 
31 |     @property
32 |     def size(self):
33 |         return len(self.word2id)
34 | 
35 |     def load_vocab(self, vocab_file):
36 |         with open(vocab_file, 'r') as reader:
37 |             for token in reader:
38 |                 self.insert(token.strip())
39 | 
40 |     def get_token(self, id):
41 |         if id in self.id2word:
42 |             return self.id2word[id]
43 |         return self.unk_sym
44 | 
45 |     def get_id(self, token):
46 |         if token in self.word2id:
47 |             return self.word2id[token]
48 |         return self.word2id[self.unk_sym]
49 | 
50 |     def save_vocab(self, vocab_file):
51 |         with open(vocab_file, 'w') as writer:
52 |             for id in range(self.size):
53 |                 writer.write(self.id2word[id] + "\n")
54 | 
55 |     def to_id(self, tokens):
56 |         return [self.get_id(token) for token in tokens]
57 | 
58 |     def to_tokens(self, ids):
59 |         return [self.get_token(id) for id in ids]
60 | 
61 |     @property
62 |     def pad(self):
63 |         return self.get_id(self.pad_sym)
64 | 
65 |     @property
66 |     def cls(self):
67 |         return self.get_id(self.cls_sym)
68 | 
69 |     @property
70 |     def sep(self):
71 |         return self.get_id(self.sep_sym)
72 | 


--------------------------------------------------------------------------------
/nli/code/bert/vocab.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | 
 8 | class Vocab(object):
 9 |     def __init__(self, vocab_file=None):
10 |         self.word2id = {}
11 |         self.id2word = {}
12 |         self.word2count = {}
13 | 
14 |         self.pad_sym = "[PAD]"
15 |         self.cls_sym = "[CLS]"
16 |         self.sep_sym = "[SEP]"
17 |         self.unk_sym = "[UNK]"
18 | 
19 |         if vocab_file is not None:
20 |             self.load_vocab(vocab_file)
21 | 
22 |     def insert(self, token):
23 |         if token not in self.word2id:
24 |             index = len(self.word2id)
25 |             self.word2id[token] = index
26 |             self.id2word[index] = token
27 | 
28 |             self.word2count[token] = 0
29 |         self.word2count[token] += 1
30 | 
31 |     @property
32 |     def size(self):
33 |         return len(self.word2id)
34 | 
35 |     def load_vocab(self, vocab_file):
36 |         with open(vocab_file, 'r') as reader:
37 |             for token in reader:
38 |                 self.insert(token.strip())
39 | 
40 |     def get_token(self, id):
41 |         if id in self.id2word:
42 |             return self.id2word[id]
43 |         return self.unk_sym
44 | 
45 |     def get_id(self, token):
46 |         if token in self.word2id:
47 |             return self.word2id[token]
48 |         return self.word2id[self.unk_sym]
49 | 
50 |     def save_vocab(self, vocab_file):
51 |         with open(vocab_file, 'w') as writer:
52 |             for id in range(self.size):
53 |                 writer.write(self.id2word[id] + "\n")
54 | 
55 |     def to_id(self, tokens):
56 |         return [self.get_id(token) for token in tokens]
57 | 
58 |     def to_tokens(self, ids):
59 |         return [self.get_token(id) for id in ids]
60 | 
61 |     @property
62 |     def pad(self):
63 |         return self.get_id(self.pad_sym)
64 | 
65 |     @property
66 |     def cls(self):
67 |         return self.get_id(self.cls_sym)
68 | 
69 |     @property
70 |     def sep(self):
71 |         return self.get_id(self.sep_sym)
72 | 


--------------------------------------------------------------------------------
/rc/rnet/code/rnns/rnn.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from rnns import get_cell
10 | 
11 | 
12 | def rnn(cell_name, x, d, mask=None, ln=False, init_state=None, sm=True):
13 |     """Self implemented RNN procedure, supporting mask trick"""
14 |     # cell_name: gru, lstm or atr
15 |     # x: input sequence embedding matrix, [batch, seq_len, dim]
16 |     # d: hidden dimension for rnn
17 |     # mask: mask matrix, [batch, seq_len]
18 |     # ln: whether use layer normalization
19 |     # init_state: the initial hidden states, for cache purpose
20 |     # sm: whether apply swap memory during rnn scan
21 |     # dp: variational dropout
22 | 
23 |     in_shape = tf.shape(x)
24 |     batch_size, time_steps = in_shape[0], in_shape[1]
25 | 
26 |     cell = get_cell(cell_name, d, ln=ln)
27 | 
28 |     if init_state is None:
29 |         init_state = cell.get_init_state(shape=[batch_size])
30 |     if mask is None:
31 |         mask = tf.ones([batch_size, time_steps], tf.float32)
32 | 
33 |     # prepare projected input
34 |     cache_inputs = cell.fetch_states(x)
35 |     cache_inputs = [tf.transpose(v, [1, 0, 2])
36 |                     for v in list(cache_inputs)]
37 |     mask_ta = tf.transpose(tf.expand_dims(mask, -1), [1, 0, 2])
38 | 
39 |     def _step_fn(prev, x):
40 |         t, h_ = prev
41 |         m = x[-1]
42 |         v = x[:-1]
43 | 
44 |         h = cell(h_, v)
45 |         h = m * h + (1. - m) * h_
46 | 
47 |         return t + 1, h
48 | 
49 |     time = tf.constant(0, dtype=tf.int32, name="time")
50 |     step_states = (time, init_state)
51 |     step_vars = cache_inputs + [mask_ta]
52 | 
53 |     outputs = tf.scan(_step_fn,
54 |                       step_vars,
55 |                       initializer=step_states,
56 |                       parallel_iterations=32,
57 |                       swap_memory=sm)
58 | 
59 |     output_ta = outputs[1]
60 |     output_state = outputs[1][-1]
61 | 
62 |     outputs = tf.transpose(output_ta, [1, 0, 2])
63 | 
64 |     return (outputs, output_state), \
65 |            (cell.get_hidden(outputs), cell.get_hidden(output_state))
66 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/rnns/rnn.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from rnns import get_cell
10 | 
11 | 
12 | def rnn(cell_name, x, d, mask=None, ln=False, init_state=None, sm=True):
13 |     """Self implemented RNN procedure, supporting mask trick"""
14 |     # cell_name: gru, lstm or atr
15 |     # x: input sequence embedding matrix, [batch, seq_len, dim]
16 |     # d: hidden dimension for rnn
17 |     # mask: mask matrix, [batch, seq_len]
18 |     # ln: whether use layer normalization
19 |     # init_state: the initial hidden states, for cache purpose
20 |     # sm: whether apply swap memory during rnn scan
21 |     # dp: variational dropout
22 | 
23 |     in_shape = tf.shape(x)
24 |     batch_size, time_steps = in_shape[0], in_shape[1]
25 | 
26 |     cell = get_cell(cell_name, d, ln=ln)
27 | 
28 |     if init_state is None:
29 |         init_state = cell.get_init_state(shape=[batch_size])
30 |     if mask is None:
31 |         mask = tf.ones([batch_size, time_steps], tf.float32)
32 | 
33 |     # prepare projected input
34 |     cache_inputs = cell.fetch_states(x)
35 |     cache_inputs = [tf.transpose(v, [1, 0, 2])
36 |                     for v in list(cache_inputs)]
37 |     mask_ta = tf.transpose(tf.expand_dims(mask, -1), [1, 0, 2])
38 | 
39 |     def _step_fn(prev, x):
40 |         t, h_ = prev
41 |         m = x[-1]
42 |         v = x[:-1]
43 | 
44 |         h = cell(h_, v)
45 |         h = m * h + (1. - m) * h_
46 | 
47 |         return t + 1, h
48 | 
49 |     time = tf.constant(0, dtype=tf.int32, name="time")
50 |     step_states = (time, init_state)
51 |     step_vars = cache_inputs + [mask_ta]
52 | 
53 |     outputs = tf.scan(_step_fn,
54 |                       step_vars,
55 |                       initializer=step_states,
56 |                       parallel_iterations=32,
57 |                       swap_memory=sm)
58 | 
59 |     output_ta = outputs[1]
60 |     output_state = outputs[1][-1]
61 | 
62 |     outputs = tf.transpose(output_ta, [1, 0, 2])
63 | 
64 |     return (outputs, output_state), \
65 |            (cell.get_hidden(outputs), cell.get_hidden(output_state))
66 | 


--------------------------------------------------------------------------------
/ner/code/trainer.py:
--------------------------------------------------------------------------------
 1 | """Training-related module.
 2 | """
 3 | from callbacks import F1score
 4 | from utils import NERSequence
 5 | 
 6 | 
 7 | class Trainer(object):
 8 |     """A trainer that train the model.
 9 | 
10 |     Attributes:
11 |         _model: Model.
12 |         _preprocessor: Transformer. Preprocessing data for feature extraction.
13 |     """
14 | 
15 |     def __init__(self, model, preprocessor=None):
16 |         self._model = model
17 |         self._preprocessor = preprocessor
18 | 
19 |     def train(self, x_train, y_train, x_valid=None, y_valid=None,
20 |               epochs=1, batch_size=32, verbose=1, callbacks=None, shuffle=True):
21 |         """Trains the model for a fixed number of epochs (iterations on a dataset).
22 | 
23 |         Args:
24 |             x_train: list of training data.
25 |             y_train: list of training target (label) data.
26 |             x_valid: list of validation data.
27 |             y_valid: list of validation target (label) data.
28 |             batch_size: Integer.
29 |                 Number of samples per gradient update.
30 |                 If unspecified, `batch_size` will default to 32.
31 |             epochs: Integer. Number of epochs to train the model.
32 |             verbose: Integer. 0, 1, or 2. Verbosity mode.
33 |                 0 = silent, 1 = progress bar, 2 = one line per epoch.
34 |             callbacks: List of `keras.callbacks.Callback` instances.
35 |                 List of callbacks to apply during training.
36 |             shuffle: Boolean (whether to shuffle the training data
37 |                 before each epoch). `shuffle` will default to True.
38 |         """
39 | 
40 |         train_seq = NERSequence(x_train, y_train, batch_size, self._preprocessor.transform)
41 | 
42 |         if x_valid and y_valid:
43 |             valid_seq = NERSequence(x_valid, y_valid, batch_size, self._preprocessor.transform)
44 |             f1 = F1score(valid_seq, preprocessor=self._preprocessor)
45 |             callbacks = [f1] + callbacks if callbacks else [f1]
46 | 
47 |         self._model.fit_generator(generator=train_seq,
48 |                                   epochs=epochs,
49 |                                   callbacks=callbacks,
50 |                                   verbose=verbose,
51 |                                   shuffle=shuffle)
52 | 


--------------------------------------------------------------------------------
/doc/code/rnns/sru.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class sru(cell.Cell):
14 |     """The Simple Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='sru'):
17 |         super(sru, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d * 2, shape=shape, x=x, scope=scope)
22 | 
23 |     def get_hidden(self, x):
24 |         return tf.split(x, 2, -1)[0]
25 | 
26 |     def fetch_states(self, x):
27 |         with tf.variable_scope(
28 |                 "fetch_state_{}".format(self.scope or "sru")):
29 |             h = linear(x, self.d * 4,
30 |                        bias=True, ln=self.ln, scope="hide_x")
31 |         return (h, )
32 | 
33 |     def __call__(self, h_, x):
34 |         # h_: the concatenation of previous hidden state
35 |         #    and memory cell state
36 |         # x_r/x: the current input state for r gate
37 |         # x_f/x: the current input state for f gate
38 |         # x_c/x: the current input state for candidate cell
39 |         # x_h/x: the current input state for hidden output
40 |         #   we increase this because we do not assume that
41 |         #   the input dimension equals the output dimension
42 |         """
43 |             f = sigmoid(Wx, vf * c_)
44 |             c = f * c_ + (1 - f) * Wx
45 |             r = sigmoid(Wx, vr * c_)
46 |             h = r * c + (1 - r) * Ux
47 |         """
48 |         if isinstance(x, (list, tuple)):
49 |             x = x[0]
50 | 
51 |         with tf.variable_scope(
52 |                 "cell_{}".format(self.scope or "sru")):
53 |             x_r, x_f, x_c, x_h = tf.split(x, 4, -1)
54 |             h_, c_ = tf.split(h_, 2, -1)
55 | 
56 |             v_f = tf.get_variable("v_f", [1, self.d])
57 |             v_r = tf.get_variable("v_r", [1, self.d])
58 | 
59 |             f = tf.sigmoid(x_f + v_f * c_)
60 |             c = f * c_ + (1. - f) * x_c
61 |             r = tf.sigmoid(x_r + v_r * c_)
62 |             h = r * c + (1. - r) * x_h
63 | 
64 |         return tf.concat([h, c], -1)
65 | 


--------------------------------------------------------------------------------
/nli/code/rnns/sru.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class sru(cell.Cell):
14 |     """The Simple Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='sru'):
17 |         super(sru, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d * 2, shape=shape, x=x, scope=scope)
22 | 
23 |     def get_hidden(self, x):
24 |         return tf.split(x, 2, -1)[0]
25 | 
26 |     def fetch_states(self, x):
27 |         with tf.variable_scope(
28 |                 "fetch_state_{}".format(self.scope or "sru")):
29 |             h = linear(x, self.d * 4,
30 |                        bias=True, ln=self.ln, scope="hide_x")
31 |         return (h, )
32 | 
33 |     def __call__(self, h_, x):
34 |         # h_: the concatenation of previous hidden state
35 |         #    and memory cell state
36 |         # x_r/x: the current input state for r gate
37 |         # x_f/x: the current input state for f gate
38 |         # x_c/x: the current input state for candidate cell
39 |         # x_h/x: the current input state for hidden output
40 |         #   we increase this because we do not assume that
41 |         #   the input dimension equals the output dimension
42 |         """
43 |             f = sigmoid(Wx, vf * c_)
44 |             c = f * c_ + (1 - f) * Wx
45 |             r = sigmoid(Wx, vr * c_)
46 |             h = r * c + (1 - r) * Ux
47 |         """
48 |         if isinstance(x, (list, tuple)):
49 |             x = x[0]
50 | 
51 |         with tf.variable_scope(
52 |                 "cell_{}".format(self.scope or "sru")):
53 |             x_r, x_f, x_c, x_h = tf.split(x, 4, -1)
54 |             h_, c_ = tf.split(h_, 2, -1)
55 | 
56 |             v_f = tf.get_variable("v_f", [1, self.d])
57 |             v_r = tf.get_variable("v_r", [1, self.d])
58 | 
59 |             f = tf.sigmoid(x_f + v_f * c_)
60 |             c = f * c_ + (1. - f) * x_c
61 |             r = tf.sigmoid(x_r + v_r * c_)
62 |             h = r * c + (1. - r) * x_h
63 | 
64 |         return tf.concat([h, c], -1)
65 | 


--------------------------------------------------------------------------------
/rc/rnet/code/rnns/sru.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class sru(cell.Cell):
14 |     """The Simple Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='sru'):
17 |         super(sru, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d * 2, shape=shape, x=x, scope=scope)
22 | 
23 |     def get_hidden(self, x):
24 |         return tf.split(x, 2, -1)[0]
25 | 
26 |     def fetch_states(self, x):
27 |         with tf.variable_scope(
28 |                 "fetch_state_{}".format(self.scope or "sru")):
29 |             h = linear(x, self.d * 4,
30 |                        bias=True, ln=self.ln, scope="hide_x")
31 |         return (h, )
32 | 
33 |     def __call__(self, h_, x):
34 |         # h_: the concatenation of previous hidden state
35 |         #    and memory cell state
36 |         # x_r/x: the current input state for r gate
37 |         # x_f/x: the current input state for f gate
38 |         # x_c/x: the current input state for candidate cell
39 |         # x_h/x: the current input state for hidden output
40 |         #   we increase this because we do not assume that
41 |         #   the input dimension equals the output dimension
42 |         """
43 |             f = sigmoid(Wx, vf * c_)
44 |             c = f * c_ + (1 - f) * Wx
45 |             r = sigmoid(Wx, vr * c_)
46 |             h = r * c + (1 - r) * Ux
47 |         """
48 |         if isinstance(x, (list, tuple)):
49 |             x = x[0]
50 | 
51 |         with tf.variable_scope(
52 |                 "cell_{}".format(self.scope or "sru")):
53 |             x_r, x_f, x_c, x_h = tf.split(x, 4, -1)
54 |             h_, c_ = tf.split(h_, 2, -1)
55 | 
56 |             v_f = tf.get_variable("v_f", [1, self.d])
57 |             v_r = tf.get_variable("v_r", [1, self.d])
58 | 
59 |             f = tf.sigmoid(x_f + v_f * c_)
60 |             c = f * c_ + (1. - f) * x_c
61 |             r = tf.sigmoid(x_r + v_r * c_)
62 |             h = r * c + (1. - r) * x_h
63 | 
64 |         return tf.concat([h, c], -1)
65 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/rnns/sru.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class sru(cell.Cell):
14 |     """The Simple Recurrent Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='sru'):
17 |         super(sru, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d * 2, shape=shape, x=x, scope=scope)
22 | 
23 |     def get_hidden(self, x):
24 |         return tf.split(x, 2, -1)[0]
25 | 
26 |     def fetch_states(self, x):
27 |         with tf.variable_scope(
28 |                 "fetch_state_{}".format(self.scope or "sru")):
29 |             h = linear(x, self.d * 4,
30 |                        bias=True, ln=self.ln, scope="hide_x")
31 |         return (h, )
32 | 
33 |     def __call__(self, h_, x):
34 |         # h_: the concatenation of previous hidden state
35 |         #    and memory cell state
36 |         # x_r/x: the current input state for r gate
37 |         # x_f/x: the current input state for f gate
38 |         # x_c/x: the current input state for candidate cell
39 |         # x_h/x: the current input state for hidden output
40 |         #   we increase this because we do not assume that
41 |         #   the input dimension equals the output dimension
42 |         """
43 |             f = sigmoid(Wx, vf * c_)
44 |             c = f * c_ + (1 - f) * Wx
45 |             r = sigmoid(Wx, vr * c_)
46 |             h = r * c + (1 - r) * Ux
47 |         """
48 |         if isinstance(x, (list, tuple)):
49 |             x = x[0]
50 | 
51 |         with tf.variable_scope(
52 |                 "cell_{}".format(self.scope or "sru")):
53 |             x_r, x_f, x_c, x_h = tf.split(x, 4, -1)
54 |             h_, c_ = tf.split(h_, 2, -1)
55 | 
56 |             v_f = tf.get_variable("v_f", [1, self.d])
57 |             v_r = tf.get_variable("v_r", [1, self.d])
58 | 
59 |             f = tf.sigmoid(x_f + v_f * c_)
60 |             c = f * c_ + (1. - f) * x_c
61 |             r = tf.sigmoid(x_r + v_r * c_)
62 |             h = r * c + (1. - r) * x_h
63 | 
64 |         return tf.concat([h, c], -1)
65 | 


--------------------------------------------------------------------------------
/ner/README.md:
--------------------------------------------------------------------------------
 1 | ## Named Entity Recognition
 2 | 
 3 | 
 4 | We employ the birnn plus CRF architecture as [Lample et al. 2016](https://www.aclweb.org/anthology/N16-1030), and
 5 | experiment on CoNLL-2003 English NER data.
 6 | Main experimental results are summarized below.
 7 | 
 8 | <table>
 9 |   <tr>
10 |     <th>Model</th>
11 |     <th>#Params</th>
12 |     <th>NER</th>
13 |   </tr>
14 |   <tr>
15 |     <td>Lample et al. 2016</td>
16 |     <td>-</td>
17 |     <td>90.94</td>
18 |   </tr>
19 |   <tr>
20 |     <td>LSTM</td>
21 |     <td>245K</td>
22 |     <td>89.61</td>
23 |   </tr>
24 |   <tr>
25 |     <td>GRU</td>
26 |     <td>192K</td>
27 |     <td>89.35</td>
28 |   </tr>
29 |   <tr>
30 |     <td>ATR</td>
31 |     <td>87K</td>
32 |     <td>88.46</td>
33 |   </tr>
34 |   <tr>
35 |     <td>SRU</td>
36 |     <td>161K</td>
37 |     <td>88.89</td>
38 |   </tr>
39 |   <tr>
40 |     <td>LRN</td>
41 |     <td>129K</td>
42 |     <td>88.56</td>
43 |   </tr>
44 | </table>
45 | 
46 | F1-score.
47 | 
48 | ## Requirement
49 | see [requirements.txt](code/requirements.txt) for full list.
50 | 
51 | ## How to Run?
52 | 
53 | - download and preprocess dataset
54 | 
55 |   - download the conll2003 dataset from [anago](https://github.com/Hironsan/anago/tree/master/data) (in data folder).
56 |   - download the Glove-6B-100d pre-trained word embedding from: http://nlp.stanford.edu/data/glove.6B.zip 
57 | 
58 | - no hyperparameters are tuned, we keep them all in default.
59 | 
60 | - training and evaluation
61 | 
62 |   the running procedure is as follows:
63 |   ```
64 |   export CUDA_ROOT=XXX
65 |   export PATH=$CUDA_ROOT/bin:$PATH
66 |   export LD_LIBRARY_PATH=$CUDA_ROOT/lib64:$LD_LIBRARY_PATH
67 | 
68 |   export CUDA_VISIBLE_DEVICES=0
69 | 
70 |   export data_dir=path-of/conll2003/en/ner
71 |   export glove_dir=path-of/glove.6B/glove.6B.100d.txt
72 | 
73 |   RUN_EXP=5
74 |   rnn=lrn
75 | 
76 |   for i in $(seq 1 $RUN_EXP); do 
77 |       exp_dir=exp$i
78 |       mkdir $exp_dir
79 |       cd $exp_dir
80 | 
81 |       export cell_type=$rnn
82 |       python3 ner_glove.py --cell lrn >& log.lrn
83 |   
84 |       cd ../
85 |   done
86 | 
87 |   python scripts/get_test_score.py $rnn exp* >& score.$rnn
88 |   ```
89 |   Results are reported over 5 runs.
90 | 
91 | ## Credits
92 | 
93 | Source code structure is adapted from [annago](https://github.com/Hironsan/anago/tree/master/).


--------------------------------------------------------------------------------
/doc/code/rnns/lstm.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class lstm(cell.Cell):
14 |     """The Long-Short Term Memory Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='lstm'):
17 |         super(lstm, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d * 2, shape=shape, x=x, scope=scope)
22 | 
23 |     def get_hidden(self, x):
24 |         return tf.split(x, 2, -1)[0]
25 | 
26 |     def fetch_states(self, x):
27 |         with tf.variable_scope(
28 |                 "fetch_state_{}".format(self.scope or "lstm")):
29 |             g = linear(x, self.d * 3,
30 |                        bias=True, ln=self.ln, scope="gate_x")
31 |             c = linear(x, self.d,
32 |                        bias=True, ln=self.ln, scope="hide_x")
33 |         return g, c
34 | 
35 |     def __call__(self, h_, x):
36 |         # h_: the concatenation of previous hidden state
37 |         #    and memory cell state
38 |         # x_i/x: the current input state for input gate
39 |         # x_f/x: the current input state for forget gate
40 |         # x_o/x: the current input state for output gate
41 |         # x_c/x: the current input state for candidate cell
42 |         """
43 |             f = sigmoid(h_, x)
44 |             i = sigmoid(h_, x)
45 |             o = sigmoid(h_, x)
46 |             c' = tanh(h_, x)
47 |             c = f * c_ + i * c'
48 |             h = o * tanh(c)
49 |         """
50 |         with tf.variable_scope(
51 |                 "cell_{}".format(self.scope or "lstm")):
52 |             x_g, x_c = x
53 |             h_, c_ = tf.split(h_, 2, -1)
54 | 
55 |             h_g = linear(h_, self.d * 3,
56 |                          ln=self.ln, scope="gate_h")
57 |             i, f, o = tf.split(
58 |                 tf.sigmoid(x_g + h_g), 3, -1)
59 | 
60 |             h_c = linear(h_, self.d,
61 |                          ln=self.ln, scope="hide_h")
62 |             h_c = tf.tanh(x_c + h_c)
63 | 
64 |             c = i * h_c + f * c_
65 | 
66 |             h = o * tf.tanh(c)
67 | 
68 |         return tf.concat([h, c], -1)
69 | 


--------------------------------------------------------------------------------
/nli/code/rnns/lstm.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class lstm(cell.Cell):
14 |     """The Long-Short Term Memory Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='lstm'):
17 |         super(lstm, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d * 2, shape=shape, x=x, scope=scope)
22 | 
23 |     def get_hidden(self, x):
24 |         return tf.split(x, 2, -1)[0]
25 | 
26 |     def fetch_states(self, x):
27 |         with tf.variable_scope(
28 |                 "fetch_state_{}".format(self.scope or "lstm")):
29 |             g = linear(x, self.d * 3,
30 |                        bias=True, ln=self.ln, scope="gate_x")
31 |             c = linear(x, self.d,
32 |                        bias=True, ln=self.ln, scope="hide_x")
33 |         return g, c
34 | 
35 |     def __call__(self, h_, x):
36 |         # h_: the concatenation of previous hidden state
37 |         #    and memory cell state
38 |         # x_i/x: the current input state for input gate
39 |         # x_f/x: the current input state for forget gate
40 |         # x_o/x: the current input state for output gate
41 |         # x_c/x: the current input state for candidate cell
42 |         """
43 |             f = sigmoid(h_, x)
44 |             i = sigmoid(h_, x)
45 |             o = sigmoid(h_, x)
46 |             c' = tanh(h_, x)
47 |             c = f * c_ + i * c'
48 |             h = o * tanh(c)
49 |         """
50 |         with tf.variable_scope(
51 |                 "cell_{}".format(self.scope or "lstm")):
52 |             x_g, x_c = x
53 |             h_, c_ = tf.split(h_, 2, -1)
54 | 
55 |             h_g = linear(h_, self.d * 3,
56 |                          ln=self.ln, scope="gate_h")
57 |             i, f, o = tf.split(
58 |                 tf.sigmoid(x_g + h_g), 3, -1)
59 | 
60 |             h_c = linear(h_, self.d,
61 |                          ln=self.ln, scope="hide_h")
62 |             h_c = tf.tanh(x_c + h_c)
63 | 
64 |             c = i * h_c + f * c_
65 | 
66 |             h = o * tf.tanh(c)
67 | 
68 |         return tf.concat([h, c], -1)
69 | 


--------------------------------------------------------------------------------
/rc/rnet/code/rnns/lstm.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class lstm(cell.Cell):
14 |     """The Long-Short Term Memory Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='lstm'):
17 |         super(lstm, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d * 2, shape=shape, x=x, scope=scope)
22 | 
23 |     def get_hidden(self, x):
24 |         return tf.split(x, 2, -1)[0]
25 | 
26 |     def fetch_states(self, x):
27 |         with tf.variable_scope(
28 |                 "fetch_state_{}".format(self.scope or "lstm")):
29 |             g = linear(x, self.d * 3,
30 |                        bias=True, ln=self.ln, scope="gate_x")
31 |             c = linear(x, self.d,
32 |                        bias=True, ln=self.ln, scope="hide_x")
33 |         return g, c
34 | 
35 |     def __call__(self, h_, x):
36 |         # h_: the concatenation of previous hidden state
37 |         #    and memory cell state
38 |         # x_i/x: the current input state for input gate
39 |         # x_f/x: the current input state for forget gate
40 |         # x_o/x: the current input state for output gate
41 |         # x_c/x: the current input state for candidate cell
42 |         """
43 |             f = sigmoid(h_, x)
44 |             i = sigmoid(h_, x)
45 |             o = sigmoid(h_, x)
46 |             c' = tanh(h_, x)
47 |             c = f * c_ + i * c'
48 |             h = o * tanh(c)
49 |         """
50 |         with tf.variable_scope(
51 |                 "cell_{}".format(self.scope or "lstm")):
52 |             x_g, x_c = x
53 |             h_, c_ = tf.split(h_, 2, -1)
54 | 
55 |             h_g = linear(h_, self.d * 3,
56 |                          ln=self.ln, scope="gate_h")
57 |             i, f, o = tf.split(
58 |                 tf.sigmoid(x_g + h_g), 3, -1)
59 | 
60 |             h_c = linear(h_, self.d,
61 |                          ln=self.ln, scope="hide_h")
62 |             h_c = tf.tanh(x_c + h_c)
63 | 
64 |             c = i * h_c + f * c_
65 | 
66 |             h = o * tf.tanh(c)
67 | 
68 |         return tf.concat([h, c], -1)
69 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/rnns/lstm.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | from func import linear
10 | from rnns import cell as cell
11 | 
12 | 
13 | class lstm(cell.Cell):
14 |     """The Long-Short Term Memory Unit."""
15 | 
16 |     def __init__(self, d, ln=False, scope='lstm'):
17 |         super(lstm, self).__init__(d, ln=ln, scope=scope)
18 | 
19 |     def get_init_state(self, shape=None, x=None, scope=None):
20 |         return self._get_init_state(
21 |             self.d * 2, shape=shape, x=x, scope=scope)
22 | 
23 |     def get_hidden(self, x):
24 |         return tf.split(x, 2, -1)[0]
25 | 
26 |     def fetch_states(self, x):
27 |         with tf.variable_scope(
28 |                 "fetch_state_{}".format(self.scope or "lstm")):
29 |             g = linear(x, self.d * 3,
30 |                        bias=True, ln=self.ln, scope="gate_x")
31 |             c = linear(x, self.d,
32 |                        bias=True, ln=self.ln, scope="hide_x")
33 |         return g, c
34 | 
35 |     def __call__(self, h_, x):
36 |         # h_: the concatenation of previous hidden state
37 |         #    and memory cell state
38 |         # x_i/x: the current input state for input gate
39 |         # x_f/x: the current input state for forget gate
40 |         # x_o/x: the current input state for output gate
41 |         # x_c/x: the current input state for candidate cell
42 |         """
43 |             f = sigmoid(h_, x)
44 |             i = sigmoid(h_, x)
45 |             o = sigmoid(h_, x)
46 |             c' = tanh(h_, x)
47 |             c = f * c_ + i * c'
48 |             h = o * tanh(c)
49 |         """
50 |         with tf.variable_scope(
51 |                 "cell_{}".format(self.scope or "lstm")):
52 |             x_g, x_c = x
53 |             h_, c_ = tf.split(h_, 2, -1)
54 | 
55 |             h_g = linear(h_, self.d * 3,
56 |                          ln=self.ln, scope="gate_h")
57 |             i, f, o = tf.split(
58 |                 tf.sigmoid(x_g + h_g), 3, -1)
59 | 
60 |             h_c = linear(h_, self.d,
61 |                          ln=self.ln, scope="hide_h")
62 |             h_c = tf.tanh(x_c + h_c)
63 | 
64 |             c = i * h_c + f * c_
65 | 
66 |             h = o * tf.tanh(c)
67 | 
68 |         return tf.concat([h, c], -1)
69 | 


--------------------------------------------------------------------------------
/lm/code/generate.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Language Modeling on Penn Tree Bank
 3 | #
 4 | # This file generates new sentences sampled from the language model
 5 | #
 6 | ###############################################################################
 7 | 
 8 | import argparse
 9 | 
10 | import torch
11 | from torch.autograd import Variable
12 | 
13 | import data
14 | 
15 | parser = argparse.ArgumentParser(description='PyTorch PTB Language Model')
16 | 
17 | # Model parameters.
18 | parser.add_argument('--data', type=str, default='./penn',
19 |                     help='location of the data corpus')
20 | parser.add_argument('--checkpoint', type=str, default='./model.pt',
21 |                     help='model checkpoint to use')
22 | parser.add_argument('--outf', type=str, default='generated.txt',
23 |                     help='output file for generated text')
24 | parser.add_argument('--words', type=int, default='1000',
25 |                     help='number of words to generate')
26 | parser.add_argument('--seed', type=int, default=1111,
27 |                     help='random seed')
28 | parser.add_argument('--cuda', action='store_true',
29 |                     help='use CUDA')
30 | parser.add_argument('--temperature', type=float, default=1.0,
31 |                     help='temperature - higher will increase diversity')
32 | parser.add_argument('--log-interval', type=int, default=100,
33 |                     help='reporting interval')
34 | args = parser.parse_args()
35 | 
36 | # Set the random seed manually for reproducibility.
37 | torch.manual_seed(args.seed)
38 | if torch.cuda.is_available():
39 |     if not args.cuda:
40 |         print("WARNING: You have a CUDA device, so you should probably run with --cuda")
41 |     else:
42 |         torch.cuda.manual_seed(args.seed)
43 | 
44 | if args.temperature < 1e-3:
45 |     parser.error("--temperature has to be greater or equal 1e-3")
46 | 
47 | with open(args.checkpoint, 'rb') as f:
48 |     model = torch.load(f)
49 | model.eval()
50 | 
51 | if args.cuda:
52 |     model.cuda()
53 | else:
54 |     model.cpu()
55 | 
56 | corpus = data.Corpus(args.data)
57 | ntokens = len(corpus.dictionary)
58 | hidden = model.init_hidden(1)
59 | input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True)
60 | if args.cuda:
61 |     input.data = input.data.cuda()
62 | 
63 | with open(args.outf, 'w') as outf:
64 |     for i in range(args.words):
65 |         output, hidden = model(input, hidden, return_prob=True)
66 |         word_weights = output.squeeze().data.div(args.temperature).exp().cpu()
67 |         word_idx = torch.multinomial(word_weights, 1)[0]
68 |         input.data.fill_(word_idx)
69 |         word = corpus.dictionary.idx2word[word_idx]
70 | 
71 |         outf.write(word + ('\n' if i % 20 == 19 else ' '))
72 | 
73 |         if i % args.log_interval == 0:
74 |             print('| Generated {}/{} words'.format(i, args.words))
75 | 


--------------------------------------------------------------------------------
/mt/README.md:
--------------------------------------------------------------------------------
 1 | ## Machine Translation
 2 | 
 3 | 
 4 | Main source code will be available at [zero](https://github.com/bzhangGo/zero) (might require some time, 31/05/2019).
 5 | The used NMT structure is in `deepnmt.py`.
 6 | 
 7 | 
 8 | Main experimental results are summarized below.
 9 | 
10 | <table>
11 |   <tr>
12 |     <th>Model</th>
13 |     <th>#Params</th>
14 |     <th>BLEU</th>
15 |     <th>Train</th>
16 |     <th>Decode</th>
17 |   </tr>
18 |   <tr>
19 |     <td>GNMT</td>
20 |     <td>-</td>
21 |     <td>24.61</td>
22 |     <td>-</td>
23 |     <td>-</td>
24 |   </tr>
25 |   <tr>
26 |     <td>GRU</td>
27 |     <td>206M</td>
28 |     <td>26.28</td>
29 |     <td>2.67</td>
30 |     <td>45.35</td>
31 |   </tr>
32 |   <tr>
33 |     <td>ATR</td>
34 |     <td>122M</td>
35 |     <td>25.70</td>
36 |     <td>1.33</td>
37 |     <td>34.40</td>
38 |   </tr>
39 |   <tr>
40 |     <td>SRU</td>
41 |     <td>170M</td>
42 |     <td>25.91</td>
43 |     <td>1.34</td>
44 |     <td>42.84</td>
45 |   </tr>
46 |   <tr>
47 |     <td>LRN</td>
48 |     <td>143M</td>
49 |     <td>26.26</td>
50 |     <td>0.99</td>
51 |     <td>36.50</td>
52 |   </tr>
53 |   <tr>
54 |     <td>oLRN</td>
55 |     <td>164M</td>
56 |     <td>26.73</td>
57 |     <td>1.15</td>
58 |     <td>40.19</td>
59 |   </tr>
60 | </table>
61 | 
62 | *Train*: time in seconds per training batch measured from 0.2k training steps. 
63 | *Decode*: time in milliseconds used to decode one sentence measured on newstest2014 dataset.
64 | *BLEU*: case-insensitive tokenized BLEU score on WMT14 English-German translation task.
65 | 
66 | ## oLRN structure
67 | 
68 | <img src="https://latex.codecogs.com/svg.latex?\begin{align*}&space;\mathbf{q}_t,&space;\mathbf{k}_t,&space;\mathbf{v}_t,&space;\mathbf{x}_o&space;=&space;\mathbf{x}_t\mathbf{W}_q,&space;\mathbf{x}_t\mathbf{W}_k,&space;\mathbf{x}_t\mathbf{W}_v,&space;\mathbf{x}_t&space;\mathbf{W}_o&space;\\&space;\mathbf{i}_t&space;=&space;\sigma(\mathbf{k}_t&space;&plus;&space;\mathbf{h}_{t-1})&space;\\&space;\mathbf{f}_t&space;=&space;\sigma(\mathbf{q}_t&space;-&space;\mathbf{h}_{t-1})&space;\\&space;\mathbf{c}_t&space;=&space;g(\mathbf{i}_t&space;\odot&space;\mathbf{v}_t&space;&plus;&space;\mathbf{f}_t&space;\odot&space;\mathbf{h}_{t-1})&space;\\&space;\mathbf{o}_t&space;=&space;\sigma(\mathbf{x}_o&space;-&space;\mathbf{c}_t)&space;\\&space;\mathbf{h}_t&space;=&space;\mathbf{o}_t&space;\odot&space;\mathbf{c}_t&space;\end{align*}" title="\begin{align*} \mathbf{q}_t, \mathbf{k}_t, \mathbf{v}_t, \mathbf{x}_o = \mathbf{x}_t\mathbf{W}_q, \mathbf{x}_t\mathbf{W}_k, \mathbf{x}_t\mathbf{W}_v, \mathbf{x}_t \mathbf{W}_o \\ \mathbf{i}_t = \sigma(\mathbf{k}_t + \mathbf{h}_{t-1}) \\ \mathbf{f}_t = \sigma(\mathbf{q}_t - \mathbf{h}_{t-1}) \\ \mathbf{c}_t = g(\mathbf{i}_t \odot \mathbf{v}_t + \mathbf{f}_t \odot \mathbf{h}_{t-1}) \\ \mathbf{o}_t = \sigma(\mathbf{x}_o - \mathbf{c}_t) \\ \mathbf{h}_t = \mathbf{o}_t \odot \mathbf{c}_t \end{align*}" />
69 | 
70 | Unlike LRN, oLRN employs an additional output gate, inspired by LSTM, to handle output information flow. 
71 | This additional gate also help avoid hidden state explosion when linear activation is applied.
72 | 
73 | ## How to Run?
74 | 
75 | Training and evaluation, please refer to project [zero](https://github.com/bzhangGo/zero).


--------------------------------------------------------------------------------
/rc/rnet/code/evaluate-v1.1.py:
--------------------------------------------------------------------------------
 1 | """ Official evaluation script for v1.1 of the SQuAD dataset. """
 2 | from __future__ import print_function
 3 | from collections import Counter
 4 | import string
 5 | import re
 6 | import argparse
 7 | import json
 8 | import sys
 9 | 
10 | 
11 | def normalize_answer(s):
12 |     """Lower text and remove punctuation, articles and extra whitespace."""
13 |     def remove_articles(text):
14 |         return re.sub(r'\b(a|an|the)\b', ' ', text)
15 | 
16 |     def white_space_fix(text):
17 |         return ' '.join(text.split())
18 | 
19 |     def remove_punc(text):
20 |         exclude = set(string.punctuation)
21 |         return ''.join(ch for ch in text if ch not in exclude)
22 | 
23 |     def lower(text):
24 |         return text.lower()
25 | 
26 |     return white_space_fix(remove_articles(remove_punc(lower(s))))
27 | 
28 | 
29 | def f1_score(prediction, ground_truth):
30 |     prediction_tokens = normalize_answer(prediction).split()
31 |     ground_truth_tokens = normalize_answer(ground_truth).split()
32 |     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
33 |     num_same = sum(common.values())
34 |     if num_same == 0:
35 |         return 0
36 |     precision = 1.0 * num_same / len(prediction_tokens)
37 |     recall = 1.0 * num_same / len(ground_truth_tokens)
38 |     f1 = (2 * precision * recall) / (precision + recall)
39 |     return f1
40 | 
41 | 
42 | def exact_match_score(prediction, ground_truth):
43 |     return (normalize_answer(prediction) == normalize_answer(ground_truth))
44 | 
45 | 
46 | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
47 |     scores_for_ground_truths = []
48 |     for ground_truth in ground_truths:
49 |         score = metric_fn(prediction, ground_truth)
50 |         scores_for_ground_truths.append(score)
51 |     return max(scores_for_ground_truths)
52 | 
53 | 
54 | def evaluate(dataset, predictions):
55 |     f1 = exact_match = total = 0
56 |     for article in dataset:
57 |         for paragraph in article['paragraphs']:
58 |             for qa in paragraph['qas']:
59 |                 total += 1
60 |                 if qa['id'] not in predictions:
61 |                     message = 'Unanswered question ' + qa['id'] + \
62 |                               ' will receive score 0.'
63 |                     print(message, file=sys.stderr)
64 |                     continue
65 |                 ground_truths = list(map(lambda x: x['text'], qa['answers']))
66 |                 prediction = predictions[qa['id']]
67 |                 exact_match += metric_max_over_ground_truths(
68 |                     exact_match_score, prediction, ground_truths)
69 |                 f1 += metric_max_over_ground_truths(
70 |                     f1_score, prediction, ground_truths)
71 | 
72 |     exact_match = 100.0 * exact_match / total
73 |     f1 = 100.0 * f1 / total
74 | 
75 |     return {'exact_match': exact_match, 'f1': f1}
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     expected_version = '1.1'
80 |     parser = argparse.ArgumentParser(
81 |         description='Evaluation for SQuAD ' + expected_version)
82 |     parser.add_argument('dataset_file', help='Dataset file')
83 |     parser.add_argument('prediction_file', help='Prediction File')
84 |     args = parser.parse_args()
85 |     with open(args.dataset_file) as dataset_file:
86 |         dataset_json = json.load(dataset_file)
87 |         if (dataset_json['version'] != expected_version):
88 |             print('Evaluation expects v-' + expected_version +
89 |                   ', but got dataset with v-' + dataset_json['version'],
90 |                   file=sys.stderr)
91 |         dataset = dataset_json['data']
92 |     with open(args.prediction_file) as prediction_file:
93 |         predictions = json.load(prediction_file)
94 |     print(json.dumps(evaluate(dataset, predictions)))
95 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/evaluate-v1.1.py:
--------------------------------------------------------------------------------
 1 | """ Official evaluation script for v1.1 of the SQuAD dataset. """
 2 | from __future__ import print_function
 3 | from collections import Counter
 4 | import string
 5 | import re
 6 | import argparse
 7 | import json
 8 | import sys
 9 | 
10 | 
11 | def normalize_answer(s):
12 |     """Lower text and remove punctuation, articles and extra whitespace."""
13 |     def remove_articles(text):
14 |         return re.sub(r'\b(a|an|the)\b', ' ', text)
15 | 
16 |     def white_space_fix(text):
17 |         return ' '.join(text.split())
18 | 
19 |     def remove_punc(text):
20 |         exclude = set(string.punctuation)
21 |         return ''.join(ch for ch in text if ch not in exclude)
22 | 
23 |     def lower(text):
24 |         return text.lower()
25 | 
26 |     return white_space_fix(remove_articles(remove_punc(lower(s))))
27 | 
28 | 
29 | def f1_score(prediction, ground_truth):
30 |     prediction_tokens = normalize_answer(prediction).split()
31 |     ground_truth_tokens = normalize_answer(ground_truth).split()
32 |     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
33 |     num_same = sum(common.values())
34 |     if num_same == 0:
35 |         return 0
36 |     precision = 1.0 * num_same / len(prediction_tokens)
37 |     recall = 1.0 * num_same / len(ground_truth_tokens)
38 |     f1 = (2 * precision * recall) / (precision + recall)
39 |     return f1
40 | 
41 | 
42 | def exact_match_score(prediction, ground_truth):
43 |     return (normalize_answer(prediction) == normalize_answer(ground_truth))
44 | 
45 | 
46 | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
47 |     scores_for_ground_truths = []
48 |     for ground_truth in ground_truths:
49 |         score = metric_fn(prediction, ground_truth)
50 |         scores_for_ground_truths.append(score)
51 |     return max(scores_for_ground_truths)
52 | 
53 | 
54 | def evaluate(dataset, predictions):
55 |     f1 = exact_match = total = 0
56 |     for article in dataset:
57 |         for paragraph in article['paragraphs']:
58 |             for qa in paragraph['qas']:
59 |                 total += 1
60 |                 if qa['id'] not in predictions:
61 |                     message = 'Unanswered question ' + qa['id'] + \
62 |                               ' will receive score 0.'
63 |                     print(message, file=sys.stderr)
64 |                     continue
65 |                 ground_truths = list(map(lambda x: x['text'], qa['answers']))
66 |                 prediction = predictions[qa['id']]
67 |                 exact_match += metric_max_over_ground_truths(
68 |                     exact_match_score, prediction, ground_truths)
69 |                 f1 += metric_max_over_ground_truths(
70 |                     f1_score, prediction, ground_truths)
71 | 
72 |     exact_match = 100.0 * exact_match / total
73 |     f1 = 100.0 * f1 / total
74 | 
75 |     return {'exact_match': exact_match, 'f1': f1}
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     expected_version = '1.1'
80 |     parser = argparse.ArgumentParser(
81 |         description='Evaluation for SQuAD ' + expected_version)
82 |     parser.add_argument('dataset_file', help='Dataset file')
83 |     parser.add_argument('prediction_file', help='Prediction File')
84 |     args = parser.parse_args()
85 |     with open(args.dataset_file) as dataset_file:
86 |         dataset_json = json.load(dataset_file)
87 |         if (dataset_json['version'] != expected_version):
88 |             print('Evaluation expects v-' + expected_version +
89 |                   ', but got dataset with v-' + dataset_json['version'],
90 |                   file=sys.stderr)
91 |         dataset = dataset_json['data']
92 |     with open(args.prediction_file) as prediction_file:
93 |         predictions = json.load(prediction_file)
94 |     print(json.dumps(evaluate(dataset, predictions)))
95 | 


--------------------------------------------------------------------------------
/lm/code/weight_drop.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import Parameter
  3 | from functools import wraps
  4 | 
  5 | class WeightDrop(torch.nn.Module):
  6 |     def __init__(self, module, weights, dropout=0, variational=False):
  7 |         super(WeightDrop, self).__init__()
  8 |         self.module = module
  9 |         self.weights = weights
 10 |         self.dropout = dropout
 11 |         self.variational = variational
 12 |         self._setup()
 13 | 
 14 |     def widget_demagnetizer_y2k_edition(*args, **kwargs):
 15 |         # We need to replace flatten_parameters with a nothing function
 16 |         # It must be a function rather than a lambda as otherwise pickling explodes
 17 |         # We can't write boring code though, so ... WIDGET DEMAGNETIZER Y2K EDITION!
 18 |         # (╯°□°）╯︵ ┻━┻
 19 |         return
 20 | 
 21 |     def _setup(self):
 22 |         # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN
 23 |         if issubclass(type(self.module), torch.nn.RNNBase):
 24 |             self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition
 25 | 
 26 |         for name_w in self.weights:
 27 |             if not hasattr(self.module, name_w):
 28 |                 continue
 29 |             print('Applying weight drop of {} to {}'.format(self.dropout, name_w))
 30 |             w = getattr(self.module, name_w)
 31 |             del self.module._parameters[name_w]
 32 |             self.module.register_parameter(name_w + '_raw', Parameter(w.data))
 33 | 
 34 |     def _setweights(self):
 35 |         for name_w in self.weights:
 36 |             if not hasattr(self.module, name_w):
 37 |                 continue
 38 | 
 39 |             raw_w = getattr(self.module, name_w + '_raw')
 40 |             w = None
 41 |             if self.variational:
 42 |                 mask = torch.autograd.Variable(torch.ones(raw_w.size(0), 1))
 43 |                 if raw_w.is_cuda: mask = mask.cuda()
 44 |                 mask = torch.nn.functional.dropout(mask, p=self.dropout, training=True)
 45 |                 w = mask.expand_as(raw_w) * raw_w
 46 |             else:
 47 |                 w = torch.nn.functional.dropout(raw_w, p=self.dropout, training=self.training)
 48 |             setattr(self.module, name_w, w)
 49 | 
 50 |     def forward(self, *args):
 51 |         self._setweights()
 52 |         return self.module.forward(*args)
 53 | 
 54 | if __name__ == '__main__':
 55 |     import torch
 56 |     from weight_drop import WeightDrop
 57 | 
 58 |     # Input is (seq, batch, input)
 59 |     x = torch.autograd.Variable(torch.randn(2, 1, 10)).cuda()
 60 |     h0 = None
 61 | 
 62 |     ###
 63 | 
 64 |     print('Testing WeightDrop')
 65 |     print('=-=-=-=-=-=-=-=-=-=')
 66 | 
 67 |     ###
 68 | 
 69 |     print('Testing WeightDrop with Linear')
 70 | 
 71 |     lin = WeightDrop(torch.nn.Linear(10, 10), ['weight'], dropout=0.9)
 72 |     lin.cuda()
 73 |     run1 = [x.sum() for x in lin(x).data]
 74 |     run2 = [x.sum() for x in lin(x).data]
 75 | 
 76 |     print('All items should be different')
 77 |     print('Run 1:', run1)
 78 |     print('Run 2:', run2)
 79 | 
 80 |     assert run1[0] != run2[0]
 81 |     assert run1[1] != run2[1]
 82 | 
 83 |     print('---')
 84 | 
 85 |     ###
 86 | 
 87 |     print('Testing WeightDrop with LSTM')
 88 | 
 89 |     wdrnn = WeightDrop(torch.nn.LSTM(10, 10), ['weight_hh_l0'], dropout=0.9)
 90 |     wdrnn.cuda()
 91 | 
 92 |     run1 = [x.sum() for x in wdrnn(x, h0)[0].data]
 93 |     run2 = [x.sum() for x in wdrnn(x, h0)[0].data]
 94 | 
 95 |     print('First timesteps should be equal, all others should differ')
 96 |     print('Run 1:', run1)
 97 |     print('Run 2:', run2)
 98 | 
 99 |     # First time step, not influenced by hidden to hidden weights, should be equal
100 |     assert run1[0] == run2[0]
101 |     # Second step should not
102 |     assert run1[1] != run2[1]
103 | 
104 |     print('---')
105 | 


--------------------------------------------------------------------------------
/doc/config.py:
--------------------------------------------------------------------------------
  1 | dict(
  2 |     # lrate decay
  3 |     # select strategy: noam, gnmt+, epoch, score and vanilla
  4 |     lrate_strategy="epoch",
  5 |     # learning decay rate
  6 |     lrate_decay=0.5,
  7 |     # weight decay for L2 loss
  8 |     weight_decay=3e-5,
  9 | 
 10 |     # early stopping
 11 |     estop_patience=100,
 12 | 
 13 |     # initialization
 14 |     # type of initializer
 15 |     initializer="uniform",
 16 |     # initializer range control
 17 |     initializer_gain=0.08,
 18 | 
 19 |     # parameters for rnnsearch
 20 |     # encoder and decoder hidden size
 21 |     hidden_size=64,
 22 |     # source and target embedding size
 23 |     embed_size=300,
 24 |     # character embedding size
 25 |     char_embed_size=32,
 26 |     # dropout value
 27 |     dropout=0.1,
 28 |     # word random dropout
 29 |     word_dropout=0.1,
 30 |     # label smoothing value
 31 |     label_smooth=0.1,
 32 |     # gru, lstm, sru or atr
 33 |     cell="atr",
 34 |     # whether use layer normalization, it will be slow
 35 |     layer_norm=False,
 36 |     # notice that when opening the swap memory switch
 37 |     # you can train reasonably larger batch on condition
 38 |     # that your system will use much more cpu memory
 39 |     swap_memory=True,
 40 | 
 41 |     # whether use character embedding
 42 |     use_char=True,
 43 |     # whether lowercase word
 44 |     lower=False,
 45 | 
 46 |     # task name
 47 |     task="amafull",
 48 | 
 49 |     model_name="InferNet",
 50 | 
 51 |     # constant batch size at 'batch' mode for batch-based batching
 52 |     batch_size=64,
 53 |     token_size=2000,
 54 |     batch_or_token='batch',
 55 |     # batch size for decoding, i.e. number of source sentences decoded at the same time
 56 |     eval_batch_size=64,
 57 |     # whether shuffle batches during training
 58 |     shuffle_batch=True,
 59 |     # whether use multiprocessing deal with data reading, default true
 60 |     data_multiprocessing=True,
 61 | 
 62 |     # word vocabulary
 63 |     word_vocab_file="",
 64 |     # char vocabulary
 65 |     char_vocab_file="",
 66 |     # pretrained word embedding
 67 |     pretrain_word_embedding_file="path-of/glove.840B.300d.txt",
 68 |     # dataset path file
 69 |     data_path="path-of/data",
 70 |     # output directory
 71 |     output_dir="train",
 72 |     # output during testing
 73 |     test_output="",
 74 | 
 75 |     # adam optimizer hyperparameters
 76 |     beta1=0.9,
 77 |     beta2=0.999,
 78 |     epsilon=1e-8,
 79 |     # gradient clipping value
 80 |     clip_grad_norm=5.0,
 81 |     # initial learning rate
 82 |     lrate=1e-3,
 83 | 
 84 |     # allowed maximum sentence length
 85 |     max_len=400,
 86 |     # maximum word length
 87 |     max_w_len=25,
 88 |     # maximum sentence number
 89 |     max_p_num=10,
 90 |     # hierarchy neural network
 91 |     enable_hierarchy=False,
 92 | 
 93 |     # maximum epochs
 94 |     epoches=6,
 95 |     # the effective batch size is: batch/token size * update_cycle
 96 |     # sequential update cycle
 97 |     update_cycle=1,
 98 |     # the number of gpus
 99 |     gpus=[0],
100 |     # whether enable ema
101 |     ema_decay=0.9999,
102 | 
103 |     # print information every disp_freq training steps
104 |     disp_freq=10,
105 |     # evaluate on the development file every eval_freq steps
106 |     eval_freq=10000,
107 |     # save the model parameters every save_freq steps
108 |     save_freq=5000,
109 |     # saved checkpoint number
110 |     checkpoints=5,
111 |     # the maximum training steps, program with stop if epoches or max_training_steps is metted
112 |     max_training_steps=1000000000,
113 | 
114 |     # bert configuration
115 |     # did not use in practice, efficiency is an important issue
116 |     bert=None,
117 |     bert_dir="path-of/cased_L-12_H-768_A-12/",
118 |     tune_bert=False,
119 |     enable_bert=False,
120 |     use_bert_single=True,
121 | 
122 |     # number of threads for threaded reading, seems useless
123 |     nthreads=3,
124 |     # buffer size controls the number of sentences readed in one time,
125 |     buffer_size=100000,
126 |     # a unique queue in multi-thread reading process
127 |     max_queue_size=100,
128 |     # random control, not so well for tensorflow.
129 |     random_seed=1234,
130 |     # whether or not train from checkpoint
131 |     train_continue=True,
132 | )
133 | 


--------------------------------------------------------------------------------
/nli/config.py:
--------------------------------------------------------------------------------
  1 | dict(
  2 |     # lrate decay
  3 |     # select strategy: noam, gnmt+, epoch, score and vanilla
  4 |     lrate_strategy="epoch",
  5 |     # learning decay rate
  6 |     lrate_decay=0.5,
  7 |     # weight decay for L2 loss
  8 |     weight_decay=3e-5,
  9 | 
 10 |     # early stopping
 11 |     estop_patience=100,
 12 | 
 13 |     # initialization
 14 |     # type of initializer
 15 |     initializer="uniform",
 16 |     # initializer range control
 17 |     initializer_gain=0.08,
 18 | 
 19 |     # parameters for rnnsearch
 20 |     # encoder and decoder hidden size
 21 |     hidden_size=300,
 22 |     # source and target embedding size
 23 |     embed_size=300,
 24 |     # label number
 25 |     label_size=3,
 26 |     # number of layers
 27 |     char_embed_size=64,
 28 |     # dropout value
 29 |     dropout=0.3,
 30 |     # label smoothing value
 31 |     label_smooth=0.1,
 32 |     # gru, lstm, sru or atr
 33 |     cell="atr",
 34 |     # whether use layer normalization, it will be slow
 35 |     layer_norm=False,
 36 |     # notice that when opening the swap memory switch
 37 |     # you can train reasonably larger batch on condition
 38 |     # that your system will use much more cpu memory
 39 |     swap_memory=True,
 40 | 
 41 |     # bert configuration
 42 |     bert=None,
 43 |     bert_dir="path-to-bert/cased_L-12_H-768_A-12",
 44 |     tune_bert=False,
 45 |     enable_bert=False,
 46 |     use_bert_single=True,
 47 | 
 48 |     # whether use character embedding
 49 |     use_char=True,
 50 |     # whether lowercase word
 51 |     lower=False,
 52 |     bert_lower=False,
 53 | 
 54 |     model_name="nlinet",
 55 | 
 56 |     # constant batch size at 'batch' mode for batch-based batching
 57 |     batch_size=128,
 58 |     token_size=2000,
 59 |     batch_or_token='batch',
 60 |     # batch size for decoding, i.e. number of source sentences decoded at the same time
 61 |     eval_batch_size=64,
 62 |     # whether shuffle batches during training
 63 |     shuffle_batch=True,
 64 |     # whether use multiprocessing deal with data reading, default true
 65 |     data_multiprocessing=True,
 66 | 
 67 |     # word vocabulary
 68 |     word_vocab_file="path-of/word_vocab",
 69 |     # char vocabulary
 70 |     char_vocab_file="path-of/char_vocab",
 71 |     # pretrained word embedding
 72 |     pretrain_word_embedding_file="path-of/word_vocab.npz",
 73 |     # train file
 74 |     train_file=["path-of/train.p", "path-of/train.q", "path-of/train.l"],
 75 |     # dev file
 76 |     dev_file=["path-of/dev.p", "path-of/dev.q", "path-of/dev.l"],
 77 |     # test file
 78 |     test_file=["path-of/test.p", "path-of/test.q", "path-of/test.l"],
 79 |     # output directory
 80 |     output_dir="train",
 81 |     # output during testing
 82 |     test_output="",
 83 | 
 84 |     # adam optimizer hyperparameters
 85 |     beta1=0.9,
 86 |     beta2=0.999,
 87 |     epsilon=1e-8,
 88 |     # gradient clipping value
 89 |     clip_grad_norm=5.0,
 90 |     # initial learning rate
 91 |     lrate=1e-3,
 92 | 
 93 |     # allowed maximum sentence length
 94 |     max_len=100,
 95 |     # maximum word length
 96 |     max_w_len=25,
 97 | 
 98 |     # maximum epochs
 99 |     epoches=10,
100 |     # the effective batch size is: batch/token size * update_cycle
101 |     # sequential update cycle
102 |     update_cycle=1,
103 |     # the number of gpus
104 |     gpus=[0],
105 |     # whether enable ema
106 |     ema_decay=0.9999,
107 | 
108 |     # print information every disp_freq training steps
109 |     disp_freq=10,
110 |     # evaluate on the development file every eval_freq steps
111 |     eval_freq=1000,
112 |     # save the model parameters every save_freq steps
113 |     save_freq=1000,
114 |     # saved checkpoint number
115 |     checkpoints=5,
116 |     # the maximum training steps, program with stop if epoches or max_training_steps is metted
117 |     max_training_steps=100000,
118 | 
119 |     # number of threads for threaded reading, seems useless
120 |     nthreads=6,
121 |     # buffer size controls the number of sentences readed in one time,
122 |     buffer_size=20000,
123 |     # a unique queue in multi-thread reading process
124 |     max_queue_size=100,
125 |     # random control, not so well for tensorflow.
126 |     random_seed=1234,
127 |     # whether or not train from checkpoint
128 |     train_continue=True,
129 | )
130 | 


--------------------------------------------------------------------------------
/nli/config_bert.py:
--------------------------------------------------------------------------------
  1 | dict(
  2 |     # lrate decay
  3 |     # select strategy: noam, gnmt+, epoch, score and vanilla
  4 |     lrate_strategy="vanilla",
  5 |     # learning decay rate
  6 |     lrate_decay=0.5,
  7 |     # weight decay for L2 loss
  8 |     weight_decay=3e-5,
  9 | 
 10 |     # early stopping
 11 |     estop_patience=100,
 12 | 
 13 |     # initialization
 14 |     # type of initializer
 15 |     initializer="uniform",
 16 |     # initializer range control
 17 |     initializer_gain=0.08,
 18 | 
 19 |     # parameters for rnnsearch
 20 |     # encoder and decoder hidden size
 21 |     hidden_size=300,
 22 |     # source and target embedding size
 23 |     embed_size=300,
 24 |     # label number
 25 |     label_size=3,
 26 |     # number of layers
 27 |     char_embed_size=64,
 28 |     # dropout value
 29 |     dropout=0.3,
 30 |     # label smoothing value
 31 |     label_smooth=0.1,
 32 |     # gru, lstm, sru or atr
 33 |     cell="atr",
 34 |     # whether use layer normalization, it will be slow
 35 |     layer_norm=False,
 36 |     # notice that when opening the swap memory switch
 37 |     # you can train reasonably larger batch on condition
 38 |     # that your system will use much more cpu memory
 39 |     swap_memory=True,
 40 | 
 41 |     # bert configuration
 42 |     bert=None,
 43 |     bert_dir="path-to-bert/cased_L-12_H-768_A-12",
 44 |     tune_bert=True,
 45 |     enable_bert=True,
 46 |     use_bert_single=True,
 47 | 
 48 |     # whether use character embedding
 49 |     use_char=True,
 50 |     # whether lowercase word
 51 |     lower=False,
 52 |     bert_lower=False,
 53 | 
 54 |     model_name="nlinet",
 55 | 
 56 |     # constant batch size at 'batch' mode for batch-based batching
 57 |     batch_size=32,
 58 |     token_size=2000,
 59 |     batch_or_token='batch',
 60 |     # batch size for decoding, i.e. number of source sentences decoded at the same time
 61 |     eval_batch_size=32,
 62 |     # whether shuffle batches during training
 63 |     shuffle_batch=True,
 64 |     # whether use multiprocessing deal with data reading, default true
 65 |     data_multiprocessing=True,
 66 | 
 67 |     # word vocabulary
 68 |     word_vocab_file="path-of/word_vocab",
 69 |     # char vocabulary
 70 |     char_vocab_file="path-of/char_vocab",
 71 |     # pretrained word embedding
 72 |     pretrain_word_embedding_file="path-of/word_vocab.npz",
 73 |     # train file
 74 |     train_file=["path-of/train.p", "path-of/train.q", "path-of/train.l"],
 75 |     # dev file
 76 |     dev_file=["path-of/dev.p", "path-of/dev.q", "path-of/dev.l"],
 77 |     # test file
 78 |     test_file=["path-of/test.p", "path-of/test.q", "path-of/test.l"],
 79 |     # output directory
 80 |     output_dir="train",
 81 |     # output during testing
 82 |     test_output="",
 83 | 
 84 |     # adam optimizer hyperparameters
 85 |     beta1=0.9,
 86 |     beta2=0.999,
 87 |     epsilon=1e-8,
 88 |     # gradient clipping value
 89 |     clip_grad_norm=5.0,
 90 |     # initial learning rate
 91 |     lrate=2e-5,
 92 | 
 93 |     # allowed maximum sentence length
 94 |     max_len=100,
 95 |     # maximum word length
 96 |     max_w_len=25,
 97 | 
 98 |     # maximum epochs
 99 |     epoches=5,
100 |     # the effective batch size is: batch/token size * update_cycle
101 |     # sequential update cycle
102 |     update_cycle=1,
103 |     # the number of gpus
104 |     gpus=[0],
105 |     # whether enable ema
106 |     ema_decay=0.9999,
107 | 
108 |     # print information every disp_freq training steps
109 |     disp_freq=10,
110 |     # evaluate on the development file every eval_freq steps
111 |     eval_freq=1000,
112 |     # save the model parameters every save_freq steps
113 |     save_freq=1000,
114 |     # saved checkpoint number
115 |     checkpoints=5,
116 |     # the maximum training steps, program with stop if epoches or max_training_steps is metted
117 |     max_training_steps=100000,
118 | 
119 |     # number of threads for threaded reading, seems useless
120 |     nthreads=6,
121 |     # buffer size controls the number of sentences readed in one time,
122 |     buffer_size=20000,
123 |     # a unique queue in multi-thread reading process
124 |     max_queue_size=100,
125 |     # random control, not so well for tensorflow.
126 |     random_seed=1234,
127 |     # whether or not train from checkpoint
128 |     train_continue=True,
129 | )
130 | 


--------------------------------------------------------------------------------
/ner/code/tagger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Model API.
  3 | """
  4 | import numpy as np
  5 | from seqeval.metrics.sequence_labeling import get_entities
  6 | 
  7 | 
  8 | class Tagger(object):
  9 |     """A model API that tags input sentence.
 10 | 
 11 |     Attributes:
 12 |         model: Model.
 13 |         preprocessor: Transformer. Preprocessing data for feature extraction.
 14 |         tokenizer: Tokenize input sentence. Default tokenizer is `str.split`.
 15 |     """
 16 | 
 17 |     def __init__(self, model, preprocessor, tokenizer=str.split):
 18 |         self.model = model
 19 |         self.preprocessor = preprocessor
 20 |         self.tokenizer = tokenizer
 21 | 
 22 |     def predict_proba(self, text):
 23 |         """Probability estimates.
 24 | 
 25 |         The returned estimates for all classes are ordered by the
 26 |         label of classes.
 27 | 
 28 |         Args:
 29 |             text : string, the input text.
 30 | 
 31 |         Returns:
 32 |             y : array-like, shape = [num_words, num_classes]
 33 |             Returns the probability of the word for each class in the model,
 34 |         """
 35 |         assert isinstance(text, str)
 36 | 
 37 |         words = self.tokenizer(text)
 38 |         X = self.preprocessor.transform([words])
 39 |         y = self.model.predict(X)
 40 |         y = y[0]  # reduce batch dimension.
 41 | 
 42 |         return y
 43 | 
 44 |     def _get_prob(self, pred):
 45 |         prob = np.max(pred, -1)
 46 | 
 47 |         return prob
 48 | 
 49 |     def _get_tags(self, pred):
 50 |         tags = self.preprocessor.inverse_transform([pred])
 51 |         tags = tags[0]  # reduce batch dimension
 52 | 
 53 |         return tags
 54 | 
 55 |     def _build_response(self, sent, tags, prob):
 56 |         words = self.tokenizer(sent)
 57 |         res = {
 58 |             'words': words,
 59 |             'entities': [
 60 | 
 61 |             ]
 62 |         }
 63 |         chunks = get_entities(tags)
 64 | 
 65 |         for chunk_type, chunk_start, chunk_end in chunks:
 66 |             chunk_end += 1
 67 |             entity = {
 68 |                 'text': ' '.join(words[chunk_start: chunk_end]),
 69 |                 'type': chunk_type,
 70 |                 'score': float(np.average(prob[chunk_start: chunk_end])),
 71 |                 'beginOffset': chunk_start,
 72 |                 'endOffset': chunk_end
 73 |             }
 74 |             res['entities'].append(entity)
 75 | 
 76 |         return res
 77 | 
 78 |     def analyze(self, text):
 79 |         """Analyze text and return pretty format.
 80 | 
 81 |         Args:
 82 |             text: string, the input text.
 83 | 
 84 |         Returns:
 85 |             res: dict.
 86 | 
 87 |         Examples:
 88 |             >>> text = 'President Obama is speaking at the White House.'
 89 |             >>> model.analyze(text)
 90 |             {
 91 |                 "words": [
 92 |                     "President",
 93 |                     "Obama",
 94 |                     "is",
 95 |                     "speaking",
 96 |                     "at",
 97 |                     "the",
 98 |                     "White",
 99 |                     "House."
100 |                 ],
101 |                 "entities": [
102 |                     {
103 |                         "beginOffset": 1,
104 |                         "endOffset": 2,
105 |                         "score": 1,
106 |                         "text": "Obama",
107 |                         "type": "PER"
108 |                     },
109 |                     {
110 |                         "beginOffset": 6,
111 |                         "endOffset": 8,
112 |                         "score": 1,
113 |                         "text": "White House.",
114 |                         "type": "ORG"
115 |                     }
116 |                 ]
117 |             }
118 |         """
119 |         pred = self.predict_proba(text)
120 |         tags = self._get_tags(pred)
121 |         prob = self._get_prob(pred)
122 |         res = self._build_response(text, tags, prob)
123 | 
124 |         return res
125 | 
126 |     def predict(self, text):
127 |         """Predict using the model.
128 | 
129 |         Args:
130 |             text: string, the input text.
131 | 
132 |         Returns:
133 |             tags: list, shape = (num_words,)
134 |             Returns predicted values.
135 |         """
136 |         pred = self.predict_proba(text)
137 |         tags = self._get_tags(pred)
138 | 
139 |         return tags
140 | 


--------------------------------------------------------------------------------
/lm/README.md:
--------------------------------------------------------------------------------
  1 | ## Language Modeling
  2 | 
  3 | 
  4 | We do experiments on PTB and WT2 dataset, and use the mixture of softmax model [MoS](https://arxiv.org/abs/1711.03953).
  5 | Main experimental results are summarized below.
  6 | 
  7 | <table>
  8 |   <tr>
  9 |     <th colspan="2" rowspan="2">Model</th>
 10 |     <th rowspan="2">#Params</th>
 11 |     <th colspan="3">PTB</th>
 12 |     <th colspan="3">WT2</th>
 13 |   </tr>
 14 |   <tr>
 15 |     <td>Base</td>
 16 |     <td>+Finetune</td>
 17 |     <td>+Dynamic</td>
 18 |     <td>Base</td>
 19 |     <td>+Finetune</td>
 20 |     <td>+Dynamic</td>
 21 |   </tr>
 22 |   <tr>
 23 |     <td colspan="2">Yang et al. (2018)</td>
 24 |     <td>22M</td>
 25 |     <td>55.97</td>
 26 |     <td>54.44</td>
 27 |     <td>47.69</td>
 28 |     <td>63.33</td>
 29 |     <td>61.45</td>
 30 |     <td>40.68</td>
 31 |   </tr>
 32 |   <tr>
 33 |     <td rowspan="5">This<br>Work</td>
 34 |     <td>LSTM</td>
 35 |     <td>22M</td>
 36 |     <td>63.78</td>
 37 |     <td>62.12</td>
 38 |     <td>53.11</td>
 39 |     <td>69.78</td>
 40 |     <td>68.68</td>
 41 |     <td>44.60</td>
 42 |   </tr>
 43 |   <tr>
 44 |     <td>GRU</td>
 45 |     <td>17M</td>
 46 |     <td>69.09</td>
 47 |     <td>67.61</td>
 48 |     <td>60.21</td>
 49 |     <td>73.37</td>
 50 |     <td>73.05</td>
 51 |     <td>49.77</td>
 52 |   </tr>
 53 |   <tr>
 54 |     <td>ATR</td>
 55 |     <td>9M</td>
 56 |     <td>66.24</td>
 57 |     <td>65.86</td>
 58 |     <td>58.29</td>
 59 |     <td>75.36</td>
 60 |     <td>73.35</td>
 61 |     <td>48.65</td>
 62 |   </tr>
 63 |   <tr>
 64 |     <td>SRU</td>
 65 |     <td>13M</td>
 66 |     <td>69.64</td>
 67 |     <td>65.29</td>
 68 |     <td>60.97</td>
 69 |     <td>85.15</td>
 70 |     <td>84.97</td>
 71 |     <td>57.97</td>
 72 |   </tr>
 73 |   <tr>
 74 |     <td>LRN</td>
 75 |     <td>11M</td>
 76 |     <td>61.26</td>
 77 |     <td>61.00</td>
 78 |     <td>54.45</td>
 79 |     <td>69.91</td>
 80 |     <td>68.86</td>
 81 |     <td>46.97</td>
 82 |   </tr>
 83 | </table>
 84 | 
 85 | Test perplexity.
 86 | 
 87 | ## Requirement
 88 | PyTorch >= 0.4.1
 89 | 
 90 | ## How to Run?
 91 | - download and preprocess dataset
 92 | 
 93 |   - see [MoS](https://github.com/zihangdai/mos) about the preprocessing of datasets
 94 | 
 95 | - training and evaluation
 96 | 
 97 |   - training
 98 |   ```
 99 |   #! /bin/bash
100 | 
101 |   export CUDA_VISIBLE_DEVICES=0
102 | 
103 |   # for PTB
104 |   python3 main.py --data path-of/penn --dropouti 0.4 --dropoutl 0.29 --dropouth 0.225 --seed 28 --batch_size 12 --lr 10.0 --epoch 1000 --nhid 960 --nhidlast 620 --emsize 280 --n_experts 15 --save PTB --single_gpu --model lrn
105 |   # for WT2
106 |   python3 main.py --epochs 1000 --data path-of/wikitext-2 --save WT2 --dropouth 0.2 --seed 1882 --n_experts 15 --nhid 1150 --nhidlast 650 --emsize 300 --batch_size 15 --lr 15.0 --dropoutl 0.29 --small_batch_size 5 --max_seq_len_delta 20 --dropouti 0.55 --single_gpu --model lrn  
107 |   ```
108 |   
109 |   - finetuning
110 |   ```
111 |   # for PTB
112 |   python3 finetune.py --data path-of/penn --dropouti 0.4 --dropoutl 0.29 --dropouth 0.225 --seed 28 --batch_size 12 --lr 15.0 --epoch 1000 --nhid 960 --emsize 280 --n_experts 15 --save PTB-XXX --single_gpu --model lrn
113 |   # for WT2
114 |   python3 finetune.py --epochs 1000 --data path-of/wikitext-2 --save WT2-XXX --dropouth 0.2 --seed 1882 --n_experts 15 --nhid 1150 --emsize 300 --batch_size 15 --lr 20.0 --dropoutl 0.29 --small_batch_size 5 --max_seq_len_delta 20 --dropouti 0.55 --single_gpu --model lrn
115 |   ```
116 |   
117 |   - dynamic evaluation
118 |   ```
119 |   # for PTB
120 |   python3 dynamiceval.py --model PTB-XXX/finetune_model.pt --data path-of/penn --lamb 0.075 --gpu 0
121 |   # for WT2
122 |   python3 dynamiceval.py --data path-of/wikitext-2 --model WT2-XXX/finetune_model.pt --epsilon 0.002 --gpu 0
123 |   ```
124 |   
125 |   - general evaluation
126 |   ```
127 |   # for PTB
128 |   python3 evaluate.py --data path-of/penn --dropouti 0.4 --dropoutl 0.29 --dropouth 0.225 --seed 28 --batch_size 12 --lr 10.0 --epoch 1000 --nhid 960 --nhidlast 620 --emsize 280 --n_experts 15 --save PTB-XXX --single_gpu --model lrn
129 |   # for WT2
130 |   python3 evaluate.py --epochs 1000 --data path-of/wikitext-2 --save WT2-XXX --dropouth 0.2 --seed 1882 --n_experts 15 --nhid 1150 --nhidlast 650 --emsize 300 --batch_size 15 --lr 15.0 --dropoutl 0.29 --small_batch_size 5 --max_seq_len_delta 20 --dropouti 0.55 --single_gpu --model lrn
131 |   ```
132 | 
133 | ## Credits
134 | 
135 | Source code structure is adapted from [MoS](https://github.com/zihangdai/mos).


--------------------------------------------------------------------------------
/lm/code/data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | 
  4 | from collections import Counter
  5 | 
  6 | 
  7 | class Dictionary(object):
  8 |     def __init__(self):
  9 |         self.word2idx = {}
 10 |         self.idx2word = []
 11 |         self.counter = Counter()
 12 |         self.total = 0
 13 | 
 14 |     def add_word(self, word):
 15 |         if word not in self.word2idx:
 16 |             self.idx2word.append(word)
 17 |             self.word2idx[word] = len(self.idx2word) - 1
 18 |         token_id = self.word2idx[word]
 19 |         self.counter[token_id] += 1
 20 |         self.total += 1
 21 |         return self.word2idx[word]
 22 | 
 23 |     def __len__(self):
 24 |         return len(self.idx2word)
 25 | 
 26 | 
 27 | class Corpus(object):
 28 |     def __init__(self, path):
 29 |         self.dictionary = Dictionary()
 30 |         self.train = self.tokenize(os.path.join(path, 'train.txt'))
 31 |         self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
 32 |         self.test = self.tokenize(os.path.join(path, 'test.txt'))
 33 | 
 34 |     def tokenize(self, path):
 35 |         """Tokenizes a text file."""
 36 |         assert os.path.exists(path)
 37 |         # Add words to the dictionary
 38 |         with open(path, 'r', encoding='utf-8') as f:
 39 |             tokens = 0
 40 |             for line in f:
 41 |                 words = line.split() + ['<eos>']
 42 |                 tokens += len(words)
 43 |                 for word in words:
 44 |                     self.dictionary.add_word(word)
 45 | 
 46 |         # Tokenize file content
 47 |         with open(path, 'r', encoding='utf-8') as f:
 48 |             ids = torch.LongTensor(tokens)
 49 |             token = 0
 50 |             for line in f:
 51 |                 words = line.split() + ['<eos>']
 52 |                 for word in words:
 53 |                     ids[token] = self.dictionary.word2idx[word]
 54 |                     token += 1
 55 | 
 56 |         return ids
 57 | 
 58 | class SentCorpus(object):
 59 |     def __init__(self, path):
 60 |         self.dictionary = Dictionary()
 61 |         self.train = self.tokenize(os.path.join(path, 'train.txt'))
 62 |         self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
 63 |         self.test = self.tokenize(os.path.join(path, 'test.txt'))
 64 | 
 65 |     def tokenize(self, path):
 66 |         """Tokenizes a text file."""
 67 |         assert os.path.exists(path)
 68 |         # Add words to the dictionary
 69 |         with open(path, 'r', encoding='utf-8') as f:
 70 |             tokens = 0
 71 |             for line in f:
 72 |                 words = line.split() + ['<eos>']
 73 |                 tokens += len(words)
 74 |                 for word in words:
 75 |                     self.dictionary.add_word(word)
 76 | 
 77 |         # Tokenize file content
 78 |         sents = []
 79 |         with open(path, 'r', encoding='utf-8') as f:
 80 |             for line in f:
 81 |                 if not line:
 82 |                     continue
 83 |                 words = line.split() + ['<eos>']
 84 |                 sent = torch.LongTensor(len(words))
 85 |                 for i, word in enumerate(words):
 86 |                     sent[i] = self.dictionary.word2idx[word]
 87 |                 sents.append(sent)
 88 | 
 89 |         return sents
 90 | 
 91 | class BatchSentLoader(object):
 92 |     def __init__(self, sents, batch_size, pad_id=0, cuda=False, volatile=False):
 93 |         self.sents = sents
 94 |         self.batch_size = batch_size
 95 |         self.sort_sents = sorted(sents, key=lambda x: x.size(0))
 96 |         self.cuda = cuda
 97 |         self.volatile = volatile
 98 |         self.pad_id = pad_id
 99 | 
100 |     def __next__(self):
101 |         if self.idx >= len(self.sort_sents):
102 |             raise StopIteration
103 | 
104 |         batch_size = min(self.batch_size, len(self.sort_sents)-self.idx)
105 |         batch = self.sort_sents[self.idx:self.idx+batch_size]
106 |         max_len = max([s.size(0) for s in batch])
107 |         tensor = torch.LongTensor(max_len, batch_size).fill_(self.pad_id)
108 |         for i in range(len(batch)):
109 |             s = batch[i]
110 |             tensor[:s.size(0),i].copy_(s)
111 |         if self.cuda:
112 |             tensor = tensor.cuda()
113 | 
114 |         self.idx += batch_size
115 | 
116 |         return tensor
117 |     
118 |     next = __next__
119 | 
120 |     def __iter__(self):
121 |         self.idx = 0
122 |         return self
123 | 
124 | if __name__ == '__main__':
125 |     corpus = SentCorpus('../penn')
126 |     loader = BatchSentLoader(corpus.test, 10)
127 |     for i, d in enumerate(loader):
128 |         print(i, d.size())
129 | 


--------------------------------------------------------------------------------
/doc/code/utils/saver.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import os
  8 | import tensorflow as tf
  9 | 
 10 | 
 11 | class Saver(object):
 12 |     def __init__(self,
 13 |                  checkpoints=5,   # save the latest number of checkpoints
 14 |                  output_dir=None  # the output directory
 15 |                  ):
 16 |         if output_dir is None:
 17 |             output_dir = "./output"
 18 |         self.output_dir = output_dir
 19 |         self.output_best_dir = os.path.join(output_dir, "best")
 20 | 
 21 |         self.saver = tf.train.Saver(
 22 |             max_to_keep=checkpoints
 23 |         )
 24 |         self.best_saver = tf.train.Saver(
 25 |             max_to_keep=1
 26 |         )
 27 |         self.best_score = -1
 28 |         self.score_record = tf.gfile.Open(
 29 |             os.path.join(self.output_best_dir, "metric.log"),
 30 |             mode="a+"
 31 |         )
 32 | 
 33 |     def save(self, session, step, metric_score=None):
 34 |         if not tf.gfile.Exists(self.output_dir):
 35 |             tf.gfile.MkDir(self.output_dir)
 36 |         if not tf.gfile.Exists(self.output_best_dir):
 37 |             tf.gfile.MkDir(self.output_best_dir)
 38 | 
 39 |         self.saver.save(session,
 40 |                         os.path.join(self.output_dir, "model"),
 41 |                         global_step=step)
 42 | 
 43 |         def _move(path, new_path):
 44 |             if tf.gfile.Exists(path):
 45 |                 if tf.gfile.Exists(new_path):
 46 |                     tf.gfile.Remove(new_path)
 47 |                 tf.gfile.Copy(path, new_path)
 48 | 
 49 |         if metric_score is not None and metric_score > self.best_score:
 50 |             self.best_score = metric_score
 51 |             self.best_saver.save(
 52 |                 session, os.path.join(self.output_best_dir, "model"))
 53 | 
 54 |             _move(os.path.join(self.output_dir, "param.json"),
 55 |                   os.path.join(self.output_best_dir, "param.json"))
 56 |             _move(os.path.join(self.output_dir, "record.json"),
 57 |                   os.path.join(self.output_best_dir, "record.json"))
 58 | 
 59 |             # this recorder only record best scores
 60 |             self.score_record.write("Steps {}, Metric Score {}\n"
 61 |                                     .format(step, metric_score))
 62 | 
 63 |             self.score_record.flush()
 64 | 
 65 |     def restore(self, session, path=None):
 66 |         if path is not None and tf.gfile.Exists(path):
 67 |             check_dir = path
 68 |         else:
 69 |             check_dir = self.output_dir
 70 | 
 71 |         checkpoint = os.path.join(check_dir, "checkpoint")
 72 |         if not tf.gfile.Exists(checkpoint):
 73 |             tf.logging.warn("No Existing Model detected")
 74 |         else:
 75 |             latest_checkpoint = tf.gfile.Open(checkpoint).readline()
 76 |             model_name = latest_checkpoint.strip().split(":")[1].strip()
 77 |             model_name = model_name[1:-1]  # remove ""
 78 |             model_path = os.path.join(check_dir, model_name)
 79 |             model_path = os.path.abspath(model_path)
 80 |             if not tf.gfile.Exists(model_path+".meta"):
 81 |                 tf.logging.error("model '{}' does not exists"
 82 |                                  .format(model_path))
 83 |             else:
 84 |                 try:
 85 |                     self.saver.restore(session, model_path)
 86 |                 except tf.errors.NotFoundError:
 87 |                     # In this case, we simply assume that the cycle part
 88 |                     #   is mismatched, where the replicas are missing.
 89 |                     # This would happen if you switch from un-cycle mode
 90 |                     #   to cycle mode.
 91 |                     tf.logging.warn("Starting Backup Restore")
 92 |                     ops = []
 93 |                     reader = tf.train.load_checkpoint(model_path)
 94 |                     for var in tf.global_variables():
 95 |                         name = var.op.name
 96 | 
 97 |                         if reader.has_tensor(name):
 98 |                             tf.logging.info('{} get initialization from {}'
 99 |                                             .format(name, name))
100 |                             ops.append(
101 |                                 tf.assign(var, reader.get_tensor(name)))
102 |                         else:
103 |                             tf.logging.warn("{} is missed".format(name))
104 |                     restore_op = tf.group(*ops, name="restore_global_vars")
105 |                     session.run(restore_op)
106 | 


--------------------------------------------------------------------------------
/nli/code/utils/saver.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import os
  8 | import tensorflow as tf
  9 | 
 10 | 
 11 | class Saver(object):
 12 |     def __init__(self,
 13 |                  checkpoints=5,   # save the latest number of checkpoints
 14 |                  output_dir=None  # the output directory
 15 |                  ):
 16 |         if output_dir is None:
 17 |             output_dir = "./output"
 18 |         self.output_dir = output_dir
 19 |         self.output_best_dir = os.path.join(output_dir, "best")
 20 | 
 21 |         self.saver = tf.train.Saver(
 22 |             max_to_keep=checkpoints
 23 |         )
 24 |         self.best_saver = tf.train.Saver(
 25 |             max_to_keep=1
 26 |         )
 27 |         self.best_score = -1
 28 |         self.score_record = tf.gfile.Open(
 29 |             os.path.join(self.output_best_dir, "metric.log"),
 30 |             mode="a+"
 31 |         )
 32 | 
 33 |     def save(self, session, step, metric_score=None):
 34 |         if not tf.gfile.Exists(self.output_dir):
 35 |             tf.gfile.MkDir(self.output_dir)
 36 |         if not tf.gfile.Exists(self.output_best_dir):
 37 |             tf.gfile.MkDir(self.output_best_dir)
 38 | 
 39 |         self.saver.save(session,
 40 |                         os.path.join(self.output_dir, "model"),
 41 |                         global_step=step)
 42 | 
 43 |         def _move(path, new_path):
 44 |             if tf.gfile.Exists(path):
 45 |                 if tf.gfile.Exists(new_path):
 46 |                     tf.gfile.Remove(new_path)
 47 |                 tf.gfile.Copy(path, new_path)
 48 | 
 49 |         if metric_score is not None and metric_score > self.best_score:
 50 |             self.best_score = metric_score
 51 |             self.best_saver.save(
 52 |                 session, os.path.join(self.output_best_dir, "model"))
 53 | 
 54 |             _move(os.path.join(self.output_dir, "param.json"),
 55 |                   os.path.join(self.output_best_dir, "param.json"))
 56 |             _move(os.path.join(self.output_dir, "record.json"),
 57 |                   os.path.join(self.output_best_dir, "record.json"))
 58 | 
 59 |             # this recorder only record best scores
 60 |             self.score_record.write("Steps {}, Metric Score {}\n"
 61 |                                     .format(step, metric_score))
 62 | 
 63 |             self.score_record.flush()
 64 | 
 65 |     def restore(self, session, path=None):
 66 |         if path is not None and tf.gfile.Exists(path):
 67 |             check_dir = path
 68 |         else:
 69 |             check_dir = self.output_dir
 70 | 
 71 |         checkpoint = os.path.join(check_dir, "checkpoint")
 72 |         if not tf.gfile.Exists(checkpoint):
 73 |             tf.logging.warn("No Existing Model detected")
 74 |         else:
 75 |             latest_checkpoint = tf.gfile.Open(checkpoint).readline()
 76 |             model_name = latest_checkpoint.strip().split(":")[1].strip()
 77 |             model_name = model_name[1:-1]  # remove ""
 78 |             model_path = os.path.join(check_dir, model_name)
 79 |             model_path = os.path.abspath(model_path)
 80 |             if not tf.gfile.Exists(model_path+".meta"):
 81 |                 tf.logging.error("model '{}' does not exists"
 82 |                                  .format(model_path))
 83 |             else:
 84 |                 try:
 85 |                     self.saver.restore(session, model_path)
 86 |                 except tf.errors.NotFoundError:
 87 |                     # In this case, we simply assume that the cycle part
 88 |                     #   is mismatched, where the replicas are missing.
 89 |                     # This would happen if you switch from un-cycle mode
 90 |                     #   to cycle mode.
 91 |                     tf.logging.warn("Starting Backup Restore")
 92 |                     ops = []
 93 |                     reader = tf.train.load_checkpoint(model_path)
 94 |                     for var in tf.global_variables():
 95 |                         name = var.op.name
 96 | 
 97 |                         if reader.has_tensor(name):
 98 |                             tf.logging.info('{} get initialization from {}'
 99 |                                             .format(name, name))
100 |                             ops.append(
101 |                                 tf.assign(var, reader.get_tensor(name)))
102 |                         else:
103 |                             tf.logging.warn("{} is missed".format(name))
104 |                     restore_op = tf.group(*ops, name="restore_global_vars")
105 |                     session.run(restore_op)
106 | 


--------------------------------------------------------------------------------
/rc/elmo_rnet/code/bilm/elmo.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import tensorflow as tf
  3 | 
  4 | def weight_layers(name, bilm_ops, l2_coef=None,
  5 |                   use_top_only=False, do_layer_norm=False):
  6 |     '''
  7 |     Weight the layers of a biLM with trainable scalar weights to
  8 |     compute ELMo representations.
  9 | 
 10 |     For each output layer, this returns two ops.  The first computes
 11 |         a layer specific weighted average of the biLM layers, and
 12 |         the second the l2 regularizer loss term.
 13 |     The regularization terms are also add to tf.GraphKeys.REGULARIZATION_LOSSES 
 14 | 
 15 |     Input:
 16 |         name = a string prefix used for the trainable variable names
 17 |         bilm_ops = the tensorflow ops returned to compute internal
 18 |             representations from a biLM.  This is the return value
 19 |             from BidirectionalLanguageModel(...)(ids_placeholder)
 20 |         l2_coef: the l2 regularization coefficient $\lambda$.
 21 |             Pass None or 0.0 for no regularization.
 22 |         use_top_only: if True, then only use the top layer.
 23 |         do_layer_norm: if True, then apply layer normalization to each biLM
 24 |             layer before normalizing
 25 | 
 26 |     Output:
 27 |         {
 28 |             'weighted_op': op to compute weighted average for output,
 29 |             'regularization_op': op to compute regularization term
 30 |         }
 31 |     '''
 32 |     def _l2_regularizer(weights):
 33 |         if l2_coef is not None:
 34 |             return l2_coef * tf.reduce_sum(tf.square(weights))
 35 |         else:
 36 |             return 0.0
 37 | 
 38 |     # Get ops for computing LM embeddings and mask
 39 |     lm_embeddings = bilm_ops['lm_embeddings']
 40 |     mask = bilm_ops['mask']
 41 | 
 42 |     # Disable the first embedding layer
 43 |     # lm_embeddings = lm_embeddings[:, 1:, :, :]
 44 | 
 45 |     n_lm_layers = int(lm_embeddings.get_shape()[1])
 46 |     lm_dim = int(lm_embeddings.get_shape()[3])
 47 | 
 48 |     with tf.control_dependencies([lm_embeddings, mask]):
 49 |         # Cast the mask and broadcast for layer use.
 50 |         mask_float = tf.cast(mask, 'float32')
 51 |         broadcast_mask = tf.expand_dims(mask_float, axis=-1)
 52 | 
 53 |         def _do_ln(x):
 54 |             # do layer normalization excluding the mask
 55 |             x_masked = x * broadcast_mask
 56 |             N = tf.reduce_sum(mask_float) * lm_dim
 57 |             mean = tf.reduce_sum(x_masked) / N
 58 |             variance = tf.reduce_sum(((x_masked - mean) * broadcast_mask)**2
 59 |                                     ) / N
 60 |             return tf.nn.batch_normalization(
 61 |                 x, mean, variance, None, None, 1E-12
 62 |             )
 63 | 
 64 |         if use_top_only:
 65 |             layers = tf.split(lm_embeddings, n_lm_layers, axis=1)
 66 |             # just the top layer
 67 |             sum_pieces = tf.squeeze(layers[-1], squeeze_dims=1)
 68 |             # no regularization
 69 |             reg = 0.0
 70 |         else:
 71 |             W = tf.get_variable(
 72 |                 '{}_ELMo_W'.format(name),
 73 |                 shape=(n_lm_layers, ),
 74 |                 initializer=tf.zeros_initializer,
 75 |                 regularizer=_l2_regularizer,
 76 |                 trainable=True,
 77 |             )
 78 | 
 79 |             # normalize the weights
 80 |             normed_weights = tf.split(
 81 |                 tf.nn.softmax(W + 1.0 / n_lm_layers), n_lm_layers
 82 |             )
 83 |             # split LM layers
 84 |             layers = tf.split(lm_embeddings, n_lm_layers, axis=1)
 85 |     
 86 |             # compute the weighted, normalized LM activations
 87 |             pieces = []
 88 |             for w, t in zip(normed_weights, layers):
 89 |                 if do_layer_norm:
 90 |                     pieces.append(w * _do_ln(tf.squeeze(t, squeeze_dims=1)))
 91 |                 else:
 92 |                     pieces.append(w * tf.squeeze(t, squeeze_dims=1))
 93 |             sum_pieces = tf.add_n(pieces)
 94 |     
 95 |             # get the regularizer 
 96 |             reg = [
 97 |                 r for r in tf.get_collection(
 98 |                                 tf.GraphKeys.REGULARIZATION_LOSSES)
 99 |                 if r.name.find('{}_ELMo_W/'.format(name)) >= 0
100 |             ]
101 |             if len(reg) != 1:
102 |                 pass
103 | 
104 |         # scale the weighted sum by gamma
105 |         gamma = tf.get_variable(
106 |             '{}_ELMo_gamma'.format(name),
107 |             shape=(1, ),
108 |             initializer=tf.ones_initializer,
109 |             regularizer=None,
110 |             trainable=True,
111 |         )
112 |         weighted_lm_layers = sum_pieces * gamma
113 | 
114 |         ret = {'weighted_op': weighted_lm_layers, 'regularization_op': reg}
115 | 
116 |     return ret
117 | 
118 | 


--------------------------------------------------------------------------------
/doc/code/tasks.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import os
  8 | import abc
  9 | import csv
 10 | import numpy as np
 11 | 
 12 | 
 13 | def get_task(params, is_training):
 14 |     name = params.task.lower()
 15 | 
 16 |     if name == "amafull":
 17 |         return AMAFull(params.data_path, is_training)
 18 |     elif name == "amapolar":
 19 |         return AMAPolar(params.data_path, is_training)
 20 |     elif name == "yahoo":
 21 |         return YaHoo(params.data_path, is_training)
 22 |     elif name == "yelpfull":
 23 |         return YelpFull(params.data_path, is_training)
 24 |     elif name == "yelppolar":
 25 |         return YelpPolar(params.data_path, is_training)
 26 |     else:
 27 |         raise NotImplementedError("Not Supported: {}".format(name))
 28 | 
 29 | 
 30 | class Task(object):
 31 |     def __init__(self, data_path, is_training=False):
 32 |         self.data_path = data_path
 33 |         self.is_training = is_training
 34 | 
 35 |         self.trainset = []
 36 |         self.devset = []
 37 |         self.testset = []
 38 | 
 39 |         if self.is_training:
 40 |             self._read_all_train_dev_data()
 41 |         self._read_all_test_data()
 42 | 
 43 |     def _clean_text(self, text_in):
 44 |         return text_in.replace('\\"', '"').replace('\\n', ' ')
 45 | 
 46 |     def _read_all_train_dev_data(self):
 47 |         train_data_path = os.path.join(self.data_path, "train.csv")
 48 | 
 49 |         dataset = []
 50 |         with open(train_data_path) as tfile:
 51 |             reader = csv.reader(tfile, delimiter=",")
 52 | 
 53 |             for sample in reader:
 54 |                 dataset.append(sample)
 55 | 
 56 |         np.random.shuffle(dataset)
 57 | 
 58 |         # split the dataset with 90% and 10%
 59 |         dev_size = int(len(dataset) * 0.1)
 60 | 
 61 |         self.devset = dataset[:dev_size]
 62 |         self.trainset = dataset[dev_size:]
 63 | 
 64 |     def _read_all_test_data(self):
 65 |         test_data_path = os.path.join(self.data_path, "test.csv")
 66 | 
 67 |         self.testset = []
 68 |         with open(test_data_path) as tfile:
 69 |             reader = csv.reader(tfile, delimiter=",")
 70 | 
 71 |             for sample in reader:
 72 |                 self.testset.append(sample)
 73 | 
 74 |     def _data_iter(self, iterator):
 75 |         for sample in iterator:
 76 |             label = int(sample[0]) - 1
 77 |             document = ' '.join(sample[1:])
 78 | 
 79 |             document = self._clean_text(document)
 80 | 
 81 |             yield (label, document)
 82 | 
 83 |     def get_train_data(self):
 84 |         np.random.shuffle(self.trainset)
 85 |         for sample in self._data_iter(self.trainset):
 86 |             yield sample
 87 | 
 88 |     def get_dev_data(self):
 89 |         for sample in self._data_iter(self.devset):
 90 |             yield sample
 91 | 
 92 |     def get_test_data(self):
 93 |         for sample in self._data_iter(self.testset):
 94 |             yield sample
 95 | 
 96 |     @abc.abstractmethod
 97 |     def get_label_size(self):
 98 |         raise NotImplementedError("Not Supported")
 99 | 
100 | 
101 | # amazon_review_full_csv
102 | class AMAFull(Task):
103 |     def __init__(self, data_path, is_training=False):
104 |         data_path = os.path.join(data_path, "amazon_review_full_csv")
105 |         super(AMAFull, self).__init__(data_path, is_training)
106 | 
107 |     def get_label_size(self):
108 |         return 5
109 | 
110 | 
111 | # amazon_review_polarity_csv
112 | class AMAPolar(Task):
113 |     def __init__(self, data_path, is_training=False):
114 |         data_path = os.path.join(data_path, "amazon_review_polarity_csv")
115 |         super(AMAPolar, self).__init__(data_path, is_training)
116 | 
117 |     def get_label_size(self):
118 |         return 2
119 | 
120 | 
121 | # yahoo_answers_csv
122 | class YaHoo(Task):
123 |     def __init__(self, data_path, is_training=False):
124 |         data_path = os.path.join(data_path, "yahoo_answers_csv")
125 |         super(YaHoo, self).__init__(data_path, is_training)
126 | 
127 |     def get_label_size(self):
128 |         return 10
129 | 
130 | 
131 | # yelp_review_full_csv
132 | class YelpFull(Task):
133 |     def __init__(self, data_path, is_training=False):
134 |         data_path = os.path.join(data_path, "yelp_review_full_csv")
135 |         super(YelpFull, self).__init__(data_path, is_training)
136 | 
137 |     def get_label_size(self):
138 |         return 5
139 | 
140 | 
141 | # yelp_review_polarity_csv
142 | class YelpPolar(Task):
143 |     def __init__(self, data_path, is_training=False):
144 |         data_path = os.path.join(data_path, "yelp_review_polarity_csv")
145 |         super(YelpPolar, self).__init__(data_path, is_training)
146 | 
147 |     def get_label_size(self):
148 |         return 2
149 | 


--------------------------------------------------------------------------------
/doc/code/evalu.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import time
  8 | import json
  9 | import numpy as np
 10 | import tensorflow as tf
 11 | 
 12 | from utils import queuer
 13 | 
 14 | 
 15 | def decoding(sprobs, samples, params, mask=None):
 16 |     """Generate decoded sequence from seqs"""
 17 |     if mask is None:
 18 |         mask = [1.] * len(sprobs)
 19 | 
 20 |     flat_sprobs = []
 21 |     for _sprobs, _m in zip(sprobs, mask):
 22 |         if _m < 1.:
 23 |             continue
 24 | 
 25 |         for start_prob in _sprobs:
 26 |             flat_sprobs.append(start_prob)
 27 | 
 28 |     assert len(flat_sprobs) == len(samples), 'Decoding length mismatch!'
 29 | 
 30 |     results = []
 31 | 
 32 |     for (idx, sample), pred in zip(samples, flat_sprobs):
 33 |         gold_label = sample['label_id']
 34 |         pred_label = pred
 35 | 
 36 |         results.append({
 37 |             'pred_answer': pred_label,
 38 |             'sample_id': idx,
 39 |             'gold_answer': gold_label
 40 |         })
 41 | 
 42 |     return results
 43 | 
 44 | 
 45 | def predict(session, features,
 46 |             out_pred, dataset, params, train="test"):
 47 |     """Performing decoding with exising information"""
 48 |     results = []
 49 | 
 50 |     batcher = dataset.batcher(params.eval_batch_size,
 51 |                               buffer_size=params.buffer_size,
 52 |                               shuffle=False, train=train)
 53 |     eval_queue = queuer.EnQueuer(batcher,
 54 |                                  multiprocessing=params.data_multiprocessing,
 55 |                                  random_seed=params.random_seed)
 56 |     eval_queue.start(workers=params.nthreads,
 57 |                      max_queue_size=params.max_queue_size)
 58 | 
 59 |     def _predict_one_batch(data_on_gpu):
 60 |         feed_dicts = {}
 61 |         flat_raw_data = []
 62 |         for fidx, data in enumerate(data_on_gpu):
 63 |             # define feed_dict
 64 |             feed_dict = {
 65 |                 features[fidx]["t"]: data['token_ids'],
 66 |                 features[fidx]["l"]: data['l_id'],
 67 |             }
 68 |             if params.use_char:
 69 |                 feed_dict[features[fidx]["c"]] = data['char_ids']
 70 | 
 71 |             if params.enable_bert:
 72 |                 feed_dict[features[fidx]["s"]] = data['subword_ids']
 73 |                 feed_dict[features[fidx]["sb"]] = data['subword_back']
 74 | 
 75 |             feed_dicts.update(feed_dict)
 76 |             flat_raw_data.extend(data['raw'])
 77 | 
 78 |         # pick up valid outputs
 79 |         data_size = len(data_on_gpu)
 80 |         valid_out_pred = out_pred[:data_size]
 81 | 
 82 |         decode_spred = session.run(
 83 |             valid_out_pred, feed_dict=feed_dicts)
 84 | 
 85 |         predictions = decoding(
 86 |             decode_spred, flat_raw_data, params
 87 |         )
 88 | 
 89 |         return predictions
 90 | 
 91 |     very_begin_time = time.time()
 92 |     data_on_gpu = []
 93 |     for bidx, data in enumerate(eval_queue.get()):
 94 | 
 95 |         data_on_gpu.append(data)
 96 |         # use multiple gpus, and data samples is not enough
 97 |         if len(params.gpus) > 0 and len(data_on_gpu) < len(params.gpus):
 98 |             continue
 99 | 
100 |         start_time = time.time()
101 |         predictions = _predict_one_batch(data_on_gpu)
102 |         data_on_gpu = []
103 |         results.extend(predictions)
104 | 
105 |         tf.logging.info(
106 |             "Decoding Batch {} using {:.3f} s, translating {} "
107 |             "sentences using {:.3f} s in total".format(
108 |                 bidx, time.time() - start_time,
109 |                 len(results), time.time() - very_begin_time
110 |             )
111 |         )
112 | 
113 |     eval_queue.stop()
114 | 
115 |     if len(data_on_gpu) > 0:
116 |         start_time = time.time()
117 |         predictions = _predict_one_batch(data_on_gpu)
118 |         results.extend(predictions)
119 | 
120 |         tf.logging.info(
121 |             "Decoding Batch {} using {:.3f} s, translating {} "
122 |             "sentences using {:.3f} s in total".format(
123 |                 'final', time.time() - start_time,
124 |                 len(results), time.time() - very_begin_time
125 |             )
126 |         )
127 | 
128 |     results = sorted(results, key=lambda x: x['sample_id'])
129 | 
130 |     golds = [result['gold_answer'] for result in results]
131 |     preds = [result['pred_answer'] for result in results]
132 | 
133 |     score = np.sum(np.asarray(golds) == np.asarray(preds)) * 100. / len(golds)
134 | 
135 |     return results, score
136 | 
137 | 
138 | def dump_predictions(results, output):
139 |     """save translation"""
140 |     with tf.gfile.Open(output, 'w') as writer:
141 |         for sample in results:
142 |             sample['pred_answer'] = sample['pred_answer']
143 |             writer.write(json.dumps(sample) + "\n")
144 |     tf.logging.info("Saving translations into {}".format(output))
145 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
  1 | ## Document Classification
  2 | 
  3 | One concern of LRN is that after simplifying the recurrent component, modeling capacity, in particular the long-range
  4 | dependency, would be weakened. We answer this question by doing experiments on document classification.
  5 | 
  6 | We choose:
  7 |  - Amazon Review Polarity (AmaPolar, 2 labels, 3.6M/0.4M for training/testing)
  8 |  - Amazon Review Full (AmaFull, 5 labels, 3M/0.65M for training/testing)
  9 |  - Yahoo! Answers (Yahoo, 10 labels, 1.4M/60K for training/testing)
 10 |  - Yelp Review Polarity (YelpPolar, 2 labels, 0.56M/38K for training/testing)
 11 |  
 12 | Dataset comes from [Zhang et al. (2015)](https://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf).
 13 | We use a birnn model followed by an attentive pooling layer. Char and Glove embeddings are used for word representation.
 14 | Main experimental results are summarized below.
 15 | 
 16 | <table>
 17 |   <tr>
 18 |     <th colspan="2" rowspan="2">Model</th>
 19 |     <th rowspan="2">#Params</th>
 20 |     <th colspan="2">AmaPolar</th>
 21 |     <th colspan="2">Yahoo</th>
 22 |     <th colspan="2">AmaFull</th>
 23 |     <th colspan="2">YelpPolar</th>
 24 |   </tr>
 25 |   <tr>
 26 |     <td>ERR</td>
 27 |     <td>Time</td>
 28 |     <td>ERR</td>
 29 |     <td>Time</td>
 30 |     <td>ERR</td>
 31 |     <td>Time</td>
 32 |     <td>ERR</td>
 33 |     <td>Time</td>
 34 |   </tr>
 35 |   <tr>
 36 |     <td colspan="2">Zhang et al. (2015)</td>
 37 |     <td>-</td>
 38 |     <td>6.10</td>
 39 |     <td>-</td>
 40 |     <td>29.16</td>
 41 |     <td>-</td>
 42 |     <td>40.57</td>
 43 |     <td>-</td>
 44 |     <td>5.26</td>
 45 |     <td>-</td>
 46 |   </tr>
 47 |   <tr>
 48 |     <td rowspan="5">This<br>Work</td>
 49 |     <td>LSTM</td>
 50 |     <td>227K</td>
 51 |     <td>4.37</td>
 52 |     <td>0.947</td>
 53 |     <td>24.62</td>
 54 |     <td>1.332</td>
 55 |     <td>37.22</td>
 56 |     <td>1.003</td>
 57 |     <td>3.58</td>
 58 |     <td>1.362</td>
 59 |   </tr>
 60 |   <tr>
 61 |     <td>GRU</td>
 62 |     <td>176K</td>
 63 |     <td>4.39</td>
 64 |     <td>0.948</td>
 65 |     <td>24.68</td>
 66 |     <td>1.242</td>
 67 |     <td>37.20</td>
 68 |     <td>0.982</td>
 69 |     <td>3.47</td>
 70 |     <td>1.230</td>
 71 |   </tr>
 72 |   <tr>
 73 |     <td>ATR</td>
 74 |     <td>74K</td>
 75 |     <td>4.78</td>
 76 |     <td>0.867</td>
 77 |     <td>25.33</td>
 78 |     <td>1.117</td>
 79 |     <td>38.54</td>
 80 |     <td>0.836</td>
 81 |     <td>4.00</td>
 82 |     <td>1.124</td>
 83 |   </tr>
 84 |   <tr>
 85 |     <td>SRU</td>
 86 |     <td>194K</td>
 87 |     <td>4.95</td>
 88 |     <td>0.919</td>
 89 |     <td>24.78</td>
 90 |     <td>1.394</td>
 91 |     <td>38.23</td>
 92 |     <td>0.907</td>
 93 |     <td>3.99</td>
 94 |     <td>1.310</td>
 95 |   </tr>
 96 |   <tr>
 97 |     <td>LRN</td>
 98 |     <td>151K</td>
 99 |     <td>4.98</td>
100 |     <td>0.731</td>
101 |     <td>25.07</td>
102 |     <td>1.038</td>
103 |     <td>38.42</td>
104 |     <td>0.788</td>
105 |     <td>3.98</td>
106 |     <td>1.022</td>
107 |   </tr>
108 | </table>
109 | 
110 | *Time*: time in seconds per training batch measured from 1k training steps.
111 | 
112 | ## Requirement
113 | tensorflow >= 1.8.1
114 | 
115 | ## How to Run?
116 | 
117 | - download and preprocess dataset
118 | 
119 |   - The dataset link: https://drive.google.com/drive/folders/0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M
120 |   - Prepare embedding and vocabulary
121 |   
122 |     Download the [pre-trained GloVe embedding](http://nlp.stanford.edu/data/glove.840B.300d.zip).
123 |     Generate vocabulary for each task as follows:
124 |     ```
125 |     task=(amafull amapolar yahoo yelppolar)
126 |     python code/run.py --mode vocab --config config.py --parameters=task="${task}",output_dir="${task}_vocab"
127 |     ```
128 |     
129 | 
130 | - training and evaluation
131 | 
132 |   - Train the model as follows:
133 |   ```
134 |   # configure your cuda libaray if necessary
135 |   export CUDA_ROOT=XXX
136 |   export PATH=$CUDA_ROOT/bin:$PATH
137 |   export LD_LIBRARY_PATH=$CUDA_ROOT/lib64:$LD_LIBRARY_PATH
138 | 
139 |   task=(amafull amapolar yahoo yelppolar)
140 |   python code/run.py --mode train --config config.py --parameters=task="${task}",output_dir="${task}_train",gpus=[1],word_vocab_file="${task}_vocab/vocab.word",char_vocab_file="${task}_vocab/vocab.char",enable_hierarchy=False,nthreads=2,enable_bert=False,cell="lrn",swap_memory=False
141 |   ```
142 |   Other hyperparameter settings are available in the given config.py.
143 |   
144 |   - Test the model as follows:
145 |   ```
146 |   task=(amafull amapolar yahoo yelppolar)
147 |   python code/run.py --mode test --config config.py --parameters=task="${task}",output_dir="${task}_train/best",gpus=[0],word_vocab_file="${task}_vocab/vocab.word",char_vocab_file="${task}_vocab/vocab.char",enable_hierarchy=False,nthreads=2,enable_bert=False,cell="lrn",swap_memory=False,train_continue=False,test_output=${task}.out.txt
148 |   ```
149 | 
150 | ## Credits
151 | 
152 | Source code structure is adapted from [zero](https://github.com/bzhangGo/zero).


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # lrn
 2 | Source code for "A Lightweight Recurrent Network for Sequence Modeling"
 3 | 
 4 | 
 5 | ## Model Architecture
 6 | In our new paper, we propose lightweight recurrent network, which combines the strengths of 
 7 | [ATR](https://arxiv.org/abs/1810.12546) and [SRU](https://arxiv.org/abs/1709.02755). 
 8 | 
 9 | * ATR helps reduces model parameters and avoids additional free parameters for gate calculation, through the twin-gate
10 | mechanism
11 | * SRU follows the [QRNN](https://arxiv.org/abs/1611.01576) and moves all recurrent computations outside the recurrence.
12 | 
13 | Based on the above units, we propose [LRN](xxx):
14 | 
15 | <img src="https://latex.codecogs.com/svg.latex?\begin{align*}&space;\mathbf{q}_t,&space;\mathbf{k}_t,&space;\mathbf{v}_t&space;=&space;\mathbf{x}_t\mathbf{W}_q,&space;\mathbf{x}_t\mathbf{W}_k,&space;\mathbf{x}_t\mathbf{W}_v&space;\\&space;\mathbf{i}_t&space;=&space;\sigma(\mathbf{k}_t&space;&plus;&space;\mathbf{h}_{t-1})&space;\\&space;\mathbf{f}_t&space;=&space;\sigma(\mathbf{q}_t&space;-&space;\mathbf{h}_{t-1})&space;\\&space;\mathbf{h}_t&space;=&space;g(\mathbf{i}_t&space;\odot&space;\mathbf{v}_t&space;&plus;&space;\mathbf{f}_t&space;\odot&space;\mathbf{h}_{t-1})&space;\end{align}" title="\begin{align} \mathbf{q}_t, \mathbf{k}_t, \mathbf{v}_t = \mathbf{x}_t\mathbf{W}_q, \mathbf{x}_t\mathbf{W}_k, \mathbf{x}_t\mathbf{W}_v \\ \mathbf{i}_t = \sigma(\mathbf{k}_t + \mathbf{h}_{t-1}) \\ \mathbf{f}_t = \sigma(\mathbf{q}_t - \mathbf{h}_{t-1}) \\ \mathbf{h}_t = g(\mathbf{i}_t \odot \mathbf{v}_t + \mathbf{f}_t \odot \mathbf{h}_{t-1}) \end{align*}"/>
16 | 
17 | where g(&middot;) is an activation function, *tanh* or *identity*. W<sub>q</sub>, W<sub>k</sub> and W<sub>v</sub> 
18 | are model parameters. The matrix computation (as well as potential layer noramlization) can be shfited outside the 
19 | recurrence. Therefore, the whole model is fast in running.
20 | 
21 | When applying twin-gate mechanism, the output value in **h**<sub>t</sub> might suffer explosion issue, 
22 | which could grow into infinity. This is the reason we added the activation function. Another alternative solution
23 | would be using layer normalization, which forces activation values to be stable.
24 | 
25 | ## Structure Analysis
26 | One way to understand the model is to unfold the LRN structure along input tokens:
27 | 
28 | <img src="https://latex.codecogs.com/svg.latex?\mathbf{h}_t&space;&&space;=&space;\sum_{k=1}^t&space;\mathbf{i}_k&space;\odot&space;\left(\prod_{l=1}^{t-k}\mathbf{f}_{k&plus;l}\right)&space;\odot&space;\mathbf{v}_k," title="\mathbf{h}_t & = \sum_{k=1}^t \mathbf{i}_k \odot \left(\prod_{l=1}^{t-k}\mathbf{f}_{k+l}\right) \odot \mathbf{v}_k,"/>
29 | 
30 | The above structure which is also observed by [Zhang et al.](https://arxiv.org/abs/1810.12546), [Lee et al.](https://arxiv.org/abs/1705.07393), 
31 | and etc, endows the RNN model with multiple interpretations. We provide two as follows:
32 | 
33 | * *Relation with Self Attention Networks*
34 | <img src="figures/san_corr.png" width=300 />
35 | 
36 | Informally, LRN assembles forget gates from step *t* to step *k+1* in order to query the key (input gate). The result 
37 | weight is assigned to the corresponding value representation and contributes to the final hidden representation.
38 | 
39 | Does the learned weights make sense? We do a classification tasks on AmaPolar task with a unidirectional linear-LRN.
40 | The final hidden state is feed into the classifier. One example below shows the learned weights. The term *great* gains
41 | a large weight, which decays slowly and contributes the final *positive* decision.
42 | <img src="figures/memory.png"  width=500 />
43 | 
44 | * *Long-term and Short-term Memory*
45 | <img src="figures/ls_mem.png"  width=250 />
46 | 
47 | Another view of the unfolded structure is that different gates form different memory mechanism. The input gate acts as
48 | a short-term memory and indicates how many information can be activated in this token. The forget gates form a forget
49 | chain that controls how to erase meaningless past information.
50 | 
51 | ## Experiments 
52 | 
53 | We did experiment on six different tasks:
54 | * [Natural Language Inference](nli)
55 | * [Document Classification](doc)
56 | * [Machine Translation](mt)
57 | * [Reading Comprehension](rc)
58 | * [Named Entity Recognition](ner)
59 | * [Language Modeling](lm)
60 | 
61 | 
62 | ## Citation
63 | 
64 | Please cite the following paper:
65 | > Biao Zhang; Rico Sennrich (2019). *A Lightweight Recurrent Network for Sequence Modeling*. 
66 | In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics. Florence, Italy.
67 | 
68 | ```
69 | @inproceedings{zhang-sennrich:2019:ACL,
70 |   address = "Florence, Italy",
71 |   author = "Zhang, Biao and Sennrich, Rico",
72 |   booktitle = "{Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics}",
73 |   publisher = "Association for Computational Linguistics",
74 |   title = "{A Lightweight Recurrent Network for Sequence Modeling}",
75 |   year = "2019"
76 | }
77 | ```
78 | 
79 | ## Contact
80 | 
81 | For any further comments or questions about LRN, please email <a href="mailto:b.zhang@ed.ac.uk">Biao Zhang</a>.


--------------------------------------------------------------------------------
/doc/code/utils/cycle.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import tensorflow as tf
  8 | 
  9 | 
 10 | def _zero_variables(variables, name=None):
 11 |     ops = []
 12 | 
 13 |     for var in variables:
 14 |         with tf.device(var.device):
 15 |             op = var.assign(tf.zeros(var.shape.as_list()))
 16 |         ops.append(op)
 17 | 
 18 |     return tf.group(*ops, name=name or "zero_variables")
 19 | 
 20 | 
 21 | def _replicate_variables(variables, device=None, suffix="Replica"):
 22 |     new_vars = []
 23 | 
 24 |     for var in variables:
 25 |         device = device or var.device
 26 |         with tf.device(device):
 27 |             name = var.op.name + "/{}".format(suffix)
 28 |             new_vars.append(tf.Variable(tf.zeros(var.shape.as_list()),
 29 |                                         name=name, trainable=False))
 30 | 
 31 |     return new_vars
 32 | 
 33 | 
 34 | def _collect_gradients(gradients, variables):
 35 |     ops = []
 36 | 
 37 |     for grad, var in zip(gradients, variables):
 38 |         if isinstance(grad, tf.Tensor):
 39 |             ops.append(tf.assign_add(var, grad))
 40 |         else:
 41 |             ops.append(tf.scatter_add(var, grad.indices, grad.values))
 42 | 
 43 |     return tf.group(*ops, name="collect_gradients")
 44 | 
 45 | 
 46 | def create_train_op(named_scalars, grads_and_vars, optimizer, global_step, params):
 47 |     gradients = [item[0] for item in grads_and_vars]
 48 |     variables = [item[1] for item in grads_and_vars]
 49 | 
 50 |     if params.update_cycle == 1:
 51 |         zero_variables_op = tf.no_op("zero_variables")
 52 |         collect_op = tf.no_op("collect_op")
 53 |     else:
 54 |         named_vars = {}
 55 |         for name in named_scalars:
 56 |             named_var = tf.Variable(tf.zeros([]),
 57 |                                     name="{}/CTrainOpReplica".format(name),
 58 |                                     trainable=False)
 59 |             named_vars[name] = named_var
 60 |         count_var = tf.Variable(tf.zeros([]), name="count/CTrainOpReplica",
 61 |                                 trainable=False)
 62 |         slot_variables = _replicate_variables(variables, suffix="CTrainOpReplica")
 63 |         zero_variables_op = _zero_variables(
 64 |             slot_variables + [count_var] + named_vars.values())
 65 | 
 66 |         collect_ops = []
 67 |         # collect gradients
 68 |         collect_grads_op = _collect_gradients(gradients, slot_variables)
 69 |         collect_ops.append(collect_grads_op)
 70 | 
 71 |         # collect other scalars
 72 |         for name in named_scalars:
 73 |             scalar = named_scalars[name]
 74 |             named_var = named_vars[name]
 75 |             collect_op = tf.assign_add(named_var, scalar)
 76 |             collect_ops.append(collect_op)
 77 |         # collect counting variable
 78 |         collect_count_op = tf.assign_add(count_var, 1.0)
 79 |         collect_ops.append(collect_count_op)
 80 | 
 81 |         collect_op = tf.group(*collect_ops, name="collect_op")
 82 |         scale = 1.0 / (tf.to_float(count_var) + 1.0)
 83 |         gradients = [scale * (g + s)
 84 |                      for (g, s) in zip(gradients, slot_variables)]
 85 | 
 86 |         for name in named_scalars:
 87 |             named_scalars[name] = scale * (
 88 |                     named_scalars[name] + named_vars[name])
 89 | 
 90 |     global_norm = tf.global_norm(gradients)
 91 | 
 92 |     # Gradient clipping
 93 |     if isinstance(params.clip_grad_norm or None, float):
 94 |         gradients, _ = tf.clip_by_global_norm(gradients,
 95 |                                               params.clip_grad_norm,
 96 |                                               use_norm=global_norm)
 97 | 
 98 |     # Update variables
 99 |     grads_and_vars = list(zip(gradients, variables))
100 |     train_op = optimizer.apply_gradients(grads_and_vars, global_step)
101 | 
102 |     ops = {
103 |         "zero_op": zero_variables_op,
104 |         "collect_op": collect_op,
105 |         "train_op": train_op
106 |     }
107 | 
108 |     # apply ema
109 |     if params.ema_decay > 0.:
110 |         tf.logging.info('Using Exp Moving Average to train the model with decay {}.'.format(params.ema_decay))
111 |         ema = tf.train.ExponentialMovingAverage(decay=params.ema_decay, num_updates=global_step)
112 |         ema_op = ema.apply(variables)
113 |         with tf.control_dependencies([ops['train_op']]):
114 |             ops['train_op'] = tf.group(ema_op)
115 |         bck_vars = _replicate_variables(variables, suffix="CTrainOpBackUpReplica")
116 | 
117 |         ops['ema_backup_op'] = tf.group(*(tf.assign(bck, var.read_value())
118 |                                         for bck, var in zip(bck_vars, variables)))
119 |         ops['ema_restore_op'] = tf.group(*(tf.assign(var, bck.read_value())
120 |                                          for bck, var in zip(bck_vars, variables)))
121 |         ops['ema_assign_op'] = tf.group(*(tf.assign(var, ema.average(var).read_value())
122 |                                         for var in variables))
123 | 
124 |     ret = named_scalars
125 |     ret.update({
126 |         "gradient_norm": global_norm,
127 |     })
128 | 
129 |     return ret, ops
130 | 


--------------------------------------------------------------------------------
/nli/code/utils/cycle.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import tensorflow as tf
  8 | 
  9 | 
 10 | def _zero_variables(variables, name=None):
 11 |     ops = []
 12 | 
 13 |     for var in variables:
 14 |         with tf.device(var.device):
 15 |             op = var.assign(tf.zeros(var.shape.as_list()))
 16 |         ops.append(op)
 17 | 
 18 |     return tf.group(*ops, name=name or "zero_variables")
 19 | 
 20 | 
 21 | def _replicate_variables(variables, device=None, suffix="Replica"):
 22 |     new_vars = []
 23 | 
 24 |     for var in variables:
 25 |         device = device or var.device
 26 |         with tf.device(device):
 27 |             name = var.op.name + "/{}".format(suffix)
 28 |             new_vars.append(tf.Variable(tf.zeros(var.shape.as_list()),
 29 |                                         name=name, trainable=False))
 30 | 
 31 |     return new_vars
 32 | 
 33 | 
 34 | def _collect_gradients(gradients, variables):
 35 |     ops = []
 36 | 
 37 |     for grad, var in zip(gradients, variables):
 38 |         if isinstance(grad, tf.Tensor):
 39 |             ops.append(tf.assign_add(var, grad))
 40 |         else:
 41 |             ops.append(tf.scatter_add(var, grad.indices, grad.values))
 42 | 
 43 |     return tf.group(*ops, name="collect_gradients")
 44 | 
 45 | 
 46 | def create_train_op(named_scalars, grads_and_vars, optimizer, global_step, params):
 47 |     gradients = [item[0] for item in grads_and_vars]
 48 |     variables = [item[1] for item in grads_and_vars]
 49 | 
 50 |     if params.update_cycle == 1:
 51 |         zero_variables_op = tf.no_op("zero_variables")
 52 |         collect_op = tf.no_op("collect_op")
 53 |     else:
 54 |         named_vars = {}
 55 |         for name in named_scalars:
 56 |             named_var = tf.Variable(tf.zeros([]),
 57 |                                     name="{}/CTrainOpReplica".format(name),
 58 |                                     trainable=False)
 59 |             named_vars[name] = named_var
 60 |         count_var = tf.Variable(tf.zeros([]), name="count/CTrainOpReplica",
 61 |                                 trainable=False)
 62 |         slot_variables = _replicate_variables(variables, suffix="CTrainOpReplica")
 63 |         zero_variables_op = _zero_variables(
 64 |             slot_variables + [count_var] + named_vars.values())
 65 | 
 66 |         collect_ops = []
 67 |         # collect gradients
 68 |         collect_grads_op = _collect_gradients(gradients, slot_variables)
 69 |         collect_ops.append(collect_grads_op)
 70 | 
 71 |         # collect other scalars
 72 |         for name in named_scalars:
 73 |             scalar = named_scalars[name]
 74 |             named_var = named_vars[name]
 75 |             collect_op = tf.assign_add(named_var, scalar)
 76 |             collect_ops.append(collect_op)
 77 |         # collect counting variable
 78 |         collect_count_op = tf.assign_add(count_var, 1.0)
 79 |         collect_ops.append(collect_count_op)
 80 | 
 81 |         collect_op = tf.group(*collect_ops, name="collect_op")
 82 |         scale = 1.0 / (tf.to_float(count_var) + 1.0)
 83 |         gradients = [scale * (g + s)
 84 |                      for (g, s) in zip(gradients, slot_variables)]
 85 | 
 86 |         for name in named_scalars:
 87 |             named_scalars[name] = scale * (
 88 |                     named_scalars[name] + named_vars[name])
 89 | 
 90 |     global_norm = tf.global_norm(gradients)
 91 | 
 92 |     # Gradient clipping
 93 |     if isinstance(params.clip_grad_norm or None, float):
 94 |         gradients, _ = tf.clip_by_global_norm(gradients,
 95 |                                               params.clip_grad_norm,
 96 |                                               use_norm=global_norm)
 97 | 
 98 |     # Update variables
 99 |     grads_and_vars = list(zip(gradients, variables))
100 |     train_op = optimizer.apply_gradients(grads_and_vars, global_step)
101 | 
102 |     ops = {
103 |         "zero_op": zero_variables_op,
104 |         "collect_op": collect_op,
105 |         "train_op": train_op
106 |     }
107 | 
108 |     # apply ema
109 |     if params.ema_decay > 0.:
110 |         tf.logging.info('Using Exp Moving Average to train the model with decay {}.'.format(params.ema_decay))
111 |         ema = tf.train.ExponentialMovingAverage(decay=params.ema_decay, num_updates=global_step)
112 |         ema_op = ema.apply(variables)
113 |         with tf.control_dependencies([ops['train_op']]):
114 |             ops['train_op'] = tf.group(ema_op)
115 |         bck_vars = _replicate_variables(variables, suffix="CTrainOpBackUpReplica")
116 | 
117 |         ops['ema_backup_op'] = tf.group(*(tf.assign(bck, var.read_value())
118 |                                         for bck, var in zip(bck_vars, variables)))
119 |         ops['ema_restore_op'] = tf.group(*(tf.assign(var, bck.read_value())
120 |                                          for bck, var in zip(bck_vars, variables)))
121 |         ops['ema_assign_op'] = tf.group(*(tf.assign(var, ema.average(var).read_value())
122 |                                         for var in variables))
123 | 
124 |     ret = named_scalars
125 |     ret.update({
126 |         "gradient_norm": global_norm,
127 |     })
128 | 
129 |     return ret, ops
130 | 


--------------------------------------------------------------------------------
/nli/code/evalu.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import time
  8 | import json
  9 | import tensorflow as tf
 10 | 
 11 | from utils import queuer
 12 | 
 13 | 
 14 | def decoding(sprobs, samples, params, mask=None):
 15 |     """Generate decoded sequence from seqs"""
 16 |     if mask is None:
 17 |         mask = [1.] * len(sprobs)
 18 | 
 19 |     flat_sprobs = []
 20 |     for _sprobs, _m in zip(sprobs, mask):
 21 |         if _m < 1.: continue
 22 | 
 23 |         for start_prob in _sprobs:
 24 |             flat_sprobs.append(start_prob)
 25 | 
 26 |     assert len(flat_sprobs) == len(samples), 'Decoding length mismatch!'
 27 | 
 28 |     results = []
 29 | 
 30 |     for (idx, sample), pred in zip(samples, flat_sprobs):
 31 |         gold_label = sample[0]
 32 |         pred_label = pred
 33 | 
 34 |         results.append({
 35 |             'pred_answer': int(pred_label),
 36 |             'sample_id': idx,
 37 |             'gold_answer': gold_label
 38 |         })
 39 | 
 40 |     return results
 41 | 
 42 | 
 43 | def predict(session, features,
 44 |             out_pred, dataset, params, train=True):
 45 |     """Performing decoding with exising information"""
 46 |     results = []
 47 | 
 48 |     batcher = dataset.batcher(params.eval_batch_size,
 49 |                               buffer_size=params.buffer_size,
 50 |                               shuffle=False, train=train)
 51 |     eval_queue = queuer.EnQueuer(batcher,
 52 |                                  multiprocessing=params.data_multiprocessing,
 53 |                                  random_seed=params.random_seed)
 54 |     eval_queue.start(workers=params.nthreads,
 55 |                      max_queue_size=params.max_queue_size)
 56 | 
 57 |     def _predict_one_batch(data_on_gpu):
 58 |         feed_dicts = {}
 59 |         flat_raw_data = []
 60 |         for fidx, data in enumerate(data_on_gpu):
 61 |             # define feed_dict
 62 |             feed_dict = {
 63 |                 features[fidx]["p"]: data['p_token_ids'],
 64 |                 features[fidx]["h"]: data['h_token_ids'],
 65 |                 features[fidx]["l"]: data['l_id'],
 66 |             }
 67 |             if params.use_char:
 68 |                 feed_dict[features[fidx]["pc"]] = data['p_char_ids']
 69 |                 feed_dict[features[fidx]["hc"]] = data['h_char_ids']
 70 | 
 71 |             if params.enable_bert:
 72 |                 feed_dict[features[fidx]["ps"]] = data['p_subword_ids']
 73 |                 feed_dict[features[fidx]["hs"]] = data['h_subword_ids']
 74 |                 feed_dict[features[fidx]["pb"]] = data['p_subword_back']
 75 |                 feed_dict[features[fidx]["hb"]] = data['h_subword_back']
 76 | 
 77 |             feed_dicts.update(feed_dict)
 78 |             flat_raw_data.extend(data['raw'])
 79 | 
 80 |         # pick up valid outputs
 81 |         data_size = len(data_on_gpu)
 82 |         valid_out_pred = out_pred[:data_size]
 83 | 
 84 |         decode_spred = session.run(
 85 |             valid_out_pred, feed_dict=feed_dicts)
 86 | 
 87 |         predictions = decoding(
 88 |             decode_spred, flat_raw_data, params
 89 |         )
 90 | 
 91 |         return predictions
 92 | 
 93 |     very_begin_time = time.time()
 94 |     data_on_gpu = []
 95 |     for bidx, data in enumerate(eval_queue.get()):
 96 | 
 97 |         data_on_gpu.append(data)
 98 |         # use multiple gpus, and data samples is not enough
 99 |         if len(params.gpus) > 0 and len(data_on_gpu) < len(params.gpus):
100 |             continue
101 | 
102 |         start_time = time.time()
103 |         predictions = _predict_one_batch(data_on_gpu)
104 |         data_on_gpu = []
105 |         results.extend(predictions)
106 | 
107 |         tf.logging.info(
108 |             "Decoding Batch {} using {:.3f} s, translating {} "
109 |             "sentences using {:.3f} s in total".format(
110 |                 bidx, time.time() - start_time,
111 |                 len(results), time.time() - very_begin_time
112 |             )
113 |         )
114 | 
115 |     eval_queue.stop()
116 | 
117 |     if len(data_on_gpu) > 0:
118 |         start_time = time.time()
119 |         predictions = _predict_one_batch(data_on_gpu)
120 |         results.extend(predictions)
121 | 
122 |         tf.logging.info(
123 |             "Decoding Batch {} using {:.3f} s, translating {} "
124 |             "sentences using {:.3f} s in total".format(
125 |                 'final', time.time() - start_time,
126 |                 len(results), time.time() - very_begin_time
127 |             )
128 |         )
129 | 
130 |     return results
131 | 
132 | 
133 | def eval_metric(results, params):
134 |     """BLEU Evaluate """
135 | 
136 |     crr_cnt, total_cnt = 0, 0
137 | 
138 |     for result in results:
139 |         total_cnt += 1
140 | 
141 |         p = result['pred_answer']
142 |         g = result['gold_answer']
143 | 
144 |         if p == g:
145 |             crr_cnt += 1
146 | 
147 |     return crr_cnt * 100. / total_cnt
148 | 
149 | 
150 | def dump_predictions(results, output):
151 |     """save translation"""
152 |     with tf.gfile.Open(output, 'w') as writer:
153 |         for sample in results:
154 |             writer.write(json.dumps(sample) + "\n")
155 |     tf.logging.info("Saving translations into {}".format(output))
156 | 


--------------------------------------------------------------------------------
/nli/code/vocab.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import json
  8 | import argparse
  9 | import numpy as np
 10 | 
 11 | 
 12 | class Vocab(object):
 13 |     def __init__(self, lower=False, vocab_file=None):
 14 |         self.word2id = {}
 15 |         self.id2word = {}
 16 |         self.word2count = {}
 17 | 
 18 |         self.pad_sym = "<pad>"
 19 |         self.eos_sym = "<eos>"
 20 |         self.unk_sym = "<unk>"
 21 | 
 22 |         self.lower = lower
 23 | 
 24 |         self.insert(self.pad_sym)
 25 |         self.insert(self.unk_sym)
 26 |         self.insert(self.eos_sym)
 27 | 
 28 |         if vocab_file is not None:
 29 |             self.load_vocab(vocab_file)
 30 | 
 31 |     def insert(self, token):
 32 |         token = token if not self.lower else token.lower()
 33 |         if token not in self.word2id:
 34 |             index = len(self.word2id)
 35 |             self.word2id[token] = index
 36 |             self.id2word[index] = token
 37 | 
 38 |             self.word2count[token] = 0
 39 |         self.word2count[token] += 1
 40 | 
 41 |     def size(self):
 42 |         return len(self.word2id)
 43 | 
 44 |     def load_vocab(self, vocab_file):
 45 |         with open(vocab_file, 'r') as reader:
 46 |             for token in reader:
 47 |                 self.insert(token.strip())
 48 | 
 49 |     def get_token(self, id):
 50 |         if id in self.id2word:
 51 |             return self.id2word[id]
 52 |         return self.unk_sym
 53 | 
 54 |     def get_id(self, token):
 55 |         token = token if not self.lower else token.lower()
 56 |         if token in self.word2id:
 57 |             return self.word2id[token]
 58 |         return self.word2id[self.unk_sym]
 59 | 
 60 |     def sort_vocab(self, least_freq=-1):
 61 |         sorted_word2count = sorted(
 62 |             self.word2count.items(), key=lambda x: - x[1])
 63 |         self.word2id, self.id2word, self.word2count = {}, {}, {}
 64 |         self.insert(self.pad_sym)
 65 |         self.insert(self.unk_sym)
 66 |         self.insert(self.eos_sym)
 67 |         for word, freq in sorted_word2count:
 68 |             if least_freq > 0:
 69 |                 if freq <= least_freq:
 70 |                     continue
 71 |             self.insert(word)
 72 | 
 73 |     def save_vocab(self, vocab_file):
 74 |         with open(vocab_file, 'w') as writer:
 75 |             for id in range(self.size()):
 76 |                 writer.write(self.id2word[id].encode("utf-8") + "\n")
 77 | 
 78 |     def to_id(self, tokens, append_eos=True):
 79 |         if not append_eos:
 80 |             return [self.get_id(token) for token in tokens]
 81 |         else:
 82 |             return [self.get_id(token) for token in
 83 |                     tokens + [self.eos_sym]]
 84 | 
 85 |     def to_tokens(self, ids):
 86 |         return [self.get_token(id) for id in ids]
 87 | 
 88 |     def eos(self):
 89 |         return self.get_id(self.eos_sym)
 90 | 
 91 |     def pad(self):
 92 |         return self.get_id(self.pad_sym)
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     parser = argparse.ArgumentParser('Vocabulary Preparation')
 97 |     parser.add_argument('--char', action='store_true', help='build char-level vocabulary')
 98 |     parser.add_argument('--lower', action='store_true', help='lower-case datasets')
 99 |     parser.add_argument('--embeddings', type=str, default='no', help='pre-trained word embedding path')
100 |     parser.add_argument('inputs', type=str, help='the input file path, separate with comma')
101 |     parser.add_argument('output', type=str, help='the output file name')
102 | 
103 |     args = parser.parse_args()
104 | 
105 |     vocab = Vocab(lower=args.lower)
106 |     for data_file in args.inputs.split(','):
107 |         with open(data_file, 'r') as reader:
108 |             for text in reader:
109 |                 tokens = text.strip().split()
110 | 
111 |                 for token in tokens:
112 |                     if not args.char:
113 |                         vocab.insert(token)
114 |                     else:
115 |                         for char in list(token):
116 |                             vocab.insert(char)
117 | 
118 |     vocab.sort_vocab(least_freq=3 if args.char else -1)
119 | 
120 |     # process the vocabulary with pretrained-embeddings
121 |     if args.embeddings != "no":
122 |         embed_tokens = {}
123 |         embed_size = None
124 |         with open(args.embeddings, 'r') as reader:
125 |             for line in reader:
126 |                 segs = line.strip().split(' ')
127 | 
128 |                 token = segs[0]
129 |                 # Not used in our training data, pass
130 |                 if token not in vocab.word2id:
131 |                     continue
132 |                 embed_tokens[token] = list(map(float, segs[1:]))
133 | 
134 |                 if embed_size is None:
135 |                     embed_size = len(segs) - 1
136 | 
137 |         vocab = Vocab(lower=args.lower)
138 |         for token in embed_tokens:
139 |             vocab.insert(token)
140 | 
141 |         # load embeddings
142 |         embeddings = np.zeros([len(embed_tokens), embed_size])
143 |         for token in embed_tokens:
144 |             # 3: the special symbols
145 |             embeddings[vocab.get_id(token) - 3] = embed_tokens[token]
146 |         np.savez(args.output + ".npz", data=embeddings)
147 | 
148 |     vocab.save_vocab(args.output)
149 | 
150 |     print("Loading {} tokens from {}".format(vocab.size(), args.inputs))
151 | 


--------------------------------------------------------------------------------
/doc/code/vocab.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import os
  8 | import argparse
  9 | import numpy as np
 10 | import tensorflow as tf
 11 | 
 12 | import sys
 13 | reload(sys)
 14 | sys.setdefaultencoding('utf-8')
 15 | 
 16 | from bert.tokenization import BasicTokenizer as Tokenizer
 17 | 
 18 | 
 19 | class Vocab(object):
 20 |     def __init__(self, lower=False, vocab_file=None):
 21 |         self.lower = lower
 22 | 
 23 |         self.word2id = {}
 24 |         self.id2word = {}
 25 |         self.word2count = {}
 26 | 
 27 |         self.pad_sym = "<pad>"
 28 |         self.eos_sym = "<eos>"
 29 |         self.unk_sym = "<unk>"
 30 | 
 31 |         self.clean()
 32 | 
 33 |         self.pretrained_embedding = None
 34 | 
 35 |         if vocab_file is not None:
 36 |             self.load_vocab(vocab_file)
 37 | 
 38 |             if os.path.exists(vocab_file + ".npz"):
 39 |                 pretrain_embedding = np.load(vocab_file + ".npz")['data']
 40 |                 self.pretrained_embedding = pretrain_embedding
 41 | 
 42 |     def clean(self):
 43 |         self.word2id = {}
 44 |         self.id2word = {}
 45 |         self.word2count = {}
 46 | 
 47 |         self.insert(self.pad_sym)
 48 |         self.insert(self.unk_sym)
 49 |         self.insert(self.eos_sym)
 50 | 
 51 |     def insert(self, token):
 52 |         token = token if not self.lower else token.lower()
 53 |         if token not in self.word2id:
 54 |             index = len(self.word2id)
 55 |             self.word2id[token] = index
 56 |             self.id2word[index] = token
 57 | 
 58 |             self.word2count[token] = 0
 59 |         self.word2count[token] += 1
 60 | 
 61 |     def size(self):
 62 |         return len(self.word2id)
 63 | 
 64 |     def load_vocab(self, vocab_file):
 65 |         with open(vocab_file, 'r') as reader:
 66 |             for token in reader:
 67 |                 self.insert(token.strip())
 68 | 
 69 |     def get_token(self, id):
 70 |         if id in self.id2word:
 71 |             return self.id2word[id]
 72 |         return self.unk_sym
 73 | 
 74 |     def get_id(self, token):
 75 |         token = token if not self.lower else token.lower()
 76 |         if token in self.word2id:
 77 |             return self.word2id[token]
 78 |         return self.word2id[self.unk_sym]
 79 | 
 80 |     def sort_vocab(self, least_freq=-1):
 81 |         sorted_word2count = sorted(
 82 |             self.word2count.items(), key=lambda x: - x[1])
 83 |         self.clean()
 84 |         for word, freq in sorted_word2count:
 85 |             if least_freq > 0:
 86 |                 if freq <= least_freq:
 87 |                     continue
 88 |             self.insert(word)
 89 | 
 90 |     def save_vocab(self, vocab_file):
 91 |         with open(vocab_file, 'w') as writer:
 92 |             for id in range(self.size()):
 93 |                 writer.write(self.id2word[id].encode("utf-8") + "\n")
 94 | 
 95 |         np.savez(vocab_file + ".npz", data=self.pretrained_embedding)
 96 | 
 97 |     def to_id(self, tokens, append_eos=True):
 98 |         if not append_eos:
 99 |             return [self.get_id(token) for token in tokens]
100 |         else:
101 |             return [self.get_id(token) for token in
102 |                     tokens + [self.eos_sym]]
103 | 
104 |     def to_tokens(self, ids):
105 |         return [self.get_token(id) for id in ids]
106 | 
107 |     def eos(self):
108 |         return self.get_id(self.eos_sym)
109 | 
110 |     def pad(self):
111 |         return self.get_id(self.pad_sym)
112 | 
113 |     def make_vocab(self, data_set, use_char=False, embedding_path=None):
114 |         tf.logging.info("Starting Reading Data in {} Manner".format(use_char))
115 |         tokenizer = Tokenizer(do_lower_case=False)
116 | 
117 |         for data_iter in [data_set.get_train_data(),
118 |                           data_set.get_dev_data(),
119 |                           data_set.get_test_data()]:
120 |             for sample in data_iter:
121 |                 label, document = sample
122 | 
123 |                 tokens = tokenizer.tokenize(document)
124 |                 for token in tokens:
125 |                     if not use_char:
126 |                         self.insert(token)
127 |                     else:
128 |                         for char in list(token):
129 |                             self.insert(char)
130 | 
131 |         tf.logging.info("Data Loading Over, Starting Sorted")
132 |         self.sort_vocab(least_freq=3 if use_char else -1)
133 | 
134 |         # process the vocabulary with pretrained-embeddings
135 |         if embedding_path is not None:
136 |             tf.logging.info("Pretrained Word Embedding Loading")
137 |             embed_tokens = {}
138 |             embed_size = None
139 |             with open(embedding_path, 'r') as reader:
140 |                 for line in reader:
141 |                     segs = line.strip().split(' ')
142 | 
143 |                     token = segs[0]
144 |                     # Not used in our training data, pass
145 |                     if token not in self.word2id:
146 |                         continue
147 |                     embed_tokens[token] = list(map(float, segs[1:]))
148 | 
149 |                     if embed_size is None:
150 |                         embed_size = len(segs) - 1
151 | 
152 |             self.clean()
153 |             for token in embed_tokens:
154 |                 self.insert(token)
155 | 
156 |             # load embeddings
157 |             embeddings = np.zeros([len(embed_tokens), embed_size])
158 |             for token in embed_tokens:
159 |                 # 3: the special symbols
160 |                 embeddings[self.get_id(token) - 3] = embed_tokens[token]
161 | 
162 |             self.pretrained_embedding = embeddings
163 | 
164 |         tf.logging.info("Vocabulary Loading Finished")
165 | 


--------------------------------------------------------------------------------
/nli/README.md:
--------------------------------------------------------------------------------
  1 | ## Natural Language Inference
  2 | 
  3 | The dataset is Stanford Natural Language Inference (SNLI), which we regard as a three-way classification tasks. 
  4 | We use an encoder-attention-decoder architecture, and stack two additional birnn upon the final sequence representation.
  5 | Both GloVe word embedding and character embedding is used for word-level representation.
  6 | Main experimental results are summarized below.
  7 | 
  8 |  <table>
  9 |   <tr>
 10 |     <th colspan="2" rowspan="2">Model</th>
 11 |     <th rowspan="2">#Params</th>
 12 |     <th colspan="2">Base</th>
 13 |     <th colspan="2">+LN</th>
 14 |     <th colspan="2">+BERT</th>
 15 |     <th colspan="2">+LN+BERT</th>
 16 |   </tr>
 17 |   <tr>
 18 |     <td>ACC</td>
 19 |     <td>Time</td>
 20 |     <td>ACC</td>
 21 |     <td>Time</td>
 22 |     <td>ACC</td>
 23 |     <td>Time</td>
 24 |     <td>ACC</td>
 25 |     <td>Time</td>
 26 |   </tr>
 27 |   <tr>
 28 |     <td colspan="2">Rocktaschel et al. (2016)</td>
 29 |     <td>250K</td>
 30 |     <td>83.50</td>
 31 |     <td>-</td>
 32 |     <td>-</td>
 33 |     <td>-</td>
 34 |     <td>-</td>
 35 |     <td>-</td>
 36 |     <td>-</td>
 37 |     <td>-</td>
 38 |   </tr>
 39 |   <tr>
 40 |     <td rowspan="5">This <br>Work</td>
 41 |     <td>LSTM</td>
 42 |     <td>8.36M</td>
 43 |     <td>84.27</td>
 44 |     <td>0.262</td>
 45 |     <td>86.03</td>
 46 |     <td>0.432</td>
 47 |     <td>89.95</td>
 48 |     <td>0.544</td>
 49 |     <td>90.49</td>
 50 |     <td>0.696</td>
 51 |   </tr>
 52 |   <tr>
 53 |     <td>GRU</td>
 54 |     <td>6.41M</td>
 55 |     <td>85.71</td>
 56 |     <td>0.245</td>
 57 |     <td>86.05</td>
 58 |     <td>0.419</td>
 59 |     <td>90.29</td>
 60 |     <td>0.529</td>
 61 |     <td>90.10</td>
 62 |     <td>0.695</td>
 63 |   </tr>
 64 |   <tr>
 65 |     <td>ATR</td>
 66 |     <td>2.87M</td>
 67 |     <td>84.88</td>
 68 |     <td>0.210</td>
 69 |     <td>85.81</td>
 70 |     <td>0.307</td>
 71 |     <td>90.00</td>
 72 |     <td>0.494</td>
 73 |     <td>90.28</td>
 74 |     <td>0.580</td>
 75 |   </tr>
 76 |   <tr>
 77 |     <td>SRU</td>
 78 |     <td>5.48M</td>
 79 |     <td>84.28</td>
 80 |     <td>0.258</td>
 81 |     <td>85.32</td>
 82 |     <td>0.283</td>
 83 |     <td>89.98</td>
 84 |     <td>0.543</td>
 85 |     <td>90.09</td>
 86 |     <td>0.555</td>
 87 |   </tr>
 88 |   <tr>
 89 |     <td>LRN</td>
 90 |     <td>4.25M</td>
 91 |     <td>84.88</td>
 92 |     <td>0.209</td>
 93 |     <td>85.06</td>
 94 |     <td>0.223</td>
 95 |     <td>89.98</td>
 96 |     <td>0.488</td>
 97 |     <td>89.93</td>
 98 |     <td>0.506</td>
 99 |   </tr>
100 | </table>
101 | 
102 | *LN*: layer normalizaton; *Time*: time in seconds per training batch measured from 1k training steps.
103 | 
104 | ## Requirement
105 | tensorflow >= 1.8.1
106 | 
107 | ## How to Run?
108 | 
109 | - download and preprocess dataset
110 | 
111 |   - The dataset link: https://nlp.stanford.edu/projects/snli/
112 |   - Prepare separate data files:
113 |     
114 |     We provide a simple processing script `convert_to_plain.py` in scripts folder. By calling:
115 |     ```
116 |     python convert_to_plain.py snli_1.0/[ds].txt
117 |     ```
118 |     you can get the `*.p, *.q, *.l` files as in the `config.py`. *[ds]* indicates `snli_1.0_train.txt`, 
119 |     `snli_1.0_dev.txt` and `snli_1.0_test.txt`. We only preserve `'entailment', 'neutral', 'contradiction'` instances, 
120 |     and others are dropped.
121 |     
122 |   - Prepare embedding and vocabulary
123 |   
124 |     Download the [pre-trained GloVe embedding](http://nlp.stanford.edu/data/glove.840B.300d.zip). And prepare 
125 |     the char as well as word vocabulary using `vocab.py` as follows:
126 |     ```
127 |     # word embedding & vocabulary
128 |     python vocab.py --embeddings [path-to-glove-embedding] train.p,train.q,dev.p,dev.q,test.p,test.q word_vocab
129 |     # char embedding
130 |     python vocab.py --char train.p,train.q,dev.p,dev.q,test.p,test.q char_vocab
131 |     ```
132 |     
133 |    - Download BERT pre-trained embedding (if you plan to work with BERT)
134 | 
135 | - training and evaluation
136 | 
137 |   - Train the model as follows:
138 |   ```
139 |   # configure your cuda libaray if necessary
140 |   export CUDA_ROOT=XXX
141 |   export PATH=$CUDA_ROOT/bin:$PATH
142 |   export LD_LIBRARY_PATH=$CUDA_ROOT/lib64:$LD_LIBRARY_PATH
143 | 
144 |   # LRN
145 |   python code/run.py --mode train --config config.py --parameters=gpus=[0],cell="lrn",layer_norm=False,output_dir="train_no_ln" >& log.noln
146 |   # LRN + LN
147 |   python code/run.py --mode train --config config.py --parameters=gpus=[0],cell="lrn",layer_norm=True,output_dir="train_ln" >& log.ln
148 |   # LRN + BERT
149 |   python code/run.py --mode train --config config_bert.py --parameters=gpus=[0],cell="lrn",layer_norm=False,output_dir="train_no_ln_bert" >& log.noln.bert
150 |   # LRN + LN + BERT
151 |   python code/run.py --mode train --config config_bert.py --parameters=gpus=[0],cell="lrn",layer_norm=True,output_dir="train_ln_bert" >& log.ln.bert
152 |   ```
153 |   Other hyperparameter settings are available in the given config.py.
154 |   
155 |   - Test the model as follows:
156 |   ```
157 |   # LRN
158 |   python code/run.py --mode test --config config.py --parameters=gpus=[0],cell="lrn",layer_norm=False,output_dir="train_no_ln/best",test_output="out.noln" >& log.noln.test
159 |   # LRN + LN
160 |   python code/run.py --mode test --config config.py --parameters=gpus=[0],cell="lrn",layer_norm=True,output_dir="train_ln/best",test_output="out.ln" >& log.ln.test
161 |   # LRN + BERT
162 |   python code/run.py --mode test --config config_bert.py --parameters=gpus=[0],cell="lrn",layer_norm=False,output_dir="train_no_ln_bert/best",test_output="out.noln.bert" >& log.noln.bert.test
163 |   # LRN + LN + BERT
164 |   python code/run.py --mode test --config config_bert.py --parameters=gpus=[0],cell="lrn",layer_norm=True,output_dir="train_ln_bert/best",test_output="out.ln.bert" >& log.ln.bert.test
165 |   ```
166 | 
167 | ## Credits
168 | 
169 | Source code structure is adapted from [zero](https://github.com/bzhangGo/zero).


--------------------------------------------------------------------------------