├── .gitignore
├── LICENSE.md
├── MANIFEST.in
├── NEWS.md
├── README.md
├── bin
    ├── nmtpy
    ├── nmtpy-build-vocab
    ├── nmtpy-ckpt-info
    ├── nmtpy-coco-metrics
    └── nmtpy-install-extra
├── doc
    ├── Makefile
    ├── _static
    │   └── img
    │   │   └── logo.png
    ├── conf.py
    ├── datasets.rst
    ├── index.rst
    ├── intro
    │   ├── 00_installation.rst
    │   ├── 01_implementing_a_model.rst
    │   └── 02_configuring_an_experiment.rst
    ├── make.bat
    ├── models.rst
    ├── requirements.txt
    └── vocabulary.rst
├── environment.yml
├── examples
    └── v4.0.0
    │   ├── mmt
    │       ├── README.md
    │       ├── mmt-task-en-fr-encdecinit.conf
    │       ├── mmt-task-en-fr-multimodalatt.conf
    │       └── mmt-task-en-fr-nmt.conf
    │   └── speech
    │       ├── README.md
    │       ├── asr-bilstmp-char.conf
    │       ├── asr-bilstmp-s1k.conf
    │       └── scripts
    │           ├── prepare.sh
    │           └── word2char
├── ipynb
    └── att.ipynb
├── nmtpytorch
    ├── __init__.py
    ├── cleanup.py
    ├── cocoeval
    │   ├── README.md
    │   ├── __init__.py
    │   ├── bleu
    │   │   ├── LICENSE.bleu
    │   │   ├── __init__.py
    │   │   ├── bleu.py
    │   │   └── bleu_scorer.py
    │   ├── cider
    │   │   ├── __init__.py
    │   │   ├── cider.py
    │   │   └── cider_scorer.py
    │   ├── meteor
    │   │   ├── __init__.py
    │   │   └── meteor.py
    │   └── rouge
    │   │   ├── __init__.py
    │   │   └── rouge.py
    ├── config.py
    ├── datasets
    │   ├── __init__.py
    │   ├── collate.py
    │   ├── imagefolder.py
    │   ├── kaldi.py
    │   ├── label.py
    │   ├── multimodal.py
    │   ├── npy.py
    │   ├── numpy_sequence.py
    │   ├── shelve.py
    │   └── text.py
    ├── evaluator.py
    ├── layers
    │   ├── __init__.py
    │   ├── argselect.py
    │   ├── attention
    │   │   ├── __init__.py
    │   │   ├── co.py
    │   │   ├── dot.py
    │   │   ├── hierarchical.py
    │   │   ├── mhco.py
    │   │   ├── mlp.py
    │   │   ├── scaled_dot.py
    │   │   └── uniform.py
    │   ├── decoders
    │   │   ├── __init__.py
    │   │   ├── conditional.py
    │   │   ├── conditionalmm.py
    │   │   ├── multisourceconditional.py
    │   │   ├── simplegru.py
    │   │   ├── switchinggru.py
    │   │   ├── vector.py
    │   │   └── xu.py
    │   ├── embedding
    │   │   ├── __init__.py
    │   │   └── pembedding.py
    │   ├── encoders
    │   │   ├── __init__.py
    │   │   ├── bilstmp.py
    │   │   ├── image.py
    │   │   ├── multimodal_bilstmp.py
    │   │   ├── multimodal_text.py
    │   │   └── text.py
    │   ├── ff.py
    │   ├── flatten.py
    │   ├── fusion.py
    │   ├── max_margin.py
    │   ├── pool.py
    │   ├── rnninit.py
    │   ├── seq_conv.py
    │   └── transformers
    │   │   ├── __init__.py
    │   │   ├── decoder.py
    │   │   ├── embedding.py
    │   │   ├── encoder.py
    │   │   ├── positionwise_ff.py
    │   │   └── residual_lnorm.py
    ├── lib
    │   └── multi-bleu.perl
    ├── logger.py
    ├── mainloop.py
    ├── metrics
    │   ├── __init__.py
    │   ├── cer.py
    │   ├── meteor.py
    │   ├── metric.py
    │   ├── multibleu.py
    │   ├── rouge.py
    │   ├── sacrebleu.py
    │   └── wer.py
    ├── models
    │   ├── __init__.py
    │   ├── asr.py
    │   ├── attentive_mmt.py
    │   ├── multimodal_asr.py
    │   ├── nmt.py
    │   ├── simple_mmt.py
    │   ├── stale
    │   │   ├── README.md
    │   │   ├── acapt.py
    │   │   ├── nli.py
    │   │   └── sat.py
    │   └── tfnmt.py
    ├── monitor.py
    ├── optimizer.py
    ├── samplers
    │   ├── __init__.py
    │   ├── approx.py
    │   └── bucket.py
    ├── tester.py
    ├── translator.py
    ├── utils
    │   ├── __init__.py
    │   ├── data.py
    │   ├── device.py
    │   ├── filterchain.py
    │   ├── io.py
    │   ├── kaldi.py
    │   ├── misc.py
    │   ├── ml_metrics.py
    │   ├── nn.py
    │   ├── tensorboard.py
    │   └── topology.py
    └── vocabulary.py
├── scripts
    ├── README.md
    ├── create-pretrained-embs
    ├── dump-attention.py
    └── package.sh
├── setup.py
└── tox.ini


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | nmtpytorch/lib/data/*gz
3 | nmtpytorch.egg-info
4 | .cache
5 | build/
6 | dist/
7 | doc/_build/
8 | ipynb/.ipynb_checkpoints
9 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ## MIT License
 2 | 
 3 | Copyright (c) 2017 - Le Mans University - Language and Speech Technology (LST) Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | --
24 | 
25 | **nmtpytorch** includes code from the following projects which have their own licenses:
26 | 
27 |  - `multi-bleu.perl` from [mosesdecoder](https://github.com/moses-smt/mosesdecoder) [[LGPL-2.1](https://github.com/moses-smt/mosesdecoder/blob/master/COPYING)]
28 |  - `pycocoevalcap` from [coco-caption](https://github.com/tylin/coco-caption) [[BSD-2-Clause](https://github.com/tylin/coco-caption/blob/master/license.txt)]
29 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include nmtpytorch/lib/multi-bleu.perl
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![nmtpytorch](https://github.com/lium-lst/nmtpytorch/blob/master/doc/_static/img/logo.png?raw=true "nmtpytorch")
 2 | 
 3 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 4 | [![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
 5 | 
 6 | # Note
 7 | 
 8 | This project is not actively maintained so issues created are unlikely to be addressed in a timely way. If you are interested, there's a recent fork of this repository called [pysimt](https://github.com/ImperialNLP/pysimt) which includes Transformer-based architectures as well.
 9 | 
10 | # Overview
11 | `nmtpytorch` allows training of various end-to-end neural architectures including
12 | but not limited to neural machine translation, image captioning and automatic
13 | speech recognition systems. The initial codebase was in `Theano` and was
14 | inspired from the famous [dl4mt-tutorial](https://github.com/nyu-dl/dl4mt-tutorial)
15 | codebase.
16 | 
17 | `nmtpytorch` received valuable contributions from the [Grounded Sequence-to-sequence Transduction Team](https://github.com/srvk/jsalt-2018-grounded-s2s)
18 | of *Frederick Jelinek Memorial Summer Workshop 2018*:
19 | 
20 | Loic Barrault, Ozan Caglayan, Amanda Duarte, Desmond Elliott, Spandana Gella, Nils Holzenberger,
21 | Chirag Lala, Jasmine (Sun Jae) Lee, Jindřich Libovický, Pranava Madhyastha,
22 | Florian Metze, Karl Mulligan, Alissa Ostapenko, Shruti Palaskar, Ramon Sanabria, Lucia Specia and Josiah Wang.
23 | 
24 | If you use **nmtpytorch**, you may want to cite the following [paper](https://ufal.mff.cuni.cz/pbml/109/art-caglayan-et-al.pdf):
25 | ```
26 | @article{nmtpy2017,
27 |   author    = {Ozan Caglayan and
28 |                Mercedes Garc\'{i}a-Mart\'{i}nez and
29 |                Adrien Bardet and
30 |                Walid Aransa and
31 |                Fethi Bougares and
32 |                Lo\"{i}c Barrault},
33 |   title     = {NMTPY: A Flexible Toolkit for Advanced Neural Machine Translation Systems},
34 |   journal   = {Prague Bull. Math. Linguistics},
35 |   volume    = {109},
36 |   pages     = {15--28},
37 |   year      = {2017},
38 |   url       = {https://ufal.mff.cuni.cz/pbml/109/art-caglayan-et-al.pdf},
39 |   doi       = {10.1515/pralin-2017-0035},
40 |   timestamp = {Tue, 12 Sep 2017 10:01:08 +0100}
41 | }
42 | ```
43 | 
44 | ## Installation
45 | 
46 | You may want to install NVIDIA's [Apex](https://github.com/NVIDIA/apex)
47 | extensions. As of February 2020, we only monkey-patched `nn.LayerNorm`
48 | with Apex' one if the library is installed and found.
49 | 
50 | ### pip
51 | 
52 | You can install `nmtpytorch` from `PyPI` using `pip` (or `pip3` depending on your
53 | operating system and environment):
54 | 
55 | ```
56 | $ pip install nmtpytorch
57 | ```
58 | 
59 | ### conda
60 | 
61 | We provide an `environment.yml` file in the repository that you can use to create
62 | a ready-to-use anaconda environment for `nmtpytorch`:
63 | 
64 | ```
65 | $ conda update --all
66 | $ git clone https://github.com/lium-lst/nmtpytorch.git
67 | $ conda env create -f nmtpytorch/environment.yml
68 | ```
69 | 
70 | **IMPORTANT:** After installing `nmtpytorch`, you **need** to run `nmtpy-install-extra`
71 | to download METEOR related files into your `${HOME}/.nmtpy` folder.
72 | This step is only required once.
73 | 
74 | ### Development Mode
75 | 
76 | For continuous development and testing, it is sufficient to run `python setup.py develop`
77 | in the root folder of your GIT checkout. From now on, all modifications to the source
78 | tree are directly taken into account without requiring reinstallation.
79 | 
80 | ## Documentation
81 | 
82 | We currently only provide some preliminary documentation in our [wiki](https://github.com/lium-lst/nmtpytorch/wiki).
83 | 
84 | ## Release Notes
85 | 
86 | See [NEWS.md](NEWS.md).
87 | 


--------------------------------------------------------------------------------
/bin/nmtpy-build-vocab:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import sys
  5 | import json
  6 | import pathlib
  7 | import argparse
  8 | from collections import OrderedDict
  9 | 
 10 | import numpy as np
 11 | 
 12 | from nmtpytorch.vocabulary import Vocabulary
 13 | from nmtpytorch.utils.misc import pbar
 14 | 
 15 | 
 16 | def freqs_to_dict(token_freqs, min_freq=0, max_items=0, exclude_symbols=False):
 17 |     # Get list of tokens
 18 |     tokens = list(token_freqs.keys())
 19 | 
 20 |     # Collect their frequencies in a numpy array
 21 |     freqs = np.array(list(token_freqs.values()))
 22 | 
 23 |     tokendict = OrderedDict()
 24 |     if not exclude_symbols:
 25 |         for key, value in Vocabulary.TOKENS.items():
 26 |             # Second value is the count information
 27 |             tokendict[key] = "{} 0".format(value)
 28 | 
 29 |     # Sort in descending order of frequency
 30 |     sorted_idx = np.argsort(freqs)
 31 |     if min_freq > 0:
 32 |         sorted_tokens = [(tokens[ii], freqs[ii]) for ii in sorted_idx[::-1]
 33 |                          if freqs[ii] >= min_freq]
 34 |     else:
 35 |         sorted_tokens = [(tokens[ii], freqs[ii]) for ii in sorted_idx[::-1]]
 36 | 
 37 |     if max_items > 0:
 38 |         sorted_tokens = sorted_tokens[:max_items]
 39 | 
 40 |     # Start inserting from index offset
 41 |     offset = len(tokendict)
 42 |     for iidx, (token, freq) in enumerate(sorted_tokens):
 43 |         tokendict[token] = '{} {}'.format(iidx + offset, int(freq))
 44 | 
 45 |     return tokendict
 46 | 
 47 | 
 48 | def get_freqs(filename, cumul_dict=None):
 49 |     # We'll first count frequencies
 50 |     if cumul_dict is not None:
 51 |         # Let's accumulate frequencies
 52 |         token_freqs = cumul_dict
 53 |     else:
 54 |         token_freqs = OrderedDict()
 55 | 
 56 |     print("Reading file %s" % filename)
 57 |     with open(filename) as fhandle:
 58 |         for line in pbar(fhandle, unit='lines'):
 59 |             line = line.strip()
 60 |             if line:
 61 |                 # Collect frequencies
 62 |                 for word in line.split():
 63 |                     if word not in token_freqs:
 64 |                         token_freqs[word] = 0
 65 |                     token_freqs[word] += 1
 66 | 
 67 |     # Remove already available special tokens
 68 |     for key in Vocabulary.TOKENS:
 69 |         if key in token_freqs:
 70 |             print('Removing ', key)
 71 |             del token_freqs[key]
 72 | 
 73 |     return token_freqs
 74 | 
 75 | 
 76 | def write_dict(fname, vocab):
 77 |     print("Dumping vocabulary (%d tokens) to %s..." % (len(vocab), fname))
 78 |     with open(fname, 'w') as fhandle:
 79 |         json.dump(vocab, fhandle, ensure_ascii=False, indent=2)
 80 | 
 81 | 
 82 | def main():
 83 |     parser = argparse.ArgumentParser(prog='build-vocab')
 84 |     parser.add_argument('-o', '--output-dir', type=str, default='.',
 85 |                         help='Output directory')
 86 |     parser.add_argument('-s', '--single', type=str, default=None,
 87 |                         help='Name of the combined vocabulary file')
 88 |     parser.add_argument('-m', '--min-freq', type=int, default=0,
 89 |                         help='Filter out tokens occuring < m times')
 90 |     parser.add_argument('-M', '--max-items', type=int, default=0,
 91 |                         help='Keep the final vocabulary size less than this')
 92 |     parser.add_argument('-x', '--exclude-symbols', action='store_true',
 93 |                         help='Do not add special <eos>, <bos>, <pad>, <unk>')
 94 |     parser.add_argument('files', type=str, nargs='+',
 95 |                         help='Sentence files')
 96 |     args = parser.parse_args()
 97 | 
 98 |     if args.exclude_symbols:
 99 |         print('Warning: -x does not create vocabularies compatible '
100 |               'with many nmtpytorch\'s models.')
101 | 
102 |     output_dir = pathlib.Path(args.output_dir).expanduser()
103 | 
104 |     # In case it is needed
105 |     all_freqs = OrderedDict()
106 | 
107 |     for filename in args.files:
108 |         filename = pathlib.Path(filename).expanduser()
109 |         suffix = ".vocab{}".format(filename.suffix)
110 |         vocab_fname = filename.stem
111 | 
112 |         if args.single:
113 |             # Get cumulative frequencies
114 |             all_freqs = get_freqs(filename, all_freqs)
115 | 
116 |         else:
117 |             # Get frequencies
118 |             freqs = get_freqs(filename)
119 |             # Build dictionary from frequencies
120 |             tokendict = freqs_to_dict(
121 |                 freqs, args.min_freq, args.max_items, args.exclude_symbols)
122 | 
123 |             if args.min_freq > 0:
124 |                 vocab_fname += "-min%d" % args.min_freq
125 |             if args.max_items > 0:
126 |                 vocab_fname += "-max%dtokens" % args.max_items
127 |             vocab_fname = str((output_dir / vocab_fname)) + suffix
128 |             write_dict(vocab_fname, tokendict)
129 | 
130 |     if args.single:
131 |         vocab_fname = pathlib.Path(args.single)
132 |         tokendict = freqs_to_dict(
133 |             all_freqs, args.min_freq, args.max_items, args.exclude_symbols)
134 |         write_dict(vocab_fname, tokendict)
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     sys.exit(main())
139 | 


--------------------------------------------------------------------------------
/bin/nmtpy-ckpt-info:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import torch
 5 | 
 6 | from nmtpytorch.config import Options
 7 | from nmtpytorch.metrics import Evaluator
 8 | from nmtpytorch.utils.misc import load_pt_file
 9 | 
10 | if __name__ == '__main__':
11 |     try:
12 |         pt_file = sys.argv[1]
13 |     except IndexError as ie:
14 |         print('Usage: {} <.ckpt file>'.format(sys.argv[0]))
15 |         sys.exit(1)
16 | 
17 |     data = load_pt_file(pt_file)
18 |     weights, history, opts = data['model'], data['history'], data['opts']
19 | 
20 |     if not history:
21 |         print('This is not a .ckpt file with history information.')
22 |         sys.exit(1)
23 | 
24 |     opts = Options.from_dict(opts)
25 | 
26 |     early_metric = opts.train['eval_metrics'].split(',')[0]
27 | 
28 |     print('Checkpoint saved at epoch: {} update: {}'.format(history['ectr'],
29 |                                                             history['uctr']))
30 |     for i, loss in enumerate(history['epoch_losses']):
31 |         print('- Epoch {:<3} loss: {:.3f}'.format(i + 1, loss))
32 | 
33 |     print('- Did {} validations with early-stop metric "{}"'.format(
34 |         history['ectr'],
35 |         early_metric))
36 | 
37 |     for metric, hist in history['evals'].items():
38 |         best_vctr, best_val = Evaluator.find_best(metric, hist)
39 |         print('- Best {:<10} so far: {:.2f} (Validation {})'.format(metric,
40 |                                                                     best_val,
41 |                                                                     best_vctr))
42 | 


--------------------------------------------------------------------------------
/bin/nmtpy-coco-metrics:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Computes the BLEU, ROUGE, METEOR, and CIDER using the COCO metrics scripts
 6 | """
 7 | import pathlib
 8 | import argparse
 9 | from collections import OrderedDict
10 | 
11 | # Script taken and adapted from Kelvin Xu's arctic-captions project
12 | # https://github.com/kelvinxu/arctic-captions
13 | 
14 | from nmtpytorch.cocoeval import Bleu, Meteor, Cider, Rouge
15 | from nmtpytorch.utils.misc import get_meteor_jar
16 | 
17 | 
18 | def print_table(results, sort_by='METEOR'):
19 |     cols = ['Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4',
20 |             'METEOR', 'CIDEr', 'ROUGE_L']
21 |     for col in cols:
22 |         print('|{:^15}|'.format(col), end='')
23 |     print()
24 | 
25 |     results = sorted(results.items(), key=lambda x: x[1][sort_by])
26 | 
27 |     for sysname, result in results:
28 |         if len(results) > 1:
29 |             print(sysname)
30 |         for col in cols:
31 |             print('|{:^15,.3f}|'.format(result[col]), end='')
32 |         print()
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     parser = argparse.ArgumentParser(prog='coco-metrics')
37 | 
38 |     parser.add_argument("-w", "--write", action='store_true',
39 |                         help='Create a .score file containing the results.')
40 |     parser.add_argument("-l", "--language", default='en',
41 |                         help='Hypothesis language (default: en)')
42 |     parser.add_argument("-r", "--refs", type=argparse.FileType('r'),
43 |                         help="Path to all the reference files", nargs='+')
44 |     parser.add_argument("systems",  type=str,
45 |                         help="Per-system hypothesis file(s)", nargs='+')
46 | 
47 |     args = parser.parse_args()
48 | 
49 |     # Check for METEOR
50 |     get_meteor_jar()
51 | 
52 |     # List of scorers
53 |     scorers = [
54 |         (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
55 |         (Meteor(args.language), ["METEOR"]),
56 |         (Cider(), ["CIDEr"]),
57 |         (Rouge(), ["ROUGE_L"]),
58 |     ]
59 | 
60 |     results = OrderedDict()
61 | 
62 |     # Read multiple reference files
63 |     raw_refs = [list(map(str.strip, r)) for r in zip(*args.refs)]
64 |     refs = {idx: rr for idx, rr in enumerate(raw_refs)}
65 | 
66 |     # Ranking of multiple systems is possible
67 |     for hypfile in args.systems:
68 |         with open(hypfile) as f:
69 |             # List of hypothesis sentences for this system
70 |             hypo = {idx: [line.strip()] for (idx, line) in enumerate(f)}
71 | 
72 |             result = OrderedDict()
73 | 
74 |             for scorer, method in scorers:
75 |                 score, _ = scorer.compute_score(refs, hypo)
76 |                 if score:
77 |                     if not isinstance(score, list):
78 |                         score = [score]
79 |                     for m, s in zip(method, score):
80 |                         result[m] = float('%.3f' % s)
81 | 
82 |             if args.write:
83 |                 with open("%s.score" % hypfile, 'w') as f:
84 |                     f.write("%s\n" % result)
85 |             results[str(pathlib.Path(hypfile))] = result
86 | 
87 |     print_table(results)
88 | 


--------------------------------------------------------------------------------
/bin/nmtpy-install-extra:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | which java &> /dev/null
 3 | if [[ "x$?" == "x1" ]]; then
 4 |   echo "'java' not found in PATH. You need to have a working JRE installation for METEOR."
 5 | else
 6 |   echo "OK: Found 'java'."
 7 | fi
 8 | 
 9 | CACHE=${HOME}/.nmtpy
10 | METEOR=${CACHE}/meteor-data
11 | 
12 | if [[ ! -d ${CACHE} ]]; then
13 |   echo "Creating ${CACHE} folder..."
14 |   mkdir -p ${CACHE}
15 | fi
16 | 
17 | if [[ ! -d $METEOR ]]; then
18 |   git clone https://github.com/ozancaglayan/meteor-1.5-data.git $METEOR
19 |   pushd $METEOR
20 |   ./recompress.sh
21 |   popd
22 | fi
23 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/doc/_static/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lium-lst/nmtpytorch/fa31279aeb68ef1fdae9b8e7b6b331d134ad4c63/doc/_static/img/logo.png


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../nmtpytorch'))
16 | 
17 | #import pytorch_sphinx_theme
18 | 
19 | 
20 | # -- Project information -----------------------------------------------------
21 | 
22 | project = 'nmtpytorch'
23 | copyright = '2020, Ozan Caglayan'
24 | author = 'Ozan Caglayan'
25 | 
26 | 
27 | # -- General configuration ---------------------------------------------------
28 | 
29 | # Add any Sphinx extension module names here, as strings. They can be
30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
31 | # ones.
32 | extensions = [
33 |     'sphinx.ext.autodoc',
34 |     'sphinx.ext.autosummary',
35 |     'sphinx.ext.doctest',
36 |     'sphinx.ext.intersphinx',
37 |     'sphinx.ext.todo',
38 |     'sphinx.ext.coverage',
39 |     'sphinx.ext.napoleon',
40 |     'sphinx.ext.viewcode',
41 |     #'sphinxcontrib.katex',
42 |     'sphinx.ext.autosectionlabel',
43 |     #'javasphinx',
44 | ]
45 | 
46 | # Add any paths that contain templates here, relative to this directory.
47 | templates_path = ['_templates']
48 | 
49 | # List of patterns, relative to source directory, that match files and
50 | # directories to ignore when looking for source files.
51 | # This pattern also affects html_static_path and html_extra_path.
52 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
53 | 
54 | 
55 | # -- Options for HTML output -------------------------------------------------
56 | 
57 | # The theme to use for HTML and HTML Help pages.  See the documentation for
58 | # a list of builtin themes.
59 | #
60 | html_theme = 'sphinx_rtd_theme'
61 | #html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
62 | html_logo = '_static/img/logo.png'
63 | 
64 | # Theme options are theme-specific and customize the look and feel of a theme
65 | # further.  For a list of options available for each theme, see the
66 | # documentation.
67 | 
68 | html_theme_options = {
69 |     'collapse_navigation': True,
70 |     # 'pytorch_project': 'doc',
71 |     # 'canonical_url': 'https://pytorch.org/docs/stable/',
72 |     # 'display_version': True,
73 |     'logo_only': True,
74 | }
75 | 
76 | # Add any paths that contain custom static files (such as style sheets) here,
77 | # relative to this directory. They are copied after the builtin static files,
78 | # so a file named "default.css" will overwrite the builtin "default.css".
79 | html_static_path = ['_static']
80 | 
81 | 
82 | # -- Extension configuration -------------------------------------------------


--------------------------------------------------------------------------------
/doc/datasets.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | Datasets
 5 | ========================
 6 | 
 7 | .. automodule:: nmtpytorch.datasets
 8 | .. currentmodule:: nmtpytorch.datasets
 9 | 
10 | 
11 | TextDataset
12 | ------------------------
13 | 
14 | .. autoclass:: TextDataset
15 |   :members:
16 | 
17 | LabelDataset
18 | ------------------------
19 | 
20 | .. autoclass:: LabelDataset
21 |   :members:
22 | 
23 | ImageFolderDataset
24 | ------------------------
25 | 
26 | .. autoclass:: ImageFolderDataset
27 |   :members:
28 | 
29 | KeyedNPZDataset
30 | ------------------------
31 | 
32 | .. autoclass:: KeyedNPZDataset
33 |   :members:
34 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. nmtpytorch documentation master file, created by
 2 |    sphinx-quickstart on Wed Jan 15 12:34:41 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | :github_url: https://github.com/lium-lst/nmtpytorch
 7 | 
 8 | nmtpytorch documentation
 9 | ========================
10 | 
11 | `nmtpytorch` is a framework around `PyTorch` with the objective of training
12 | complex sequence-to-sequence models.
13 | 
14 | .. toctree::
15 |   :glob:
16 |   :maxdepth: 1
17 | 
18 | .. toctree::
19 |   :glob:
20 |   :maxdepth: 1
21 |   :caption: Introduction
22 | 
23 |   intro/*
24 | 
25 | .. toctree::
26 |   :maxdepth: 1
27 |   :caption: Datasets
28 | 
29 |   datasets
30 | 
31 | .. toctree::
32 |   :maxdepth: 1
33 |   :caption: Models
34 | 
35 |   models
36 | 
37 | .. toctree::
38 |   :maxdepth: 1
39 |   :caption: API Documentation
40 | 
41 |   vocabulary
42 | 


--------------------------------------------------------------------------------
/doc/intro/00_installation.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 |     :class: hidden-section
3 | 
4 | Installation
5 | =============
6 | 
7 | bla bla bla
8 | 
9 | 


--------------------------------------------------------------------------------
/doc/intro/01_implementing_a_model.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 |     :class: hidden-section
3 | 
4 | Implementing a model
5 | =====================
6 | 
7 | bla bla bla
8 | 
9 | 


--------------------------------------------------------------------------------
/doc/intro/02_configuring_an_experiment.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 |     :class: hidden-section
3 | 
4 | Configuring an experiment
5 | =========================
6 | 
7 | bla bla bla
8 | 
9 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/doc/models.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 |     :class: hidden-section
3 | 
4 | Models
5 | ========================
6 | 


--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx_rtd_theme
3 | 


--------------------------------------------------------------------------------
/doc/vocabulary.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | 
 4 | nmtpytorch.vocabulary
 5 | ========================
 6 | 
 7 | .. automodule:: nmtpytorch.vocabulary
 8 | .. currentmodule:: nmtpytorch.vocabulary
 9 | 
10 | Vocabulary
11 | ----------
12 | 
13 | .. autoclass:: Vocabulary
14 |   :members:
15 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: nmtpy
 2 | 
 3 | dependencies:
 4 |   - python=3.7
 5 |   - cffi
 6 |   - cython
 7 |   - ipython
 8 |   - pyyaml
 9 |   - six
10 |   - numpy
11 |   - scikit-learn
12 |   - tqdm
13 |   - pillow
14 |   - pip:
15 |     - torch==1.4.0
16 |     - torchvision==0.5.0
17 |     - pytorch-ignite==0.3.0
18 |     - sacrebleu>=1.2.9
19 |     - editdistance==0.4
20 |     - subword_nmt==0.3.5
21 |     - ipdb
22 |     - -e .
23 | 


--------------------------------------------------------------------------------
/examples/v4.0.0/mmt/README.md:
--------------------------------------------------------------------------------
 1 | Examples
 2 | --
 3 | 
 4 | Here you can find example configuration files that are tied to specific versions
 5 | of nmtpytorch. You need to set paths accordingly in order for the configurations
 6 | to work correctly.
 7 | 
 8 | ## Multimodal task (En->Fr)
 9 | 
10 | **NOTE:** These examples do not use BPE-segmented files, instead they simply
11 | use word forms.
12 | 
13 | The dataset files are suffixed with `lc.norm.tok` in this experiment which
14 | means that Moses scripts were used to lowercase -> normalize-punctuation -> tokenize
15 | the corpora. Specifically for tokenization, we enable `-a` option to aggressively
16 | split the hyphens. The following pipeline should do the trick (moses scripts should
17 | be in `$PATH` for the following to work as-is):
18 | 
19 | ```bash
20 | for split in train val test_201*flickr; do
21 |   for llang in en fr; do
22 |     lowercase.perl < ${split}.${llang} | normalize-punctuation.perl -l $llang | \
23 |       tokenizer.perl -q -a -l $llang -threads 4 > ${split}.lc.norm.tok.${llang}
24 |   done
25 | done
26 | ```
27 | 
28 | Next you need to run `nmtpy-build-vocab` on the `train.lc.norm.tok.*` files
29 | to construct the vocabularies. You should now be able to train the systems
30 | accordingly.
31 | 
32 | **NOTE:** For multimodal systems, you may want to L2-normalize the feature files
33 | and save the normalized versions, see [WMT18 paper for LIUM-CVC](https://arxiv.org/abs/1809.00151):
34 | 
35 | ```python
36 | x = np.load('foo.npy')
37 | np.save('foo-l2norm.npy', x / np.linalg.norm(x, axis=-1, keepdims=True))
38 | ```
39 | 
40 | ### mmt-task-en-fr-nmt.conf
41 | 
42 | A baseline NMT for En->Fr language pair
43 | of Multi30K. You can download the Multi30K dataset from [here](https://github.com/multi30k/dataset).
44 | 
45 | ### mmt-task-en-fr-encdecinit.conf
46 | 
47 | - A baseline multimodal NMT for En->Fr language pair of Multi30K. You need
48 |   to have `.npy` feature files for image features in order to train this model.
49 | 
50 | - A feature file should contain a tensor of shape `(n, feat_dim)` where `n` is the
51 |   number of sentences of the split and `feat_dim` is the dimensionality for the features.
52 | 
53 | - Depending on `feat_dim`, you need to adjust the `feat_dim` option in the configuration file.
54 | 
55 | - You can download the provided ResNet-50 feature files for the WMT18 shared task
56 |   from [here](https://drive.google.com/drive/folders/1I2ufg3rTva3qeBkEc-xDpkESsGkYXgCf?usp=sharing).
57 | 
58 | - The feature files for this model have `avgpool` in their filenames and the
59 |   `feat_dim` is `2048`.
60 | 
61 | ### mmt-task-en-fr-multimodalatt.conf
62 | 
63 | A multimodal attentive NMT baseline replicating [this paper](https://arxiv.org/abs/1609.03976).
64 | You now need to use the convolutional feature files that can be downloaded from the same link above.
65 | 
66 | - The feature files for this model have `res4frelu` in their filenames and the `feat_dim` is `1024`.
67 | 
68 | #### More variants
69 | 
70 |  - You can switch to [hierarchical attention](https://arxiv.org/pdf/1704.06567.pdf) by
71 |    changing `fusion_type: concat` to `fusion_type: hierarchical` in the `*multimodalatt.conf`
72 |    file.
73 | 


--------------------------------------------------------------------------------
/examples/v4.0.0/mmt/mmt-task-en-fr-encdecinit.conf:
--------------------------------------------------------------------------------
 1 | [train]
 2 | seed: 0
 3 | model_type: MultimodalNMT
 4 | patience: 10
 5 | max_epochs: 100
 6 | eval_freq: 0
 7 | eval_metrics: meteor,bleu,loss
 8 | # Tokenization was done with -a parameter of moses tokenizer
 9 | eval_filters: de-hyphen
10 | eval_beam: 12
11 | eval_batch_size: 32
12 | save_best_metrics: True
13 | eval_max_len: 100
14 | n_checkpoints: 0
15 | l2_reg: 1e-05
16 | lr_decay: plateau
17 | lr_decay_revert: False
18 | lr_decay_factor: 0.5
19 | lr_decay_patience: 2
20 | gclip: 1
21 | optimizer: adam
22 | lr: 0.0004
23 | batch_size: 64
24 | save_path: /path/to/experiment/folder
25 | tensorboard_dir: ${save_path}/tb_dir
26 | 
27 | [model]
28 | att_type: mlp
29 | att_bottleneck: hid
30 | enc_dim: 320
31 | dec_dim: 320
32 | emb_dim: 200
33 | dropout_emb: 0.4
34 | dropout_ctx: 0.5
35 | dropout_out: 0.5
36 | n_encoders: 2
37 | tied_emb: 2way
38 | bucket_by: en
39 | max_len: None
40 | 
41 | sampler_type: approximate
42 | sched_sampling: 0
43 | dec_init: zero
44 | bos_type: emb
45 | 
46 | feat_fusion: encdecinit
47 | feat_dim: 2048
48 | feat_activ: tanh
49 | direction: en:Text, feats:Numpy -> fr:Text
50 | 
51 | [data]
52 | tok_root: /path/to/tokenized/files/folder
53 | feats_root: /path/to/avgpooled/resnet/feature/files
54 | 
55 | train_set: {'en': '${tok_root}/train.lc.norm.tok.en',
56 |             'feats': '${feats_root}/train-resnet50-avgpool.npy',
57 |             'fr': '${tok_root}/train.lc.norm.tok.fr'}
58 | 
59 | val_set: {'en': '${tok_root}/val.lc.norm.tok.en',
60 |           'feats': '${feats_root}/val-resnet50-avgpool.npy',
61 |           'fr': '${tok_root}/val.lc.norm.tok.fr'}
62 | 
63 | test_2016_flickr_set: {'en': '${tok_root}/test_2016_flickr.lc.norm.tok.en',
64 |                        'feats': '${feats_root}/test_2016_flickr-resnet50-avgpool.npy',
65 |                        'fr': '${tok_root}/test_2016_flickr.lc.norm.tok.fr'}
66 | 
67 | test_2017_flickr_set: {'en': '${tok_root}/test_2017_flickr.lc.norm.tok.en',
68 |                        'feats': '${feats_root}/test_2017_flickr-resnet50-avgpool.npy',
69 |                        'fr': '${tok_root}/test_2017_flickr.lc.norm.tok.fr'}
70 | 
71 | test_2018_flickr_set: {'en': '${tok_root}/test_2018_flickr.lc.norm.tok.en',
72 |                        'feats': '${feats_root}/test_2018_flickr-resnet50-avgpool.npy'}
73 | 
74 | [vocabulary]
75 | en: ${data:tok_root}/train.lc.norm.tok.vocab.en
76 | fr: ${data:tok_root}/train.lc.norm.tok.vocab.fr
77 | 


--------------------------------------------------------------------------------
/examples/v4.0.0/mmt/mmt-task-en-fr-multimodalatt.conf:
--------------------------------------------------------------------------------
 1 | [train]
 2 | seed: 0
 3 | model_type: AttentiveMNMTFeatures
 4 | patience: 10
 5 | max_epochs: 100
 6 | eval_freq: 0
 7 | eval_metrics: meteor,bleu,loss
 8 | # Tokenization was done with -a parameter of moses tokenizer
 9 | eval_filters: de-hyphen
10 | eval_beam: 12
11 | eval_batch_size: 32
12 | save_best_metrics: True
13 | eval_max_len: 100
14 | n_checkpoints: 0
15 | l2_reg: 1e-05
16 | lr_decay: plateau
17 | lr_decay_revert: False
18 | lr_decay_factor: 0.5
19 | lr_decay_patience: 2
20 | gclip: 1
21 | optimizer: adam
22 | lr: 0.0004
23 | batch_size: 64
24 | save_path: /path/to/experiment/folder
25 | tensorboard_dir: ${save_path}/tb_dir
26 | 
27 | [model]
28 | att_type: mlp
29 | att_bottleneck: hid
30 | enc_dim: 320
31 | dec_dim: 320
32 | emb_dim: 200
33 | dropout_emb: 0.4
34 | dropout_ctx: 0.5
35 | dropout_out: 0.5
36 | n_encoders: 2
37 | tied_emb: 2way
38 | bucket_by: en
39 | max_len: None
40 | 
41 | sampler_type: approximate
42 | sched_sampling: 0
43 | dec_init: zero
44 | bos_type: emb
45 | 
46 | fusion_type: concat
47 | n_channels: 1024
48 | direction: en:Text, image:Numpy -> fr:Text
49 | 
50 | [data]
51 | tok_root: /path/to/tokenized/files/folder
52 | img_root: /path/to/res4f-relu/resnet/feature/files
53 | 
54 | train_set: {'en': '${tok_root}/train.lc.norm.tok.en',
55 |             'image': '${img_root}/train-resnet50-res4f_relu.npy',
56 |             'fr': '${tok_root}/train.lc.norm.tok.fr'}
57 | 
58 | val_set: {'en': '${tok_root}/val.lc.norm.tok.en',
59 |           'image': '${img_root}/val-resnet50-res4f_relu.npy',
60 |           'fr': '${tok_root}/val.lc.norm.tok.fr'}
61 | 
62 | test_2016_flickr_set: {'en': '${tok_root}/test_2016_flickr.lc.norm.tok.en',
63 |                        'image': '${img_root}/test_2016_flickr-resnet50-res4f_relu.npy',
64 |                        'fr': '${tok_root}/test_2016_flickr.lc.norm.tok.fr'}
65 | 
66 | test_2017_flickr_set: {'en': '${tok_root}/test_2017_flickr.lc.norm.tok.en',
67 |                        'image': '${img_root}/test_2017_flickr-resnet50-res4f_relu.npy',
68 |                        'fr': '${tok_root}/test_2017_flickr.lc.norm.tok.fr'}
69 | 
70 | test_2018_flickr_set: {'en': '${tok_root}/test_2018_flickr.lc.norm.tok.en',
71 |                        'image': '${img_root}/test_2018_flickr-resnet50-res4f_relu.npy'}
72 | 
73 | [vocabulary]
74 | en: ${data:tok_root}/train.lc.norm.tok.vocab.en
75 | fr: ${data:tok_root}/train.lc.norm.tok.vocab.fr
76 | 


--------------------------------------------------------------------------------
/examples/v4.0.0/mmt/mmt-task-en-fr-nmt.conf:
--------------------------------------------------------------------------------
 1 | [train]
 2 | seed: 0
 3 | model_type: NMT
 4 | patience: 10
 5 | max_epochs: 100
 6 | eval_freq: 0
 7 | eval_metrics: meteor,bleu,loss
 8 | # Tokenization was done with -a parameter of moses tokenizer
 9 | eval_filters: de-hyphen
10 | eval_beam: 12
11 | eval_batch_size: 32
12 | save_best_metrics: True
13 | eval_max_len: 100
14 | n_checkpoints: 0
15 | l2_reg: 1e-05
16 | lr_decay: plateau
17 | lr_decay_revert: False
18 | lr_decay_factor: 0.5
19 | lr_decay_patience: 2
20 | gclip: 1
21 | optimizer: adam
22 | lr: 0.0004
23 | batch_size: 64
24 | save_path: /path/to/experiment/folder
25 | tensorboard_dir: ${save_path}/tb_dir
26 | 
27 | [model]
28 | att_type: mlp
29 | att_bottleneck: hid
30 | enc_dim: 320
31 | dec_dim: 320
32 | emb_dim: 200
33 | dropout_emb: 0.4
34 | dropout_ctx: 0.5
35 | dropout_out: 0.5
36 | n_encoders: 2
37 | tied_emb: 2way
38 | bucket_by: en
39 | max_len: None
40 | 
41 | sampler_type: approximate
42 | sched_sampling: 0
43 | dec_init: zero
44 | bos_type: emb
45 | direction: en:Text -> fr:Text
46 | 
47 | [data]
48 | tok_root: /path/to/tokenized/files/folder
49 | 
50 | train_set: {'en': '${tok_root}/train.lc.norm.tok.en',
51 |             'fr': '${tok_root}/train.lc.norm.tok.fr'}
52 | 
53 | val_set: {'en': '${tok_root}/val.lc.norm.tok.en',
54 |           'fr': '${tok_root}/val.lc.norm.tok.fr'}
55 | 
56 | test_2016_flickr_set: {'en': '${tok_root}/test_2016_flickr.lc.norm.tok.en',
57 |                        'fr': '${tok_root}/test_2016_flickr.lc.norm.tok.fr'}
58 | 
59 | test_2017_flickr_set: {'en': '${tok_root}/test_2017_flickr.lc.norm.tok.en',
60 |                        'fr': '${tok_root}/test_2017_flickr.lc.norm.tok.fr'}
61 | 
62 | test_2018_flickr_set: {'en': '${tok_root}/test_2018_flickr.lc.norm.tok.en'}
63 | 
64 | [vocabulary]
65 | en: ${data:tok_root}/train.lc.norm.tok.vocab.en
66 | fr: ${data:tok_root}/train.lc.norm.tok.vocab.fr
67 | 


--------------------------------------------------------------------------------
/examples/v4.0.0/speech/README.md:
--------------------------------------------------------------------------------
 1 | Automatic Speech Recognition (ASR)
 2 | --
 3 | 
 4 | Two example configuration files for character level and subword level
 5 | ASR systems. These experiments make use of the [ASR](https://github.com/lium-lst/nmtpytorch/blob/master/nmtpytorch/models/asr.py) model from `nmtpytorch`.
 6 | 
 7 | ## Preparing Kaldi features
 8 | 
 9 | Right now nmtpytorch only supports Kaldi feature files namely `.ark` and `.scp`
10 | files along with a special folder structure. Let's assume that all speech related
11 | files are under `~/data/swbd`:
12 |   - Each train/test set split should have a corresponding subfolder with the following files:
13 |     - feats.scp
14 |     - cmvn.scp
15 |     - text
16 |     - utt2spk
17 |   - The paths to `.ark` files in the `.scp` files should be valid paths.
18 |   
19 | Once you have this folder hierarchy ready, you can modify the input and output folder paths in the `scripts/prepare.sh` accordingly and launch the script. The script will create the uncompressed feature files in the format required by `nmtpytorch`. Specifically, the output folder hierarchy should look something like below:
20 | 
21 | ```
22 | /tmp/data/swbd/
23 | ├── eval2000_test
24 | │   ├── feats_local.ark
25 | │   ├── feats_local.scp
26 | │   └── segments.len
27 | ├── train_dev
28 | │   ├── feats_local.ark
29 | │   ├── feats_local.scp
30 | │   └── segments.len
31 | └── train_nodup
32 |     ├── feats_local.ark
33 |     ├── feats_local.scp
34 |     └── segments.len
35 | ```
36 | 
37 | **NOTE:** Unlike Kaldi, we remove the utterance ID columns from the label files for `nmtpytorch` so you need to make sure that the `text` files are in the **same order** with the `feats.scp` file.
38 | 
39 | Now if you look the provided configuration files, you will see that the speech modality tagged with `en_speech` keys are pointing towards the folders listed above:
40 | 
41 | ```
42 | [data]                                                     
43 | root: /tmp/data/swbd                                       
44 | 
45 | train_set: {'en_speech': '${root}/train_nodup',            
46 |             'en_text': '${root}/train_nodup/text.char.nmtpy'}                                                          
47 | 
48 | val_set: {'en_speech': '${root}/train_dev',                
49 |           'en_text': '${root}/train_dev/text.char.nmtpy'}  
50 | 
51 | eval2000_set: {'en_speech': '${root}/eval2000_test'}       
52 | 
53 | [vocabulary]                                               
54 | en_text: ${data:root}/train_nodup/text.char.vocab.nmtpy
55 | ```
56 | 
57 | ### Adding label files and vocabularies
58 | 
59 | The last set of files to prepare are the target side transcript files tagged with `en_text` keys above. These are plain text files **without the utterance ID columns**. Each line corresponds to an utterance/segment and explicit spaces are defined with the `<s>` token. An example line should look like this:
60 | ```
61 | y e a h <s> y e a h <s> w e l l <s> i - <s> i - <s> t h a t ' s <s> r i g h t <s> a n d <s> i t
62 | ```
63 | 
64 | **HINT:** You can use `scripts/word2char` to convert a word-level text file to the above format easily
65 | 
66 | On the other hand, a subword-level file prepared with `subword-nmt` tool looks like this:
67 | ```
68 | all right th@@ an@@ ks bye bye
69 | ```
70 | 
71 | Once you have the transcript files preprocessed this way, you can run `nmtpy-build-vocab` to create the vocabulary file using the training sentence file:
72 | 
73 | ```
74 | $ nmtpy-build-vocab <preprocessed training set transcript file>
75 | ```
76 | 
77 | ### Configuration Files
78 | 
79 |  - `asr-bilstmp-char.conf:` Character-level ASR baseline that uses character error rate (CER) as early-stopping metric.
80 |  - `asr-bilstmp-s1k.conf:` BPE-level ASR baseline example. Here the early-stopping metric is WER. To correctly compute the WER over non-BPE files, a post-processing filter is activated in the configuration file: `eval_filters: de-bpe`
81 | 
82 | ### Launching Training
83 | See [this](https://github.com/lium-lst/nmtpytorch/wiki/Running-Experiments)
84 | 
85 | ### Decoding Afterwards
86 | Once training is over, you can use `nmtpy translate` command to decode arbitrary dev/test sets using beam search. For example to decode the `eval2000` set defined in the above config, you can run:
87 | 
88 | ```
89 | # batch_size: 32 beam_size:10 output file prefix: eval2000
90 | # last argument is model checkpoint file
91 | CUDA_VISIBLE_DEVICES=0 nmtpy translate -s eval2000 -b 32 -k 10 -o eval2000 <path to model .ckpt file>
92 | ```
93 | 


--------------------------------------------------------------------------------
/examples/v4.0.0/speech/asr-bilstmp-char.conf:
--------------------------------------------------------------------------------
 1 | [train]
 2 | seed: 72000
 3 | model_type: ASR
 4 | patience: 10
 5 | max_epochs: 100
 6 | eval_freq: 0
 7 | eval_metrics: cer,loss
 8 | eval_beam: 5
 9 | eval_batch_size: 16
10 | save_best_metrics: True
11 | eval_max_len: 400
12 | n_checkpoints: 0
13 | l2_reg: 0
14 | gclip: 1
15 | optimizer: adam
16 | lr: 0.0004
17 | lr_decay: plateau
18 | lr_decay_revert: False
19 | lr_decay_factor: 0.5
20 | lr_decay_patience: 2
21 | batch_size: 36
22 | save_path: /path/to/save/the/experiments
23 | tensorboard_dir: ${save_path}/tb
24 | 
25 | [model]
26 | att_type: mlp
27 | att_bottleneck: hid
28 | feat_dim: 43
29 | enc_dim: 256
30 | proj_dim: 256
31 | emb_dim: 49
32 | dec_dim: 256
33 | dropout: 0.4
34 | # 6 encoder layers
35 | enc_layers: '1_1_2_2_1_1'
36 | tied_dec_embs: True
37 | dec_init: mean_ctx
38 | bucket_by: en_speech
39 | # Enough coverage @ 1500
40 | max_len: 1500
41 | 
42 | direction: en_speech:Kaldi -> en_text:Text
43 | 
44 | [data]
45 | root: /tmp/data/swbd
46 | 
47 | train_set: {'en_speech': '${root}/train_nodup',
48 |             'en_text': '${root}/train_nodup/text.char.nmtpy'}
49 | 
50 | val_set: {'en_speech': '${root}/train_dev',
51 |           'en_text': '${root}/train_dev/text.char.nmtpy'}
52 | 
53 | eval2000_set: {'en_speech': '${root}/eval2000_test'}
54 | 
55 | [vocabulary]
56 | en_text: ${data:root}/train_nodup/text.char.vocab.nmtpy
57 | 


--------------------------------------------------------------------------------
/examples/v4.0.0/speech/asr-bilstmp-s1k.conf:
--------------------------------------------------------------------------------
 1 | [train]
 2 | seed: 72000
 3 | model_type: ASR
 4 | patience: 10
 5 | max_epochs: 100
 6 | eval_freq: 0
 7 | eval_metrics: wer,loss
 8 | # this is a bpe model so de-bpe is necessary for correct WER computation
 9 | eval_filters: de-bpe
10 | eval_beam: 5
11 | eval_batch_size: 16
12 | save_best_metrics: True
13 | eval_max_len: 400
14 | n_checkpoints: 0
15 | l2_reg: 0
16 | gclip: 1
17 | optimizer: adam
18 | lr: 0.0004
19 | lr_decay: plateau
20 | lr_decay_revert: False
21 | lr_decay_factor: 0.5
22 | lr_decay_patience: 2
23 | batch_size: 36
24 | save_path: /path/to/save/the/experiments
25 | tensorboard_dir: ${save_path}/tb
26 | 
27 | [model]
28 | att_type: mlp
29 | att_bottleneck: hid
30 | feat_dim: 43
31 | enc_dim: 256
32 | proj_dim: 256
33 | emb_dim: 256
34 | dec_dim: 256
35 | dropout: 0.4
36 | # 6 encoder layers
37 | enc_layers: '1_1_2_2_1_1'
38 | tied_dec_embs: True
39 | dec_init: mean_ctx
40 | bucket_by: en_speech
41 | # Enough coverage @ 1500
42 | max_len: 1500
43 | 
44 | direction: en_speech:Kaldi -> en_text:Text
45 | 
46 | [data]
47 | root: /tmp/data/swbd
48 | 
49 | train_set: {'en_speech': '${root}/train_nodup',
50 |             'en_text': '${root}/train_nodup/text.s1k.nmtpy'}
51 | 
52 | val_set: {'en_speech': '${root}/train_dev',
53 |           'en_text': '${root}/train_dev/text.s1k.nmtpy'}
54 | 
55 | eval2000_set: {'en_speech': '${root}/eval2000_test'}
56 | 
57 | [vocabulary]
58 | en_text: ${data:root}/train_nodup/text.s1k.vocab.nmtpy
59 | 


--------------------------------------------------------------------------------
/examples/v4.0.0/speech/scripts/prepare.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ############################################
 4 | # Example preparation script for switchboard
 5 | ############################################
 6 | # Kaldi utility `feat-to-len` and `copy-feats` should be in your $PATH
 7 | 
 8 | # List split names here. These correspond to Kaldi prepared subfolder
 9 | # names as well.
10 | splits=( train_nodup train_dev eval2000_test )
11 | 
12 | # The root folder containing the split subfolders
13 | input_folder=~/data/swbd
14 | 
15 | # The required hierarchy is:
16 | # ${input_folder}/split_name/
17 | #     - utt2spk
18 | #     - text
19 | #     - feats.scp
20 | #     - cmvn.scp
21 | 
22 | # Where to put the prepared nmtpy-ready files
23 | output_folder=/tmp/data/swbd
24 | 
25 | # Create the folder
26 | mkdir -p $output_folder
27 | 
28 | ####################################################################
29 | # REQUIREMENT CHECK
30 | # Make sure that the files are ordered (in sync) w.r.t utterance IDs
31 | ####################################################################
32 | for split in "${splits[@]}"; do
33 |   # Original .scp with valid paths to .ark files such as the following
34 |   #  sw02054-A_000204-000790 /path/to/ark/file:offset
35 |   scp=${input_folder}/${split}/feats.scp
36 | 
37 |   # Transcription per line prefixed with utterance IDs as well
38 |   # sw02054-A_000204-000790 so let me tell you a little bit ...
39 |   txt=${input_folder}/${split}/text
40 | 
41 |   # NOTE: Make sure that the files are ordered (in sync) w.r.t utterance IDs
42 |   # Compare utterance IDs to make sure that they're ordered/aligned
43 |   cmp -s <(cut -d' ' -f1 < $scp) <(cut -d' ' -f1 < $txt) || \
44 |     { echo "Error: [$split] feats.scp and text are not aligned"; exit 1; }
45 | done
46 | 
47 | ###############################
48 | # Generate `segments.len` files
49 | ###############################
50 | for split in "${splits[@]}"; do
51 |   mkdir -p $output_folder/${split}
52 |   scp=${input_folder}/${split}/feats.scp
53 |   utt2spk="${input_folder}/${split}/utt2spk"
54 |   cmvn="${input_folder}/${split}/cmvn.scp"
55 |   scp="${input_folder}/${split}/feats.scp"
56 |   seg=${output_folder}/${split}/segments.len
57 | 
58 |   if [[ ! -f $seg ]]; then
59 |     # Extract frame counts
60 |     echo "Extracting frame counts for $split"
61 |     feat-to-len scp:$scp ark,t:- | cut -d' ' -f2 > ${output_folder}/${split}/segments.len
62 |   fi
63 | 
64 |   if [[ ! -f "${output_folder}/${split}/feats_local.ark" ]]; then
65 |     feats_cmvn="ark,s,cs:apply-cmvn --norm-vars=true --utt2spk=ark:$utt2spk scp:$cmvn scp:$scp ark:- |"
66 |     copy-feats "$feats_cmvn" ark,scp:`realpath $output_folder/${split}/feats_local.ark`,$output_folder/${split}/feats_local.scp &
67 |   fi
68 | done
69 | 
70 | # Wait for completion
71 | wait
72 | 


--------------------------------------------------------------------------------
/examples/v4.0.0/speech/scripts/word2char:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import argparse
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser(prog='word2char')
 7 | 
 8 |     parser.add_argument('-i', '--has-ids', action='store_true',
 9 |                         help='Enable if input file has segment IDs in first column.')
10 |     parser.add_argument('-s', '--space', default='<s>',
11 |                         help='Placeholder token for explicit space characters.')
12 | 
13 |     # Parse arguments
14 |     args = parser.parse_args()
15 | 
16 |     for line in sys.stdin:
17 |         text = line.strip()
18 |         if args.has_ids:
19 |             sid, text = text.split(' ', 1)
20 | 
21 |         text = ' '.join(list(text)).replace('   ', ' {} '.format(args.space))
22 |         print(text.replace('[ n o i s e ]', '[noise]').replace(
23 |             '[ v o c a l i z e d - n o i s e ]', '[vocalized-noise]').replace(
24 |                 '[ l a u g h t e r ]', '[laughter]'))
25 | 


--------------------------------------------------------------------------------
/nmtpytorch/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '4.0.0'
2 | 


--------------------------------------------------------------------------------
/nmtpytorch/cleanup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import sys
 4 | import signal
 5 | import atexit
 6 | import pathlib
 7 | import traceback
 8 | 
 9 | 
10 | class Cleanup:
11 |     def __init__(self):
12 |         self.temp_files = set()
13 |         self.processes = set()
14 | 
15 |     def register_tmp_file(self, tmp_file):
16 |         """Add new temp file to global set."""
17 |         self.temp_files.add(pathlib.Path(tmp_file))
18 | 
19 |     def register_proc(self, pid):
20 |         """Add new process to global set."""
21 |         self.processes.add(pid)
22 | 
23 |     def unregister_proc(self, pid):
24 |         """Remove given PID from global set."""
25 |         self.processes.remove(pid)
26 | 
27 |     def __call__(self):
28 |         """Cleanup registered temp files and kill PIDs."""
29 |         for tmp_file in filter(lambda x: x.exists(), self.temp_files):
30 |             tmp_file.unlink()
31 | 
32 |         for proc in self.processes:
33 |             try:
34 |                 os.kill(proc, signal.SIGTERM)
35 |             except ProcessLookupError:
36 |                 pass
37 | 
38 |     def __repr__(self):
39 |         repr_ = "Cleanup Manager\n"
40 |         if len(self.processes) > 0:
41 |             repr_ += "Tracking Processes\n"
42 |             for proc in self.processes:
43 |                 repr_ += " {}\n".format(proc)
44 | 
45 |         if len(self.temp_files) > 0:
46 |             repr_ += "Tracking Temporary Files\n"
47 |             for tmp_file in self.temp_files:
48 |                 repr_ += " {}\n".format(tmp_file)
49 | 
50 |         return repr_
51 | 
52 |     @staticmethod
53 |     def register_exception_handler(logger, quit_on_exception=False):
54 |         """Setup exception handler."""
55 | 
56 |         def exception_handler(exctype, val, trace):
57 |             """Let Python call this when an exception is uncaught."""
58 |             logger.info(
59 |                 ''.join(traceback.format_exception(exctype, val, trace)))
60 | 
61 |         def exception_handler_quits(exctype, val, trace):
62 |             """Let Python call this when an exception is uncaught."""
63 |             logger.info(
64 |                 ''.join(traceback.format_exception(exctype, val, trace)))
65 |             sys.exit(1)
66 | 
67 |         if quit_on_exception:
68 |             sys.excepthook = exception_handler_quits
69 |         else:
70 |             sys.excepthook = exception_handler
71 | 
72 |     @staticmethod
73 |     def register_handler(logger, _atexit=True, _signals=True,
74 |                          exception_quits=False):
75 |         """Register atexit and signal handlers."""
76 |         if _atexit:
77 |             # Register exit handler
78 |             atexit.register(cleanup)
79 | 
80 |         if _signals:
81 |             # Register SIGINT and SIGTERM
82 |             signal.signal(signal.SIGINT, signal_handler)
83 |             signal.signal(signal.SIGTERM, signal_handler)
84 | 
85 |         Cleanup.register_exception_handler(logger, exception_quits)
86 | 
87 | 
88 | # Create a global cleaner
89 | cleanup = Cleanup()
90 | 
91 | 
92 | def signal_handler(signum, frame):
93 |     """Let Python call this when SIGINT or SIGTERM caught."""
94 |     cleanup()
95 |     sys.exit(0)
96 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/README.md:
--------------------------------------------------------------------------------
1 | pycocoevalcap
2 | ---
3 | 
4 | This is a copy from
5 |  https://github.com/tylin/coco-caption/tree/master/pycocoevalcap
6 | 
7 | with Python 2 support dropped.
8 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | from .bleu.bleu import Bleu
3 | from .cider.cider import Cider
4 | from .rouge.rouge import Rouge
5 | from .meteor.meteor import Meteor
6 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/bleu/LICENSE.bleu:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Xinlei Chen, Hao Fang, Tsung-Yi Lin, and Ramakrishna Vedantam
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/bleu/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/bleu/bleu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # File Name : bleu.py
 3 | #
 4 | # Description : Wrapper for BLEU scorer.
 5 | #
 6 | # Creation Date : 06-01-2015
 7 | # Last Modified : Thu 19 Mar 2015 09:13:28 PM PDT
 8 | # Authors : Hao Fang <hfang@uw.edu> and Tsung-Yi Lin <tl483@cornell.edu>
 9 | 
10 | from .bleu_scorer import BleuScorer
11 | 
12 | 
13 | class Bleu:
14 |     def __init__(self, n=4):
15 |         # default compute Blue score up to 4
16 |         self._n = n
17 |         self._hypo_for_image = {}
18 |         self.ref_for_image = {}
19 | 
20 |     def compute_score(self, gts, res):
21 | 
22 |         bleu_scorer = BleuScorer(n=self._n)
23 |         for id in sorted(gts.keys()):
24 |             hypo = res[id]
25 |             ref = gts[id]
26 | 
27 |             # Sanity check.
28 |             assert isinstance(hypo, list)
29 |             assert isinstance(ref, list)
30 |             assert len(hypo) == 1
31 |             assert len(ref) >= 1
32 | 
33 |             bleu_scorer += (hypo[0], ref)
34 | 
35 |         # score, scores = bleu_scorer.compute_score(option='shortest')
36 |         # score, scores = bleu_scorer.compute_score(option='average',verbose=1)
37 |         score, scores = bleu_scorer.compute_score(option='closest', verbose=0)
38 | 
39 |         # return (bleu, bleu_info)
40 |         return score, scores
41 | 
42 |     def method(self):
43 |         return "Bleu"
44 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/cider/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/cider/cider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Filename: cider.py
 3 | #
 4 | # Description: Describes the class to compute the CIDEr
 5 | #              (Consensus-Based Image Description Evaluation) Metric
 6 | # by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
 7 | #
 8 | # Creation Date: Sun Feb  8 14:16:54 2015
 9 | #
10 | # Authors: Ramakrishna Vedantam <vrama91@vt.edu> and
11 | # Tsung-Yi Lin <tl483@cornell.edu>
12 | 
13 | from .cider_scorer import CiderScorer
14 | 
15 | 
16 | class Cider:
17 |     """Main Class to compute the CIDEr metric."""
18 | 
19 |     def __init__(self, test=None, refs=None, n=4, sigma=6.0):
20 |         # set cider to sum over 1 to 4-grams
21 |         self._n = n
22 |         # set the standard deviation parameter for gaussian penalty
23 |         self._sigma = sigma
24 | 
25 |     def compute_score(self, gts, res):
26 |         """Main function to compute CIDEr score
27 | 
28 |         Arguments:
29 |             hypo_for_image (dict): dictionary with key <image> and
30 |                 value <tokenized hypothesis / candidate sentence>
31 |             ref_for_image (dict): dictionary with key <image> and value
32 |                 <tokenized reference sentence>
33 | 
34 |         Returns:
35 |             cider (float): computed CIDEr score for the corpus
36 |         """
37 | 
38 |         cider_scorer = CiderScorer(n=self._n, sigma=self._sigma)
39 | 
40 |         for id in sorted(gts.keys()):
41 |             hypo = res[id]
42 |             ref = gts[id]
43 | 
44 |             # Sanity check.
45 |             assert isinstance(hypo, list)
46 |             assert isinstance(ref, list)
47 |             assert len(hypo) == 1
48 |             assert len(ref) > 0
49 | 
50 |             cider_scorer += (hypo[0], ref)
51 | 
52 |         (score, scores) = cider_scorer.compute_score()
53 | 
54 |         return score, scores
55 | 
56 |     def method(self):
57 |         return "CIDEr"
58 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/meteor/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/meteor/meteor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Python wrapper for METEOR implementation, by Xinlei Chen
 3 | # Acknowledge Michael Denkowski for the generous discussion and help
 4 | 
 5 | import os
 6 | import shutil
 7 | import threading
 8 | import subprocess
 9 | 
10 | from ...utils.misc import get_meteor_jar
11 | 
12 | 
13 | class Meteor:
14 |     def __init__(self, language, norm=False):
15 |         self.jar = str(get_meteor_jar())
16 |         self.meteor_cmd = ['java', '-jar', '-Xmx2G', self.jar,
17 |                            '-', '-', '-stdio', '-l', language]
18 |         self.env = os.environ
19 |         self.env['LC_ALL'] = 'en_US.UTF_8'
20 | 
21 |         # Sanity check
22 |         if shutil.which('java') is None:
23 |             raise RuntimeError('METEOR requires java which is not installed.')
24 | 
25 |         if norm:
26 |             self.meteor_cmd.append('-norm')
27 | 
28 |         self.meteor_p = subprocess.Popen(self.meteor_cmd,
29 |                                          stdin=subprocess.PIPE,
30 |                                          stdout=subprocess.PIPE,
31 |                                          stderr=subprocess.PIPE,
32 |                                          env=self.env,
33 |                                          universal_newlines=True, bufsize=1)
34 |         # Used to guarantee thread safety
35 |         self.lock = threading.Lock()
36 | 
37 |     def method(self):
38 |         return "METEOR"
39 | 
40 |     def compute_score(self, gts, res):
41 |         imgIds = sorted(list(gts.keys()))
42 |         scores = []
43 | 
44 |         eval_line = 'EVAL'
45 |         self.lock.acquire()
46 |         for i in imgIds:
47 |             assert len(res[i]) == 1
48 | 
49 |             hypothesis_str = res[i][0].replace('|||', '').replace('  ', ' ')
50 |             score_line = ' ||| '.join(
51 |                 ('SCORE', ' ||| '.join(gts[i]), hypothesis_str))
52 | 
53 |             # We obtained --> SCORE ||| reference 1 words |||
54 |             # reference n words ||| hypothesis words
55 |             self.meteor_p.stdin.write(score_line + '\n')
56 |             stat = self.meteor_p.stdout.readline().strip()
57 |             eval_line += ' ||| {}'.format(stat)
58 | 
59 |         # Send to METEOR
60 |         self.meteor_p.stdin.write(eval_line + '\n')
61 | 
62 |         # Collect segment scores
63 |         for i in range(len(imgIds)):
64 |             score = float(self.meteor_p.stdout.readline().strip())
65 |             scores.append(score)
66 | 
67 |         # Final score
68 |         final_score = 100 * float(self.meteor_p.stdout.readline().strip())
69 |         self.lock.release()
70 | 
71 |         return final_score, scores
72 | 
73 |     def __del__(self):
74 |         self.lock.acquire()
75 |         self.meteor_p.stdin.close()
76 |         self.meteor_p.wait()
77 |         self.lock.release()
78 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/rouge/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'vrama91'
2 | 


--------------------------------------------------------------------------------
/nmtpytorch/cocoeval/rouge/rouge.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # File Name : rouge.py
  3 | #
  4 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004)
  5 | #
  6 | # Creation Date : 2015-01-07 06:03
  7 | # Author : Ramakrishna Vedantam <vrama91@vt.edu>
  8 | 
  9 | import numpy as np
 10 | 
 11 | 
 12 | def my_lcs(string, sub):
 13 |     """
 14 |     Calculates longest common subsequence for a pair of tokenized strings
 15 |     :param string : list of str : tokens from a string split using whitespace
 16 |     :param sub : list of str : shorter string, also split using whitespace
 17 |     :returns: length (list of int): length of the longest common subsequence
 18 |         between the two strings
 19 | 
 20 |     my_lcs only gives length of the longest common subsequence,
 21 |     not the actual LCS
 22 |     """
 23 |     if len(string) < len(sub):
 24 |         sub, string = string, sub
 25 | 
 26 |     lengths = [[0 for i in range(0, len(sub) + 1)] for j
 27 |                in range(0, len(string) + 1)]
 28 | 
 29 |     for j in range(1, len(sub) + 1):
 30 |         for i in range(1, len(string) + 1):
 31 |             if string[i - 1] == sub[j - 1]:
 32 |                 lengths[i][j] = lengths[i - 1][j - 1] + 1
 33 |             else:
 34 |                 lengths[i][j] = max(lengths[i - 1][j], lengths[i][j - 1])
 35 | 
 36 |     return lengths[len(string)][len(sub)]
 37 | 
 38 | 
 39 | class Rouge:
 40 |     """Class for computing ROUGE-L score for a set of candidate sentences
 41 |     for the MS COCO test set."""
 42 |     def __init__(self):
 43 |         # vrama91: updated the value below based on discussion with Hovey
 44 |         self.beta = 1.2
 45 | 
 46 |     def calc_score(self, candidate, refs):
 47 |         """
 48 |         Compute ROUGE-L score given one candidate and references for an image
 49 |         :param candidate: str : candidate sentence to be evaluated
 50 |         :param refs: list of str : COCO reference sentences for the particular
 51 |             image to be evaluated
 52 |         :returns score: int (ROUGE-L score for the candidate evaluated
 53 |             against references)
 54 |         """
 55 |         assert len(candidate) == 1
 56 |         assert len(refs) > 0
 57 |         prec = []
 58 |         rec = []
 59 | 
 60 |         # split into tokens
 61 |         token_c = candidate[0].split(" ")
 62 | 
 63 |         for reference in refs:
 64 |             # split into tokens
 65 |             token_r = reference.split(" ")
 66 |             # compute the longest common subsequence
 67 |             lcs = my_lcs(token_r, token_c)
 68 |             prec.append(lcs / float(len(token_c)))
 69 |             rec.append(lcs / float(len(token_r)))
 70 | 
 71 |         prec_max = max(prec)
 72 |         rec_max = max(rec)
 73 | 
 74 |         if prec_max != 0 and rec_max != 0:
 75 |             score = ((1 + self.beta**2) * prec_max * rec_max)
 76 |             score /= float(rec_max + self.beta ** 2 * prec_max)
 77 |         else:
 78 |             score = 0.0
 79 |         return score
 80 | 
 81 |     def compute_score(self, gts, res):
 82 |         """
 83 |         Computes Rouge-L score given a set of reference and candidate
 84 |         sentences for the dataset
 85 | 
 86 |         :param hypo_for_image: dict : candidate / test sentences with
 87 |             "image name" key and "tokenized sentences" as values
 88 |         :param ref_for_image: dict : reference MS-COCO sentences with
 89 |             "image name" key and "tokenized sentences" as values
 90 |         :returns: average_score: float (mean ROUGE-L score computed by
 91 |             averaging scores for all the images)
 92 |         """
 93 |         score = []
 94 |         for id in sorted(gts.keys()):
 95 |             hypo = res[id]
 96 |             ref = gts[id]
 97 | 
 98 |             score.append(self.calc_score(hypo, ref))
 99 | 
100 |             # Sanity check.
101 |             assert isinstance(hypo, list)
102 |             assert isinstance(ref, list)
103 |             assert len(hypo) == 1
104 |             assert len(ref) > 0
105 | 
106 |         average_score = np.mean(np.array(score))
107 |         return average_score, np.array(score)
108 | 
109 |     def method(self):
110 |         return "Rouge"
111 | 


--------------------------------------------------------------------------------
/nmtpytorch/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # First the basic types
 2 | from .npy import NumpyDataset
 3 | from .kaldi import KaldiDataset
 4 | from .imagefolder import ImageFolderDataset
 5 | from .text import TextDataset
 6 | from .numpy_sequence import NumpySequenceDataset
 7 | from .label import LabelDataset
 8 | from .shelve import ShelveDataset
 9 | 
10 | # Second the selector function
11 | def get_dataset(type_):
12 |     return {
13 |         'numpy': NumpyDataset,
14 |         'numpysequence': NumpySequenceDataset,
15 |         'kaldi': KaldiDataset,
16 |         'imagefolder': ImageFolderDataset,
17 |         'text': TextDataset,
18 |         'label': LabelDataset,
19 |         'shelve': ShelveDataset,
20 |     }[type_.lower()]
21 | 
22 | 
23 | # Should always be at the end
24 | from .multimodal import MultimodalDataset
25 | 


--------------------------------------------------------------------------------
/nmtpytorch/datasets/collate.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # This will eventually disappear as this only provides .size
 4 | # which can be inferred if we guarantee that batch_dim is always at
 5 | # a given position regardless of input/output feature/tensor types.
 6 | 
 7 | 
 8 | class Batch(dict):
 9 |     """A custom dictionary representing a batch."""
10 |     def __init__(self, *args, **kwargs):
11 |         super().__init__(*args, **kwargs)
12 |         dim1s = set([x.size(1) for x in self.values()])
13 |         assert len(dim1s) == 1, \
14 |             "Incompatible batch dimension (1) between modalities."
15 |         self.size = dim1s.pop()
16 | 
17 |     def device(self, device):
18 |         self.update({k: v.to(device) for k, v in self.items()})
19 | 
20 |     def __repr__(self):
21 |         s = "Batch(size={})\n".format(self.size)
22 |         for data_source, tensor in self.items():
23 |             s += "  {:10s} -> {} - {}\n".format(
24 |                 str(data_source), tensor.shape, tensor.device)
25 |         return s
26 | 
27 | 
28 | def get_collate(data_sources):
29 |     """Returns a special collate_fn which will view the underlying data
30 |     in terms of the given DataSource keys."""
31 | 
32 |     def collate_fn(batch):
33 |         return Batch(
34 |             {ds: ds.torchify([elem[ds] for elem in batch]) for ds in data_sources},
35 |         )
36 | 
37 |     return collate_fn
38 | 


--------------------------------------------------------------------------------
/nmtpytorch/datasets/imagefolder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from functools import lru_cache
 3 | from pathlib import Path
 4 | 
 5 | from PIL import Image
 6 | 
 7 | import torch
 8 | from torch.utils import data
 9 | from torchvision import transforms
10 | 
11 | 
12 | class ImageFolderDataset(data.Dataset):
13 |     """A variant of torchvision.datasets.ImageFolder which drops support for
14 |     target loading, i.e. this only loads images not attached to any other
15 |     label.
16 | 
17 |     This class also makes use of ``lru_cache`` to cache an image file once
18 |     opened to avoid repetitive disk access.
19 | 
20 |     Arguments:
21 |         root (str): The root folder that contains the images and index.txt
22 |         resize (int, optional): An optional integer to be given to
23 |             ``torchvision.transforms.Resize``. Default: ``None``.
24 |         crop (int, optional): An optional integer to be given to
25 |             ``torchvision.transforms.CenterCrop``. Default: ``None``.
26 |         replicate(int, optional): Replicate the image names ``replicate``
27 |             times in order to process the same image ``replicate`` times
28 |             if ``replicate`` sentences are available during training time.
29 |         warmup(bool, optional): If ``True``, the images will be read once
30 |             at the beginning to fill the cache.
31 |     """
32 |     def __init__(self, root, resize=None, crop=None,
33 |                  replicate=1, warmup=False, **kwargs):
34 |         self.root = Path(root).expanduser().resolve()
35 |         self.replicate = replicate
36 | 
37 |         # Image list in dataset order
38 |         self.index = self.root / 'index.txt'
39 | 
40 |         _transforms = []
41 |         if resize is not None:
42 |             _transforms.append(transforms.Resize(resize))
43 |         if crop is not None:
44 |             _transforms.append(transforms.CenterCrop(crop))
45 |         _transforms.append(transforms.ToTensor())
46 |         _transforms.append(
47 |             transforms.Normalize(mean=[0.485, 0.456, 0.406],
48 |                                  std=[0.229, 0.224, 0.225]))
49 |         self.transform = transforms.Compose(_transforms)
50 | 
51 |         if not self.index.exists():
52 |             raise(RuntimeError(
53 |                 "index.txt does not exist in {}".format(self.root)))
54 | 
55 |         self.image_files = []
56 |         with self.index.open() as f:
57 |             for fname in f:
58 |                 fname = self.root / fname.strip()
59 |                 assert fname.exists(), "{} does not exist.".format(fname)
60 |                 self.image_files.append(str(fname))
61 | 
62 |         # Setup reader
63 |         self.read_image = lru_cache(maxsize=self.__len__())(self._read_image)
64 | 
65 |         if warmup:
66 |             for idx in range(self.__len__()):
67 |                 self[idx]
68 | 
69 |         # Replicate the list if requested
70 |         self.image_files = self.image_files * self.replicate
71 | 
72 |     def _read_image(self, fname):
73 |         with open(fname, 'rb') as f:
74 |             img = Image.open(f).convert('RGB')
75 |             return self.transform(img)
76 | 
77 |     @staticmethod
78 |     def to_torch(batch, **kwargs):
79 |         return torch.stack(batch)
80 | 
81 |     def __getitem__(self, idx):
82 |         return self.read_image(self.image_files[idx])
83 | 
84 |     def __len__(self):
85 |         return len(self.image_files)
86 | 
87 |     def __repr__(self):
88 |         s = "{}(replicate={}) ({} samples)\n".format(
89 |             self.__class__.__name__, self.replicate, self.__len__())
90 |         s += " {}\n".format(self.root)
91 |         if self.transform:
92 |             s += ' Transforms: {}\n'.format(
93 |                 self.transform.__repr__().replace('\n', '\n' + ' '))
94 |         return s
95 | 


--------------------------------------------------------------------------------
/nmtpytorch/datasets/kaldi.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from pathlib import Path
 3 | from tqdm import tqdm
 4 | 
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | from torch.nn.utils.rnn import pad_sequence
 8 | 
 9 | from ..utils.kaldi import readMatrixShape, readMatrixByOffset
10 | 
11 | # TODO
12 | # ----
13 | # an lru_cache() decorated version of readMatrixByOffset() will make sure that
14 | # all the training data is cached into memory after 1 epoch.
15 | 
16 | 
17 | class KaldiDataset(Dataset):
18 |     """A PyTorch dataset for Kaldi .scp/ark.
19 | 
20 |     Arguments:
21 |         fname (str or Path): A string or ``pathlib.Path`` object for
22 |             a folder that contains ``feats_local.scp`` and optionally a ``segments.len``
23 |             file containing segment lengths.
24 |     """
25 | 
26 |     def __init__(self, fname, **kwargs):
27 |         self.data = []
28 |         self.lengths = []
29 |         self.root = Path(fname)
30 |         self.scp_path = self.root / 'feats_local.scp'
31 |         self.len_path = self.root / 'segments.len'
32 | 
33 |         if not self.scp_path.exists():
34 |             raise RuntimeError('{} does not exist.'.format(self.scp_path))
35 | 
36 |         if self.len_path.exists():
37 |             read_lengths = False
38 |             # Read lengths file
39 |             with open(self.len_path) as f:
40 |                 for line in f:
41 |                     self.lengths.append(int(line.strip()))
42 |         else:
43 |             # Read them below (this is slow)
44 |             read_lengths = True
45 | 
46 |         with open(self.scp_path) as scp_input_file:
47 |             for line in tqdm(scp_input_file, unit='segments'):
48 |                 uttid, pointer = line.strip().split()
49 |                 arkfile, offset = pointer.rsplit(':', 1)
50 |                 offset = int(offset)
51 |                 self.data.append((arkfile, offset))
52 |                 if read_lengths:
53 |                     with open(arkfile, "rb") as g:
54 |                         g.seek(offset)
55 |                         feat_len = readMatrixShape(g)[0]
56 | 
57 |                     self.lengths.append(feat_len)
58 | 
59 |         # Set dataset size
60 |         self.size = len(self.data)
61 | 
62 |         if self.size != len(self.lengths):
63 |             raise RuntimeError("Dataset size and lengths size does not match.")
64 | 
65 |     @staticmethod
66 |     def to_torch(batch, **kwargs):
67 |         return pad_sequence(
68 |             [torch.FloatTensor(x) for x in batch], batch_first=False)
69 | 
70 |     def __getitem__(self, idx):
71 |         """Read segment features from the actual .ark file."""
72 |         return readMatrixByOffset(*self.data[idx])
73 | 
74 |     def __len__(self):
75 |         return self.size
76 | 
77 |     def __repr__(self):
78 |         s = "{} '{}' ({} samples)\n".format(
79 |             self.__class__.__name__, self.scp_path.name, self.__len__())
80 |         return s
81 | 


--------------------------------------------------------------------------------
/nmtpytorch/datasets/label.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from pathlib import Path
 3 | 
 4 | import torch
 5 | from torch.utils.data import Dataset
 6 | 
 7 | from ..utils.data import read_sentences
 8 | 
 9 | 
10 | class LabelDataset(Dataset):
11 |     r"""A PyTorch dataset that returns a single integer representing a category.
12 | 
13 |     Arguments:
14 |         fname (str or Path): A string or ``pathlib.Path`` object giving
15 |             space delimited attributes per sentence.
16 |         vocab (Vocabulary): A ``Vocabulary`` instance for the labels.
17 |     """
18 | 
19 |     def __init__(self, fname, vocab, **kwargs):
20 |         self.path = Path(fname)
21 |         self.vocab = vocab
22 | 
23 |         # Detect glob patterns
24 |         self.fnames = sorted(self.path.parent.glob(self.path.name))
25 | 
26 |         if len(self.fnames) == 0:
27 |             raise RuntimeError('{} does not exist.'.format(self.path))
28 |         elif len(self.fnames) > 1:
29 |             raise RuntimeError("Multiple source files not supported.")
30 | 
31 |         # Read the label strings and map them to vocabulary
32 |         self.data, _ = read_sentences(
33 |             self.fnames[0], self.vocab, eos=False, bos=False)
34 | 
35 |         # number of possible classes is the vocab size
36 |         self.n_classes = len(self.vocab)
37 | 
38 |         # Dataset size
39 |         self.size = len(self.data)
40 | 
41 |     @staticmethod
42 |     def to_torch(batch, **kwargs):
43 |         return torch.LongTensor(batch).t()
44 | 
45 |     def __getitem__(self, idx):
46 |         return self.data[idx]
47 | 
48 |     def __len__(self):
49 |         return self.size
50 | 
51 |     def __repr__(self):
52 |         s = "{} '{}' ({} samples)\n".format(
53 |             self.__class__.__name__, self.fnames[0].name, self.__len__())
54 |         return s
55 | 


--------------------------------------------------------------------------------
/nmtpytorch/datasets/npy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from pathlib import Path
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | 
 8 | 
 9 | class NumpyDataset(Dataset):
10 |     r"""A PyTorch dataset for Numpy .npy/npz serialized tensor files. The
11 |     serialized tensor's first dimension should be the batch dimension.
12 | 
13 |     Arguments:
14 |         fname (str or Path): A string or ``pathlib.Path`` object for
15 |             the relevant numpy file.
16 |         key (str, optional): If `fname` is `.npz` file, its relevant `key`
17 |             will be fetched from the serialized object.
18 |         order_file (str, None): If given, will be used to map sample indices
19 |             to tensors using this list. Useful for tiled or repeated
20 |             experiments.
21 |         revert (bool, optional): If `True`, the data order will be reverted
22 |             for adversarial/incongruent experiments during test-time.
23 |     """
24 | 
25 |     def __init__(self, fname, key=None, order_file=None, revert=False, **kwargs):
26 |         self.path = Path(fname)
27 |         if not self.path.exists():
28 |             raise RuntimeError('{} does not exist.'.format(self.path))
29 | 
30 |         if self.path.suffix == '.npy':
31 |             self.data = np.load(self.path)
32 |         elif self.path.suffix == '.npz':
33 |             assert key, "A key should be provided for .npz files."
34 |             self.data = np.load(self.path)[key]
35 | 
36 |         if order_file:
37 |             with open(order_file) as orf:
38 |                 self.order = [int(x) for x in orf.read().strip().split('\n')]
39 |         else:
40 |             self.order = list(range(self.data.shape[0]))
41 | 
42 |         if revert:
43 |             self.order = self.order[::-1]
44 | 
45 |         # Dataset size
46 |         self.size = len(self.order)
47 | 
48 |     @staticmethod
49 |     def to_torch(batch, **kwargs):
50 |         # NOTE: Assumes x.shape == (n, *)
51 |         x = torch.from_numpy(np.array(batch, dtype='float32'))
52 |         # Convert it to (t(=1 if fixed features), n, c)
53 |         # By default we flatten h*w to first dim for interoperability
54 |         # Models should further reshape the tensor for their needs
55 |         return x.view(*x.size()[:2], -1).permute(2, 0, 1)
56 | 
57 |     def __getitem__(self, idx):
58 |         return self.data[self.order[idx]]
59 | 
60 |     def __len__(self):
61 |         return self.size
62 | 
63 |     def __repr__(self):
64 |         s = "{} '{}' ({} samples)\n".format(
65 |             self.__class__.__name__, self.path.name, self.__len__())
66 |         return s
67 | 


--------------------------------------------------------------------------------
/nmtpytorch/datasets/numpy_sequence.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from functools import lru_cache
 3 | import numpy as np
 4 | import torch
 5 | from torch.utils.data import Dataset
 6 | from ..utils.misc import pbar
 7 | 
 8 | 
 9 | class NumpySequenceDataset(Dataset):
10 |     """Read a sequence of numpy arrays.
11 | 
12 |     Arguments:
13 |         fname (str or Path): Path to a list of paths to Numpy `.npy` files
14 |             where each file contains an array with shape `(n_features, feat_dim)`.
15 |             If the lines are in `<path>:<len>` format, additional length
16 |             information will be used for bucketing. If the file itself is
17 |             a `.npy` file, it will be treated as an array of numpy objects.
18 |             For cases where all features are the same length, you should use
19 |             `NumpyDataset`.
20 |         cache (bool, optional): Whether the accessed files will be cached
21 |             in memory or not.
22 |     """
23 | 
24 |     def __init__(self, fname, cache=False, **kwargs):
25 |         self.fname = fname
26 |         self.data = []
27 |         self.lengths = []
28 |         self.has_lengths = False
29 |         self.cache = cache
30 | 
31 |         if not self.fname:
32 |             raise RuntimeError('{} does not exist.'.format(self.fname))
33 | 
34 |         if str(self.fname).endswith('.npy'):
35 |             # Loads the whole dataset at once
36 |             self.data = np.load(self.fname)
37 |             self.lengths = [x.shape[0] for x in self.data]
38 |             self.has_lengths = True
39 |             self._read = lambda x: x
40 |         else:
41 |             with open(self.fname) as f_list:
42 |                 # Detect file format and seek back
43 |                 self.has_lengths = ':' in f_list.readline()
44 |                 f_list.seek(0)
45 |                 for line in pbar(f_list, unit='sents'):
46 |                     if self.has_lengths:
47 |                         path, length = line.strip().split(':')
48 |                         self.lengths.append(int(length))
49 |                     else:
50 |                         path = line.strip()
51 |                     self.data.append(path)
52 | 
53 |             if self.cache:
54 |                 self._read = lru_cache(maxsize=len(self.data))(self._read_tensor)
55 |             else:
56 |                 self._read = self._read_tensor
57 | 
58 |         # Set dataset size
59 |         self.size = len(self.data)
60 | 
61 |     def _read_tensor(self, fname):
62 |         """Reads the .npy file."""
63 |         return np.load(fname)
64 | 
65 |     def __getitem__(self, idx):
66 |         # Each item is (t, feat_dim)
67 |         return self._read(self.data[idx])
68 | 
69 |     @staticmethod
70 |     def to_torch(batch, **kwargs):
71 |         # List of (t, feat_dim)
72 |         max_len = max(x.shape[0] for x in batch)
73 |         width = batch[0].shape[1]
74 |         padded = [np.zeros((max_len, width)) for _ in batch]
75 |         for pad, x in zip(padded, batch):
76 |             pad[:x.shape[0]] = x
77 |         # padded is (n_samples, t, feat_dim)
78 |         # return (n, f, t) for compatibility with the other input sources
79 |         return torch.from_numpy(
80 |             np.array(padded, dtype='float32')).transpose(1, 2)
81 | 
82 |     def __len__(self):
83 |         return self.size
84 | 
85 |     def __repr__(self):
86 |         s = "{} (has_lengths={}) ({} samples)\n".format(
87 |             self.__class__.__name__, self.has_lengths, self.__len__())
88 |         s += " {}\n".format(self.fname)
89 |         return s
90 | 


--------------------------------------------------------------------------------
/nmtpytorch/datasets/shelve.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import shelve
 3 | from pathlib import Path
 4 | 
 5 | from sklearn import preprocessing
 6 | import numpy as np
 7 | from torch.utils.data import Dataset
 8 | 
 9 | from ..utils.data import pad_video_sequence
10 | 
11 | 
12 | class ShelveDataset(Dataset):
13 |     r"""A PyTorch dataset for Shelve serialized tensor files. The
14 |     serialized tensor's first dimension should be the batch dimension.
15 | 
16 |     Arguments:
17 |         fname (str or Path): A string or ``pathlib.Path`` object for
18 |             the relevant .shelve file.
19 |         norm_and_scale: True or False: Should we normalise and scale
20 |             the image features?
21 |     """
22 | 
23 |     def __init__(self, fname, key=None, norm_and_scale=False, **kwargs):
24 |         self.path = Path('{}.dat'.format(fname))
25 |         if not self.path.exists():
26 |             raise RuntimeError('{} does not exist.'.format(self.path))
27 | 
28 |         self.data = shelve.open(str(fname.resolve()))
29 |         self.norm_and_scale = norm_and_scale
30 | 
31 |         # Dataset size
32 |         self.size = len(self.data)
33 | 
34 |         # Stores the lengths of the input video sequences to enable bucketing
35 |         self.lengths = self.read_sequence_lengths()
36 | 
37 |     def read_sequence_lengths(self):
38 |         '''Returns an array with the number of video feature vectors
39 |         stored for each image. TODO: This is expensive and a slow
40 |         way to start the process.'''
41 |         lengths = []
42 |         for x in self.data:
43 |             lengths.append(len(self.data[str(x)]))
44 |         return lengths
45 | 
46 |     @staticmethod
47 |     def to_torch(batch, **kwargs):
48 |         ''' Pad the video sequence, if necessary.
49 |         Transposes the video sequence to conform to the RNN expected inputs:
50 |             n_samples x timesteps x feats -> timesteps x n_samples x feats
51 |         '''
52 |         batch = pad_video_sequence(batch)
53 |         batch = batch.transpose(0, 1)
54 |         return batch
55 | 
56 |     def __getitem__(self, idx):
57 |         if self.norm_and_scale:
58 |             feats = self.data[str(idx)]
59 |             feats = preprocessing.normalize(feats)
60 |             return feats
61 |         else:
62 |             return np.array(self.data[str(idx)])
63 | 
64 |     def __len__(self):
65 |         return self.size
66 | 
67 |     def __repr__(self):
68 |         s = "{} '{}' ({} samples)\n".format(
69 |             self.__class__.__name__, self.path.name, self.__len__())
70 |         return s
71 | 


--------------------------------------------------------------------------------
/nmtpytorch/datasets/text.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from pathlib import Path
 4 | 
 5 | import torch
 6 | 
 7 | from torch.utils.data import Dataset
 8 | from torch.nn.utils.rnn import pad_sequence
 9 | 
10 | from ..utils.data import read_sentences
11 | 
12 | logger = logging.getLogger('nmtpytorch')
13 | 
14 | 
15 | class TextDataset(Dataset):
16 |     r"""A PyTorch dataset for sentences.
17 | 
18 |     Arguments:
19 |         fname (str or Path): A string or ``pathlib.Path`` object giving
20 |             the corpus.
21 |         vocab (Vocabulary): A ``Vocabulary`` instance for the given corpus.
22 |         bos (bool, optional): If ``True``, a special beginning-of-sentence
23 |             "<bos>" marker will be prepended to sentences.
24 |     """
25 | 
26 |     def __init__(self, fname, vocab, bos=False, eos=True, **kwargs):
27 |         self.path = Path(fname)
28 |         self.vocab = vocab
29 |         self.bos = bos
30 |         self.eos = eos
31 | 
32 |         # Detect glob patterns
33 |         self.fnames = sorted(self.path.parent.glob(self.path.name))
34 | 
35 |         if len(self.fnames) == 0:
36 |             raise RuntimeError('{} does not exist.'.format(self.path))
37 |         elif len(self.fnames) > 1:
38 |             logger.info('Multiple files found, using first: {}'.format(self.fnames[0]))
39 | 
40 |         # Read the sentences and map them to vocabulary
41 |         self.data, self.lengths = read_sentences(
42 |             self.fnames[0], self.vocab, bos=self.bos, eos=self.eos)
43 | 
44 |         # Dataset size
45 |         self.size = len(self.data)
46 | 
47 |     @staticmethod
48 |     def to_torch(batch, **kwargs):
49 |         return pad_sequence(
50 |             [torch.tensor(b, dtype=torch.long) for b in batch], batch_first=False)
51 | 
52 |     def __getitem__(self, idx):
53 |         return self.data[idx]
54 | 
55 |     def __len__(self):
56 |         return self.size
57 | 
58 |     def __repr__(self):
59 |         s = "{} '{}' ({} sentences)".format(
60 |             self.__class__.__name__, self.fnames[0].name, self.__len__())
61 |         return s
62 | 


--------------------------------------------------------------------------------
/nmtpytorch/evaluator.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from collections import OrderedDict
 3 | 
 4 | from . import metrics
 5 | from .utils.filterchain import FilterChain
 6 | from .utils.misc import get_language
 7 | 
 8 | 
 9 | class Evaluator:
10 |     def __init__(self, refs, beam_metrics, filters=''):
11 |         # metrics: list of upper-case beam-search metrics
12 |         self.kwargs = {}
13 |         self.scorers = OrderedDict()
14 |         self.refs = list(refs.parent.glob(refs.name))
15 |         self.language = get_language(self.refs[0])
16 |         if self.language is None:
17 |             # Fallback to en (this is only relevant for METEOR)
18 |             self.language = 'en'
19 | 
20 |         self.filter = lambda s: s
21 |         if filters:
22 |             self.filter = FilterChain(filters)
23 |             self.refs = self.filter(refs)
24 | 
25 |         assert len(self.refs) > 0, "Number of reference files == 0"
26 | 
27 |         for metric in sorted(beam_metrics):
28 |             self.kwargs[metric] = {'language': self.language}
29 |             self.scorers[metric] = getattr(metrics, metric + 'Scorer')()
30 | 
31 |     def score(self, hyps):
32 |         """hyps is a list of hypotheses as they come out from decoder."""
33 |         assert isinstance(hyps, list), "hyps should be a list."
34 | 
35 |         # Post-process if requested
36 |         hyps = self.filter(hyps)
37 | 
38 |         results = []
39 |         for key, scorer in self.scorers.items():
40 |             results.append(
41 |                 scorer.compute(self.refs, hyps, **self.kwargs[key]))
42 |         return results
43 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import apex
 3 |     LayerNorm = apex.normalization.FusedLayerNorm
 4 | except ImportError as ie:
 5 |     import torch
 6 |     LayerNorm = torch.nn.LayerNorm
 7 | 
 8 | # Basic layers
 9 | from .ff import FF
10 | from .fusion import Fusion
11 | from .flatten import Flatten
12 | from .argselect import ArgSelect
13 | from .pool import Pool
14 | from .seq_conv import SequenceConvolution
15 | from .rnninit import RNNInitializer
16 | from .max_margin import MaxMargin
17 | 
18 | # Embedding variants
19 | from .embedding import *
20 | 
21 | # Attention layers
22 | from .attention import *
23 | 
24 | # Encoder layers
25 | from .encoders import *
26 | 
27 | # Decoder layers
28 | from .decoders import *
29 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/argselect.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class ArgSelect(torch.nn.Module):
 5 |     """Dummy layer that picks one of the returned values from mostly RNN-type
 6 |     `nn.Module` layers."""
 7 |     def __init__(self, index):
 8 |         super().__init__()
 9 |         self.index = index
10 | 
11 |     def forward(self, x):
12 |         return x[self.index]
13 | 
14 |     def __repr__(self):
15 |         return "ArgSelect(index={})".format(self.index)
16 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/attention/__init__.py:
--------------------------------------------------------------------------------
 1 | from .mlp import MLPAttention
 2 | from .dot import DotAttention
 3 | from .hierarchical import HierarchicalAttention
 4 | from .co import CoAttention
 5 | from .mhco import MultiHeadCoAttention
 6 | from .uniform import UniformAttention
 7 | from .scaled_dot import ScaledDotAttention
 8 | 
 9 | 
10 | def get_attention(type_):
11 |     return {
12 |         'mlp': MLPAttention,
13 |         'dot': DotAttention,
14 |         'hier': HierarchicalAttention,
15 |         'co': CoAttention,
16 |         'mhco': MultiHeadCoAttention,
17 |         'uniform': UniformAttention,
18 |         'scaled_dot': ScaledDotAttention,
19 |     }[type_]
20 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/attention/co.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | import torch.nn.functional as F
 4 | from torch import nn
 5 | 
 6 | from ...utils.nn import get_activation_fn
 7 | 
 8 | # Code contributed by @jlibovicky
 9 | 
10 | 
11 | class CoAttention(nn.Module):
12 |     """Co-attention between two sequences.
13 | 
14 |     Uses one hidden layer to compute an affinity matrix between two sequences.
15 |     This can be then normalized in two direction which gives us 1->2 and 2->1
16 |     attentions.
17 | 
18 |     The co-attention is computed using a single feed-forward layer as in
19 |     Bahdanau's attention.
20 |     """
21 |     def __init__(self, ctx_1_dim, ctx_2_dim, bottleneck,
22 |                  att_activ='tanh', mlp_bias=False):
23 |         super().__init__()
24 | 
25 |         self.mlp_hid = nn.Conv2d(ctx_1_dim + ctx_2_dim, bottleneck, 1)
26 |         self.mlp_out = nn.Conv2d(bottleneck, 1, 1, bias=mlp_bias)
27 |         self.activ = get_activation_fn(att_activ)
28 | 
29 |         self.project_1_to_2 = nn.Linear(ctx_1_dim + ctx_2_dim, bottleneck)
30 |         self.project_2_to_1 = nn.Linear(ctx_1_dim + ctx_2_dim, bottleneck)
31 | 
32 |     def forward(self, ctx_1, ctx_2, ctx_1_mask=None, ctx_2_mask=None):
33 |         if ctx_2_mask is not None:
34 |             ctx_2_neg_mask = (1. - ctx_2_mask.transpose(0, 1).unsqueeze(1)) * -1e12
35 | 
36 |         ctx_1_len = ctx_1.size(0)
37 |         ctx_2_len = ctx_2.size(0)
38 |         b_ctx_1 = ctx_1.permute(1, 2, 0).unsqueeze(3).repeat(1, 1, 1, ctx_2_len)
39 |         b_ctx_2 = ctx_2.permute(1, 2, 0).unsqueeze(2).repeat(1, 1, ctx_1_len, 1)
40 | 
41 |         catted = torch.cat([b_ctx_1, b_ctx_2], dim=1)
42 |         hidden = self.activ(self.mlp_hid(catted))
43 |         affinity_matrix = self.mlp_out(hidden).squeeze(1)
44 |         if ctx_1_mask is not None:
45 |             ctx_1_neg_mask = (1. - ctx_1_mask.transpose(0, 1).unsqueeze(2)) * -1e12
46 |             affinity_matrix += ctx_1_neg_mask
47 | 
48 |         if ctx_2_mask is not None:
49 |             ctx_2_neg_mask = (1. - ctx_2_mask.transpose(0, 1).unsqueeze(1)) * -1e12
50 |             affinity_matrix += ctx_2_neg_mask
51 | 
52 |         dist_1_to_2 = F.softmax(affinity_matrix, dim=2)
53 |         context_1_to_2 = ctx_1.permute(1, 2, 0).matmul(dist_1_to_2).permute(2, 0, 1)
54 |         seq_1_to_2 = self.activ(
55 |             self.project_1_to_2(torch.cat([ctx_2, context_1_to_2], dim=-1)))
56 | 
57 |         dist_2_to_1 = F.softmax(affinity_matrix, dim=1).transpose(1, 2)
58 |         context_2_to_1 = ctx_2.permute(1, 2, 0).matmul(dist_2_to_1).permute(2, 0, 1)
59 |         seq_2_to_1 = self.activ(
60 |             self.project_2_to_1(torch.cat([ctx_1, context_2_to_1], dim=-1)))
61 | 
62 |         return seq_2_to_1, seq_1_to_2
63 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/attention/dot.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | import torch.nn.functional as F
 4 | from torch import nn
 5 | 
 6 | from ...utils.nn import get_activation_fn
 7 | 
 8 | 
 9 | class DotAttention(nn.Module):
10 |     """Attention layer with dot product."""
11 |     def __init__(self, ctx_dim, hid_dim, att_bottleneck='ctx',
12 |                  transform_ctx=True, att_activ='tanh', temp=1., ctx2hid=True,
13 |                  mlp_bias=None):
14 |         # NOTE:
15 |         # mlp_bias here to not break models that pass mlp_bias to all types
16 |         # of attentions
17 |         super().__init__()
18 | 
19 |         self.ctx_dim = ctx_dim
20 |         self.hid_dim = hid_dim
21 |         self._ctx2hid = ctx2hid
22 |         self.temperature = temp
23 |         self.activ = get_activation_fn(att_activ)
24 | 
25 |         # The common dimensionality for inner formulation
26 |         if isinstance(att_bottleneck, int):
27 |             self.mid_dim = att_bottleneck
28 |         else:
29 |             self.mid_dim = getattr(self, '{}_dim'.format(att_bottleneck))
30 | 
31 |         # Adaptor from RNN's hidden dim to mid_dim
32 |         self.hid2ctx = nn.Linear(self.hid_dim, self.mid_dim, bias=False)
33 | 
34 |         if transform_ctx or self.mid_dim != self.ctx_dim:
35 |             # Additional context projection within same dimensionality
36 |             self.ctx2ctx = nn.Linear(self.ctx_dim, self.mid_dim, bias=False)
37 |         else:
38 |             self.ctx2ctx = lambda x: x
39 | 
40 |         if self._ctx2hid:
41 |             # ctx2hid: final transformation from ctx to hid
42 |             self.ctx2hid = nn.Linear(self.ctx_dim, self.hid_dim, bias=False)
43 |         else:
44 |             self.ctx2hid = lambda x: x
45 | 
46 |     def forward(self, hid, ctx, ctx_mask=None):
47 |         r"""Computes attention probabilities and final context using
48 |         decoder's hidden state and source annotations.
49 | 
50 |         Arguments:
51 |             hid(Tensor): A set of decoder hidden states of shape `T*B*H`
52 |                 where `T` == 1, `B` is batch dim and `H` is hidden state dim.
53 |             ctx(Tensor): A set of annotations of shape `S*B*C` where `S`
54 |                 is the source timestep dim, `B` is batch dim and `C`
55 |                 is annotation dim.
56 |             ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes
57 |                 in the padded positions.
58 | 
59 |         Returns:
60 |             scores(Tensor): A tensor of shape `S*B` containing normalized
61 |                 attention scores for each position and sample.
62 |             z_t(Tensor): A tensor of shape `B*H` containing the final
63 |                 attended context vector for this target decoding timestep.
64 | 
65 |         Notes:
66 |             This will only work when `T==1` for now.
67 |         """
68 |         # SxBxC
69 |         ctx_ = self.ctx2ctx(ctx)
70 |         # TxBxC
71 |         hid_ = self.hid2ctx(hid)
72 | 
73 |         # shuffle dims to prepare for batch mat-mult -> SxB
74 |         scores = torch.bmm(hid_.permute(1, 0, 2), ctx_.permute(1, 2, 0)).div(
75 |             self.temperature).squeeze(1).t()
76 | 
77 |         # Normalize attention scores correctly -> S*B
78 |         if ctx_mask is not None:
79 |             # Mask out padded positions with -inf so that they get 0 attention
80 |             scores.masked_fill_((1 - ctx_mask).bool(), -1e8)
81 | 
82 |         alpha = F.softmax(scores, dim=0)
83 | 
84 |         # Transform final context vector to H for further decoders
85 |         return alpha, self.ctx2hid((alpha.unsqueeze(-1) * ctx).sum(0))
86 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/attention/hierarchical.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from ...utils.nn import get_activation_fn
 6 | 
 7 | 
 8 | # Libovický, J., & Helcl, J. (2017). Attention Strategies for Multi-Source
 9 | # Sequence-to-Sequence Learning. In Proceedings of the 55th Annual Meeting of
10 | # the Association for Computational Linguistics (Volume 2: Short Papers)
11 | # (Vol. 2, pp. 196-202). [Code contributed by @jlibovicky]
12 | 
13 | 
14 | class HierarchicalAttention(nn.Module):
15 |     """Hierarchical attention over multiple modalities."""
16 |     def __init__(self, ctx_dims, hid_dim, mid_dim, att_activ='tanh'):
17 |         super().__init__()
18 | 
19 |         self.activ = get_activation_fn(att_activ)
20 |         self.ctx_dims = ctx_dims
21 |         self.hid_dim = hid_dim
22 |         self.mid_dim = mid_dim
23 | 
24 |         self.ctx_projs = nn.ModuleList([
25 |             nn.Linear(dim, mid_dim, bias=False) for dim in self.ctx_dims])
26 |         self.dec_proj = nn.Linear(hid_dim, mid_dim, bias=True)
27 |         self.mlp = nn.Linear(self.mid_dim, 1, bias=False)
28 | 
29 |     def forward(self, contexts, hid):
30 |         dec_state_proj = self.dec_proj(hid)
31 |         ctx_projected = torch.cat([
32 |             p(ctx).unsqueeze(0) for p, ctx
33 |             in zip(self.ctx_projs, contexts)], dim=0)
34 |         energies = self.mlp(self.activ(dec_state_proj + ctx_projected))
35 |         att_dist = nn.functional.softmax(energies, dim=0)
36 | 
37 |         ctxs_cat = torch.cat([c.unsqueeze(0) for c in contexts])
38 |         joint_context = (att_dist * ctxs_cat).sum(0)
39 | 
40 |         return att_dist, joint_context
41 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/attention/mhco.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | import torch.nn.functional as F
 4 | from torch import nn
 5 | 
 6 | # Code contributed by @jlibovicky
 7 | 
 8 | 
 9 | class MultiHeadCoAttention(nn.Module):
10 |     """Generalization of multi-head attention for co-attention."""
11 | 
12 |     def __init__(self, ctx_1_dim, ctx_2_dim, bottleneck, head_count, dropout=0.1):
13 |         assert bottleneck % head_count == 0
14 |         self.dim_per_head = bottleneck // head_count
15 |         self.model_dim = bottleneck
16 | 
17 |         super().__init__()
18 |         self.head_count = head_count
19 | 
20 |         self.linear_keys_1 = nn.Linear(ctx_1_dim,
21 |                                        head_count * self.dim_per_head)
22 |         self.linear_values_1 = nn.Linear(ctx_1_dim,
23 |                                          head_count * self.dim_per_head)
24 |         self.linear_keys_2 = nn.Linear(ctx_2_dim,
25 |                                        head_count * self.dim_per_head)
26 |         self.linear_values_2 = nn.Linear(ctx_2_dim,
27 |                                          head_count * self.dim_per_head)
28 | 
29 |         self.final_1_to_2_linear = nn.Linear(bottleneck, bottleneck)
30 |         self.final_2_to_1_linear = nn.Linear(bottleneck, bottleneck)
31 |         self.project_1_to_2 = nn.Linear(ctx_1_dim + ctx_2_dim, bottleneck)
32 |         self.project_2_to_1 = nn.Linear(ctx_1_dim + ctx_2_dim, bottleneck)
33 | 
34 |     def forward(self, ctx_1, ctx_2, ctx_1_mask=None, ctx_2_mask=None):
35 |         """Computes the context vector and the attention vectors."""
36 | 
37 |         def shape(x, length):
38 |             """  projection """
39 |             return x.view(
40 |                 length, batch_size, head_count, dim_per_head).permute(1, 2, 0, 3)
41 | 
42 |         def unshape(x, length):
43 |             """  compute context """
44 |             return x.transpose(1, 2).contiguous().view(
45 |                 batch_size, length, head_count * dim_per_head).transpose(0, 1)
46 | 
47 |         batch_size = ctx_1.size(1)
48 |         assert batch_size == ctx_2.size(1)
49 |         dim_per_head = self.dim_per_head
50 |         head_count = self.head_count
51 |         ctx_1_len = ctx_1.size(0)
52 |         ctx_2_len = ctx_2.size(0)
53 | 
54 |         # 1) Project key, value, and key_2.
55 |         key_1_up = shape(self.linear_keys_1(ctx_1), ctx_1_len)
56 |         value_1_up = shape(self.linear_values_1(ctx_1), ctx_1_len)
57 |         key_2_up = shape(self.linear_keys_2(ctx_2), ctx_2_len)
58 |         value_2_up = shape(self.linear_values_2(ctx_2), ctx_2_len)
59 | 
60 |         scores = torch.matmul(key_2_up, key_1_up.transpose(2, 3))
61 | 
62 |         if ctx_1_mask is not None:
63 |             mask = ctx_1_mask.t().unsqueeze(2).unsqueeze(3).expand_as(scores)
64 |             scores = scores.masked_fill(mask.bool(), -1e18)
65 |         if ctx_2_mask is not None:
66 |             mask = ctx_2_mask.t().unsqueeze(1).unsqueeze(3).expand_as(scores)
67 |             scores = scores.masked_fill(mask.bool(), -1e18)
68 | 
69 |         # 3) Apply attention dropout and compute context vectors.
70 |         dist_1_to_2 = F.softmax(scores, dim=2)
71 |         context_1_to_2 = unshape(torch.matmul(dist_1_to_2, value_1_up), ctx_2_len)
72 |         context_1_to_2 = self.final_1_to_2_linear(context_1_to_2)
73 |         seq_1_to_2 = self.activ(
74 |             self.project_1_to_2(torch.cat([ctx_2, context_1_to_2], dim=-1)))
75 | 
76 |         dist_2_to_1 = F.softmax(scores, dim=1)
77 |         context_2_to_1 = unshape(
78 |             torch.matmul(dist_2_to_1.transpose(2, 3), value_2_up), ctx_1_len)
79 |         context_2_to_1 = self.final_2_to_1_linear(context_2_to_1)
80 |         seq_2_to_1 = self.activ(
81 |             self.project_2_to_1(torch.cat([ctx_1, context_2_to_1], dim=-1)))
82 | 
83 |         return seq_2_to_1, seq_1_to_2
84 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/attention/mlp.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | import torch.nn.functional as F
 4 | from torch import nn
 5 | 
 6 | from .dot import DotAttention
 7 | 
 8 | 
 9 | class MLPAttention(DotAttention):
10 |     """Attention layer with feed-forward layer."""
11 |     def __init__(self, ctx_dim, hid_dim, att_bottleneck='ctx',
12 |                  transform_ctx=True, att_activ='tanh',
13 |                  mlp_bias=False, temp=1., ctx2hid=True):
14 |         super().__init__(ctx_dim, hid_dim, att_bottleneck, transform_ctx,
15 |                          att_activ, temp, ctx2hid)
16 | 
17 |         if mlp_bias:
18 |             self.bias = nn.Parameter(torch.Tensor(self.mid_dim))
19 |             self.bias.data.zero_()
20 |         else:
21 |             self.register_parameter('bias', None)
22 | 
23 |         self.mlp = nn.Linear(self.mid_dim, 1, bias=False)
24 | 
25 |     def forward(self, hid, ctx, ctx_mask=None):
26 |         r"""Computes attention probabilities and final context using
27 |         decoder's hidden state and source annotations.
28 | 
29 |         Arguments:
30 |             hid(Tensor): A set of decoder hidden states of shape `T*B*H`
31 |                 where `T` == 1, `B` is batch dim and `H` is hidden state dim.
32 |             ctx(Tensor): A set of annotations of shape `S*B*C` where `S`
33 |                 is the source timestep dim, `B` is batch dim and `C`
34 |                 is annotation dim.
35 |             ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes
36 |                 in the padded positions.
37 | 
38 |         Returns:
39 |             scores(Tensor): A tensor of shape `S*B` containing normalized
40 |                 attention scores for each position and sample.
41 |             z_t(Tensor): A tensor of shape `B*H` containing the final
42 |                 attended context vector for this target decoding timestep.
43 | 
44 |         Notes:
45 |             This will only work when `T==1` for now.
46 |         """
47 |         # inner_sum -> SxBxC + TxBxC
48 |         inner_sum = self.ctx2ctx(ctx) + self.hid2ctx(hid)
49 | 
50 |         if self.bias is not None:
51 |             inner_sum.add_(self.bias)
52 | 
53 |         # Compute scores- > SxB
54 |         scores = self.mlp(
55 |             self.activ(inner_sum)).div(self.temperature).squeeze(-1)
56 | 
57 |         # Normalize attention scores correctly -> S*B
58 |         if ctx_mask is not None:
59 |             # Mask out padded positions with -inf so that they get 0 attention
60 |             scores.masked_fill_((1 - ctx_mask).bool(), -1e8)
61 | 
62 |         alpha = F.softmax(scores, dim=0)
63 | 
64 |         # Transform final context vector to H for further decoders
65 |         return alpha, self.ctx2hid((alpha.unsqueeze(-1) * ctx).sum(0))
66 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/attention/scaled_dot.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import math
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class ScaledDotAttention(torch.nn.Module):
 8 |     """Scaled Dot-product attention from `Attention is all you need`.
 9 | 
10 |     Arguments:
11 | 
12 |     Input:
13 | 
14 |     Output:
15 |     """
16 | 
17 |     def __init__(self, model_dim, n_heads, causal=False):
18 |         super().__init__()
19 |         self.model_dim = model_dim
20 |         self.n_heads = n_heads
21 |         self.causal = causal
22 | 
23 |         #self.k_dim = self.model_dim / self.n_heads
24 |         #self.v_dim = self.model_dim / self.n_heads
25 | 
26 |         # Efficient linear projections for all heads
27 |         self.lin_k = torch.nn.Linear(
28 |             self.model_dim, self.model_dim, bias=False)
29 |         self.lin_q = torch.nn.Linear(
30 |             self.model_dim, self.model_dim, bias=False)
31 |         self.lin_v = torch.nn.Linear(
32 |             self.model_dim, self.model_dim, bias=False)
33 | 
34 |         # Final output layer is independent of number of heads
35 |         self.lin_o = torch.nn.Linear(
36 |             self.model_dim, self.model_dim, bias=False)
37 | 
38 |         self.scale = math.sqrt(self.model_dim / self.n_heads)
39 | 
40 |     def forward(self, inputs):
41 |         """Scaled dot-product attention forward-pass
42 | 
43 |         :param inputs: dictionary with query, key, value and mask tensors
44 |             the shape of the tensors are (tstep, bsize, dim) except for the
45 |             mask which is (tstep, bsize)
46 | 
47 |         :return: foo
48 |         """
49 |         q, k, v, mask = inputs
50 |         # q is the query, v is the actual inputs and k is v's representation
51 |         # for self attention q=v=k
52 |         # for cross attention q != (v=k)
53 |         # Project keys, queries and values --> (bsize, tstep, dim)
54 |         tstep, bsize = mask.shape
55 |         head_view = (tstep, bsize, self.n_heads, -1)
56 |         # qp: (bsize, head, tstep, dim)
57 |         # vp: (bsize, head, tstep, dim)
58 |         # kp: (bsize, head, dim, tstep)
59 |         qp = self.lin_q(q).view(*head_view).permute(1, 2, 0, 3)
60 |         vp = self.lin_v(v).view(*head_view).permute(1, 2, 0, 3)
61 |         kp = self.lin_k(k).view(*head_view).permute(1, 2, 3, 0)
62 | 
63 |         # z: (bsize, head, tstep, tstep)
64 |         z = torch.matmul(qp, kp).div(self.scale).softmax(dim=-1)
65 |         out = torch.matmul(z, vp).permute(2, 0, 1, 3).reshape_as(v)
66 |         return (v, out, mask)
67 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/attention/uniform.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | 
 4 | 
 5 | class UniformAttention(torch.nn.Module):
 6 |     """A dummy non-parametric attention layer that applies uniform weights."""
 7 |     def __init__(self):
 8 |         super().__init__()
 9 | 
10 |     def forward(self, hid, ctx, ctx_mask=None):
11 |         alpha = torch.ones(*ctx.shape[:2], device=ctx.device).div(ctx.shape[0])
12 |         wctx = (alpha.unsqueeze(-1) * ctx).sum(0)
13 |         return alpha, wctx
14 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/decoders/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conditional import ConditionalDecoder
 2 | from .simplegru import SimpleGRUDecoder
 3 | from .conditionalmm import ConditionalMMDecoder
 4 | from .multisourceconditional import MultiSourceConditionalDecoder
 5 | from .xu import XuDecoder
 6 | from .switchinggru import SwitchingGRUDecoder
 7 | from .vector import VectorDecoder
 8 | 
 9 | 
10 | def get_decoder(type_):
11 |     """Only expose ones with compatible __init__() arguments for now."""
12 |     return {
13 |         'cond': ConditionalDecoder,
14 |         'simplegru': SimpleGRUDecoder,
15 |         'condmm': ConditionalMMDecoder,
16 |         'vector': VectorDecoder,
17 |     }[type_]
18 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/decoders/conditionalmm.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import torch.nn.functional as F
  3 | 
  4 | from ...utils.nn import get_rnn_hidden_state
  5 | from ..attention import HierarchicalAttention, UniformAttention, get_attention
  6 | from .. import Fusion
  7 | from . import ConditionalDecoder
  8 | 
  9 | 
 10 | class ConditionalMMDecoder(ConditionalDecoder):
 11 |     """A conditional multimodal decoder with multimodal attention."""
 12 |     def __init__(self, fusion_type='concat', fusion_activ=None,
 13 |                  aux_ctx_name='image', mm_att_type='md-dd',
 14 |                  persistent_dump=False, **kwargs):
 15 |         super().__init__(**kwargs)
 16 |         self.aux_ctx_name = aux_ctx_name
 17 |         self.mm_att_type = mm_att_type
 18 |         self.persistent_dump = persistent_dump
 19 | 
 20 |         if self.mm_att_type == 'uniform':
 21 |             # Dummy uniform attention
 22 |             self.shared_dec_state = False
 23 |             self.shared_att_mlp = False
 24 |         else:
 25 |             # Parse attention type
 26 |             att_str = sorted(self.mm_att_type.lower().split('-'))
 27 |             assert len(att_str) == 2 and att_str[0][0] == 'd' and att_str[1][0] == 'm', \
 28 |                 "att_type should be m[d|i]-d[d-i]"
 29 |             # Independent <d>ecoder state means shared dec state
 30 |             self.shared_dec_state = att_str[0][1] == 'i'
 31 | 
 32 |             # Independent <m>odality means sharing the mlp in the MLP attention
 33 |             self.shared_att_mlp = att_str[1][1] == 'i'
 34 | 
 35 |             # Sanity check
 36 |             if self.shared_att_mlp and self.att_type != 'mlp':
 37 |                 raise Exception("Shared attention requires MLP attention.")
 38 | 
 39 |         # Define (context) fusion operator
 40 |         self.fusion_type = fusion_type
 41 |         if fusion_type == "hierarchical":
 42 |             self.fusion = HierarchicalAttention(
 43 |                 [self.hidden_size, self.hidden_size],
 44 |                 self.hidden_size, self.hidden_size)
 45 |         else:
 46 |             if self.att_ctx2hid:
 47 |                 # Old behaviour
 48 |                 fusion_inp_size = 2 * self.hidden_size
 49 |             else:
 50 |                 fusion_inp_sizes = list(self.ctx_size_dict.values())
 51 |                 if fusion_type == 'concat':
 52 |                     fusion_inp_size = sum(fusion_inp_sizes)
 53 |                 else:
 54 |                     fusion_inp_size = fusion_inp_sizes[0]
 55 |             self.fusion = Fusion(
 56 |                 fusion_type, fusion_inp_size, self.hidden_size,
 57 |                 fusion_activ=fusion_activ)
 58 | 
 59 |         # Rename textual attention layer
 60 |         self.txt_att = self.att
 61 |         del self.att
 62 | 
 63 |         if self.mm_att_type == 'uniform':
 64 |             self.img_att = UniformAttention()
 65 |         else:
 66 |             # Visual attention over convolutional feature maps
 67 |             Attention = get_attention(self.att_type)
 68 |             self.img_att = Attention(
 69 |                 self.ctx_size_dict[self.aux_ctx_name], self.hidden_size,
 70 |                 transform_ctx=self.transform_ctx, mlp_bias=self.mlp_bias,
 71 |                 ctx2hid=self.att_ctx2hid,
 72 |                 att_activ=self.att_activ,
 73 |                 att_bottleneck=self.att_bottleneck)
 74 | 
 75 |         # Tune multimodal attention type
 76 |         if self.shared_att_mlp:
 77 |             # Modality independent
 78 |             self.txt_att.mlp.weight = self.img_att.mlp.weight
 79 |             self.txt_att.ctx2ctx.weight = self.img_att.ctx2ctx.weight
 80 | 
 81 |         if self.shared_dec_state:
 82 |             # Decoder independent
 83 |             self.txt_att.hid2ctx.weight = self.img_att.hid2ctx.weight
 84 | 
 85 |     def f_next(self, ctx_dict, y, h):
 86 |         # Get hidden states from the first decoder (purely cond. on LM)
 87 |         h1_c1 = self.dec0(y, self._rnn_unpack_states(h))
 88 |         h1 = get_rnn_hidden_state(h1_c1)
 89 | 
 90 |         # Apply attention
 91 |         self.txt_alpha_t, txt_z_t = self.txt_att(
 92 |             h1.unsqueeze(0), *ctx_dict[self.ctx_name])
 93 |         self.img_alpha_t, img_z_t = self.img_att(
 94 |             h1.unsqueeze(0), *ctx_dict[self.aux_ctx_name])
 95 |         # Save for reg loss terms
 96 |         self.history['alpha_img'].append(self.img_alpha_t.unsqueeze(0))
 97 | 
 98 |         # Context will double dimensionality if fusion_type is concat
 99 |         # z_t should be compatible with hidden_size
100 |         if self.fusion_type == "hierarchical":
101 |             self.h_att, z_t = self.fusion([txt_z_t, img_z_t], h1.unsqueeze(0))
102 |         else:
103 |             z_t = self.fusion(txt_z_t, img_z_t)
104 | 
105 |         if not self.training and self.persistent_dump:
106 |             # For test-time activation debugging
107 |             self.persistence['z_t'].append(z_t.t().cpu().numpy())
108 |             self.persistence['txt_z_t'].append(txt_z_t.t().cpu().numpy())
109 |             self.persistence['img_z_t'].append(img_z_t.t().cpu().numpy())
110 | 
111 |         # Run second decoder (h1 is compatible now as it was returned by GRU)
112 |         h2_c2 = self.dec1(z_t, h1_c1)
113 |         h2 = get_rnn_hidden_state(h2_c2)
114 | 
115 |         # This is a bottleneck to avoid going from H to V directly
116 |         logit = self.hid2out(self.out_merge_fn(h2, y, z_t))
117 | 
118 |         # Apply dropout if any
119 |         if self.dropout_out > 0:
120 |             logit = self.do_out(logit)
121 | 
122 |         # Transform logit to T*B*V (V: vocab_size)
123 |         # Compute log_softmax over token dim
124 |         log_p = F.log_softmax(self.out2prob(logit), dim=-1)
125 | 
126 |         # Return log probs and new hidden states
127 |         return log_p, self._rnn_pack_states(h2_c2)
128 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/decoders/multisourceconditional.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from ...utils.nn import get_rnn_hidden_state
 6 | from ..attention import get_attention, HierarchicalAttention
 7 | from .. import Fusion
 8 | from . import ConditionalDecoder
 9 | 
10 | 
11 | class MultiSourceConditionalDecoder(ConditionalDecoder):
12 |     """A conditional multimodal decoder with multimodal attention."""
13 |     def __init__(self, ctx_names, fusion_type='concat', **kwargs):
14 |         super().__init__(**kwargs)
15 | 
16 |         # Define (context) fusion operator
17 |         self.ctx_names = ctx_names
18 |         self.fusion_type = fusion_type
19 |         if fusion_type == "hierarchical":
20 |             self.fusion = HierarchicalAttention(
21 |                 [self.hidden_size for _ in ctx_names],
22 |                 self.hidden_size, self.hidden_size)
23 |         else:
24 |             raise NotImplementedError("Concatenation and sum work only with two inputs now.")
25 |             self.fusion = Fusion(
26 |                 fusion_type, len(ctx_names) * self.hidden_size, self.hidden_size)
27 | 
28 |         attns = []
29 |         for ctx_name in ctx_names:
30 |             Attention = get_attention(self.att_type)
31 |             attns.append(Attention(
32 |                 self.ctx_size_dict[ctx_name], self.hidden_size,
33 |                 transform_ctx=self.transform_ctx, mlp_bias=self.mlp_bias,
34 |                 att_activ=self.att_activ,
35 |                 att_bottleneck=self.att_bottleneck))
36 |         self.attns = nn.ModuleList(attns)
37 | 
38 |     def f_next(self, ctx_dict, y, h):
39 |         # Get hidden states from the first decoder (purely cond. on LM)
40 |         h1_c1 = self.dec0(y, self._rnn_unpack_states(h))
41 |         h1 = get_rnn_hidden_state(h1_c1)
42 | 
43 |         # Apply attention
44 |         ctx_list = [att(h1.unsqueeze(0), *ctx_dict[name])[1]
45 |                     for att, name in zip(self.attns, self.ctx_names)]
46 | 
47 |         # Context will double dimensionality if fusion_type is concat
48 |         # z_t should be compatible with hidden_size
49 |         if self.fusion_type == "hierarchical":
50 |             _, z_t = self.fusion(ctx_list, h1.unsqueeze(0))
51 |         else:
52 |             z_t = self.fusion(ctx_list)
53 | 
54 |         # Run second decoder (h1 is compatible now as it was returned by GRU)
55 |         h2_c2 = self.dec1(z_t, h1_c1)
56 |         h2 = get_rnn_hidden_state(h2_c2)
57 | 
58 |         # This is a bottleneck to avoid going from H to V directly
59 |         logit = self.hid2out(h2)
60 | 
61 |         # Apply dropout if any
62 |         if self.dropout_out > 0:
63 |             logit = self.do_out(logit)
64 | 
65 |         # Transform logit to T*B*V (V: vocab_size)
66 |         # Compute log_softmax over token dim
67 |         log_p = F.log_softmax(self.out2prob(logit), dim=-1)
68 | 
69 |         # Return log probs and new hidden states
70 |         return log_p, self._rnn_pack_states(h2_c2)
71 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/decoders/simplegru.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | from .. import FF
 6 | from . import ConditionalDecoder
 7 | 
 8 | 
 9 | class SimpleGRUDecoder(ConditionalDecoder):
10 |     """A simple GRU decoder with a single decoder layer. It has the same
11 |     set of parameters as the parent class except `rnn_type`."""
12 |     def __init__(self, **kwargs):
13 |         # Set rnn_type to GRU
14 |         kwargs['rnn_type'] = 'gru'
15 |         super().__init__(**kwargs)
16 | 
17 |         # Remove second GRU
18 |         # Remove and replace hid2out since we now concatenate the
19 |         # attention output and the hidden state
20 |         del self.dec1, self.hid2out
21 |         self.hid2out = FF(2 * self.hidden_size,
22 |                           self.input_size, bias_zero=True, activ='tanh')
23 | 
24 |     def f_next(self, ctx_dict, y, h):
25 |         """Applies one timestep of recurrence."""
26 |         # Get hidden states from the first decoder (purely cond. on LM)
27 |         h1 = self.dec0(y, h)
28 | 
29 |         # Apply attention
30 |         alpha_t, z_t = self.att(h1.unsqueeze(0), *ctx_dict[self.ctx_name])
31 | 
32 |         if not self.training:
33 |             self.history['alpha_txt'].append(alpha_t)
34 | 
35 |         # Concatenate attented source and hidden state & project
36 |         o = self.hid2out(torch.cat((h1, z_t), dim=-1))
37 | 
38 |         # Apply dropout if any
39 |         logit = self.do_out(o) if self.dropout_out > 0 else o
40 | 
41 |         # Transform logit to T*B*V (V: vocab_size)
42 |         # Compute log_softmax over token dim
43 |         log_p = F.log_softmax(self.out2prob(logit), dim=-1)
44 | 
45 |         # Return log probs and new hidden states
46 |         return log_p, h1
47 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/decoders/switchinggru.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from collections import defaultdict
  3 | import torch
  4 | from torch import nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from .. import FF
  8 | from ...utils.device import DEVICE
  9 | from ..attention import get_attention
 10 | 
 11 | 
 12 | class SwitchingGRUDecoder(nn.Module):
 13 |     """A multi-source aware attention based decoder. During training,
 14 |         this decoder will be fed by a single modality at a time while
 15 |         during inference one of the src->trg tasks will be performed.
 16 |     """
 17 |     def __init__(self, input_size, hidden_size, modality_dict, n_vocab,
 18 |                  tied_emb=False, dropout_out=0):
 19 |         super().__init__()
 20 | 
 21 |         self.hidden_size = hidden_size
 22 |         self.input_size = input_size
 23 |         self.n_vocab = n_vocab
 24 |         self.tied_emb = tied_emb
 25 |         self.dropout_out = dropout_out
 26 | 
 27 |         # Will have N attentions for N possible input modalities
 28 |         # dict: {en_speech: (encoding_size, att_type)}
 29 |         atts = {}
 30 |         for name, (enc_size, att_type) in modality_dict.items():
 31 |             atts[name] = get_attention(att_type)(enc_size, self.hidden_size)
 32 | 
 33 |         self.atts = nn.ModuleDict(atts)
 34 | 
 35 |         # Create target embeddings
 36 |         self.emb = nn.Embedding(self.n_vocab, self.input_size, padding_idx=0)
 37 | 
 38 |         # Create first decoder layer necessary for attention
 39 |         self.dec0 = nn.GRUCell(self.input_size, self.hidden_size)
 40 |         self.dec1 = nn.GRUCell(self.hidden_size, self.hidden_size)
 41 | 
 42 |         # Output dropout
 43 |         if self.dropout_out > 0:
 44 |             self.do_out = nn.Dropout(p=self.dropout_out)
 45 | 
 46 |         # Output bottleneck: maps hidden states to target emb dim
 47 |         self.hid2out = FF(self.hidden_size, self.input_size,
 48 |                           bias_zero=True, activ='tanh')
 49 | 
 50 |         # Final softmax
 51 |         self.out2prob = FF(self.input_size, self.n_vocab)
 52 | 
 53 |         # Tie input embedding matrix and output embedding matrix
 54 |         if self.tied_emb:
 55 |             self.out2prob.weight = self.emb.weight
 56 | 
 57 |         # Final loss
 58 |         self.nll_loss = nn.NLLLoss(reduction="sum", ignore_index=0)
 59 | 
 60 |     def f_init(self, sources):
 61 |         """Returns the initial h_0 for the decoder. `sources` is not used
 62 |         but passed for compatibility with beam search."""
 63 |         self.history = defaultdict(list)
 64 |         batch_size = next(iter(sources.values()))[0].shape[1]
 65 |         # NOTE: Non-scatter aware, fix this
 66 |         return torch.zeros(batch_size, self.hidden_size, device=DEVICE)
 67 | 
 68 |     def f_next(self, sources, y, h):
 69 |         # Get hidden states from the first decoder (purely cond. on LM)
 70 |         h_1 = self.dec0(y, h)
 71 | 
 72 |         # sources will always contain single modality
 73 |         assert len(sources) == 1
 74 |         modality = list(sources.keys())[0]
 75 | 
 76 |         # Apply modality-specific attention
 77 |         alpha_t, z_t = self.atts[modality](h_1.unsqueeze(0), *sources[modality])
 78 |         self.history['alpha_{}'.format(modality)].append(alpha_t)
 79 | 
 80 |         # Run second decoder (h_1 is compatible now as it was returned by GRU)
 81 |         h_2 = self.dec1(z_t, h_1)
 82 | 
 83 |         # This is a bottleneck to avoid going from H to V directly
 84 |         logit = self.hid2out(h_2)
 85 | 
 86 |         # Apply dropout if any
 87 |         if self.dropout_out > 0:
 88 |             logit = self.do_out(logit)
 89 | 
 90 |         # Transform logit to T*B*V (V: vocab_size)
 91 |         # Compute log_softmax over token dim
 92 |         log_p = F.log_softmax(self.out2prob(logit), dim=-1)
 93 | 
 94 |         # Return log probs and new hidden states
 95 |         return log_p, h_2
 96 | 
 97 |     def forward(self, sources, y):
 98 |         """Computes the softmax outputs given source annotations `sources` and
 99 |         ground-truth target token indices `y`. Only called during training.
100 | 
101 |         Arguments:
102 |             sources(Tensor): A tensor of `S*B*ctx_dim` representing the source
103 |                 annotations in an order compatible with ground-truth targets.
104 |             y(Tensor): A tensor of `T*B` containing ground-truth target
105 |                 token indices for the given batch.
106 |         """
107 | 
108 |         loss = 0.0
109 |         logps = None if self.training else torch.zeros(
110 |             y.shape[0] - 1, y.shape[1], self.n_vocab, device=y.device)
111 | 
112 |         # Convert token indices to embeddings -> T*B*E
113 |         y_emb = self.emb(y)
114 | 
115 |         # Get initial hidden state
116 |         h = self.f_init(sources)
117 | 
118 |         # -1: So that we skip the timestep where input is <eos>
119 |         for t in range(y_emb.shape[0] - 1):
120 |             log_p, h = self.f_next(sources, y_emb[t], h)
121 |             if not self.training:
122 |                 logps[t] = log_p.data
123 |             loss += self.nll_loss(log_p, y[t + 1])
124 | 
125 |         return {'loss': loss, 'logps': logps}
126 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/decoders/vector.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch.nn.functional as F
 3 | 
 4 | from ...utils.nn import get_rnn_hidden_state
 5 | from . import ConditionalDecoder
 6 | 
 7 | # Decoder without attention that uses a single input vector.
 8 | # Layer contributed by @loicbarrault
 9 | 
10 | 
11 | class VectorDecoder(ConditionalDecoder):
12 |     """Single-layer RNN decoder using fixed-size vector representation."""
13 |     def __init__(self, **kwargs):
14 |         # Disable attention
15 |         kwargs['att_type'] = None
16 |         super().__init__(**kwargs)
17 | 
18 |     def f_next(self, ctx_dict, y, h):
19 |         """Applies one timestep of recurrence."""
20 |         # Get hidden states from the decoder
21 |         h1_c1 = self.dec0(y, self._rnn_unpack_states(h))
22 |         h1 = get_rnn_hidden_state(h1_c1)
23 | 
24 |         # Project hidden state to embedding size
25 |         o = self.hid2out(h1)
26 | 
27 |         # Apply dropout if any
28 |         logit = self.do_out(o) if self.dropout_out > 0 else o
29 | 
30 |         # Transform logit to T*B*V (V: vocab_size)
31 |         # Compute log_softmax over token dim
32 |         log_p = F.log_softmax(self.out2prob(logit), dim=-1)
33 | 
34 |         # Return log probs and new hidden states
35 |         return log_p, self._rnn_pack_states(h1_c1)
36 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/embedding/__init__.py:
--------------------------------------------------------------------------------
1 | from .pembedding import PEmbedding
2 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/embedding/pembedding.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from torch import nn
 3 | 
 4 | from .. import FF
 5 | 
 6 | 
 7 | class PEmbedding(nn.Embedding):
 8 |     """An extension to regular `nn.Embedding` with MLP and dropout."""
 9 |     def __init__(self, num_embeddings, embedding_dim, out_dim,
10 |                  activ='linear', dropout=0.0):
11 |         super().__init__(num_embeddings, embedding_dim, padding_idx=0)
12 |         self.proj = FF(embedding_dim, out_dim, activ=activ, bias=False)
13 |         self.do = nn.Dropout(dropout) if dropout > 0.0 else lambda x: x
14 | 
15 |     def forward(self, input):
16 |         # Get the embeddings from parent's forward
17 |         return self.do(self.proj(super().forward(input)))
18 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/encoders/__init__.py:
--------------------------------------------------------------------------------
1 | from .image import ImageEncoder
2 | from .text import TextEncoder
3 | from .bilstmp import BiLSTMp
4 | from .multimodal_text import MultimodalTextEncoder
5 | from .multimodal_bilstmp import MultimodalBiLSTMp
6 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/encoders/bilstmp.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from torch import nn
 4 | from torch.nn import functional as F
 5 | 
 6 | from ..ff import FF
 7 | 
 8 | logger = logging.getLogger('nmtpytorch')
 9 | 
10 | 
11 | class BiLSTMp(nn.Module):
12 |     """A bidirectional LSTM encoder for speech features. A batch should
13 |     only contain samples that have the same sequence length.
14 | 
15 |     Arguments:
16 |         input_size (int): Input feature dimensionality.
17 |         hidden_size (int): LSTM hidden state dimensionality.
18 |         proj_size (int): Projection layer size.
19 |         proj_activ (str, optional): Non-linearity to apply to intermediate projection
20 |             layers. (Default: 'tanh')
21 |         layers (str): A '_' separated list of integers that defines the subsampling
22 |             factor for each LSTM.
23 |         dropout (float, optional): Use dropout (Default: 0.)
24 |     Input:
25 |         x (Tensor): A tensor of shape (n_timesteps, n_samples, n_feats)
26 |             that includes acoustic features of dimension ``n_feats`` per
27 |             each timestep (in the first dimension).
28 | 
29 |     Output:
30 |         hs (Tensor): A tensor of shape (n_timesteps, n_samples, hidden * 2)
31 |             that contains encoder hidden states for all timesteps.
32 |         mask (Tensor): `None` since this layer expects all equal frame inputs.
33 |     """
34 |     def __init__(self, input_size, hidden_size, proj_size, layers,
35 |                  proj_activ='tanh', dropout=0):
36 |         super().__init__()
37 | 
38 |         self.input_size = input_size
39 |         self.hidden_size = hidden_size
40 |         self.proj_size = proj_size
41 |         self.proj_activ = proj_activ
42 |         self.layers = [int(i) for i in layers.split('_')]
43 |         self.dropout = dropout
44 |         self.n_layers = len(self.layers)
45 | 
46 |         # Doubles its size because of concatenation of forw-backw encs
47 |         self.ctx_size = self.hidden_size * 2
48 | 
49 |         # Fill 0-vector as <eos> to the end of the frames
50 |         self.pad_tuple = (0, 0, 0, 0, 0, 1)
51 | 
52 |         # Projections and LSTMs
53 |         self.ffs = nn.ModuleList()
54 |         self.lstms = nn.ModuleList()
55 | 
56 |         if self.dropout > 0:
57 |             self.do = nn.Dropout(self.dropout)
58 | 
59 |         for i, ss_factor in enumerate(self.layers):
60 |             # Add LSTMs
61 |             self.lstms.append(nn.LSTM(
62 |                 self.input_size if i == 0 else self.hidden_size,
63 |                 self.hidden_size, bidirectional=True))
64 |             # Add non-linear bottlenecks
65 |             self.ffs.append(FF(
66 |                 self.ctx_size, self.proj_size, activ=self.proj_activ))
67 | 
68 |     def forward(self, x, **kwargs):
69 |         # Generate a mask to detect padded sequences
70 |         mask = x.ne(0).float().sum(2).ne(0).float()
71 | 
72 |         if mask.eq(0).nonzero().numel() > 0:
73 |             logger.info("WARNING: Non-homogeneous batch in BiLSTMp.")
74 | 
75 |         # Pad with <eos> zero
76 |         hs = F.pad(x, self.pad_tuple)
77 | 
78 |         for (ss_factor, f_lstm, f_ff) in zip(self.layers, self.lstms, self.ffs):
79 |             if ss_factor > 1:
80 |                 # Skip states
81 |                 hs = f_ff(f_lstm(hs[::ss_factor])[0])
82 |             else:
83 |                 hs = f_ff(f_lstm(hs)[0])
84 | 
85 |         if self.dropout > 0:
86 |             hs = self.do(hs)
87 | 
88 |         # No mask is returned as batch should contain same-length sequences
89 |         return hs, None
90 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/encoders/multimodal_bilstmp.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import logging
  3 | 
  4 | import torch
  5 | from torch.nn import functional as F
  6 | 
  7 | from ..ff import FF
  8 | 
  9 | from . import BiLSTMp
 10 | 
 11 | logger = logging.getLogger('nmtpytorch')
 12 | 
 13 | 
 14 | class MultimodalBiLSTMp(BiLSTMp):
 15 |     """A bidirectional multimodal LSTM encoder for speech features.
 16 | 
 17 |     Arguments:
 18 |         feat_size (int): Auxiliary feature dimensionality.
 19 |         feat_fusion(str): Type of feature fusion: 'early_concat', 'early_sum',
 20 |             'late_concat', 'late_sum', 'init'.
 21 |         feat_activ(str): Type of non-linearity if any for feature projection
 22 |             layer.
 23 |         input_size (int): Input speech feature dimensionality.
 24 |         hidden_size (int): LSTM hidden state dimensionality.
 25 |         proj_size (int): Projection layer size.
 26 |         proj_activ (str, optional): Non-linearity to apply to intermediate projection
 27 |             layers. (Default: 'tanh')
 28 |         layers (str): A '_' separated list of integers that defines the subsampling
 29 |             factor for each LSTM.
 30 |         dropout (float, optional): Use dropout (Default: 0.)
 31 |     Input:
 32 |         x (Tensor): A tensor of shape (n_timesteps, n_samples, n_feats)
 33 |             that includes acoustic features of dimension ``n_feats`` per
 34 |             each timestep (in the first dimension).
 35 | 
 36 |     Output:
 37 |         hs (Tensor): A tensor of shape (n_timesteps, n_samples, hidden * 2)
 38 |             that contains encoder hidden states for all timesteps.
 39 |         mask (Tensor): `None` since this layer expects all equal frame inputs.
 40 |     """
 41 | 
 42 |     def __init__(self, feat_size, feat_fusion, feat_activ=None, **kwargs):
 43 |         # Call BiLSTMp.__init__ first
 44 |         super().__init__(**kwargs)
 45 | 
 46 |         self.feat_size = feat_size
 47 |         self.feat_fusion = feat_fusion
 48 |         self.feat_activ = feat_activ
 49 | 
 50 |         # early_concat: x = layer([x; aux])
 51 |         #  layer: feat_size + input_size -> input_size
 52 |         if self.feat_fusion == 'early_concat':
 53 |             self.feat_layer = FF(
 54 |                 self.feat_size + self.input_size, self.input_size, activ=self.feat_activ)
 55 |         # early_sum: x = x + layer(aux)
 56 |         #  layer: feat_size -> input_size
 57 |         elif self.feat_fusion == 'early_sum':
 58 |             self.feat_layer = FF(self.feat_size, self.input_size, activ=self.feat_activ)
 59 |         # late_concat: hs = layer([hs; aux])
 60 |         #  layer: proj_size + feat_size -> proj_size
 61 |         elif self.feat_fusion == 'late_concat':
 62 |             self.feat_layer = FF(
 63 |                 self.feat_size + self.proj_size, self.proj_size, activ=self.feat_activ)
 64 |         # late_sum: hs = hs + layer(aux)
 65 |         #  layer: feat_size -> proj_size
 66 |         elif self.feat_fusion == 'late_sum':
 67 |             self.feat_layer = FF(self.feat_size, self.proj_size, activ=self.feat_activ)
 68 |         # init: Initialize all LSTMs
 69 |         elif self.feat_fusion == 'init':
 70 |             # Use single h_0/c_0 for all stacked layers and directions for a
 71 |             # consistent information source.
 72 |             self.ff_init_c0 = FF(self.feat_size, self.hidden_size, activ=self.feat_activ)
 73 |             self.ff_init_h0 = FF(self.feat_size, self.hidden_size, activ=self.feat_activ)
 74 | 
 75 |     def forward(self, x, **kwargs):
 76 |         # Generate a mask to detect padded sequences
 77 |         mask = x.ne(0).float().sum(2).ne(0).float()
 78 | 
 79 |         if mask.eq(0).nonzero().numel() > 0:
 80 |             logger.info("WARNING: Non-homogeneous batch in BiLSTMp.")
 81 | 
 82 |         # Get auxiliary input
 83 |         aux_x = kwargs['aux']
 84 | 
 85 |         ##############
 86 |         # Encoder init
 87 |         ##############
 88 |         if self.feat_fusion == 'init':
 89 |             # Tile to 2xBxH for bidirectionality
 90 |             c_0_ = self.ff_init_c0(aux_x).repeat(2, 1, 1)
 91 |             h_0_ = self.ff_init_h0(aux_x).repeat(2, 1, 1)
 92 | 
 93 |             # Should be a tuple of (h, c) for each layer
 94 |             h_0s = [(h_0_, c_0_) for _ in range(self.n_layers)]
 95 |         else:
 96 |             # Dummy setup so that the below method calls are good
 97 |             h_0s = [None for _ in range(self.n_layers)]
 98 |             if self.feat_fusion == 'early_concat':
 99 |                 x = self.feat_layer(
100 |                     torch.cat([x, aux_x.repeat(x.shape[0], 1, 1)], dim=-1))
101 |             elif self.feat_fusion == 'early_sum':
102 |                 x.add_(self.feat_layer(aux_x).unsqueeze(0))
103 | 
104 |         # Pad with <eos> zero
105 |         hs = F.pad(x, self.pad_tuple)
106 | 
107 |         ###################
108 |         # LSTM + Proj block
109 |         ###################
110 |         for (ss_factor, f_lstm, f_ff, h_0) in zip(self.layers, self.lstms, self.ffs, h_0s):
111 |             if ss_factor > 1:
112 |                 # Skip states
113 |                 hs = f_ff(f_lstm(hs[::ss_factor], hx=h_0)[0])
114 |             else:
115 |                 hs = f_ff(f_lstm(hs, hx=h_0)[0])
116 | 
117 |         #############
118 |         # Late Fusion
119 |         #############
120 |         if self.feat_fusion == 'late_concat':
121 |             hs = self.feat_layer(
122 |                 torch.cat([hs, aux_x.repeat(hs.shape[0], 1, 1)], dim=-1))
123 |         elif self.feat_fusion == 'late_sum':
124 |             hs = hs + self.feat_layer(aux_x).unsqueeze(0)
125 | 
126 |         if self.dropout > 0:
127 |             hs = self.do(hs)
128 | 
129 |         # No mask is returned as batch should contain same-length sequences
130 |         return hs, None
131 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/ff.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import math
 3 | 
 4 | import torch
 5 | import torch.nn.functional as F
 6 | from torch import nn
 7 | 
 8 | from ..utils.nn import get_activation_fn
 9 | 
10 | 
11 | class FF(nn.Module):
12 |     """A smart feedforward layer with activation support.
13 | 
14 |     Arguments:
15 |         in_features(int): Input dimensionality.
16 |         out_features(int): Output dimensionality.
17 |         bias(bool, optional): Enable/disable bias for the layer. (Default: True)
18 |         bias_zero(bool, optional): Start with a 0-vector bias. (Default: True)
19 |         activ(str, optional): A string like 'tanh' or 'relu' to define the
20 |             non-linearity type. `None` or `'linear'` is a linear layer (default).
21 |     """
22 | 
23 |     def __init__(self, in_features, out_features, bias=True,
24 |                  bias_zero=True, activ=None):
25 |         super().__init__()
26 |         self.in_features = in_features
27 |         self.out_features = out_features
28 |         self.use_bias = bias
29 |         self.bias_zero = bias_zero
30 |         self.activ_type = activ
31 |         if self.activ_type in (None, 'linear'):
32 |             self.activ_type = 'linear'
33 |         self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
34 |         self.activ = get_activation_fn(activ)
35 | 
36 |         if self.use_bias:
37 |             self.bias = nn.Parameter(torch.Tensor(out_features))
38 |         else:
39 |             self.register_parameter('bias', None)
40 | 
41 |         self.reset_parameters()
42 | 
43 |     def reset_parameters(self):
44 |         stdv = 1. / math.sqrt(self.weight.size(1))
45 |         self.weight.data.uniform_(-stdv, stdv)
46 |         if self.use_bias:
47 |             if self.bias_zero:
48 |                 self.bias.data.zero_()
49 |             else:
50 |                 self.bias.data.uniform_(-stdv, stdv)
51 | 
52 |     def forward(self, input):
53 |         return self.activ(F.linear(input, self.weight, self.bias))
54 | 
55 |     def __repr__(self):
56 |         repr_ = self.__class__.__name__ + '(' \
57 |             + 'in_features=' + str(self.in_features) \
58 |             + ', out_features=' + str(self.out_features) \
59 |             + ', activ=' + str(self.activ_type) \
60 |             + ', bias=' + str(self.use_bias)
61 |         if self.use_bias:
62 |             repr_ += ', bias_zero=' + str(self.bias_zero)
63 |         return repr_ + ')'
64 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/flatten.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class Flatten(torch.nn.Module):
 5 |     """A flatten module to squeeze single dimensions."""
 6 |     def __init__(self):
 7 |         super().__init__()
 8 | 
 9 |     def forward(self, x):
10 |         return x.view(x.size(0), -1)
11 | 
12 |     def __repr__(self):
13 |         return "Flatten()"
14 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/fusion.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import operator
 3 | from functools import reduce
 4 | 
 5 | import torch
 6 | 
 7 | from . import FF
 8 | from ..utils.nn import get_activation_fn
 9 | 
10 | 
11 | class Fusion(torch.nn.Module):
12 |     """A convenience fusion layer that merges an arbitrary number of inputs.
13 | 
14 |     Arguments:
15 |         fusion_type(str, optional): One of ``concat,sum,mul`` defining the
16 |             fusion operation. In the default setup of ``concat``, the
17 |             following two arguments should be provided to create a
18 |             ``Linear`` adaptor which will project the concatenated vector to
19 |             ``output_size``.
20 |         input_size(int, optional): The dimensionality of the concatenated
21 |             input. Only necessary if ``fusion_type==concat``.
22 |         output_size(int, optional): The output dimensionality of the
23 |             concatenation. Only necessary if ``fusion_type==concat``.
24 |     """
25 | 
26 |     def __init__(self, fusion_type='concat', input_size=None, output_size=None,
27 |                  fusion_activ=None):
28 |         super().__init__()
29 | 
30 |         self.fusion_type = fusion_type
31 |         self.fusion_activ = fusion_activ
32 |         self.forward = getattr(self, '_{}'.format(self.fusion_type))
33 |         self.activ = get_activation_fn(fusion_activ)
34 |         self.adaptor = lambda x: x
35 | 
36 |         if self.fusion_type == 'concat' or input_size != output_size:
37 |             self.adaptor = FF(input_size, output_size, bias=False, activ=None)
38 | 
39 |     def _sum(self, *inputs):
40 |         return self.activ(self.adaptor(reduce(operator.add, inputs)))
41 | 
42 |     def _mul(self, *inputs):
43 |         return self.activ(self.adaptor(reduce(operator.mul, inputs)))
44 | 
45 |     def _concat(self, *inputs):
46 |         return self.activ(self.adaptor(torch.cat(inputs, dim=-1)))
47 | 
48 |     def __repr__(self):
49 |         return "Fusion(type={}, adaptor={}, activ={})".format(
50 |             self.fusion_type,
51 |             getattr(self, 'adaptor') if hasattr(self, 'adaptor') else 'None',
52 |             self.fusion_activ)
53 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/max_margin.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | # Layer contributed by @elliottd
 6 | 
 7 | 
 8 | class MaxMargin(nn.Module):
 9 |     """A max-margin layer for ranking-based loss functions."""
10 | 
11 |     def __init__(self, margin, max_violation=False):
12 |         super().__init__()
13 | 
14 |         assert margin > 0., "margin must be > 0."
15 | 
16 |         # Other arguments
17 |         self.margin = margin
18 |         self.max_violation = max_violation
19 | 
20 |     def forward(self, enc1, enc2):
21 |         """Computes the max-margin loss given a pair of rank-2
22 |            annotation matrices. The matrices must have the same number of
23 |            batches and the same number of feats.
24 | 
25 |         Arguments:
26 |             enc1(Tensor): A tensor of `B*feats` representing the
27 |                 annotation vectors of the first encoder.
28 |             enc2(Tensor): A tensor of `B*feats` representation the
29 |                 annotation vectors of the second encoder.
30 |         """
31 | 
32 |         assert enc1.shape == enc2.shape, \
33 |             "shapes must match: enc1 {} enc2 {}".format(enc1.shape, enc2.shape)
34 | 
35 |         enc1 = enc1 / enc1.norm(p=2, dim=1).unsqueeze(1)
36 |         enc2 = enc2 / enc2.norm(p=2, dim=1).unsqueeze(1)
37 |         loss = self.constrastive_loss(enc1, enc2)
38 | 
39 |         return {'loss': loss}
40 | 
41 |     def constrastive_loss(self, enc1, enc2):
42 |         if enc1.shape[0] == 1:
43 |             # There is no error when we have a single-instance batch.
44 |             # Return a dummy error of 1e-5 as a regularizer
45 |             return torch.tensor([1e-3], device=enc1.device)
46 | 
47 |         # compute enc1-enc2 score matrix
48 |         scores = self.cosine_sim(enc1, enc2)
49 |         diagonal = scores.diag().view(enc1.size(0), 1)
50 |         d1 = diagonal.expand_as(scores)
51 |         d2 = diagonal.t().expand_as(scores)
52 | 
53 |         cost_enc1 = (self.margin + scores - d2).clamp(min=0)
54 |         cost_enc2 = (self.margin + scores - d1).clamp(min=0)
55 | 
56 |         # clear diagonals
57 |         mask = torch.eye(scores.size(0), device=enc1.device) > .5
58 |         cost_enc2 = cost_enc2.masked_fill_(mask, 0)
59 |         cost_enc1 = cost_enc1.masked_fill_(mask, 0)
60 | 
61 |         # keep the maximum violating negative for each query
62 |         if self.max_violation:
63 |             cost_enc2 = cost_enc2.max(1)[0]
64 |             cost_enc1 = cost_enc1.max(0)[0]
65 |         denom = cost_enc1.shape[0]**2 - cost_enc1.shape[0]
66 |         return (cost_enc2 + cost_enc1).sum() / denom
67 | 
68 |     def cosine_sim(self, one, two):
69 |         '''Cosine similarity between all the first and second encoder pairs'''
70 |         return one.mm(two.t())
71 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class Pool(torch.nn.Module):
 5 |     """A pool layer with mean/max/sum/last options."""
 6 |     def __init__(self, op_type, pool_dim, keepdim=True):
 7 |         super().__init__()
 8 | 
 9 |         self.op_type = op_type
10 |         self.pool_dim = pool_dim
11 |         self.keepdim = keepdim
12 |         assert self.op_type in ["last", "mean", "max", "sum"], \
13 |             "Pool() operation should be mean, max, sum or last."
14 | 
15 |         if self.op_type == 'last':
16 |             self.__pool_fn = lambda x: x.select(
17 |                 self.pool_dim, -1).unsqueeze(0)
18 |         else:
19 |             if self.op_type == 'max':
20 |                 self.__pool_fn = lambda x: torch.max(
21 |                     x, dim=self.pool_dim, keepdim=self.keepdim)[0]
22 |             elif self.op_type == 'mean':
23 |                 self.__pool_fn = lambda x: torch.mean(
24 |                     x, dim=self.pool_dim, keepdim=self.keepdim)
25 |             elif self.op_type == 'sum':
26 |                 self.__pool_fn = lambda x: torch.sum(
27 |                     x, dim=self.pool_dim, keepdim=self.keepdim)
28 | 
29 |     def forward(self, x):
30 |         return self.__pool_fn(x)
31 | 
32 |     def __repr__(self):
33 |         return "Pool(op_type={}, pool_dim={}, keepdim={})".format(
34 |             self.op_type, self.pool_dim, self.keepdim)
35 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/rnninit.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from . import FF
 6 | 
 7 | 
 8 | class RNNInitializer(nn.Module):
 9 |     """RNN initializer block for encoders and decoders.
10 | 
11 |     Arguments:
12 |         rnn_type(str): GRU or LSTM.
13 |         input_size(int): Input dimensionality of the feature vectors that'll
14 |             be used for initialization if ``method != zero``.
15 |         hidden_size(int): Output dimensionality, i.e. hidden size of the RNN
16 |             that will be initialized.
17 |         n_layers(int): Number of recurrent layers to be initialized.
18 |         data_source(str): The modality name to look for in the batch dictionary.
19 |         method(str): One of ``last_ctx|mean_ctx|feats|zero``.
20 |         activ(str, optional): The non-linearity to be used for all initializers
21 |             except 'zero'. Default is ``None`` i.e. no non-linearity.
22 |     """
23 |     def __init__(self, rnn_type, input_size, hidden_size, n_layers, data_source,
24 |                  method, activ=None):
25 |         super().__init__()
26 |         self.rnn_type = rnn_type
27 |         self.input_size = input_size
28 |         self.hidden_size = hidden_size
29 |         self.n_layers = n_layers
30 |         self.data_source = data_source
31 |         self.method = method
32 |         self.activ = activ
33 | 
34 |         # Check for RNN
35 |         assert self.rnn_type in ('GRU', 'LSTM'), \
36 |             "rnn_type '{}' is unknown.".format(self.rnn_type)
37 | 
38 |         assert self.method in ('mean_ctx', 'last_ctx', 'zero', 'feats'), \
39 |             "RNN init method '{}' is unknown.".format(self.method)
40 | 
41 |         # LSTMs have also the cell state so double the output size
42 |         assert self.rnn_type == 'GRU', 'LSTM support not ready yet.'
43 |         self.n_states = 1 if self.rnn_type == 'GRU' else 2
44 | 
45 |         if self.method in ('mean_ctx', 'last_ctx', 'feats'):
46 |             self.ff = FF(
47 |                 self.input_size, self.hidden_size * self.n_layers,
48 |                 activ=self.activ)
49 | 
50 |         # Set the actual initializer depending on the method
51 |         self._initializer = getattr(self, '_init_{}'.format(self.method))
52 | 
53 |     def forward(self, ctx_dict):
54 |         ctx, ctx_mask = ctx_dict[self.data_source]
55 |         x = self._initializer(ctx, ctx_mask)
56 |         return torch.stack(torch.split(x, self.hidden_size, dim=-1))
57 | 
58 |     def _init_zero(self, ctx, mask):
59 |         # h_0: (n_layers, batch_size, hidden_size)
60 |         return torch.zeros(
61 |             ctx.shape[1], self.hidden_size * self.n_layers, device=ctx.device)
62 | 
63 |     def _init_feats(self, ctx, mask):
64 |         return self.ff(ctx)
65 | 
66 |     def _init_mean_ctx(self, ctx, mask):
67 |         if mask is None:
68 |             return self.ff(ctx.mean(0))
69 |         else:
70 |             return self.ff(ctx.sum(0) / mask.sum(0).unsqueeze(1))
71 | 
72 |     def _init_last_ctx(self, ctx, mask):
73 |         if mask is None:
74 |             return self.ff(ctx[-1])
75 |         else:
76 |             # Fetch last timesteps
77 |             last_tsteps = mask.sum(0).sub(1).long()
78 |             return self.ff(ctx[last_tsteps, range(ctx.shape[1])])
79 | 
80 |     def __repr__(self):
81 |         return self.__class__.__name__ + '(' \
82 |             + 'in_features={}, '.format(self.input_size) \
83 |             + 'out_features={}, '.format(self.hidden_size) \
84 |             + 'activ={}, '.format(self.activ) \
85 |             + 'method={}'.format(self.method) + ')'
86 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/seq_conv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | import torch.nn.functional as F
 4 | from torch import nn
 5 | 
 6 | 
 7 | # Code contributed by @jlibovicky
 8 | 
 9 | 
10 | class SequenceConvolution(nn.Module):
11 |     """1D convolution with optional max-pooling.
12 | 
13 |     The layer applies 1D convolution of odd kernel size with output channel
14 |     counts specified by a list of integers. Then, it optionally applies 1D
15 |     max-pooling to reduce the sequence length.
16 |     """
17 | 
18 |     def __init__(self, input_dim, filters, max_pool_stride=None, activation='relu'):
19 |         super().__init__()
20 |         self.max_pool_stride = max_pool_stride
21 | 
22 |         self.conv_proj = nn.ModuleList([
23 |             nn.Conv1d(in_channels=input_dim,
24 |                       out_channels=size,
25 |                       kernel_size=2 * k + 1,
26 |                       padding=k)
27 |             for k, size in enumerate(filters) if size > 0])
28 | 
29 |         if self.max_pool_stride is not None:
30 |             self.max_pool = nn.MaxPool1d(
31 |                 kernel_size=self.max_pool_stride,
32 |                 stride=self.max_pool_stride)
33 |         else:
34 |             self.max_pool = None
35 | 
36 |     def forward(self, x, mask):
37 |         conv_outputs = [conv(x.permute(1, 2, 0)) for conv in self.conv_proj]
38 |         conv_out = torch.cat(conv_outputs, dim=1)
39 | 
40 |         if self.max_pool is not None:
41 |             conv_len = conv_out.size(-1)
42 |             if conv_len < self.max_pool_stride:
43 |                 pad_size = self.max_pool_stride - conv_len
44 |                 conv_out = F.pad(conv_out, pad=[pad_size, pad_size])
45 |             max_pooled_data = self.max_pool(conv_out).permute(2, 0, 1)
46 |             max_pooled_mask = (self.max_pool(mask.t().unsqueeze(1)).squeeze(1).t()
47 |                                if mask is not None else None)
48 |             return max_pooled_data, max_pooled_mask
49 |         else:
50 |             return conv_out.permute(2, 0, 1), mask
51 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | from .residual_lnorm import ResidualLayerNorm
2 | from .positionwise_ff import PositionwiseFF
3 | from .embedding import TFEmbedding
4 | from .encoder import TFEncoder
5 | from .decoder import TFDecoder
6 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/transformers/decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..attention import ScaledDotAttention
 4 | from . import ResidualLayerNorm, PositionwiseFF
 5 | 
 6 | 
 7 | class TFDecoder(torch.nn.Module):
 8 |     """Decoder block for Transformer.
 9 | 
10 |     Arguments:
11 | 
12 |     Input:
13 | 
14 |     Output:
15 |     """
16 | 
17 |     def __init__(self, model_dim, ff_dim, n_heads, n_layers):
18 |         super().__init__()
19 |         self.model_dim = model_dim
20 |         self.ff_dim = ff_dim
21 |         self.n_heads = n_heads
22 |         self.n_layers = n_layers
23 |         blocks = []
24 | 
25 |         for _ in range(self.n_layers):
26 |             layers = torch.nn.Sequential(
27 |                 ScaledDotAttention(self.model_dim, self.n_heads, causal=True),
28 |                 ResidualLayerNorm(self.model_dim),
29 |                 PositionwiseFF(self.model_dim, self.ff_dim),
30 |                 ResidualLayerNorm(self.model_dim),
31 |             )
32 |             blocks.append(layers)
33 | 
34 |         self.blocks = torch.nn.ModuleList(blocks)
35 | 
36 |     def forward(self, x, mask=None, **kwargs):
37 |         """Forward-pass of the encoder block.
38 | 
39 |         :param x: input tensor, shape (tstep, bsize, model_dim)
40 |         :param mask: mask tensor for unavailable batch positions (tstep, bsize)
41 | 
42 |         :return: foo
43 |         """
44 |         for block in self.blocks:
45 |             x, mask = block((x, x, x, mask))
46 |         return (x, mask)
47 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/transformers/embedding.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class TFEmbedding(torch.nn.Embedding):
 5 |     """Position-aware embeddings for Transformer models.
 6 | 
 7 |     Adapted from OpenNMT-py & original `Attention is all you need` paper.
 8 |     """
 9 |     def __init__(self, num_embeddings, embedding_dim, max_len=1024, dropout=0.1):
10 |         self.num_embeddings = num_embeddings
11 |         self.embedding_dim = embedding_dim
12 |         self.max_len = max_len
13 |         self.dropout = dropout
14 | 
15 |         # pos_embs: (max_len, emb_dim)
16 |         pos_embs = torch.zeros(self.max_len, self.embedding_dim)
17 |         # pos: (max_len, 1)
18 |         pos = torch.arange(self.max_len).unsqueeze(1)
19 |         # divs:
20 |         divs = torch.pow(10000,
21 |                 torch.arange(self.embedding_dim).float().div(self.embedding_dim))
22 | 
23 |         pos_embs[:, 0::2] = torch.sin(pos / divs[0::2])
24 |         pos_embs[:, 1::2] = torch.cos(pos / divs[1::2])
25 |         # pos_embs: (max_len, 1, emb_dim)
26 |         pos_embs.unsqueeze_(1)
27 |         sqrt_dim = torch.scalar_tensor(self.embedding_dim).sqrt()
28 | 
29 |         # Call parent's init() first
30 |         super().__init__(num_embeddings, embedding_dim, padding_idx=0)
31 | 
32 |         # Register non-learnable params as buffers
33 |         self.register_buffer('pos_embs', pos_embs)
34 |         self.register_buffer('sqrt_dim', sqrt_dim)
35 |         # Create dropout layer
36 |         self.dropout_layer = torch.nn.Dropout(p=self.dropout)
37 | 
38 |     def forward(self, x):
39 |         # Get the embeddings from parent's forward first
40 |         embs = super().forward(x)
41 |         return self.dropout_layer(
42 |             embs.mul(self.sqrt_dim) + self.pos_embs[:embs.size(0)])
43 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/transformers/encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..attention import ScaledDotAttention
 4 | from . import ResidualLayerNorm, PositionwiseFF
 5 | 
 6 | 
 7 | class TFEncoder(torch.nn.Module):
 8 |     """Encoder block for Transformer.
 9 | 
10 |     Arguments:
11 | 
12 |     Input:
13 | 
14 |     Output:
15 |     """
16 | 
17 |     def __init__(self, model_dim, ff_dim, n_heads, n_layers):
18 |         super().__init__()
19 |         self.model_dim = model_dim
20 |         self.ff_dim = ff_dim
21 |         self.n_heads = n_heads
22 |         self.n_layers = n_layers
23 |         blocks = []
24 | 
25 |         for _ in range(self.n_layers):
26 |             layers = torch.nn.Sequential(
27 |                 ScaledDotAttention(self.model_dim, self.n_heads),
28 |                 ResidualLayerNorm(self.model_dim),
29 |                 PositionwiseFF(self.model_dim, self.ff_dim),
30 |                 ResidualLayerNorm(self.model_dim),
31 |             )
32 |             blocks.append(layers)
33 | 
34 |         self.blocks = torch.nn.ModuleList(blocks)
35 | 
36 |     def forward(self, x, mask=None, **kwargs):
37 |         """Forward-pass of the encoder block.
38 | 
39 |         :param x: input tensor, shape (tstep, bsize, model_dim)
40 |         :param mask: mask tensor for unavailable batch positions (tstep, bsize)
41 | 
42 |         :return: foo
43 |         """
44 |         for block in self.blocks:
45 |             x, mask = block((x, x, x, mask))
46 |         return (x, mask)
47 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/transformers/positionwise_ff.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .. import FF
 4 | 
 5 | 
 6 | class PositionwiseFF(torch.nn.Module):
 7 |     """Positionwise Feed-forward layer.
 8 | 
 9 |     Arguments:
10 | 
11 |     Input:
12 | 
13 |     Output:
14 |     """
15 | 
16 |     def __init__(self, model_dim, ff_dim, activ='relu'):
17 |         super().__init__()
18 |         self.model_dim = model_dim
19 |         self.ff_dim = ff_dim
20 |         self.activ = activ
21 | 
22 |         # Create the layers
23 |         self.func = torch.nn.Sequential(
24 |             FF(self.model_dim, self.ff_dim, activ=self.activ),
25 |             FF(self.ff_dim, self.model_dim, activ=None),
26 |         )
27 | 
28 |     def forward(self, inputs):
29 |         x, mask = inputs
30 |         return (x, self.func(x), mask)
31 | 


--------------------------------------------------------------------------------
/nmtpytorch/layers/transformers/residual_lnorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .. import LayerNorm
 4 | 
 5 | 
 6 | class ResidualLayerNorm(torch.nn.Module):
 7 |     """Residually connected Layer Normalization layer.
 8 | 
 9 |     Arguments:
10 | 
11 |     Input:
12 | 
13 |     Output:
14 |     """
15 | 
16 |     def __init__(self, model_dim, affine=True, dropout=0.1):
17 |         super().__init__()
18 |         self.model_dim = model_dim
19 |         self.affine = affine
20 |         self.dropout = dropout
21 | 
22 |         self.norm = LayerNorm(self.model_dim, elementwise_affine=self.affine)
23 |         self.dropout_layer = torch.nn.Dropout(self.dropout)
24 | 
25 |     def forward(self, inputs):
26 |         # Unpack into `x` and `Sublayer(x)`
27 |         x, f_x, mask = inputs
28 |         return (self.norm(x + self.dropout_layer(f_x)), mask)
29 | 


--------------------------------------------------------------------------------
/nmtpytorch/logger.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pathlib
 3 | import logging
 4 | 
 5 | from .cleanup import cleanup
 6 | 
 7 | 
 8 | def setup(opts=None):
 9 |     _format = '%(message)s'
10 | 
11 |     formatter = logging.Formatter(_format)
12 |     logger = logging.getLogger('nmtpytorch')
13 |     logger.setLevel(logging.DEBUG)
14 | 
15 |     con_handler = logging.StreamHandler()
16 |     con_handler.setFormatter(formatter)
17 |     logger.addHandler(con_handler)
18 | 
19 |     if opts is not None:
20 |         log_file = str(pathlib.Path(opts['save_path']) /
21 |                        opts['subfolder'] / opts['exp_id']) + '.log'
22 |         file_handler = logging.FileHandler(log_file, mode='w')
23 |         file_handler.setFormatter(formatter)
24 |         logger.addHandler(file_handler)
25 | 
26 |     cleanup.register_handler(logger)
27 |     return logger
28 | 


--------------------------------------------------------------------------------
/nmtpytorch/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | from .metric import Metric
 2 | from .multibleu import BLEUScorer
 3 | from .sacrebleu import SACREBLEUScorer
 4 | from .meteor import METEORScorer
 5 | from .wer import WERScorer
 6 | from .cer import CERScorer
 7 | from .rouge import ROUGEScorer
 8 | 
 9 | beam_metrics = ["BLEU", "SACREBLEU", "METEOR", "WER", "CER", "ROUGE"]
10 | 
11 | metric_info = {
12 |     'BLEU': 'max',
13 |     'SACREBLEU': 'max',
14 |     'METEOR': 'max',
15 |     'ROUGE': 'max',
16 |     'LOSS': 'min',
17 |     'WER': 'min',
18 |     'CER': 'min',
19 |     'ACC': 'max',
20 |     'RECALL': 'max',
21 |     'PRECISION': 'max',
22 |     'F1': 'max',
23 | }
24 | 


--------------------------------------------------------------------------------
/nmtpytorch/metrics/cer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import editdistance
 3 | 
 4 | from .metric import Metric
 5 | 
 6 | 
 7 | class CERScorer:
 8 |     """This is the same as WER but computes CER and also WER after post-processing."""
 9 |     def compute(self, refs, hyps, language=None, lowercase=False):
10 |         if isinstance(hyps, str):
11 |             # hyps is a file
12 |             hyp_sents = open(hyps).read().strip().split('\n')
13 |         elif isinstance(hyps, list):
14 |             hyp_sents = hyps
15 | 
16 |         # refs is a list, take its first item
17 |         with open(refs[0]) as f:
18 |             ref_sents = f.read().strip().split('\n')
19 | 
20 |         assert len(hyp_sents) == len(ref_sents), "CER: # of sentences does not match."
21 | 
22 |         n_ref_chars = 0
23 |         n_ref_tokens = 0
24 |         dist_chars = 0
25 |         dist_tokens = 0
26 |         for hyp, ref in zip(hyp_sents, ref_sents):
27 |             hyp_chars = hyp.split(' ')
28 |             ref_chars = ref.split(' ')
29 |             n_ref_chars += len(ref_chars)
30 |             dist_chars += editdistance.eval(hyp_chars, ref_chars)
31 | 
32 |             # Convert char-based sentences to token-based ones
33 |             hyp_tokens = hyp.replace(' ', '').replace('<s>', ' ').strip().split(' ')
34 |             ref_tokens = ref.replace(' ', '').replace('<s>', ' ').strip().split(' ')
35 |             n_ref_tokens += len(ref_tokens)
36 |             dist_tokens += editdistance.eval(hyp_tokens, ref_tokens)
37 | 
38 |         cer = (100 * dist_chars) / n_ref_chars
39 |         wer = (100 * dist_tokens) / n_ref_tokens
40 | 
41 |         verbose_score = "{:.3f}% (n_errors = {}, n_ref_chars = {}, WER = {:.3f}%)".format(
42 |             cer, dist_chars, n_ref_chars, wer)
43 | 
44 |         return Metric('CER', cer, verbose_score, higher_better=False)
45 | 


--------------------------------------------------------------------------------
/nmtpytorch/metrics/meteor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import shutil
 4 | import pathlib
 5 | import subprocess
 6 | 
 7 | from ..utils.misc import listify, get_meteor_jar
 8 | from .metric import Metric
 9 | 
10 | 
11 | class METEORScorer:
12 |     def __init__(self):
13 |         self.jar = str(get_meteor_jar())
14 |         self.__cmdline = ["java", "-Xmx2G", "-jar", self.jar,
15 |                           "-", "-", "-stdio"]
16 |         self.env = os.environ
17 |         self.env['LC_ALL'] = 'en_US.UTF-8'
18 | 
19 |         # Sanity check
20 |         if shutil.which('java') is None:
21 |             raise RuntimeError('METEOR requires java which is not installed.')
22 | 
23 |     def compute(self, refs, hyps, language="auto"):
24 |         cmdline = self.__cmdline[:]
25 |         refs = listify(refs)
26 | 
27 |         if isinstance(hyps, str):
28 |             # If file, open it for line reading
29 |             hyps = open(hyps)
30 | 
31 |         if language == "auto":
32 |             # Take the extension of the 1st reference file, e.g. ".de"
33 |             language = pathlib.Path(refs[0]).suffix[1:]
34 | 
35 |         cmdline.extend(["-l", language])
36 | 
37 |         # Make reference files a list
38 |         iters = [open(f) for f in refs]
39 |         iters.append(hyps)
40 | 
41 |         # Run METEOR process
42 |         proc = subprocess.Popen(cmdline,
43 |                                 stdout=subprocess.PIPE,
44 |                                 stdin=subprocess.PIPE,
45 |                                 stderr=subprocess.PIPE,
46 |                                 env=self.env,
47 |                                 universal_newlines=True, bufsize=1)
48 | 
49 |         eval_line = 'EVAL'
50 | 
51 |         for line_ctr, lines in enumerate(zip(*iters)):
52 |             lines = [l.rstrip('\n') for l in lines]
53 |             refstr = " ||| ".join(lines[:-1])
54 |             line = "SCORE ||| " + refstr + " ||| " + lines[-1]
55 | 
56 |             proc.stdin.write(line + '\n')
57 |             eval_line += ' ||| {}'.format(proc.stdout.readline().strip())
58 | 
59 |         # Send EVAL line to METEOR
60 |         proc.stdin.write(eval_line + '\n')
61 | 
62 |         # Dummy read segment scores
63 |         for i in range(line_ctr + 1):
64 |             proc.stdout.readline().strip()
65 | 
66 |         # Compute final METEOR
67 |         try:
68 |             score = float(proc.stdout.readline().strip())
69 |             score = Metric('METEOR', 100 * score)
70 |         except Exception as e:
71 |             score = Metric('METEOR', 0.0)
72 |         finally:
73 |             # Close METEOR process
74 |             proc.stdin.close()
75 |             proc.terminate()
76 |             proc.kill()
77 |             proc.wait(timeout=2)
78 |             return score
79 | 


--------------------------------------------------------------------------------
/nmtpytorch/metrics/metric.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from functools import total_ordering
 3 | 
 4 | 
 5 | @total_ordering
 6 | class Metric:
 7 |     """A Metric object to represent evaluation metrics.
 8 | 
 9 |     Arguments:
10 |         name(str): A name for the metric that will be kept internally
11 |             after upper-casing
12 |         score(float): A floating point score
13 |         detailed_score(str, optional): A custom, more detailed string
14 |             representing the score given above (Default: "")
15 |         higher_better(bool, optional): If ``False``, the smaller the better
16 |             (Default: ``True``)
17 |     """
18 | 
19 |     def __init__(self, name, score, detailed_score="", higher_better=True):
20 |         self.name = name.upper()
21 |         self.score = score
22 |         self.detailed_score = detailed_score
23 |         self.higher_better = higher_better
24 | 
25 |     def __eq__(self, other):
26 |         return self.score == other.score
27 | 
28 |     def __lt__(self, other):
29 |         return self.score < other.score
30 | 
31 |     def __repr__(self):
32 |         rhs = (self.detailed_score if self.detailed_score
33 |                else "%.2f" % self.score)
34 |         return self.name + ' = ' + rhs
35 | 


--------------------------------------------------------------------------------
/nmtpytorch/metrics/multibleu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import subprocess
 3 | import pkg_resources
 4 | 
 5 | from ..utils.misc import listify
 6 | from .metric import Metric
 7 | 
 8 | BLEU_SCRIPT = pkg_resources.resource_filename('nmtpytorch',
 9 |                                               'lib/multi-bleu.perl')
10 | 
11 | 
12 | class BLEUScorer:
13 |     """BLEUScorer class."""
14 |     def __init__(self):
15 |         # For multi-bleu.perl we give the reference(s) files as argv,
16 |         # while the candidate translations are read from stdin.
17 |         self.__cmdline = [BLEU_SCRIPT]
18 | 
19 |     def compute(self, refs, hyps, language=None, lowercase=False):
20 |         cmdline = self.__cmdline[:]
21 | 
22 |         if lowercase:
23 |             cmdline.append("-lc")
24 | 
25 |         # Make reference files a list
26 |         cmdline.extend(listify(refs))
27 | 
28 |         if isinstance(hyps, str):
29 |             hypstring = open(hyps).read().strip()
30 |         elif isinstance(hyps, list):
31 |             hypstring = "\n".join(hyps)
32 | 
33 |         score = subprocess.run(cmdline, stdout=subprocess.PIPE,
34 |                                input=hypstring,
35 |                                universal_newlines=True).stdout.splitlines()
36 | 
37 |         if len(score) == 0:
38 |             return Metric('BLEU', 0, "0.0")
39 |         else:
40 |             score = score[0].strip()
41 |             float_score = float(score.split()[2][:-1])
42 |             verbose_score = score.replace('BLEU = ', '')
43 |             return Metric('BLEU', float_score, verbose_score)
44 | 


--------------------------------------------------------------------------------
/nmtpytorch/metrics/rouge.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from .metric import Metric
 3 | from ..cocoeval import Rouge
 4 | 
 5 | 
 6 | class ROUGEScorer:
 7 |     def compute(self, refs, hyps, language=None, lowercase=False):
 8 |         if isinstance(hyps, str):
 9 |             # hyps is a file
10 |             hyp_sents = open(hyps).read().strip().split('\n')
11 |         elif isinstance(hyps, list):
12 |             hyp_sents = hyps
13 | 
14 |         # refs is a list, take its first item
15 |         with open(refs[0]) as f:
16 |             ref_sents = f.read().strip().split('\n')
17 | 
18 |         assert len(hyp_sents) == len(ref_sents), "ROUGE: # of sentences does not match."
19 | 
20 |         rouge_scorer = Rouge()
21 | 
22 |         rouge_sum = 0
23 |         for hyp, ref in zip(hyp_sents, ref_sents):
24 |             rouge_sum += rouge_scorer.calc_score([hyp], [ref])
25 | 
26 |         score = (100 * rouge_sum) / len(hyp_sents)
27 |         verbose_score = "{:.3f}".format(score)
28 | 
29 |         return Metric('ROUGE', score, verbose_score, higher_better=True)
30 | 


--------------------------------------------------------------------------------
/nmtpytorch/metrics/sacrebleu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import subprocess
 3 | 
 4 | from ..utils.misc import listify
 5 | from .metric import Metric
 6 | 
 7 | 
 8 | class SACREBLEUScorer:
 9 |     """SACREBLEUScorer class."""
10 |     def __init__(self):
11 |         self.__cmdline = ["sacrebleu", "--short"]
12 | 
13 |     def compute(self, refs, hyps, language=None, lowercase=False):
14 |         cmdline = self.__cmdline[:]
15 | 
16 |         if lowercase:
17 |             cmdline.append("-lc")
18 | 
19 |         # Make reference files a list
20 |         cmdline.extend(listify(refs))
21 | 
22 |         if isinstance(hyps, str):
23 |             hypstring = open(hyps).read().strip()
24 |         elif isinstance(hyps, list):
25 |             hypstring = "\n".join(hyps)
26 | 
27 |         score = subprocess.run(cmdline, stdout=subprocess.PIPE,
28 |                                input=hypstring,
29 |                                universal_newlines=True).stdout.splitlines()
30 | 
31 |         if len(score) == 0:
32 |             return Metric('SACREBLEU', 0, "0.0")
33 |         else:
34 |             score = score[0].strip()
35 |             float_score = float(score.split()[2])
36 |             verbose_score = ' '.join(score.split()[2:])
37 |             return Metric('SACREBLEU', float_score, verbose_score)
38 | 


--------------------------------------------------------------------------------
/nmtpytorch/metrics/wer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import editdistance
 3 | 
 4 | from .metric import Metric
 5 | 
 6 | 
 7 | class WERScorer:
 8 |     def compute(self, refs, hyps, language=None, lowercase=False):
 9 |         if isinstance(hyps, str):
10 |             # hyps is a file
11 |             hyp_sents = open(hyps).read().strip().split('\n')
12 |         elif isinstance(hyps, list):
13 |             hyp_sents = hyps
14 | 
15 |         # refs is a list, take its first item
16 |         with open(refs[0]) as f:
17 |             ref_sents = f.read().strip().split('\n')
18 | 
19 |         assert len(hyp_sents) == len(ref_sents), "WER: # of sentences does not match."
20 | 
21 |         n_ref_tokens = 0
22 |         dist = 0
23 |         for hyp, ref in zip(hyp_sents, ref_sents):
24 |             hyp_tokens = hyp.split(' ')
25 |             ref_tokens = ref.split(' ')
26 |             n_ref_tokens += len(ref_tokens)
27 |             dist += editdistance.eval(hyp_tokens, ref_tokens)
28 | 
29 |         score = (100 * dist) / n_ref_tokens
30 |         verbose_score = "{:.3f}% (n_errors = {}, n_ref_tokens = {})".format(
31 |             score, dist, n_ref_tokens)
32 | 
33 |         return Metric('WER', score, verbose_score, higher_better=False)
34 | 


--------------------------------------------------------------------------------
/nmtpytorch/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #####
 2 | # NMT
 3 | #####
 4 | from .nmt import NMT
 5 | from .tfnmt import TransformerNMT
 6 | 
 7 | ################
 8 | # Multimodal NMT
 9 | ################
10 | from .simple_mmt import SimpleMMT
11 | from .attentive_mmt import AttentiveMMT
12 | 
13 | ###############
14 | # Speech models
15 | ###############
16 | from .asr import ASR
17 | from .multimodal_asr import MultimodalASR
18 | 


--------------------------------------------------------------------------------
/nmtpytorch/models/attentive_mmt.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import logging
  3 | 
  4 | from torch import nn
  5 | 
  6 | from ..datasets import MultimodalDataset
  7 | from ..layers import ConditionalMMDecoder, TextEncoder, FF
  8 | from .nmt import NMT
  9 | 
 10 | logger = logging.getLogger('nmtpytorch')
 11 | 
 12 | 
 13 | class AttentiveMMT(NMT):
 14 |     """An end-to-end sequence-to-sequence NMT model with visual attention over
 15 |     pre-extracted convolutional features.
 16 |     """
 17 |     def set_defaults(self):
 18 |         # Set parent defaults
 19 |         super().set_defaults()
 20 |         self.defaults.update({
 21 |             'fusion_type': 'concat',    # Multimodal context fusion (sum|mul|concat)
 22 |             'fusion_activ': 'tanh',     # Multimodal context non-linearity
 23 |             'vis_activ': 'linear',      # Visual feature transformation activ.
 24 |             'n_channels': 2048,         # depends on the features used
 25 |             'mm_att_type': 'md-dd',     # multimodal attention type
 26 |                                         # md: modality dep.
 27 |                                         # mi: modality indep.
 28 |                                         # dd: decoder state dep.
 29 |                                         # di: decoder state indep.
 30 |             'out_logic': 'deep',        # simple vs deep output
 31 |             'persistent_dump': False,   # To save activations during beam-search
 32 |             'preatt': False,            # Apply filtered attention
 33 |             'preatt_activ': 'ReLU',     # Activation for convatt block
 34 |             'dropout_img': 0.0,         # Dropout on image features
 35 |         })
 36 | 
 37 |     def __init__(self, opts):
 38 |         super().__init__(opts)
 39 | 
 40 |     def setup(self, is_train=True):
 41 |         # Textual context dim
 42 |         txt_ctx_size = self.ctx_sizes[self.sl]
 43 | 
 44 |         # Add visual context transformation (sect. 3.2 in paper)
 45 |         self.ff_img = FF(
 46 |             self.opts.model['n_channels'], txt_ctx_size,
 47 |             activ=self.opts.model['vis_activ'])
 48 | 
 49 |         self.dropout_img = nn.Dropout(self.opts.model['dropout_img'])
 50 | 
 51 |         # Add vis ctx size
 52 |         self.ctx_sizes['image'] = txt_ctx_size
 53 | 
 54 |         ########################
 55 |         # Create Textual Encoder
 56 |         ########################
 57 |         self.enc = TextEncoder(
 58 |             input_size=self.opts.model['emb_dim'],
 59 |             hidden_size=self.opts.model['enc_dim'],
 60 |             n_vocab=self.n_src_vocab,
 61 |             rnn_type=self.opts.model['enc_type'],
 62 |             dropout_emb=self.opts.model['dropout_emb'],
 63 |             dropout_ctx=self.opts.model['dropout_ctx'],
 64 |             dropout_rnn=self.opts.model['dropout_enc'],
 65 |             num_layers=self.opts.model['n_encoders'],
 66 |             emb_maxnorm=self.opts.model['emb_maxnorm'],
 67 |             emb_gradscale=self.opts.model['emb_gradscale'])
 68 | 
 69 |         # Create Decoder
 70 |         self.dec = ConditionalMMDecoder(
 71 |             input_size=self.opts.model['emb_dim'],
 72 |             hidden_size=self.opts.model['dec_dim'],
 73 |             n_vocab=self.n_trg_vocab,
 74 |             rnn_type=self.opts.model['dec_type'],
 75 |             ctx_size_dict=self.ctx_sizes,
 76 |             ctx_name=str(self.sl),
 77 |             fusion_type=self.opts.model['fusion_type'],
 78 |             fusion_activ=self.opts.model['fusion_activ'],
 79 |             tied_emb=self.opts.model['tied_emb'],
 80 |             dec_init=self.opts.model['dec_init'],
 81 |             att_type=self.opts.model['att_type'],
 82 |             mm_att_type=self.opts.model['mm_att_type'],
 83 |             out_logic=self.opts.model['out_logic'],
 84 |             att_activ=self.opts.model['att_activ'],
 85 |             transform_ctx=self.opts.model['att_transform_ctx'],
 86 |             att_ctx2hid=False,
 87 |             mlp_bias=self.opts.model['att_mlp_bias'],
 88 |             att_bottleneck=self.opts.model['att_bottleneck'],
 89 |             dropout_out=self.opts.model['dropout_out'],
 90 |             emb_maxnorm=self.opts.model['emb_maxnorm'],
 91 |             emb_gradscale=self.opts.model['emb_gradscale'],
 92 |             persistent_dump=self.opts.model['persistent_dump'])
 93 | 
 94 |         # Share encoder and decoder weights
 95 |         if self.opts.model['tied_emb'] == '3way':
 96 |             self.enc.emb.weight = self.dec.emb.weight
 97 | 
 98 |     def load_data(self, split, batch_size, mode='train'):
 99 |         """Loads the requested dataset split."""
100 |         dataset = MultimodalDataset(
101 |             data=self.opts.data[split + '_set'],
102 |             mode=mode, batch_size=batch_size,
103 |             vocabs=self.vocabs, topology=self.topology,
104 |             bucket_by=self.opts.model['bucket_by'],
105 |             max_len=self.opts.model.get('max_len', None),
106 |             order_file=self.opts.data[split + '_set'].get('ord', None))
107 |         logger.info(dataset)
108 |         return dataset
109 | 
110 |     def encode(self, batch, **kwargs):
111 |         # Transform the features to context dim
112 |         feats = self.dropout_img(self.ff_img(batch['image']))
113 | 
114 |         # Get source language encodings (S*B*C)
115 |         text_encoding = self.enc(batch[self.sl])
116 | 
117 |         return {
118 |             str(self.sl): text_encoding,
119 |             'image': (feats, None),
120 |         }
121 | 


--------------------------------------------------------------------------------
/nmtpytorch/models/stale/README.md:
--------------------------------------------------------------------------------
1 | Stale models
2 | ---
3 | 
4 | This folder contains files from older/experimental models which may or
5 | may not work with the current code. They are merely here for reference.
6 | 


--------------------------------------------------------------------------------
/nmtpytorch/models/tfnmt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | from ..layers.transformers import *
 8 | 
 9 | from . import NMT
10 | 
11 | logger = logging.getLogger('nmtpytorch')
12 | 
13 | 
14 | class TransformerNMT(NMT):
15 |     supports_beam_search = True
16 | 
17 |     def set_defaults(self):
18 |         self.defaults = {
19 |             'model_dim': 512,           # model_dim
20 |             'ff_dim': 2048,             # Positionwise FF inner dimension
21 |             'n_enc_layers': 6,          # Number of encoder layers
22 |             'n_dec_layers': 6,          # Number of decoder layers
23 |             'n_heads': 8,               # Number of attention heads
24 |             'direction': None,          # Network directionality, i.e. en->de
25 |             'max_len': None,            # Reject sentences where 'bucket_by' length > 80
26 |             'bucket_by': None,          # A key like 'en' to define w.r.t which dataset
27 |                                         # the batches will be sorted
28 |             'bucket_order': None,       # Curriculum: ascending/descending/None
29 |             'sampler_type': 'bucket',   # bucket or approximate
30 |             'short_list': 0,            # Vocabulary short listing
31 |         }
32 | 
33 |     def __init__(self, opts):
34 |         super().__init__(opts)
35 | 
36 |     def reset_parameters(self):
37 |         for name, param in self.named_parameters():
38 |             # Skip 1-d biases and scalars
39 |             if param.requires_grad and param.dim() > 1:
40 |                 nn.init.kaiming_normal_(param.data)
41 |         # Reset padding embedding to 0
42 |         with torch.no_grad():
43 |             self.src_emb.weight.data[0].fill_(0)
44 |             self.trg_emb.weight.data[0].fill_(0)
45 | 
46 |     def setup(self, is_train=True):
47 |         """Sets up NN topology by creating the layers."""
48 |         # Create the embeddings
49 |         self.src_emb = TFEmbedding(self.n_src_vocab, self.opts.model['model_dim'])
50 |         self.trg_emb = TFEmbedding(self.n_trg_vocab, self.opts.model['model_dim'])
51 |         self.enc = TFEncoder(
52 |             self.opts.model['model_dim'], self.opts.model['ff_dim'],
53 |             self.opts.model['n_heads'], self.opts.model['n_enc_layers'])
54 |         self.dec = TFDecoder(
55 |             self.opts.model['model_dim'], self.opts.model['ff_dim'],
56 |             self.opts.model['n_heads'], self.opts.model['n_dec_layers'])
57 |         self.seq_loss = torch.nn.NLLLoss(reduction='sum', ignore_index=0)
58 | 
59 |     def encode(self, batch, **kwargs):
60 |         # mask: (tstep, bsize)
61 |         mask = batch[self.sl].ne(0).float()
62 | 
63 |         # embs: (tstep, bsize, dim)
64 |         embs = self.src_emb(batch[self.sl])
65 |         h, mask = self.enc(embs, mask=mask)
66 | 
67 |         d = {str(self.sl): (h, mask)}
68 |         return d
69 | 
70 |     def forward(self, batch, **kwargs):
71 |         # Get loss dict
72 |         enc = self.encode(batch)
73 | 
74 |         dec_input = batch[self.tl]
75 | 
76 | #         result = self.dec(self.encode(batch), batch[self.tl])
77 |         # result['n_items'] = torch.nonzero(batch[self.tl][1:]).shape[0]
78 |         # return result
79 | 


--------------------------------------------------------------------------------
/nmtpytorch/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from .bucket import BucketBatchSampler
 3 | from .approx import ApproximateBucketBatchSampler
 4 | 
 5 | def get_sampler(type_):
 6 |     return {
 7 |         'bucket': BucketBatchSampler,
 8 |         'approximate': ApproximateBucketBatchSampler,
 9 |     }[type_.lower()]
10 | 


--------------------------------------------------------------------------------
/nmtpytorch/samplers/approx.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import math
  3 | import logging
  4 | from collections import defaultdict
  5 | 
  6 | import numpy as np
  7 | 
  8 | from ..utils.device import DEVICE_IDS
  9 | from . import BucketBatchSampler
 10 | 
 11 | logger = logging.getLogger('nmtpytorch')
 12 | 
 13 | 
 14 | class ApproximateBucketBatchSampler(BucketBatchSampler):
 15 |     r"""Samples batch indices from sequence-length buckets efficiently
 16 |     with very little memory overhead.
 17 | 
 18 |     Different from `BucketBatchSampler`, this class bins data samples w.r.t
 19 |     lengths but does not guarantee that each bucket contains necessarily
 20 |     same-length sequences. Further padding/packing/masking should be done
 21 |     by detecting possible <pad> items in tensors.
 22 | 
 23 |     Arguments:
 24 |         batch_size (int): Size of mini-batch.
 25 |         sort_lens (list): List of source or target lengths corresponding to each
 26 |             item in the dataset.
 27 |         max_len (int, optional): A maximum sequence length that will be used
 28 |             to filter out very long sequences. ``None`` means no filtering.
 29 |         store_indices (bool, optional): If ``True``, indices that will unsort
 30 |             the dataset will be stored. This used by beam search/inference.
 31 |         order (str, optional): Default is ``None``, i.e. buckets are shuffled.
 32 |             If ``ascending`` or ``descending``, will iterate w.r.t bucket
 33 |             lengths to implement length-based curriculum learning.
 34 |     """
 35 | 
 36 |     def __init__(self, batch_size, sort_lens, max_len=None,
 37 |                  store_indices=False, order=None):
 38 |         assert order in (None, 'ascending', 'descending'), \
 39 |             "order should be None, 'ascending' or 'descending'"
 40 | 
 41 |         self.batch_size = batch_size
 42 |         self.max_len = max_len
 43 |         self.n_rejects = 0
 44 |         self.order = order
 45 |         self.store_indices = store_indices
 46 | 
 47 |         # Additional balancing logic for multi-GPU
 48 |         self.n_devices = len(DEVICE_IDS) if DEVICE_IDS else 1
 49 | 
 50 |         # Buckets: sort_lens -> list of sample indices
 51 |         self.buckets = defaultdict(list)
 52 | 
 53 |         # Pre-compute how many times a bucket will be sampled
 54 |         self.bucket_idxs = []
 55 | 
 56 |         # Fill the buckets while optionally filtering out long sequences
 57 |         if self.max_len is not None:
 58 |             for idx, len_ in enumerate(sort_lens):
 59 |                 if len_ <= self.max_len:
 60 |                     self.buckets[len_].append(idx)
 61 |                 else:
 62 |                     self.n_rejects += 1
 63 |             logger.info('{} samples rejected because of length filtering @ {}'.format(
 64 |                 self.n_rejects, self.max_len))
 65 |         else:
 66 |             # No length filtering
 67 |             for idx, len_ in enumerate(sort_lens):
 68 |                 self.buckets[len_].append(idx)
 69 | 
 70 |         ######################################
 71 |         # Modified part compared to base class
 72 |         ######################################
 73 |         ordered_idxs = []
 74 |         min_bucket_size = self.batch_size * 5
 75 |         for length in sorted(self.buckets):
 76 |             ordered_idxs.extend(self.buckets[length])
 77 | 
 78 |         # Reset buckets
 79 |         self.buckets = {}
 80 |         n_elems = len(ordered_idxs)
 81 | 
 82 |         # Bin sorted buckets approximately
 83 |         for idx, start in enumerate(range(0, n_elems, min_bucket_size)):
 84 |             self.buckets[idx] = ordered_idxs[start:start + min_bucket_size]
 85 | 
 86 |         # number of elems in the last bucket
 87 |         last_bucket_size = len(self.buckets[idx])
 88 |         # number of elems in the last batch of last bucket
 89 |         last_batch_size = last_bucket_size % self.batch_size
 90 |         # how many should we remove to make the last batch divisible for
 91 |         # many GPUs
 92 |         n_remove_from_last = last_batch_size % self.n_devices
 93 |         end_point = last_bucket_size - n_remove_from_last
 94 |         self.buckets[idx] = self.buckets[idx][:end_point]
 95 |         if n_remove_from_last > 0:
 96 |             logger.info('Removed {} samples to balance buckets.'.format(
 97 |                 n_remove_from_last))
 98 | 
 99 |         self.stats = {k: len(self.buckets[k]) for k in sorted(self.buckets)}
100 | 
101 |         for len_ in self.buckets:
102 |             # Convert bucket to numpy array
103 |             np_bucket = np.array(self.buckets[len_])
104 | 
105 |             # How many batches will be done for this bucket?
106 |             bucket_bs = np_bucket.size / self.batch_size
107 |             idxs = [len_] * math.ceil(bucket_bs)
108 | 
109 |             self.buckets[len_] = np_bucket
110 |             self.bucket_idxs.extend(idxs)
111 | 
112 |         # Convert to numpy array
113 |         self.bucket_idxs = np.array(self.bucket_idxs)
114 | 
115 |         # Set number of batches
116 |         self.n_batches = len(self.bucket_idxs)
117 | 


--------------------------------------------------------------------------------
/nmtpytorch/tester.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import time
  3 | import logging
  4 | from pathlib import Path
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from .utils.misc import load_pt_file, pbar
 10 | from .utils.data import make_dataloader
 11 | from .utils.device import DEVICE
 12 | 
 13 | from . import models
 14 | from .config import Options
 15 | 
 16 | logger = logging.getLogger('nmtpytorch')
 17 | 
 18 | 
 19 | class Tester:
 20 |     """Tester for models without beam-search."""
 21 | 
 22 |     def __init__(self, **kwargs):
 23 |         # Store attributes directly. See bin/nmtpy for their list.
 24 |         self.__dict__.update(kwargs)
 25 | 
 26 |         # How many models?
 27 |         if len(self.models) > 1:
 28 |             raise RuntimeError("Test mode requires single model file.")
 29 | 
 30 |         self.model_file = self.models[0]
 31 | 
 32 |         # Disable gradient tracking
 33 |         torch.set_grad_enabled(False)
 34 | 
 35 |         data = load_pt_file(self.model_file)
 36 |         weights, _, opts = data['model'], data['history'], data['opts']
 37 | 
 38 |         opts = Options.from_dict(opts, override_list=self.override)
 39 |         instance = getattr(models, opts.train['model_type'])(opts=opts)
 40 | 
 41 |         if instance.supports_beam_search:
 42 |             logger.info("Model supports beam-search by the way.")
 43 | 
 44 |         # Setup layers
 45 |         instance.setup(is_train=False)
 46 |         # Load weights
 47 |         instance.load_state_dict(weights, strict=False)
 48 |         # Move to device
 49 |         instance.to(DEVICE)
 50 |         # Switch to eval mode
 51 |         instance.train(False)
 52 | 
 53 |         self.instance = instance
 54 | 
 55 |         # Can be a comma separated list of hardcoded test splits
 56 |         if self.splits:
 57 |             logger.info('Will process "{}"'.format(self.splits))
 58 |             self.splits = self.splits.split(',')
 59 |         elif self.source:
 60 |             # Split into key:value's and parse into dict
 61 |             input_dict = {}
 62 |             logger.info('Will process input configuration:')
 63 |             for data_source in self.source.split(','):
 64 |                 key, path = data_source.split(':', 1)
 65 |                 input_dict[key] = Path(path)
 66 |                 logger.info(' {}: {}'.format(key, input_dict[key]))
 67 |             self.instance.opts.data['new_set'] = input_dict
 68 |             self.splits = ['new']
 69 | 
 70 |     def extract_encodings(self, instance, split):
 71 |         """(Experimental) feature extraction mode."""
 72 |         dataset = instance.load_data(split, self.batch_size, mode='eval')
 73 |         loader = make_dataloader(dataset)
 74 |         n_samples = len(dataset)
 75 |         feats = []
 76 |         ord_feats = []
 77 |         logger.info('Starting extraction')
 78 |         start = time.time()
 79 |         for batch in pbar(loader, unit='batch'):
 80 |             batch.device(DEVICE)
 81 |             out, _ = list(instance.encode(batch).values())[0]
 82 |             feats.append(out.data.cpu().transpose(0, 1))
 83 |         for feat in feats:
 84 |             # this is a batch
 85 |             ord_feats.extend([f for f in feat])
 86 |         idxs = zip(range(n_samples), loader.batch_sampler.orig_idxs)
 87 |         idxs = sorted(idxs, key=lambda x: x[1])
 88 |         ord_feats = [ord_feats[i[0]].numpy() for i in idxs]
 89 |         np.save('{}_{}.encodings.npy'.format(self.model_file, split), ord_feats)
 90 |         up_time = time.time() - start
 91 |         logger.info('Took {:.3f} seconds'.format(up_time))
 92 | 
 93 |     def test(self, instance, split):
 94 |         dataset = instance.load_data(split, self.batch_size, mode='eval')
 95 |         loader = make_dataloader(dataset)
 96 | 
 97 |         logger.info('Starting computation')
 98 |         start = time.time()
 99 |         results = instance.test_performance(
100 |             loader,
101 |             dump_file="{}.{}".format(self.model_file, split))
102 |         up_time = time.time() - start
103 |         logger.info('Took {:.3f} seconds'.format(up_time))
104 |         return results
105 | 
106 |     def __call__(self):
107 |         for input_ in self.splits:
108 |             if self.mode == 'eval':
109 |                 results = self.test(self.instance, input_)
110 |                 for res in results:
111 |                     print('  {}: {:.5f}'.format(res.name, res.score))
112 |             elif self.mode == 'enc':
113 |                 self.extract_encodings(self.instance, input_)
114 | 


--------------------------------------------------------------------------------
/nmtpytorch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['misc', 'device', 'nn', 'data', 'io', 'topology']
2 | 


--------------------------------------------------------------------------------
/nmtpytorch/utils/data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | import logging
 4 | from torch.utils.data import DataLoader
 5 | import numpy as np
 6 | 
 7 | from ..utils.misc import fopen, pbar
 8 | 
 9 | logger = logging.getLogger('nmtpytorch')
10 | 
11 | 
12 | def sort_predictions(data_loader, results):
13 |     """Recovers the dataset order when bucketing samplers are used."""
14 |     if getattr(data_loader.batch_sampler, 'store_indices', False):
15 |         results = [results[i] for i, j in sorted(
16 |             enumerate(data_loader.batch_sampler.orig_idxs), key=lambda k: k[1])]
17 |     return results
18 | 
19 | 
20 | def make_dataloader(dataset, pin_memory=False, num_workers=0):
21 |     if num_workers != 0:
22 |         logger.info('Forcing num_workers to 0 since it fails with torch 0.4')
23 |         num_workers = 0
24 | 
25 |     return DataLoader(
26 |         dataset, batch_sampler=dataset.sampler,
27 |         collate_fn=dataset.collate_fn,
28 |         pin_memory=pin_memory, num_workers=num_workers)
29 | 
30 | 
31 | def sort_batch(seqbatch):
32 |     """Sorts torch tensor of integer indices by decreasing order."""
33 |     # 0 is padding_idx
34 |     omask = (seqbatch != 0).long()
35 |     olens = omask.sum(0)
36 |     slens, sidxs = torch.sort(olens, descending=True)
37 |     oidxs = torch.sort(sidxs)[1]
38 |     return (oidxs, sidxs, slens.data.tolist(), omask.float())
39 | 
40 | 
41 | def pad_video_sequence(seqs):
42 |     """
43 |     Pads video sequences with zero vectors for minibatch processing.
44 |     (contributor: @elliottd)
45 | 
46 |     TODO: Can we write the for loop in a more compact format?
47 |     """
48 |     lengths = [len(s) for s in seqs]
49 |     # Get the desired size of the padding vector from the input seqs data
50 |     feat_size = seqs[0].shape[1]
51 |     max_len = max(lengths)
52 |     tmp = []
53 |     for s, len_ in zip(seqs, lengths):
54 |         if max_len - len_ == 0:
55 |             tmp.append(s)
56 |         else:
57 |             inner_tmp = s
58 |             for i in range(max_len - len_):
59 |                 inner_tmp = np.vstack((inner_tmp, (np.array([0.] * feat_size))))
60 |             tmp.append(inner_tmp)
61 |     padded = np.array(tmp, dtype='float32')
62 |     return torch.FloatTensor(torch.from_numpy(padded))
63 | 
64 | 
65 | def convert_to_onehot(idxs, n_classes):
66 |     """Returns a binary batch_size x n_classes one-hot tensor."""
67 |     out = torch.zeros(len(idxs), n_classes, device=idxs[0].device)
68 |     for row, indices in zip(out, idxs):
69 |         row.scatter_(0, indices, 1)
70 |     return out
71 | 
72 | 
73 | def read_sentences(fname, vocab, bos=False, eos=True):
74 |     lines = []
75 |     lens = []
76 |     with fopen(fname) as f:
77 |         for idx, line in enumerate(pbar(f, unit='sents')):
78 |             line = line.strip()
79 | 
80 |             # Empty lines will cause a lot of headaches,
81 |             # get rid of them during preprocessing!
82 |             assert line, "Empty line (%d) found in %s" % (idx + 1, fname)
83 | 
84 |             # Map and append
85 |             seq = vocab.sent_to_idxs(line, explicit_bos=bos, explicit_eos=eos)
86 |             lines.append(seq)
87 |             lens.append(len(seq))
88 | 
89 |     return lines, lens
90 | 


--------------------------------------------------------------------------------
/nmtpytorch/utils/device.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import os
  3 | import shutil
  4 | import subprocess
  5 | 
  6 | import torch
  7 | 
  8 | DEVICE = None
  9 | DEVICE_IDS = None
 10 | 
 11 | 
 12 | class DeviceManager:
 13 |     __errors = {
 14 |         'BadDeviceFormat': 'Device can be cpu, gpu or [N]gpu, i.e. 2gpu',
 15 |         'NoDevFiles': 'Make sure you requested a GPU resource from your cluster.',
 16 |         'NoSMI': 'nvidia-smi is not installed. Are you on the correct node?',
 17 |         'EnvVar': 'Please set CUDA_VISIBLE_DEVICES explicitly.',
 18 |         'NoMultiGPU': 'Multi-GPU not supported for now.',
 19 |         'NotEnoughGPU': 'You requested {} GPUs while you have access to only {}.',
 20 |     }
 21 | 
 22 |     def __init__(self, dev):
 23 |         self.dev = dev.lower()
 24 |         self.pid = os.getpid()
 25 |         self.req_cpu = False
 26 |         self.req_gpu = False
 27 |         self.req_n_gpu = 0
 28 |         self.req_multi_gpu = False
 29 |         self.nvidia_smi = False
 30 |         self.cuda_dev_ids = None
 31 | 
 32 |         if not re.match('(cpu|[0-9]{0,1}gpu)$', self.dev):
 33 |             raise RuntimeError(self.__errors['BadDeviceFormat'])
 34 | 
 35 |         if self.dev == 'cpu':
 36 |             self.req_cpu = True
 37 |             self.dev = torch.device('cpu')
 38 |         else:
 39 |             self.req_gpu = True
 40 |             if self.dev == 'gpu':
 41 |                 self.req_n_gpu = 1
 42 |             else:
 43 |                 self.req_n_gpu = int(self.dev[0])
 44 | 
 45 |             self.req_multi_gpu = self.req_n_gpu > 1
 46 | 
 47 |             # What we have
 48 |             self.nvidia_smi = shutil.which('nvidia-smi')
 49 |             self.cuda_dev_ids = os.environ.get('CUDA_VISIBLE_DEVICES', None)
 50 | 
 51 |             if self.nvidia_smi is None:
 52 |                 raise RuntimeError(self.__errors['NoSMI'])
 53 |             if self.cuda_dev_ids == "NoDevFiles":
 54 |                 raise RuntimeError(self.__errors['NoDevFiles'])
 55 |             elif self.cuda_dev_ids is None:
 56 |                 raise RuntimeError(self.__errors['EnvVar'])
 57 | 
 58 |             # How many GPUs do we have access to?
 59 |             self.cuda_dev_ids = [int(de) for de in self.cuda_dev_ids.split(',')]
 60 | 
 61 |             # FIXME: Remove this once DataParallel works.
 62 |             if self.req_n_gpu > 1 or len(self.cuda_dev_ids) > 1:
 63 |                 raise RuntimeError(self.__errors['NoMultiGPU'])
 64 | 
 65 |             if self.req_n_gpu > len(self.cuda_dev_ids):
 66 |                 raise RuntimeError(
 67 |                     self.__errors['NotEnoughGPU'].format(
 68 |                         self.req_n_gpu, len(self.cuda_dev_ids)))
 69 |             else:
 70 |                 self.cuda_dev_ids = self.cuda_dev_ids[:self.req_n_gpu]
 71 | 
 72 |             # Set master device (is always cuda:0 since we force env.var
 73 |             # restriction)
 74 |             self.dev = torch.device('cuda:0')
 75 | 
 76 |             global DEVICE, DEVICE_IDS
 77 |             DEVICE = self.dev
 78 |             DEVICE_IDS = self.cuda_dev_ids
 79 | 
 80 |     def get_cuda_mem_usage(self, name=True):
 81 |         if self.req_cpu:
 82 |             return None
 83 | 
 84 |         pr = subprocess.run([
 85 |             self.nvidia_smi,
 86 |             "--query-compute-apps=pid,gpu_name,used_memory",
 87 |             "--format=csv,noheader"], stdout=subprocess.PIPE, universal_newlines=True)
 88 | 
 89 |         for line in pr.stdout.strip().split('\n'):
 90 |             pid, gpu_name, usage = line.split(',')
 91 |             if int(pid) == self.pid:
 92 |                 if name:
 93 |                     return '{} -> {}'.format(gpu_name.strip(), usage.strip())
 94 |                 return usage.strip()
 95 | 
 96 |         return 'N/A'
 97 | 
 98 |     def __repr__(self):
 99 |         if self.req_cpu:
100 |             return "DeviceManager(dev='cpu')"
101 |         return "DeviceManager({}, n_gpu={})".format(self.dev, self.req_n_gpu)
102 | 


--------------------------------------------------------------------------------
/nmtpytorch/utils/filterchain.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import re
 3 | from pathlib import Path
 4 | 
 5 | from .misc import get_temp_file, fopen
 6 | 
 7 | 
 8 | class FilterChain:
 9 |     """A sequential filter chain to post-process list of tokens.
10 | 
11 |         Arguments:
12 |             filters(list): A  list of strings representing filters to apply.
13 | 
14 |         Available Filters:
15 |             'de-bpe': Stitches back subword units produced with apply_bpe
16 |             'de-spm': Stitches back sentence pieces produced with spm_encode
17 |             'de-segment': Converts <tag:morpheme> to normal form
18 |             'de-compond': Stitches back German compound splittings
19 |             'c2w': Stitches back space delimited characters to words.
20 |                 Necessary for word-level BLEU, etc. when using CharNMT.
21 |             'lower': Lowercase.
22 |             'upper': Uppercase.
23 |             'de-hyphen': De-hyphenate 'foo @-@ bar' constructs of Moses.
24 | 
25 |     """
26 |     FILTERS = {
27 |         'de-bpe': lambda s: s.replace("@@ ", "").replace("@@", ""),
28 |         'de-tag': lambda s: re.sub('<[a-zA-Z][a-zA-Z]>', '', s),
29 |         # Decoder for Google sentenpiece
30 |         # only for default params of spm_encode
31 |         'de-spm': lambda s: s.replace(" ", "").replace("\u2581", " ").strip(),
32 |         # Converts segmentations of <tag:morpheme> to normal form
33 |         'de-segment': lambda s: re.sub(' *<.*?:(.*?)>', '\\1', s),
34 |         # Space delim character sequence to non-tokenized normal word form
35 |         'c2w': lambda s: s.replace(' ', '').replace('<s>', ' ').strip(),
36 |         # Filters out fillers from compound splitted sentences
37 |         'de-compound': lambda s: (s.replace(" @@ ", "").replace(" @@", "")
38 |                                   .replace(" @", "").replace("@ ", "")),
39 |         # de-hyphenate when -a given to Moses tokenizer
40 |         'de-hyphen': lambda s: re.sub(r'\s*@-@\s*', '-', s),
41 |         'lower': lambda s: s.lower(),
42 |         'upper': lambda s: s.upper(),
43 |     }
44 | 
45 |     def __init__(self, filters):
46 |         assert not set(filters).difference(set(self.FILTERS.keys())), \
47 |             "Unknown evaluation filter given."
48 |         self.filters = filters
49 |         self._funcs = [self.FILTERS[k] for k in self.filters]
50 | 
51 |     def _apply(self, list_of_strs):
52 |         """Applies filters consecutively on a list of sentences."""
53 |         for func in self._funcs:
54 |             list_of_strs = [func(s) for s in list_of_strs]
55 |         return list_of_strs
56 | 
57 |     def __call__(self, inp):
58 |         """Applies the filterchain on a given input.
59 | 
60 |         Arguments:
61 |             inp(pathlib.Path or list): If a `Path` given, temporary
62 |                 file(s) with filters applied are returned. The `Path` can
63 |                 also be a glob expression. Otherwise, a list with filtered
64 |                 sentences is returned.
65 |         """
66 |         if isinstance(inp, Path):
67 |             # Need to create copies of reference files with filters applied
68 |             # and return their paths instead
69 |             fnames = inp.parent.glob(inp.name)
70 |             new_fnames = []
71 |             for fname in fnames:
72 |                 lines = []
73 |                 f = fopen(fname)
74 |                 for line in f:
75 |                     lines.append(line.strip())
76 |                 f.close()
77 |                 f = get_temp_file()
78 |                 for line in self._apply(lines):
79 |                     f.write(line + '\n')
80 |                 f.close()
81 |                 new_fnames.append(f.name)
82 |             return new_fnames
83 | 
84 |         elif isinstance(inp, list):
85 |             return self._apply(inp)
86 | 
87 |     def __repr__(self):
88 |         return "FilterChain({})".format(" -> ".join(self.filters))
89 | 


--------------------------------------------------------------------------------
/nmtpytorch/utils/io.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from collections import deque
 3 | 
 4 | 
 5 | class FileRotator:
 6 |     """A fixed queue with Path() elements where pushing a new element pops
 7 |     the oldest one and removes it from disk.
 8 | 
 9 |     Arguments:
10 |         maxlen(int): The capacity of the queue.
11 |     """
12 | 
13 |     def __init__(self, maxlen):
14 |         self.maxlen = maxlen
15 |         self.elems = deque(maxlen=self.maxlen)
16 | 
17 |     def push(self, elem):
18 |         if len(self.elems) == self.maxlen:
19 |             # Remove oldest item
20 |             popped = self.elems.pop()
21 |             if popped.exists():
22 |                 popped.unlink()
23 | 
24 |         # Add new item
25 |         self.elems.appendleft(elem)
26 | 
27 |     def __repr__(self):
28 |         return self.elems.__repr__()
29 | 


--------------------------------------------------------------------------------
/nmtpytorch/utils/nn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import pickle as pkl
  3 | 
  4 | import torch
  5 | from torch import nn
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | def get_rnn_hidden_state(h):
 10 |     """Returns h_t transparently regardless of RNN type."""
 11 |     return h if not isinstance(h, tuple) else h[0]
 12 | 
 13 | 
 14 | def get_activation_fn(name):
 15 |     """Returns a callable activation function from torch."""
 16 |     if name in (None, 'linear'):
 17 |         return lambda x: x
 18 |     elif name in ('sigmoid', 'tanh'):
 19 |         return getattr(torch, name)
 20 |     else:
 21 |         return getattr(F, name)
 22 | 
 23 | 
 24 | def mean_pool(data):
 25 |     """Simple mean pool function for transforming 3D features of shape
 26 |     [T]imesteps x [B]atch_size x [F]eature_size into 2D BxF features.
 27 |     (author: @klmulligan)
 28 | 
 29 |         Arguments:
 30 |             data (tuple): Encoder result of form (data: Tensor(TxBxF), mask: Tensor(TxB))
 31 |         Returns:
 32 |             pooled_data (Tensor): Mean pooled data of shape BxF.
 33 |     """
 34 |     # Unpack
 35 |     x, mask = data
 36 | 
 37 |     if mask is not None:
 38 |         return x.sum(0) / mask.sum(0).unsqueeze(1)
 39 |     else:
 40 |         return x.mean(0)
 41 | 
 42 | 
 43 | def get_partial_embedding_layer(vocab, embedding_dim, pretrained_file,
 44 |                                 freeze='none', oov_zero=True):
 45 |     """A partially updateable embedding layer with pretrained embeddings.
 46 |     This is experimental and not quite tested."""
 47 |     avail_idxs, miss_idxs = [], []
 48 |     avail_embs = []
 49 | 
 50 |     # Load the pickled dictionary
 51 |     with open(pretrained_file, 'rb') as f:
 52 |         pret_dict = pkl.load(f)
 53 | 
 54 |     for idx, word in vocab._imap.items():
 55 |         if word in pret_dict:
 56 |             avail_embs.append(pret_dict[word])
 57 |             avail_idxs.append(idx)
 58 |         else:
 59 |             miss_idxs.append(idx)
 60 | 
 61 |     # This matrix contains the pretrained embeddings
 62 |     avail_embs = torch.Tensor(avail_embs)
 63 | 
 64 |     # We don't need the whole dictionary anymore
 65 |     del pret_dict
 66 | 
 67 |     n_pretrained = len(avail_idxs)
 68 |     n_learned = vocab.n_tokens - n_pretrained
 69 | 
 70 |     # Sanity checks
 71 |     assert len(avail_idxs) + len(miss_idxs) == vocab.n_tokens
 72 | 
 73 |     # Create the layer
 74 |     emb = nn.Embedding(vocab.n_tokens, embedding_dim, padding_idx=0)
 75 |     if oov_zero:
 76 |         emb.weight.data.fill_(0)
 77 | 
 78 |     # Copy in the pretrained embeddings
 79 |     emb.weight.data[n_learned:] = avail_embs
 80 |     # Sanity check
 81 |     assert torch.equal(emb.weight.data[-1], avail_embs[-1])
 82 | 
 83 |     grad_mask = None
 84 |     if freeze == 'all':
 85 |         emb.weight.requires_grad = False
 86 |     elif freeze == 'partial':
 87 |         # Create bitmap gradient mask
 88 |         grad_mask = torch.ones(vocab.n_tokens)
 89 |         grad_mask[n_learned:].fill_(0)
 90 |         grad_mask[0].fill_(0)
 91 |         grad_mask.unsqueeze_(1)
 92 | 
 93 |         def grad_mask_hook(grad):
 94 |             return grad_mask.to(grad.device) * grad
 95 | 
 96 |         emb.weight.register_hook(grad_mask_hook)
 97 | 
 98 |     # Return the layer
 99 |     return emb
100 | 


--------------------------------------------------------------------------------
/nmtpytorch/utils/tensorboard.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pathlib
 3 | 
 4 | from torch.utils.tensorboard import SummaryWriter
 5 | 
 6 | 
 7 | class TensorBoard:
 8 |     def __init__(self, model, log_dir, exp_id, subfolder):
 9 |         self.model = model
10 |         self.log_dir = log_dir
11 |         self.exp_id = exp_id
12 |         self.subfolder = subfolder
13 |         self.writer = None
14 |         self.available = bool(self.log_dir)
15 | 
16 |         # Call setup
17 |         self.setup()
18 | 
19 |     def _nop(self, *args, **kwargs):
20 |         return
21 | 
22 |     def setup(self):
23 |         """Setups TensorBoard logger."""
24 |         if not self.available:
25 |             self.replace_loggers()
26 |             return
27 | 
28 |         # Construct full folder path
29 |         self.log_dir = pathlib.Path(self.log_dir).expanduser()
30 |         self.log_dir = self.log_dir / self.subfolder / self.exp_id
31 |         self.log_dir.mkdir(parents=True, exist_ok=True)
32 | 
33 |         # Set up summary writer
34 |         self.writer = SummaryWriter(self.log_dir)
35 | 
36 |     def replace_loggers(self):
37 |         """Replace all log_* methods with dummy _nop."""
38 |         self.log_metrics = self._nop
39 |         self.log_scalar = self._nop
40 |         self.log_activations = self._nop
41 |         self.log_gradients = self._nop
42 | 
43 |     def log_metrics(self, metrics, step, suffix=''):
44 |         """Logs evaluation metrics as scalars."""
45 |         for metric in metrics:
46 |             self.writer.add_scalar(suffix + metric.name, metric.score,
47 |                                    global_step=step)
48 | 
49 |     def log_scalar(self, name, value, step):
50 |         """Logs single scalar value."""
51 |         self.writer.add_scalar(name, value, global_step=step)
52 | 
53 |     def log_activations(self, step):
54 |         """Logs activations by layer."""
55 |         pass
56 | 
57 |     def log_gradients(self, step):
58 |         """Logs gradients by layer."""
59 |         pass
60 | 
61 |     def close(self):
62 |         """Closes TensorBoard handle."""
63 |         if self.available:
64 |             self.writer.close()
65 | 
66 |     def __repr__(self):
67 |         if not self.log_dir:
68 |             return "No 'tensorboard_dir' given in config"
69 |         return "TensorBoard is active"
70 | 


--------------------------------------------------------------------------------
/nmtpytorch/utils/topology.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from collections import UserString, OrderedDict
  3 | 
  4 | from .. import datasets
  5 | 
  6 | 
  7 | class DataSource(UserString):
  8 |     def __init__(self, name, _type, src=False, trg=False):
  9 |         super().__init__(name)
 10 |         self._type = _type
 11 |         self.src = src
 12 |         self.trg = trg
 13 |         self.side = 'src' if self.src else 'trg'
 14 | 
 15 |         # Assign the method that knows how to create a tensor for a batch
 16 |         # of this type
 17 |         klass = getattr(datasets, '{}Dataset'.format(_type))
 18 |         self.kwargs = {}
 19 |         self.torchify = lambda batch: klass.to_torch(batch, **self.kwargs)
 20 |     def __repr__(self):
 21 |         return "DataSource('{}', kwargs:{})".format(self.data, self.kwargs)
 22 | 
 23 | 
 24 | class Topology:
 25 |     """A simple object that parses the direction string provided through the
 26 |         experiment configuration file.
 27 | 
 28 |         A direction is a string with the following syntax:
 29 |             feat:<type>, feat:<type>, ... -> feat:<type>, feat:<type>, ...
 30 | 
 31 |         where
 32 |             feat determines the name of the modality, i.e. 'en', 'image', etc.
 33 |             type is the prefix of the actual ``Dataset`` class to be used
 34 |                 with this modality, i.e. Text, ImageFolder, OneHot, etc.
 35 |             if type is omitted, the default is Text.
 36 | 
 37 |         Example:
 38 |             de:Text (no target side)
 39 |             de:Text -> en:Text
 40 |             de:Text -> en:Text, en_pos:OneHot
 41 |             de:Text, image:ImageFolder -> en:Text
 42 |     """
 43 |     def __init__(self, direction):
 44 |         self.direction = direction
 45 |         self.srcs = OrderedDict()
 46 |         self.trgs = OrderedDict()
 47 |         self.all = OrderedDict()
 48 | 
 49 |         parts = direction.strip().split('->')
 50 |         if len(parts) == 1:
 51 |             srcs, trgs = parts[0].strip().split(','), []
 52 |         else:
 53 |             srcs = parts[0].strip().split(',') if parts[0].strip() else []
 54 |             trgs = parts[1].strip().split(',') if parts[1].strip() else []
 55 | 
 56 |         # Temporary dict to parse sources and targets in a single loop
 57 |         tmp = {'srcs': srcs, 'trgs': trgs}
 58 | 
 59 |         for key, values in tmp.items():
 60 |             _dict = getattr(self, key)
 61 |             for val in values:
 62 |                 name, *ftype = val.strip().split(':')
 63 |                 ftype = ftype[0] if len(ftype) > 0 else "Text"
 64 |                 ds = DataSource(name, ftype,
 65 |                                 src=(key == 'srcs'), trg=(key == 'trgs'))
 66 |                 if name in self.all:
 67 |                     raise RuntimeError(
 68 |                         '"{}" already given as a data source.'.format(name))
 69 |                 _dict[name] = ds
 70 |                 self.all[name] = ds
 71 | 
 72 |         # Assign shortcuts
 73 |         self.first_src = list(self.srcs.keys())[0]
 74 |         self.first_trg = list(self.trgs.keys())[0]
 75 | 
 76 |     def is_included_in(self, t):
 77 |         """Return True if this topology is included in t, otherwise False."""
 78 |         if t is None:
 79 |             return False
 80 |         return (self.srcs.keys() <= t.srcs.keys()) and (self.trgs.keys() <= t.trgs.keys())
 81 | 
 82 |     def get_srcs(self, _type):
 83 |         return [v for v in self.srcs.values() if v._type == _type]
 84 | 
 85 |     def get_trgs(self, _type):
 86 |         return [v for v in self.trgs.values() if v._type == _type]
 87 | 
 88 |     def get_src_langs(self):
 89 |         return self.get_srcs('Text')
 90 | 
 91 |     def get_trg_langs(self):
 92 |         return self.get_trgs('Text')
 93 | 
 94 |     def __getitem__(self, key):
 95 |         return self.all[key]
 96 | 
 97 |     def __repr__(self):
 98 |         s = "Sources:\n"
 99 |         for x in self.srcs.values():
100 |             s += " {}\n".format(x.__repr__())
101 |         s += "Targets:\n"
102 |         for x in self.trgs.values():
103 |             s += " {}\n".format(x.__repr__())
104 |         return s
105 | 


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
1 | scripts
2 | --
3 | 


--------------------------------------------------------------------------------
/scripts/create-pretrained-embs:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import re
  3 | import json
  4 | import argparse
  5 | from collections import OrderedDict
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | 
 10 | from nmtpytorch.vocabulary import Vocabulary
 11 | 
 12 | 
 13 | def get_nmtpy_vocab_tokens(fname):
 14 |     vocab = Vocabulary(fname, name='en')
 15 |     base_tokens = list(vocab._map.keys())
 16 |     # remove special tokens
 17 |     base_tokens = set(base_tokens).difference(vocab.TOKENS.keys())
 18 |     return base_tokens
 19 | 
 20 | 
 21 | if __name__ == '__main__':
 22 |     parser = argparse.ArgumentParser(
 23 |         prog='create-pretrained-embs',
 24 |         description="Creates a .ckpt file with pretrained embeddings ready-to-use.")
 25 | 
 26 |     parser.add_argument('-i', '--input', type=str, required=True,
 27 |                         help="Input pretrained file.")
 28 | 
 29 |     parser.add_argument('-t', '--type', type=str, required=True,
 30 |                         choices=['glove', 'fasttext'],
 31 |                         help="Input file format i.e. glove or fasttext")
 32 | 
 33 |     parser.add_argument('-n', '--n-tokens', type=int, default=0,
 34 |                         help="Size limit of final vocabulary.")
 35 | 
 36 |     parser.add_argument('-b', '--base-vocab', type=str, nargs='*',
 37 |                         help="nmtpy .vocab file(s) for tokens which should always be included.")
 38 | 
 39 |     parser.add_argument('-o', '--out-prefix', type=str, required=True,
 40 |                         help="Prefix for output files.")
 41 | 
 42 |     args = parser.parse_args()
 43 | 
 44 |     embs = {}
 45 |     base_tokens = []
 46 | 
 47 |     ########################
 48 |     # Read base vocabularies
 49 |     ########################
 50 |     for fname in args.base_vocab:
 51 |         base_tokens.extend(get_nmtpy_vocab_tokens(fname))
 52 | 
 53 |     #######################
 54 |     # Read pretrained store
 55 |     #######################
 56 |     with open(args.input) as f:
 57 |         for line in f:
 58 |             word, *vals = line.strip().split(' ')
 59 |             embs[word] = vals
 60 | 
 61 |     # Separate out special tokens
 62 |     spec_embs = {}
 63 |     for tok in ('<s>', '</s>', '<unk>', '<oov>', '<bos>', '<eos>'):
 64 |         if tok in embs:
 65 |             spec_embs[tok] = embs.pop(tok)
 66 |         elif tok.upper() in embs:
 67 |             spec_embs[tok.upper()] = embs.pop(tok.upper())
 68 | 
 69 |     print(f'Number of pretrained vectors: {len(embs)}')
 70 | 
 71 |     #############################################
 72 |     # Construct the list for the final vocabulary
 73 |     #############################################
 74 |     deferred_init = []
 75 |     vocab = OrderedDict()
 76 | 
 77 |     def emb2float(vals):
 78 |         return [float(v) for v in vals]
 79 | 
 80 |     # Put anything to <pad> as it will later be rewritten with zeros
 81 |     vocab['<pad>'] = emb2float(embs['.'])
 82 |     vocab['<bos>'] = emb2float(spec_embs['<s>'])
 83 |     vocab['<eos>'] = emb2float(spec_embs['</s>'])
 84 |     # We'll re-init this at a later stage
 85 |     vocab['<unk>'] = emb2float(embs['.'])
 86 |     # Moses hyphen symbol is OOV, use plain hyphen
 87 |     embs['@-@'] = embs['-']
 88 | 
 89 |     # Put base tokens
 90 |     for tok in base_tokens:
 91 |         if tok in embs:
 92 |             vocab[tok] = emb2float(embs.pop(tok))
 93 |         else:
 94 |             deferred_init.append(tok)
 95 | 
 96 |     # Only alphabetic ones
 97 |     re_pat = re.compile('^[a-z]+$')
 98 |     filtered_words = list(filter(lambda x: re_pat.match(x), embs.keys()))
 99 | 
100 |     if args.n_tokens > 0:
101 |         # Complete to args.n_tokens
102 |         how_many = args.n_tokens - len(vocab) - len(deferred_init)
103 |     else:
104 |         # Add all
105 |         how_many = len(filtered_words)
106 | 
107 |     for word in filtered_words[:how_many]:
108 |         vocab[word] = emb2float(embs[word])
109 | 
110 |     word_order = list(vocab.keys())
111 |     emb_W = np.array(list(vocab.values()), dtype='float32')
112 | 
113 |     # Init randomly the deferred ones with sample averages
114 |     np.random.seed(39348)
115 |     deferred_embs = np.empty(
116 |         (len(deferred_init), emb_W.shape[1]), dtype='float32')
117 |     for idx, tok in enumerate(deferred_init):
118 |         word_order.append(tok)
119 |         idxs = np.random.permutation(emb_W.shape[0])[:10000]
120 |         deferred_embs[idx] = emb_W[idxs].mean(0)
121 | 
122 |     # merge altogether
123 |     emb_W = np.concatenate([emb_W, deferred_embs])
124 | 
125 |     # Finally replace <unk> with average embedding
126 |     emb_W[word_order.index('<unk>')] = emb_W.mean(0)
127 | 
128 |     # cast down
129 |     emb_W = torch.from_numpy(emb_W.astype('float16'))
130 | 
131 |     # Dump file
132 |     torch.save(emb_W, f'{args.out_prefix}.pt')
133 | 
134 |     json_vocab = OrderedDict({k: i for i, k in enumerate(word_order)})
135 |     with open(f'{args.out_prefix}.vocab.en', 'w') as f:
136 |         json.dump(json_vocab, f, ensure_ascii=False, indent=2)
137 | 
138 |     print(f'Final number of vocabulary: {emb_W.shape[0]}')
139 | 


--------------------------------------------------------------------------------
/scripts/dump-attention.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | import pickle as pkl
  4 | from pathlib import Path
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import tqdm
  9 | 
 10 | from nmtpytorch.translator import Translator
 11 | from nmtpytorch.utils.data import make_dataloader
 12 | 
 13 | 
 14 | 
 15 | if __name__ == '__main__':
 16 |     parser = argparse.ArgumentParser(
 17 |         prog='nmtpy-dump-attention',
 18 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 19 |         description="generate attention pkl",
 20 |         argument_default=argparse.SUPPRESS)
 21 | 
 22 |     parser.add_argument('-m', '--model', type=str, required=True,
 23 |                         help=".ckpt model file")
 24 |     parser.add_argument('-s', '--split', type=str,
 25 |                        help='test_set name given as in configuration file')
 26 |     parser.add_argument('-o', '--output', type=str,
 27 |                        help='output file name.')
 28 | 
 29 |     args = parser.parse_args()
 30 |     translator = Translator(models=[args.model], splits=args.split,
 31 |                             source=None, disable_filters=True, override=None,
 32 |                             task_id=None)
 33 | 
 34 |     model = translator.instances[0]
 35 | 
 36 |     dataset = model.load_data(args.split, 64, mode='beam')
 37 |     loader = make_dataloader(dataset)
 38 |     data = []
 39 | 
 40 |     torch.set_grad_enabled(False)
 41 | 
 42 |     # Greedy search
 43 |     for batch in tqdm.tqdm(loader, unit='batch'):
 44 |         # Visual attention (may not be available)
 45 |         img_att = [[] for i in range(batch.size)]
 46 | 
 47 |         # Textual attention
 48 |         main_att = [[] for i in range(batch.size)]
 49 | 
 50 |         # Hierarchical attention
 51 |         hie_att = [[] for i in range(batch.size)]
 52 | 
 53 |         hyps = [[] for i in range(batch.size)]
 54 | 
 55 |         fini = torch.zeros(batch.size, dtype=torch.long)
 56 |         ctx_dict = model.encode(batch)
 57 | 
 58 |         # Get initial hidden state
 59 |         h_t = model.dec.f_init(ctx_dict)
 60 | 
 61 |         y_t = model.get_bos(batch.size)
 62 | 
 63 |         # Iterate for 100 timesteps
 64 |         for t in range(100):
 65 |             logp, h_t = model.dec.f_next(ctx_dict, model.dec.get_emb(y_t, t).squeeze(1), h_t)
 66 | 
 67 |             # text attention
 68 |             tatt = model.dec.history['alpha_txt'][-1].data.clone().numpy()
 69 |             iatt, hatt = None, None
 70 | 
 71 |             # If decoder has .img_alpha_t
 72 |             if hasattr(model.dec, 'img_alpha_t'):
 73 |                 iatt = model.dec.img_alpha_t.data.clone().numpy()
 74 | 
 75 |             if hasattr(model.dec, 'h_att'):
 76 |                 hatt = model.dec.h_att.data.clone().numpy()
 77 | 
 78 |             top_scores, y_t = logp.data.topk(1, largest=True)
 79 |             hyp = y_t.numpy().tolist()
 80 |             for idx, w in enumerate(hyp):
 81 |                 if 2 not in hyps[idx]:
 82 |                     hyps[idx].append(w[0])
 83 |                     main_att[idx].append(tatt[:, idx])
 84 |                     if iatt is None:
 85 |                         img_att[idx].append(None)
 86 |                     else:
 87 |                         img_att[idx].append(iatt[:, idx])
 88 | 
 89 |                     if hatt is None:
 90 |                         hie_att[idx].append(None)
 91 |                     else:
 92 |                         hie_att[idx].append(hatt[:, idx])
 93 | 
 94 |             # Did we finish? (2 == <eos>)
 95 |             fini = fini | y_t.eq(2).squeeze().long()
 96 |             if fini.sum() == batch.size:
 97 |                 break
 98 | 
 99 |         for h, sa, ia, ha in zip(hyps, main_att, img_att, hie_att):
100 |             d = {
101 |                 'hyp': model.trg_vocab.idxs_to_sent(h),
102 |                 'pri_att': np.array(sa),
103 |                 'sec_att': np.array(ia) if ia is not None else None,
104 |                 'hie_att': np.array(ha) if ha is not None else None,
105 |             }
106 |             data.append(d)
107 | 
108 |     # Put into correct order
109 |     data = [data[i] for i, j in sorted(
110 |         enumerate(loader.batch_sampler.orig_idxs), key=lambda k: k[1])]
111 | 
112 |     src_lines = []
113 |     with open(model.opts.data['{}_set'.format(args.split)][model.sl]) as sf:
114 |         for line in sf:
115 |             src_lines.append(line.strip())
116 | 
117 |     for d, line in zip(data, src_lines):
118 |         d['src'] = line
119 | 
120 |     with open(args.output, 'wb') as f:
121 |         pkl.dump(data, f)
122 | 


--------------------------------------------------------------------------------
/scripts/package.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | VER=$1
 4 | 
 5 | if [[ -z $VER ]]; then
 6 |   echo "You need to provide a version string."
 7 |   exit 1
 8 | fi
 9 | 
10 | rm -rf build/ dist/
11 | 
12 | echo "Preparing $VER"
13 | echo "__version__ = '${VER}'" > nmtpytorch/__init__.py
14 | 
15 | git commit nmtpytorch/__init__.py -m "bump version to ${VER}"
16 | git push origin master
17 | git tag -a "v${VER}" -m "Version ${VER}"
18 | git push origin --tags
19 | 
20 | # prep packages
21 | python setup.py sdist bdist_wheel
22 | 
23 | #twine upload --repository-url https://test.pypi.org/legacy/ dist/*  # Upload to TestPyPI
24 | twine upload dist/*  # Upload to PyPI
25 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pathlib
 3 | import setuptools
 4 | 
 5 | 
 6 | def get_nmtpytorch_version():
 7 |     with open('nmtpytorch/__init__.py') as f:
 8 |         s = f.read().split('\n')[0]
 9 |         if '__version__' not in s:
10 |             raise RuntimeError('Can not detect version from nmtpytorch/__init__.py')
11 |         return eval(s.split(' ')[-1])
12 | 
13 | 
14 | with open('README.md') as f:
15 |     long_description = f.read()
16 | 
17 | with open('NEWS.md') as f:
18 |     release_notes = f.read()
19 | 
20 | long_description = long_description.replace(
21 |     '## Release Notes\n\nSee [NEWS.md](NEWS.md).\n', release_notes)
22 | 
23 | setuptools.setup(
24 |     name='nmtpytorch',
25 |     version=get_nmtpytorch_version(),
26 |     description='Sequence-to-Sequence Framework in PyTorch',
27 |     long_description=long_description,
28 |     long_description_content_type='text/markdown',
29 |     url='https://github.com/lium-lst/nmtpytorch',
30 |     author='Ozan Caglayan',
31 |     author_email='ozancag@gmail.com',
32 |     license='MIT',
33 |     project_urls={
34 |         'Wiki': 'https://github.com/lium-lst/nmtpytorch/wiki',
35 |     },
36 |     classifiers=[
37 |         'Intended Audience :: Science/Research',
38 |         'Topic :: Scientific/Engineering',
39 |         'License :: OSI Approved :: MIT License',
40 |         'Programming Language :: Python :: 3 :: Only',
41 |         'Programming Language :: Python :: 3.7',
42 |         'Operating System :: POSIX',
43 |     ],
44 |     keywords='nmt neural-mt translation sequence-to-sequence deep-learning pytorch',
45 |     python_requires='~=3.7',
46 |     install_requires=[
47 |         'numpy', 'scikit-learn', 'tqdm', 'pillow',
48 |         'torch==1.4.0', 'torchvision==0.5.0', 'pytorch-ignite==0.3.0',
49 |         'sacrebleu>=1.2.9',
50 |         'editdistance==0.4', 'subword_nmt==0.3.5',
51 |     ],
52 |     include_package_data=True,
53 |     exclude_package_data={'': ['.git']},
54 |     packages=setuptools.find_packages(),
55 |     scripts=[str(p) for p in pathlib.Path('bin').glob('*')],
56 |     zip_safe=False)
57 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 90
3 | ignore = E116,E241,E265,W504,E501
4 | exclude = docs,examples,build
5 | 


--------------------------------------------------------------------------------