├── .gitignore ├── LICENSE.md ├── MANIFEST.in ├── NEWS.md ├── README.md ├── bin ├── nmtpy ├── nmtpy-build-vocab ├── nmtpy-ckpt-info ├── nmtpy-coco-metrics └── nmtpy-install-extra ├── doc ├── Makefile ├── _static │ └── img │ │ └── logo.png ├── conf.py ├── datasets.rst ├── index.rst ├── intro │ ├── 00_installation.rst │ ├── 01_implementing_a_model.rst │ └── 02_configuring_an_experiment.rst ├── make.bat ├── models.rst ├── requirements.txt └── vocabulary.rst ├── environment.yml ├── examples └── v4.0.0 │ ├── mmt │ ├── README.md │ ├── mmt-task-en-fr-encdecinit.conf │ ├── mmt-task-en-fr-multimodalatt.conf │ └── mmt-task-en-fr-nmt.conf │ └── speech │ ├── README.md │ ├── asr-bilstmp-char.conf │ ├── asr-bilstmp-s1k.conf │ └── scripts │ ├── prepare.sh │ └── word2char ├── ipynb └── att.ipynb ├── nmtpytorch ├── __init__.py ├── cleanup.py ├── cocoeval │ ├── README.md │ ├── __init__.py │ ├── bleu │ │ ├── LICENSE.bleu │ │ ├── __init__.py │ │ ├── bleu.py │ │ └── bleu_scorer.py │ ├── cider │ │ ├── __init__.py │ │ ├── cider.py │ │ └── cider_scorer.py │ ├── meteor │ │ ├── __init__.py │ │ └── meteor.py │ └── rouge │ │ ├── __init__.py │ │ └── rouge.py ├── config.py ├── datasets │ ├── __init__.py │ ├── collate.py │ ├── imagefolder.py │ ├── kaldi.py │ ├── label.py │ ├── multimodal.py │ ├── npy.py │ ├── numpy_sequence.py │ ├── shelve.py │ └── text.py ├── evaluator.py ├── layers │ ├── __init__.py │ ├── argselect.py │ ├── attention │ │ ├── __init__.py │ │ ├── co.py │ │ ├── dot.py │ │ ├── hierarchical.py │ │ ├── mhco.py │ │ ├── mlp.py │ │ ├── scaled_dot.py │ │ └── uniform.py │ ├── decoders │ │ ├── __init__.py │ │ ├── conditional.py │ │ ├── conditionalmm.py │ │ ├── multisourceconditional.py │ │ ├── simplegru.py │ │ ├── switchinggru.py │ │ ├── vector.py │ │ └── xu.py │ ├── embedding │ │ ├── __init__.py │ │ └── pembedding.py │ ├── encoders │ │ ├── __init__.py │ │ ├── bilstmp.py │ │ ├── image.py │ │ ├── multimodal_bilstmp.py │ │ ├── multimodal_text.py │ │ └── text.py │ ├── ff.py │ ├── flatten.py │ ├── fusion.py │ ├── max_margin.py │ ├── pool.py │ ├── rnninit.py │ ├── seq_conv.py │ └── transformers │ │ ├── __init__.py │ │ ├── decoder.py │ │ ├── embedding.py │ │ ├── encoder.py │ │ ├── positionwise_ff.py │ │ └── residual_lnorm.py ├── lib │ └── multi-bleu.perl ├── logger.py ├── mainloop.py ├── metrics │ ├── __init__.py │ ├── cer.py │ ├── meteor.py │ ├── metric.py │ ├── multibleu.py │ ├── rouge.py │ ├── sacrebleu.py │ └── wer.py ├── models │ ├── __init__.py │ ├── asr.py │ ├── attentive_mmt.py │ ├── multimodal_asr.py │ ├── nmt.py │ ├── simple_mmt.py │ ├── stale │ │ ├── README.md │ │ ├── acapt.py │ │ ├── nli.py │ │ └── sat.py │ └── tfnmt.py ├── monitor.py ├── optimizer.py ├── samplers │ ├── __init__.py │ ├── approx.py │ └── bucket.py ├── tester.py ├── translator.py ├── utils │ ├── __init__.py │ ├── data.py │ ├── device.py │ ├── filterchain.py │ ├── io.py │ ├── kaldi.py │ ├── misc.py │ ├── ml_metrics.py │ ├── nn.py │ ├── tensorboard.py │ └── topology.py └── vocabulary.py ├── scripts ├── README.md ├── create-pretrained-embs ├── dump-attention.py └── package.sh ├── setup.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | nmtpytorch/lib/data/*gz 3 | nmtpytorch.egg-info 4 | .cache 5 | build/ 6 | dist/ 7 | doc/_build/ 8 | ipynb/.ipynb_checkpoints 9 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ## MIT License 2 | 3 | Copyright (c) 2017 - Le Mans University - Language and Speech Technology (LST) Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -- 24 | 25 | **nmtpytorch** includes code from the following projects which have their own licenses: 26 | 27 | - `multi-bleu.perl` from [mosesdecoder](https://github.com/moses-smt/mosesdecoder) [[LGPL-2.1](https://github.com/moses-smt/mosesdecoder/blob/master/COPYING)] 28 | - `pycocoevalcap` from [coco-caption](https://github.com/tylin/coco-caption) [[BSD-2-Clause](https://github.com/tylin/coco-caption/blob/master/license.txt)] 29 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include nmtpytorch/lib/multi-bleu.perl 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![nmtpytorch](https://github.com/lium-lst/nmtpytorch/blob/master/doc/_static/img/logo.png?raw=true "nmtpytorch") 2 | 3 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 4 | [![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/) 5 | 6 | # Note 7 | 8 | This project is not actively maintained so issues created are unlikely to be addressed in a timely way. If you are interested, there's a recent fork of this repository called [pysimt](https://github.com/ImperialNLP/pysimt) which includes Transformer-based architectures as well. 9 | 10 | # Overview 11 | `nmtpytorch` allows training of various end-to-end neural architectures including 12 | but not limited to neural machine translation, image captioning and automatic 13 | speech recognition systems. The initial codebase was in `Theano` and was 14 | inspired from the famous [dl4mt-tutorial](https://github.com/nyu-dl/dl4mt-tutorial) 15 | codebase. 16 | 17 | `nmtpytorch` received valuable contributions from the [Grounded Sequence-to-sequence Transduction Team](https://github.com/srvk/jsalt-2018-grounded-s2s) 18 | of *Frederick Jelinek Memorial Summer Workshop 2018*: 19 | 20 | Loic Barrault, Ozan Caglayan, Amanda Duarte, Desmond Elliott, Spandana Gella, Nils Holzenberger, 21 | Chirag Lala, Jasmine (Sun Jae) Lee, Jindřich Libovický, Pranava Madhyastha, 22 | Florian Metze, Karl Mulligan, Alissa Ostapenko, Shruti Palaskar, Ramon Sanabria, Lucia Specia and Josiah Wang. 23 | 24 | If you use **nmtpytorch**, you may want to cite the following [paper](https://ufal.mff.cuni.cz/pbml/109/art-caglayan-et-al.pdf): 25 | ``` 26 | @article{nmtpy2017, 27 | author = {Ozan Caglayan and 28 | Mercedes Garc\'{i}a-Mart\'{i}nez and 29 | Adrien Bardet and 30 | Walid Aransa and 31 | Fethi Bougares and 32 | Lo\"{i}c Barrault}, 33 | title = {NMTPY: A Flexible Toolkit for Advanced Neural Machine Translation Systems}, 34 | journal = {Prague Bull. Math. Linguistics}, 35 | volume = {109}, 36 | pages = {15--28}, 37 | year = {2017}, 38 | url = {https://ufal.mff.cuni.cz/pbml/109/art-caglayan-et-al.pdf}, 39 | doi = {10.1515/pralin-2017-0035}, 40 | timestamp = {Tue, 12 Sep 2017 10:01:08 +0100} 41 | } 42 | ``` 43 | 44 | ## Installation 45 | 46 | You may want to install NVIDIA's [Apex](https://github.com/NVIDIA/apex) 47 | extensions. As of February 2020, we only monkey-patched `nn.LayerNorm` 48 | with Apex' one if the library is installed and found. 49 | 50 | ### pip 51 | 52 | You can install `nmtpytorch` from `PyPI` using `pip` (or `pip3` depending on your 53 | operating system and environment): 54 | 55 | ``` 56 | $ pip install nmtpytorch 57 | ``` 58 | 59 | ### conda 60 | 61 | We provide an `environment.yml` file in the repository that you can use to create 62 | a ready-to-use anaconda environment for `nmtpytorch`: 63 | 64 | ``` 65 | $ conda update --all 66 | $ git clone https://github.com/lium-lst/nmtpytorch.git 67 | $ conda env create -f nmtpytorch/environment.yml 68 | ``` 69 | 70 | **IMPORTANT:** After installing `nmtpytorch`, you **need** to run `nmtpy-install-extra` 71 | to download METEOR related files into your `${HOME}/.nmtpy` folder. 72 | This step is only required once. 73 | 74 | ### Development Mode 75 | 76 | For continuous development and testing, it is sufficient to run `python setup.py develop` 77 | in the root folder of your GIT checkout. From now on, all modifications to the source 78 | tree are directly taken into account without requiring reinstallation. 79 | 80 | ## Documentation 81 | 82 | We currently only provide some preliminary documentation in our [wiki](https://github.com/lium-lst/nmtpytorch/wiki). 83 | 84 | ## Release Notes 85 | 86 | See [NEWS.md](NEWS.md). 87 | -------------------------------------------------------------------------------- /bin/nmtpy-build-vocab: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | import json 6 | import pathlib 7 | import argparse 8 | from collections import OrderedDict 9 | 10 | import numpy as np 11 | 12 | from nmtpytorch.vocabulary import Vocabulary 13 | from nmtpytorch.utils.misc import pbar 14 | 15 | 16 | def freqs_to_dict(token_freqs, min_freq=0, max_items=0, exclude_symbols=False): 17 | # Get list of tokens 18 | tokens = list(token_freqs.keys()) 19 | 20 | # Collect their frequencies in a numpy array 21 | freqs = np.array(list(token_freqs.values())) 22 | 23 | tokendict = OrderedDict() 24 | if not exclude_symbols: 25 | for key, value in Vocabulary.TOKENS.items(): 26 | # Second value is the count information 27 | tokendict[key] = "{} 0".format(value) 28 | 29 | # Sort in descending order of frequency 30 | sorted_idx = np.argsort(freqs) 31 | if min_freq > 0: 32 | sorted_tokens = [(tokens[ii], freqs[ii]) for ii in sorted_idx[::-1] 33 | if freqs[ii] >= min_freq] 34 | else: 35 | sorted_tokens = [(tokens[ii], freqs[ii]) for ii in sorted_idx[::-1]] 36 | 37 | if max_items > 0: 38 | sorted_tokens = sorted_tokens[:max_items] 39 | 40 | # Start inserting from index offset 41 | offset = len(tokendict) 42 | for iidx, (token, freq) in enumerate(sorted_tokens): 43 | tokendict[token] = '{} {}'.format(iidx + offset, int(freq)) 44 | 45 | return tokendict 46 | 47 | 48 | def get_freqs(filename, cumul_dict=None): 49 | # We'll first count frequencies 50 | if cumul_dict is not None: 51 | # Let's accumulate frequencies 52 | token_freqs = cumul_dict 53 | else: 54 | token_freqs = OrderedDict() 55 | 56 | print("Reading file %s" % filename) 57 | with open(filename) as fhandle: 58 | for line in pbar(fhandle, unit='lines'): 59 | line = line.strip() 60 | if line: 61 | # Collect frequencies 62 | for word in line.split(): 63 | if word not in token_freqs: 64 | token_freqs[word] = 0 65 | token_freqs[word] += 1 66 | 67 | # Remove already available special tokens 68 | for key in Vocabulary.TOKENS: 69 | if key in token_freqs: 70 | print('Removing ', key) 71 | del token_freqs[key] 72 | 73 | return token_freqs 74 | 75 | 76 | def write_dict(fname, vocab): 77 | print("Dumping vocabulary (%d tokens) to %s..." % (len(vocab), fname)) 78 | with open(fname, 'w') as fhandle: 79 | json.dump(vocab, fhandle, ensure_ascii=False, indent=2) 80 | 81 | 82 | def main(): 83 | parser = argparse.ArgumentParser(prog='build-vocab') 84 | parser.add_argument('-o', '--output-dir', type=str, default='.', 85 | help='Output directory') 86 | parser.add_argument('-s', '--single', type=str, default=None, 87 | help='Name of the combined vocabulary file') 88 | parser.add_argument('-m', '--min-freq', type=int, default=0, 89 | help='Filter out tokens occuring < m times') 90 | parser.add_argument('-M', '--max-items', type=int, default=0, 91 | help='Keep the final vocabulary size less than this') 92 | parser.add_argument('-x', '--exclude-symbols', action='store_true', 93 | help='Do not add special , , , ') 94 | parser.add_argument('files', type=str, nargs='+', 95 | help='Sentence files') 96 | args = parser.parse_args() 97 | 98 | if args.exclude_symbols: 99 | print('Warning: -x does not create vocabularies compatible ' 100 | 'with many nmtpytorch\'s models.') 101 | 102 | output_dir = pathlib.Path(args.output_dir).expanduser() 103 | 104 | # In case it is needed 105 | all_freqs = OrderedDict() 106 | 107 | for filename in args.files: 108 | filename = pathlib.Path(filename).expanduser() 109 | suffix = ".vocab{}".format(filename.suffix) 110 | vocab_fname = filename.stem 111 | 112 | if args.single: 113 | # Get cumulative frequencies 114 | all_freqs = get_freqs(filename, all_freqs) 115 | 116 | else: 117 | # Get frequencies 118 | freqs = get_freqs(filename) 119 | # Build dictionary from frequencies 120 | tokendict = freqs_to_dict( 121 | freqs, args.min_freq, args.max_items, args.exclude_symbols) 122 | 123 | if args.min_freq > 0: 124 | vocab_fname += "-min%d" % args.min_freq 125 | if args.max_items > 0: 126 | vocab_fname += "-max%dtokens" % args.max_items 127 | vocab_fname = str((output_dir / vocab_fname)) + suffix 128 | write_dict(vocab_fname, tokendict) 129 | 130 | if args.single: 131 | vocab_fname = pathlib.Path(args.single) 132 | tokendict = freqs_to_dict( 133 | all_freqs, args.min_freq, args.max_items, args.exclude_symbols) 134 | write_dict(vocab_fname, tokendict) 135 | 136 | 137 | if __name__ == '__main__': 138 | sys.exit(main()) 139 | -------------------------------------------------------------------------------- /bin/nmtpy-ckpt-info: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import torch 5 | 6 | from nmtpytorch.config import Options 7 | from nmtpytorch.metrics import Evaluator 8 | from nmtpytorch.utils.misc import load_pt_file 9 | 10 | if __name__ == '__main__': 11 | try: 12 | pt_file = sys.argv[1] 13 | except IndexError as ie: 14 | print('Usage: {} <.ckpt file>'.format(sys.argv[0])) 15 | sys.exit(1) 16 | 17 | data = load_pt_file(pt_file) 18 | weights, history, opts = data['model'], data['history'], data['opts'] 19 | 20 | if not history: 21 | print('This is not a .ckpt file with history information.') 22 | sys.exit(1) 23 | 24 | opts = Options.from_dict(opts) 25 | 26 | early_metric = opts.train['eval_metrics'].split(',')[0] 27 | 28 | print('Checkpoint saved at epoch: {} update: {}'.format(history['ectr'], 29 | history['uctr'])) 30 | for i, loss in enumerate(history['epoch_losses']): 31 | print('- Epoch {:<3} loss: {:.3f}'.format(i + 1, loss)) 32 | 33 | print('- Did {} validations with early-stop metric "{}"'.format( 34 | history['ectr'], 35 | early_metric)) 36 | 37 | for metric, hist in history['evals'].items(): 38 | best_vctr, best_val = Evaluator.find_best(metric, hist) 39 | print('- Best {:<10} so far: {:.2f} (Validation {})'.format(metric, 40 | best_val, 41 | best_vctr)) 42 | -------------------------------------------------------------------------------- /bin/nmtpy-coco-metrics: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Computes the BLEU, ROUGE, METEOR, and CIDER using the COCO metrics scripts 6 | """ 7 | import pathlib 8 | import argparse 9 | from collections import OrderedDict 10 | 11 | # Script taken and adapted from Kelvin Xu's arctic-captions project 12 | # https://github.com/kelvinxu/arctic-captions 13 | 14 | from nmtpytorch.cocoeval import Bleu, Meteor, Cider, Rouge 15 | from nmtpytorch.utils.misc import get_meteor_jar 16 | 17 | 18 | def print_table(results, sort_by='METEOR'): 19 | cols = ['Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 20 | 'METEOR', 'CIDEr', 'ROUGE_L'] 21 | for col in cols: 22 | print('|{:^15}|'.format(col), end='') 23 | print() 24 | 25 | results = sorted(results.items(), key=lambda x: x[1][sort_by]) 26 | 27 | for sysname, result in results: 28 | if len(results) > 1: 29 | print(sysname) 30 | for col in cols: 31 | print('|{:^15,.3f}|'.format(result[col]), end='') 32 | print() 33 | 34 | 35 | if __name__ == '__main__': 36 | parser = argparse.ArgumentParser(prog='coco-metrics') 37 | 38 | parser.add_argument("-w", "--write", action='store_true', 39 | help='Create a .score file containing the results.') 40 | parser.add_argument("-l", "--language", default='en', 41 | help='Hypothesis language (default: en)') 42 | parser.add_argument("-r", "--refs", type=argparse.FileType('r'), 43 | help="Path to all the reference files", nargs='+') 44 | parser.add_argument("systems", type=str, 45 | help="Per-system hypothesis file(s)", nargs='+') 46 | 47 | args = parser.parse_args() 48 | 49 | # Check for METEOR 50 | get_meteor_jar() 51 | 52 | # List of scorers 53 | scorers = [ 54 | (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), 55 | (Meteor(args.language), ["METEOR"]), 56 | (Cider(), ["CIDEr"]), 57 | (Rouge(), ["ROUGE_L"]), 58 | ] 59 | 60 | results = OrderedDict() 61 | 62 | # Read multiple reference files 63 | raw_refs = [list(map(str.strip, r)) for r in zip(*args.refs)] 64 | refs = {idx: rr for idx, rr in enumerate(raw_refs)} 65 | 66 | # Ranking of multiple systems is possible 67 | for hypfile in args.systems: 68 | with open(hypfile) as f: 69 | # List of hypothesis sentences for this system 70 | hypo = {idx: [line.strip()] for (idx, line) in enumerate(f)} 71 | 72 | result = OrderedDict() 73 | 74 | for scorer, method in scorers: 75 | score, _ = scorer.compute_score(refs, hypo) 76 | if score: 77 | if not isinstance(score, list): 78 | score = [score] 79 | for m, s in zip(method, score): 80 | result[m] = float('%.3f' % s) 81 | 82 | if args.write: 83 | with open("%s.score" % hypfile, 'w') as f: 84 | f.write("%s\n" % result) 85 | results[str(pathlib.Path(hypfile))] = result 86 | 87 | print_table(results) 88 | -------------------------------------------------------------------------------- /bin/nmtpy-install-extra: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | which java &> /dev/null 3 | if [[ "x$?" == "x1" ]]; then 4 | echo "'java' not found in PATH. You need to have a working JRE installation for METEOR." 5 | else 6 | echo "OK: Found 'java'." 7 | fi 8 | 9 | CACHE=${HOME}/.nmtpy 10 | METEOR=${CACHE}/meteor-data 11 | 12 | if [[ ! -d ${CACHE} ]]; then 13 | echo "Creating ${CACHE} folder..." 14 | mkdir -p ${CACHE} 15 | fi 16 | 17 | if [[ ! -d $METEOR ]]; then 18 | git clone https://github.com/ozancaglayan/meteor-1.5-data.git $METEOR 19 | pushd $METEOR 20 | ./recompress.sh 21 | popd 22 | fi 23 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /doc/_static/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lium-lst/nmtpytorch/fa31279aeb68ef1fdae9b8e7b6b331d134ad4c63/doc/_static/img/logo.png -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('../nmtpytorch')) 16 | 17 | #import pytorch_sphinx_theme 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'nmtpytorch' 23 | copyright = '2020, Ozan Caglayan' 24 | author = 'Ozan Caglayan' 25 | 26 | 27 | # -- General configuration --------------------------------------------------- 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.autosummary', 35 | 'sphinx.ext.doctest', 36 | 'sphinx.ext.intersphinx', 37 | 'sphinx.ext.todo', 38 | 'sphinx.ext.coverage', 39 | 'sphinx.ext.napoleon', 40 | 'sphinx.ext.viewcode', 41 | #'sphinxcontrib.katex', 42 | 'sphinx.ext.autosectionlabel', 43 | #'javasphinx', 44 | ] 45 | 46 | # Add any paths that contain templates here, relative to this directory. 47 | templates_path = ['_templates'] 48 | 49 | # List of patterns, relative to source directory, that match files and 50 | # directories to ignore when looking for source files. 51 | # This pattern also affects html_static_path and html_extra_path. 52 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 53 | 54 | 55 | # -- Options for HTML output ------------------------------------------------- 56 | 57 | # The theme to use for HTML and HTML Help pages. See the documentation for 58 | # a list of builtin themes. 59 | # 60 | html_theme = 'sphinx_rtd_theme' 61 | #html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] 62 | html_logo = '_static/img/logo.png' 63 | 64 | # Theme options are theme-specific and customize the look and feel of a theme 65 | # further. For a list of options available for each theme, see the 66 | # documentation. 67 | 68 | html_theme_options = { 69 | 'collapse_navigation': True, 70 | # 'pytorch_project': 'doc', 71 | # 'canonical_url': 'https://pytorch.org/docs/stable/', 72 | # 'display_version': True, 73 | 'logo_only': True, 74 | } 75 | 76 | # Add any paths that contain custom static files (such as style sheets) here, 77 | # relative to this directory. They are copied after the builtin static files, 78 | # so a file named "default.css" will overwrite the builtin "default.css". 79 | html_static_path = ['_static'] 80 | 81 | 82 | # -- Extension configuration ------------------------------------------------- -------------------------------------------------------------------------------- /doc/datasets.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | Datasets 5 | ======================== 6 | 7 | .. automodule:: nmtpytorch.datasets 8 | .. currentmodule:: nmtpytorch.datasets 9 | 10 | 11 | TextDataset 12 | ------------------------ 13 | 14 | .. autoclass:: TextDataset 15 | :members: 16 | 17 | LabelDataset 18 | ------------------------ 19 | 20 | .. autoclass:: LabelDataset 21 | :members: 22 | 23 | ImageFolderDataset 24 | ------------------------ 25 | 26 | .. autoclass:: ImageFolderDataset 27 | :members: 28 | 29 | KeyedNPZDataset 30 | ------------------------ 31 | 32 | .. autoclass:: KeyedNPZDataset 33 | :members: 34 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. nmtpytorch documentation master file, created by 2 | sphinx-quickstart on Wed Jan 15 12:34:41 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | :github_url: https://github.com/lium-lst/nmtpytorch 7 | 8 | nmtpytorch documentation 9 | ======================== 10 | 11 | `nmtpytorch` is a framework around `PyTorch` with the objective of training 12 | complex sequence-to-sequence models. 13 | 14 | .. toctree:: 15 | :glob: 16 | :maxdepth: 1 17 | 18 | .. toctree:: 19 | :glob: 20 | :maxdepth: 1 21 | :caption: Introduction 22 | 23 | intro/* 24 | 25 | .. toctree:: 26 | :maxdepth: 1 27 | :caption: Datasets 28 | 29 | datasets 30 | 31 | .. toctree:: 32 | :maxdepth: 1 33 | :caption: Models 34 | 35 | models 36 | 37 | .. toctree:: 38 | :maxdepth: 1 39 | :caption: API Documentation 40 | 41 | vocabulary 42 | -------------------------------------------------------------------------------- /doc/intro/00_installation.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | Installation 5 | ============= 6 | 7 | bla bla bla 8 | 9 | -------------------------------------------------------------------------------- /doc/intro/01_implementing_a_model.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | Implementing a model 5 | ===================== 6 | 7 | bla bla bla 8 | 9 | -------------------------------------------------------------------------------- /doc/intro/02_configuring_an_experiment.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | Configuring an experiment 5 | ========================= 6 | 7 | bla bla bla 8 | 9 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /doc/models.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | Models 5 | ======================== 6 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_rtd_theme 3 | -------------------------------------------------------------------------------- /doc/vocabulary.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | nmtpytorch.vocabulary 5 | ======================== 6 | 7 | .. automodule:: nmtpytorch.vocabulary 8 | .. currentmodule:: nmtpytorch.vocabulary 9 | 10 | Vocabulary 11 | ---------- 12 | 13 | .. autoclass:: Vocabulary 14 | :members: 15 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: nmtpy 2 | 3 | dependencies: 4 | - python=3.7 5 | - cffi 6 | - cython 7 | - ipython 8 | - pyyaml 9 | - six 10 | - numpy 11 | - scikit-learn 12 | - tqdm 13 | - pillow 14 | - pip: 15 | - torch==1.4.0 16 | - torchvision==0.5.0 17 | - pytorch-ignite==0.3.0 18 | - sacrebleu>=1.2.9 19 | - editdistance==0.4 20 | - subword_nmt==0.3.5 21 | - ipdb 22 | - -e . 23 | -------------------------------------------------------------------------------- /examples/v4.0.0/mmt/README.md: -------------------------------------------------------------------------------- 1 | Examples 2 | -- 3 | 4 | Here you can find example configuration files that are tied to specific versions 5 | of nmtpytorch. You need to set paths accordingly in order for the configurations 6 | to work correctly. 7 | 8 | ## Multimodal task (En->Fr) 9 | 10 | **NOTE:** These examples do not use BPE-segmented files, instead they simply 11 | use word forms. 12 | 13 | The dataset files are suffixed with `lc.norm.tok` in this experiment which 14 | means that Moses scripts were used to lowercase -> normalize-punctuation -> tokenize 15 | the corpora. Specifically for tokenization, we enable `-a` option to aggressively 16 | split the hyphens. The following pipeline should do the trick (moses scripts should 17 | be in `$PATH` for the following to work as-is): 18 | 19 | ```bash 20 | for split in train val test_201*flickr; do 21 | for llang in en fr; do 22 | lowercase.perl < ${split}.${llang} | normalize-punctuation.perl -l $llang | \ 23 | tokenizer.perl -q -a -l $llang -threads 4 > ${split}.lc.norm.tok.${llang} 24 | done 25 | done 26 | ``` 27 | 28 | Next you need to run `nmtpy-build-vocab` on the `train.lc.norm.tok.*` files 29 | to construct the vocabularies. You should now be able to train the systems 30 | accordingly. 31 | 32 | **NOTE:** For multimodal systems, you may want to L2-normalize the feature files 33 | and save the normalized versions, see [WMT18 paper for LIUM-CVC](https://arxiv.org/abs/1809.00151): 34 | 35 | ```python 36 | x = np.load('foo.npy') 37 | np.save('foo-l2norm.npy', x / np.linalg.norm(x, axis=-1, keepdims=True)) 38 | ``` 39 | 40 | ### mmt-task-en-fr-nmt.conf 41 | 42 | A baseline NMT for En->Fr language pair 43 | of Multi30K. You can download the Multi30K dataset from [here](https://github.com/multi30k/dataset). 44 | 45 | ### mmt-task-en-fr-encdecinit.conf 46 | 47 | - A baseline multimodal NMT for En->Fr language pair of Multi30K. You need 48 | to have `.npy` feature files for image features in order to train this model. 49 | 50 | - A feature file should contain a tensor of shape `(n, feat_dim)` where `n` is the 51 | number of sentences of the split and `feat_dim` is the dimensionality for the features. 52 | 53 | - Depending on `feat_dim`, you need to adjust the `feat_dim` option in the configuration file. 54 | 55 | - You can download the provided ResNet-50 feature files for the WMT18 shared task 56 | from [here](https://drive.google.com/drive/folders/1I2ufg3rTva3qeBkEc-xDpkESsGkYXgCf?usp=sharing). 57 | 58 | - The feature files for this model have `avgpool` in their filenames and the 59 | `feat_dim` is `2048`. 60 | 61 | ### mmt-task-en-fr-multimodalatt.conf 62 | 63 | A multimodal attentive NMT baseline replicating [this paper](https://arxiv.org/abs/1609.03976). 64 | You now need to use the convolutional feature files that can be downloaded from the same link above. 65 | 66 | - The feature files for this model have `res4frelu` in their filenames and the `feat_dim` is `1024`. 67 | 68 | #### More variants 69 | 70 | - You can switch to [hierarchical attention](https://arxiv.org/pdf/1704.06567.pdf) by 71 | changing `fusion_type: concat` to `fusion_type: hierarchical` in the `*multimodalatt.conf` 72 | file. 73 | -------------------------------------------------------------------------------- /examples/v4.0.0/mmt/mmt-task-en-fr-encdecinit.conf: -------------------------------------------------------------------------------- 1 | [train] 2 | seed: 0 3 | model_type: MultimodalNMT 4 | patience: 10 5 | max_epochs: 100 6 | eval_freq: 0 7 | eval_metrics: meteor,bleu,loss 8 | # Tokenization was done with -a parameter of moses tokenizer 9 | eval_filters: de-hyphen 10 | eval_beam: 12 11 | eval_batch_size: 32 12 | save_best_metrics: True 13 | eval_max_len: 100 14 | n_checkpoints: 0 15 | l2_reg: 1e-05 16 | lr_decay: plateau 17 | lr_decay_revert: False 18 | lr_decay_factor: 0.5 19 | lr_decay_patience: 2 20 | gclip: 1 21 | optimizer: adam 22 | lr: 0.0004 23 | batch_size: 64 24 | save_path: /path/to/experiment/folder 25 | tensorboard_dir: ${save_path}/tb_dir 26 | 27 | [model] 28 | att_type: mlp 29 | att_bottleneck: hid 30 | enc_dim: 320 31 | dec_dim: 320 32 | emb_dim: 200 33 | dropout_emb: 0.4 34 | dropout_ctx: 0.5 35 | dropout_out: 0.5 36 | n_encoders: 2 37 | tied_emb: 2way 38 | bucket_by: en 39 | max_len: None 40 | 41 | sampler_type: approximate 42 | sched_sampling: 0 43 | dec_init: zero 44 | bos_type: emb 45 | 46 | feat_fusion: encdecinit 47 | feat_dim: 2048 48 | feat_activ: tanh 49 | direction: en:Text, feats:Numpy -> fr:Text 50 | 51 | [data] 52 | tok_root: /path/to/tokenized/files/folder 53 | feats_root: /path/to/avgpooled/resnet/feature/files 54 | 55 | train_set: {'en': '${tok_root}/train.lc.norm.tok.en', 56 | 'feats': '${feats_root}/train-resnet50-avgpool.npy', 57 | 'fr': '${tok_root}/train.lc.norm.tok.fr'} 58 | 59 | val_set: {'en': '${tok_root}/val.lc.norm.tok.en', 60 | 'feats': '${feats_root}/val-resnet50-avgpool.npy', 61 | 'fr': '${tok_root}/val.lc.norm.tok.fr'} 62 | 63 | test_2016_flickr_set: {'en': '${tok_root}/test_2016_flickr.lc.norm.tok.en', 64 | 'feats': '${feats_root}/test_2016_flickr-resnet50-avgpool.npy', 65 | 'fr': '${tok_root}/test_2016_flickr.lc.norm.tok.fr'} 66 | 67 | test_2017_flickr_set: {'en': '${tok_root}/test_2017_flickr.lc.norm.tok.en', 68 | 'feats': '${feats_root}/test_2017_flickr-resnet50-avgpool.npy', 69 | 'fr': '${tok_root}/test_2017_flickr.lc.norm.tok.fr'} 70 | 71 | test_2018_flickr_set: {'en': '${tok_root}/test_2018_flickr.lc.norm.tok.en', 72 | 'feats': '${feats_root}/test_2018_flickr-resnet50-avgpool.npy'} 73 | 74 | [vocabulary] 75 | en: ${data:tok_root}/train.lc.norm.tok.vocab.en 76 | fr: ${data:tok_root}/train.lc.norm.tok.vocab.fr 77 | -------------------------------------------------------------------------------- /examples/v4.0.0/mmt/mmt-task-en-fr-multimodalatt.conf: -------------------------------------------------------------------------------- 1 | [train] 2 | seed: 0 3 | model_type: AttentiveMNMTFeatures 4 | patience: 10 5 | max_epochs: 100 6 | eval_freq: 0 7 | eval_metrics: meteor,bleu,loss 8 | # Tokenization was done with -a parameter of moses tokenizer 9 | eval_filters: de-hyphen 10 | eval_beam: 12 11 | eval_batch_size: 32 12 | save_best_metrics: True 13 | eval_max_len: 100 14 | n_checkpoints: 0 15 | l2_reg: 1e-05 16 | lr_decay: plateau 17 | lr_decay_revert: False 18 | lr_decay_factor: 0.5 19 | lr_decay_patience: 2 20 | gclip: 1 21 | optimizer: adam 22 | lr: 0.0004 23 | batch_size: 64 24 | save_path: /path/to/experiment/folder 25 | tensorboard_dir: ${save_path}/tb_dir 26 | 27 | [model] 28 | att_type: mlp 29 | att_bottleneck: hid 30 | enc_dim: 320 31 | dec_dim: 320 32 | emb_dim: 200 33 | dropout_emb: 0.4 34 | dropout_ctx: 0.5 35 | dropout_out: 0.5 36 | n_encoders: 2 37 | tied_emb: 2way 38 | bucket_by: en 39 | max_len: None 40 | 41 | sampler_type: approximate 42 | sched_sampling: 0 43 | dec_init: zero 44 | bos_type: emb 45 | 46 | fusion_type: concat 47 | n_channels: 1024 48 | direction: en:Text, image:Numpy -> fr:Text 49 | 50 | [data] 51 | tok_root: /path/to/tokenized/files/folder 52 | img_root: /path/to/res4f-relu/resnet/feature/files 53 | 54 | train_set: {'en': '${tok_root}/train.lc.norm.tok.en', 55 | 'image': '${img_root}/train-resnet50-res4f_relu.npy', 56 | 'fr': '${tok_root}/train.lc.norm.tok.fr'} 57 | 58 | val_set: {'en': '${tok_root}/val.lc.norm.tok.en', 59 | 'image': '${img_root}/val-resnet50-res4f_relu.npy', 60 | 'fr': '${tok_root}/val.lc.norm.tok.fr'} 61 | 62 | test_2016_flickr_set: {'en': '${tok_root}/test_2016_flickr.lc.norm.tok.en', 63 | 'image': '${img_root}/test_2016_flickr-resnet50-res4f_relu.npy', 64 | 'fr': '${tok_root}/test_2016_flickr.lc.norm.tok.fr'} 65 | 66 | test_2017_flickr_set: {'en': '${tok_root}/test_2017_flickr.lc.norm.tok.en', 67 | 'image': '${img_root}/test_2017_flickr-resnet50-res4f_relu.npy', 68 | 'fr': '${tok_root}/test_2017_flickr.lc.norm.tok.fr'} 69 | 70 | test_2018_flickr_set: {'en': '${tok_root}/test_2018_flickr.lc.norm.tok.en', 71 | 'image': '${img_root}/test_2018_flickr-resnet50-res4f_relu.npy'} 72 | 73 | [vocabulary] 74 | en: ${data:tok_root}/train.lc.norm.tok.vocab.en 75 | fr: ${data:tok_root}/train.lc.norm.tok.vocab.fr 76 | -------------------------------------------------------------------------------- /examples/v4.0.0/mmt/mmt-task-en-fr-nmt.conf: -------------------------------------------------------------------------------- 1 | [train] 2 | seed: 0 3 | model_type: NMT 4 | patience: 10 5 | max_epochs: 100 6 | eval_freq: 0 7 | eval_metrics: meteor,bleu,loss 8 | # Tokenization was done with -a parameter of moses tokenizer 9 | eval_filters: de-hyphen 10 | eval_beam: 12 11 | eval_batch_size: 32 12 | save_best_metrics: True 13 | eval_max_len: 100 14 | n_checkpoints: 0 15 | l2_reg: 1e-05 16 | lr_decay: plateau 17 | lr_decay_revert: False 18 | lr_decay_factor: 0.5 19 | lr_decay_patience: 2 20 | gclip: 1 21 | optimizer: adam 22 | lr: 0.0004 23 | batch_size: 64 24 | save_path: /path/to/experiment/folder 25 | tensorboard_dir: ${save_path}/tb_dir 26 | 27 | [model] 28 | att_type: mlp 29 | att_bottleneck: hid 30 | enc_dim: 320 31 | dec_dim: 320 32 | emb_dim: 200 33 | dropout_emb: 0.4 34 | dropout_ctx: 0.5 35 | dropout_out: 0.5 36 | n_encoders: 2 37 | tied_emb: 2way 38 | bucket_by: en 39 | max_len: None 40 | 41 | sampler_type: approximate 42 | sched_sampling: 0 43 | dec_init: zero 44 | bos_type: emb 45 | direction: en:Text -> fr:Text 46 | 47 | [data] 48 | tok_root: /path/to/tokenized/files/folder 49 | 50 | train_set: {'en': '${tok_root}/train.lc.norm.tok.en', 51 | 'fr': '${tok_root}/train.lc.norm.tok.fr'} 52 | 53 | val_set: {'en': '${tok_root}/val.lc.norm.tok.en', 54 | 'fr': '${tok_root}/val.lc.norm.tok.fr'} 55 | 56 | test_2016_flickr_set: {'en': '${tok_root}/test_2016_flickr.lc.norm.tok.en', 57 | 'fr': '${tok_root}/test_2016_flickr.lc.norm.tok.fr'} 58 | 59 | test_2017_flickr_set: {'en': '${tok_root}/test_2017_flickr.lc.norm.tok.en', 60 | 'fr': '${tok_root}/test_2017_flickr.lc.norm.tok.fr'} 61 | 62 | test_2018_flickr_set: {'en': '${tok_root}/test_2018_flickr.lc.norm.tok.en'} 63 | 64 | [vocabulary] 65 | en: ${data:tok_root}/train.lc.norm.tok.vocab.en 66 | fr: ${data:tok_root}/train.lc.norm.tok.vocab.fr 67 | -------------------------------------------------------------------------------- /examples/v4.0.0/speech/README.md: -------------------------------------------------------------------------------- 1 | Automatic Speech Recognition (ASR) 2 | -- 3 | 4 | Two example configuration files for character level and subword level 5 | ASR systems. These experiments make use of the [ASR](https://github.com/lium-lst/nmtpytorch/blob/master/nmtpytorch/models/asr.py) model from `nmtpytorch`. 6 | 7 | ## Preparing Kaldi features 8 | 9 | Right now nmtpytorch only supports Kaldi feature files namely `.ark` and `.scp` 10 | files along with a special folder structure. Let's assume that all speech related 11 | files are under `~/data/swbd`: 12 | - Each train/test set split should have a corresponding subfolder with the following files: 13 | - feats.scp 14 | - cmvn.scp 15 | - text 16 | - utt2spk 17 | - The paths to `.ark` files in the `.scp` files should be valid paths. 18 | 19 | Once you have this folder hierarchy ready, you can modify the input and output folder paths in the `scripts/prepare.sh` accordingly and launch the script. The script will create the uncompressed feature files in the format required by `nmtpytorch`. Specifically, the output folder hierarchy should look something like below: 20 | 21 | ``` 22 | /tmp/data/swbd/ 23 | ├── eval2000_test 24 | │   ├── feats_local.ark 25 | │   ├── feats_local.scp 26 | │   └── segments.len 27 | ├── train_dev 28 | │   ├── feats_local.ark 29 | │   ├── feats_local.scp 30 | │   └── segments.len 31 | └── train_nodup 32 | ├── feats_local.ark 33 | ├── feats_local.scp 34 | └── segments.len 35 | ``` 36 | 37 | **NOTE:** Unlike Kaldi, we remove the utterance ID columns from the label files for `nmtpytorch` so you need to make sure that the `text` files are in the **same order** with the `feats.scp` file. 38 | 39 | Now if you look the provided configuration files, you will see that the speech modality tagged with `en_speech` keys are pointing towards the folders listed above: 40 | 41 | ``` 42 | [data] 43 | root: /tmp/data/swbd 44 | 45 | train_set: {'en_speech': '${root}/train_nodup', 46 | 'en_text': '${root}/train_nodup/text.char.nmtpy'} 47 | 48 | val_set: {'en_speech': '${root}/train_dev', 49 | 'en_text': '${root}/train_dev/text.char.nmtpy'} 50 | 51 | eval2000_set: {'en_speech': '${root}/eval2000_test'} 52 | 53 | [vocabulary] 54 | en_text: ${data:root}/train_nodup/text.char.vocab.nmtpy 55 | ``` 56 | 57 | ### Adding label files and vocabularies 58 | 59 | The last set of files to prepare are the target side transcript files tagged with `en_text` keys above. These are plain text files **without the utterance ID columns**. Each line corresponds to an utterance/segment and explicit spaces are defined with the `` token. An example line should look like this: 60 | ``` 61 | y e a h y e a h w e l l i - i - t h a t ' s r i g h t a n d i t 62 | ``` 63 | 64 | **HINT:** You can use `scripts/word2char` to convert a word-level text file to the above format easily 65 | 66 | On the other hand, a subword-level file prepared with `subword-nmt` tool looks like this: 67 | ``` 68 | all right th@@ an@@ ks bye bye 69 | ``` 70 | 71 | Once you have the transcript files preprocessed this way, you can run `nmtpy-build-vocab` to create the vocabulary file using the training sentence file: 72 | 73 | ``` 74 | $ nmtpy-build-vocab 75 | ``` 76 | 77 | ### Configuration Files 78 | 79 | - `asr-bilstmp-char.conf:` Character-level ASR baseline that uses character error rate (CER) as early-stopping metric. 80 | - `asr-bilstmp-s1k.conf:` BPE-level ASR baseline example. Here the early-stopping metric is WER. To correctly compute the WER over non-BPE files, a post-processing filter is activated in the configuration file: `eval_filters: de-bpe` 81 | 82 | ### Launching Training 83 | See [this](https://github.com/lium-lst/nmtpytorch/wiki/Running-Experiments) 84 | 85 | ### Decoding Afterwards 86 | Once training is over, you can use `nmtpy translate` command to decode arbitrary dev/test sets using beam search. For example to decode the `eval2000` set defined in the above config, you can run: 87 | 88 | ``` 89 | # batch_size: 32 beam_size:10 output file prefix: eval2000 90 | # last argument is model checkpoint file 91 | CUDA_VISIBLE_DEVICES=0 nmtpy translate -s eval2000 -b 32 -k 10 -o eval2000 92 | ``` 93 | -------------------------------------------------------------------------------- /examples/v4.0.0/speech/asr-bilstmp-char.conf: -------------------------------------------------------------------------------- 1 | [train] 2 | seed: 72000 3 | model_type: ASR 4 | patience: 10 5 | max_epochs: 100 6 | eval_freq: 0 7 | eval_metrics: cer,loss 8 | eval_beam: 5 9 | eval_batch_size: 16 10 | save_best_metrics: True 11 | eval_max_len: 400 12 | n_checkpoints: 0 13 | l2_reg: 0 14 | gclip: 1 15 | optimizer: adam 16 | lr: 0.0004 17 | lr_decay: plateau 18 | lr_decay_revert: False 19 | lr_decay_factor: 0.5 20 | lr_decay_patience: 2 21 | batch_size: 36 22 | save_path: /path/to/save/the/experiments 23 | tensorboard_dir: ${save_path}/tb 24 | 25 | [model] 26 | att_type: mlp 27 | att_bottleneck: hid 28 | feat_dim: 43 29 | enc_dim: 256 30 | proj_dim: 256 31 | emb_dim: 49 32 | dec_dim: 256 33 | dropout: 0.4 34 | # 6 encoder layers 35 | enc_layers: '1_1_2_2_1_1' 36 | tied_dec_embs: True 37 | dec_init: mean_ctx 38 | bucket_by: en_speech 39 | # Enough coverage @ 1500 40 | max_len: 1500 41 | 42 | direction: en_speech:Kaldi -> en_text:Text 43 | 44 | [data] 45 | root: /tmp/data/swbd 46 | 47 | train_set: {'en_speech': '${root}/train_nodup', 48 | 'en_text': '${root}/train_nodup/text.char.nmtpy'} 49 | 50 | val_set: {'en_speech': '${root}/train_dev', 51 | 'en_text': '${root}/train_dev/text.char.nmtpy'} 52 | 53 | eval2000_set: {'en_speech': '${root}/eval2000_test'} 54 | 55 | [vocabulary] 56 | en_text: ${data:root}/train_nodup/text.char.vocab.nmtpy 57 | -------------------------------------------------------------------------------- /examples/v4.0.0/speech/asr-bilstmp-s1k.conf: -------------------------------------------------------------------------------- 1 | [train] 2 | seed: 72000 3 | model_type: ASR 4 | patience: 10 5 | max_epochs: 100 6 | eval_freq: 0 7 | eval_metrics: wer,loss 8 | # this is a bpe model so de-bpe is necessary for correct WER computation 9 | eval_filters: de-bpe 10 | eval_beam: 5 11 | eval_batch_size: 16 12 | save_best_metrics: True 13 | eval_max_len: 400 14 | n_checkpoints: 0 15 | l2_reg: 0 16 | gclip: 1 17 | optimizer: adam 18 | lr: 0.0004 19 | lr_decay: plateau 20 | lr_decay_revert: False 21 | lr_decay_factor: 0.5 22 | lr_decay_patience: 2 23 | batch_size: 36 24 | save_path: /path/to/save/the/experiments 25 | tensorboard_dir: ${save_path}/tb 26 | 27 | [model] 28 | att_type: mlp 29 | att_bottleneck: hid 30 | feat_dim: 43 31 | enc_dim: 256 32 | proj_dim: 256 33 | emb_dim: 256 34 | dec_dim: 256 35 | dropout: 0.4 36 | # 6 encoder layers 37 | enc_layers: '1_1_2_2_1_1' 38 | tied_dec_embs: True 39 | dec_init: mean_ctx 40 | bucket_by: en_speech 41 | # Enough coverage @ 1500 42 | max_len: 1500 43 | 44 | direction: en_speech:Kaldi -> en_text:Text 45 | 46 | [data] 47 | root: /tmp/data/swbd 48 | 49 | train_set: {'en_speech': '${root}/train_nodup', 50 | 'en_text': '${root}/train_nodup/text.s1k.nmtpy'} 51 | 52 | val_set: {'en_speech': '${root}/train_dev', 53 | 'en_text': '${root}/train_dev/text.s1k.nmtpy'} 54 | 55 | eval2000_set: {'en_speech': '${root}/eval2000_test'} 56 | 57 | [vocabulary] 58 | en_text: ${data:root}/train_nodup/text.s1k.vocab.nmtpy 59 | -------------------------------------------------------------------------------- /examples/v4.0.0/speech/scripts/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ############################################ 4 | # Example preparation script for switchboard 5 | ############################################ 6 | # Kaldi utility `feat-to-len` and `copy-feats` should be in your $PATH 7 | 8 | # List split names here. These correspond to Kaldi prepared subfolder 9 | # names as well. 10 | splits=( train_nodup train_dev eval2000_test ) 11 | 12 | # The root folder containing the split subfolders 13 | input_folder=~/data/swbd 14 | 15 | # The required hierarchy is: 16 | # ${input_folder}/split_name/ 17 | # - utt2spk 18 | # - text 19 | # - feats.scp 20 | # - cmvn.scp 21 | 22 | # Where to put the prepared nmtpy-ready files 23 | output_folder=/tmp/data/swbd 24 | 25 | # Create the folder 26 | mkdir -p $output_folder 27 | 28 | #################################################################### 29 | # REQUIREMENT CHECK 30 | # Make sure that the files are ordered (in sync) w.r.t utterance IDs 31 | #################################################################### 32 | for split in "${splits[@]}"; do 33 | # Original .scp with valid paths to .ark files such as the following 34 | # sw02054-A_000204-000790 /path/to/ark/file:offset 35 | scp=${input_folder}/${split}/feats.scp 36 | 37 | # Transcription per line prefixed with utterance IDs as well 38 | # sw02054-A_000204-000790 so let me tell you a little bit ... 39 | txt=${input_folder}/${split}/text 40 | 41 | # NOTE: Make sure that the files are ordered (in sync) w.r.t utterance IDs 42 | # Compare utterance IDs to make sure that they're ordered/aligned 43 | cmp -s <(cut -d' ' -f1 < $scp) <(cut -d' ' -f1 < $txt) || \ 44 | { echo "Error: [$split] feats.scp and text are not aligned"; exit 1; } 45 | done 46 | 47 | ############################### 48 | # Generate `segments.len` files 49 | ############################### 50 | for split in "${splits[@]}"; do 51 | mkdir -p $output_folder/${split} 52 | scp=${input_folder}/${split}/feats.scp 53 | utt2spk="${input_folder}/${split}/utt2spk" 54 | cmvn="${input_folder}/${split}/cmvn.scp" 55 | scp="${input_folder}/${split}/feats.scp" 56 | seg=${output_folder}/${split}/segments.len 57 | 58 | if [[ ! -f $seg ]]; then 59 | # Extract frame counts 60 | echo "Extracting frame counts for $split" 61 | feat-to-len scp:$scp ark,t:- | cut -d' ' -f2 > ${output_folder}/${split}/segments.len 62 | fi 63 | 64 | if [[ ! -f "${output_folder}/${split}/feats_local.ark" ]]; then 65 | feats_cmvn="ark,s,cs:apply-cmvn --norm-vars=true --utt2spk=ark:$utt2spk scp:$cmvn scp:$scp ark:- |" 66 | copy-feats "$feats_cmvn" ark,scp:`realpath $output_folder/${split}/feats_local.ark`,$output_folder/${split}/feats_local.scp & 67 | fi 68 | done 69 | 70 | # Wait for completion 71 | wait 72 | -------------------------------------------------------------------------------- /examples/v4.0.0/speech/scripts/word2char: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import argparse 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser(prog='word2char') 7 | 8 | parser.add_argument('-i', '--has-ids', action='store_true', 9 | help='Enable if input file has segment IDs in first column.') 10 | parser.add_argument('-s', '--space', default='', 11 | help='Placeholder token for explicit space characters.') 12 | 13 | # Parse arguments 14 | args = parser.parse_args() 15 | 16 | for line in sys.stdin: 17 | text = line.strip() 18 | if args.has_ids: 19 | sid, text = text.split(' ', 1) 20 | 21 | text = ' '.join(list(text)).replace(' ', ' {} '.format(args.space)) 22 | print(text.replace('[ n o i s e ]', '[noise]').replace( 23 | '[ v o c a l i z e d - n o i s e ]', '[vocalized-noise]').replace( 24 | '[ l a u g h t e r ]', '[laughter]')) 25 | -------------------------------------------------------------------------------- /nmtpytorch/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '4.0.0' 2 | -------------------------------------------------------------------------------- /nmtpytorch/cleanup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import sys 4 | import signal 5 | import atexit 6 | import pathlib 7 | import traceback 8 | 9 | 10 | class Cleanup: 11 | def __init__(self): 12 | self.temp_files = set() 13 | self.processes = set() 14 | 15 | def register_tmp_file(self, tmp_file): 16 | """Add new temp file to global set.""" 17 | self.temp_files.add(pathlib.Path(tmp_file)) 18 | 19 | def register_proc(self, pid): 20 | """Add new process to global set.""" 21 | self.processes.add(pid) 22 | 23 | def unregister_proc(self, pid): 24 | """Remove given PID from global set.""" 25 | self.processes.remove(pid) 26 | 27 | def __call__(self): 28 | """Cleanup registered temp files and kill PIDs.""" 29 | for tmp_file in filter(lambda x: x.exists(), self.temp_files): 30 | tmp_file.unlink() 31 | 32 | for proc in self.processes: 33 | try: 34 | os.kill(proc, signal.SIGTERM) 35 | except ProcessLookupError: 36 | pass 37 | 38 | def __repr__(self): 39 | repr_ = "Cleanup Manager\n" 40 | if len(self.processes) > 0: 41 | repr_ += "Tracking Processes\n" 42 | for proc in self.processes: 43 | repr_ += " {}\n".format(proc) 44 | 45 | if len(self.temp_files) > 0: 46 | repr_ += "Tracking Temporary Files\n" 47 | for tmp_file in self.temp_files: 48 | repr_ += " {}\n".format(tmp_file) 49 | 50 | return repr_ 51 | 52 | @staticmethod 53 | def register_exception_handler(logger, quit_on_exception=False): 54 | """Setup exception handler.""" 55 | 56 | def exception_handler(exctype, val, trace): 57 | """Let Python call this when an exception is uncaught.""" 58 | logger.info( 59 | ''.join(traceback.format_exception(exctype, val, trace))) 60 | 61 | def exception_handler_quits(exctype, val, trace): 62 | """Let Python call this when an exception is uncaught.""" 63 | logger.info( 64 | ''.join(traceback.format_exception(exctype, val, trace))) 65 | sys.exit(1) 66 | 67 | if quit_on_exception: 68 | sys.excepthook = exception_handler_quits 69 | else: 70 | sys.excepthook = exception_handler 71 | 72 | @staticmethod 73 | def register_handler(logger, _atexit=True, _signals=True, 74 | exception_quits=False): 75 | """Register atexit and signal handlers.""" 76 | if _atexit: 77 | # Register exit handler 78 | atexit.register(cleanup) 79 | 80 | if _signals: 81 | # Register SIGINT and SIGTERM 82 | signal.signal(signal.SIGINT, signal_handler) 83 | signal.signal(signal.SIGTERM, signal_handler) 84 | 85 | Cleanup.register_exception_handler(logger, exception_quits) 86 | 87 | 88 | # Create a global cleaner 89 | cleanup = Cleanup() 90 | 91 | 92 | def signal_handler(signum, frame): 93 | """Let Python call this when SIGINT or SIGTERM caught.""" 94 | cleanup() 95 | sys.exit(0) 96 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/README.md: -------------------------------------------------------------------------------- 1 | pycocoevalcap 2 | --- 3 | 4 | This is a copy from 5 | https://github.com/tylin/coco-caption/tree/master/pycocoevalcap 6 | 7 | with Python 2 support dropped. 8 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | from .bleu.bleu import Bleu 3 | from .cider.cider import Cider 4 | from .rouge.rouge import Rouge 5 | from .meteor.meteor import Meteor 6 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/bleu/LICENSE.bleu: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Xinlei Chen, Hao Fang, Tsung-Yi Lin, and Ramakrishna Vedantam 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/bleu/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/bleu/bleu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File Name : bleu.py 3 | # 4 | # Description : Wrapper for BLEU scorer. 5 | # 6 | # Creation Date : 06-01-2015 7 | # Last Modified : Thu 19 Mar 2015 09:13:28 PM PDT 8 | # Authors : Hao Fang and Tsung-Yi Lin 9 | 10 | from .bleu_scorer import BleuScorer 11 | 12 | 13 | class Bleu: 14 | def __init__(self, n=4): 15 | # default compute Blue score up to 4 16 | self._n = n 17 | self._hypo_for_image = {} 18 | self.ref_for_image = {} 19 | 20 | def compute_score(self, gts, res): 21 | 22 | bleu_scorer = BleuScorer(n=self._n) 23 | for id in sorted(gts.keys()): 24 | hypo = res[id] 25 | ref = gts[id] 26 | 27 | # Sanity check. 28 | assert isinstance(hypo, list) 29 | assert isinstance(ref, list) 30 | assert len(hypo) == 1 31 | assert len(ref) >= 1 32 | 33 | bleu_scorer += (hypo[0], ref) 34 | 35 | # score, scores = bleu_scorer.compute_score(option='shortest') 36 | # score, scores = bleu_scorer.compute_score(option='average',verbose=1) 37 | score, scores = bleu_scorer.compute_score(option='closest', verbose=0) 38 | 39 | # return (bleu, bleu_info) 40 | return score, scores 41 | 42 | def method(self): 43 | return "Bleu" 44 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/cider/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/cider/cider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Filename: cider.py 3 | # 4 | # Description: Describes the class to compute the CIDEr 5 | # (Consensus-Based Image Description Evaluation) Metric 6 | # by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726) 7 | # 8 | # Creation Date: Sun Feb 8 14:16:54 2015 9 | # 10 | # Authors: Ramakrishna Vedantam and 11 | # Tsung-Yi Lin 12 | 13 | from .cider_scorer import CiderScorer 14 | 15 | 16 | class Cider: 17 | """Main Class to compute the CIDEr metric.""" 18 | 19 | def __init__(self, test=None, refs=None, n=4, sigma=6.0): 20 | # set cider to sum over 1 to 4-grams 21 | self._n = n 22 | # set the standard deviation parameter for gaussian penalty 23 | self._sigma = sigma 24 | 25 | def compute_score(self, gts, res): 26 | """Main function to compute CIDEr score 27 | 28 | Arguments: 29 | hypo_for_image (dict): dictionary with key and 30 | value 31 | ref_for_image (dict): dictionary with key and value 32 | 33 | 34 | Returns: 35 | cider (float): computed CIDEr score for the corpus 36 | """ 37 | 38 | cider_scorer = CiderScorer(n=self._n, sigma=self._sigma) 39 | 40 | for id in sorted(gts.keys()): 41 | hypo = res[id] 42 | ref = gts[id] 43 | 44 | # Sanity check. 45 | assert isinstance(hypo, list) 46 | assert isinstance(ref, list) 47 | assert len(hypo) == 1 48 | assert len(ref) > 0 49 | 50 | cider_scorer += (hypo[0], ref) 51 | 52 | (score, scores) = cider_scorer.compute_score() 53 | 54 | return score, scores 55 | 56 | def method(self): 57 | return "CIDEr" 58 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/meteor/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/meteor/meteor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Python wrapper for METEOR implementation, by Xinlei Chen 3 | # Acknowledge Michael Denkowski for the generous discussion and help 4 | 5 | import os 6 | import shutil 7 | import threading 8 | import subprocess 9 | 10 | from ...utils.misc import get_meteor_jar 11 | 12 | 13 | class Meteor: 14 | def __init__(self, language, norm=False): 15 | self.jar = str(get_meteor_jar()) 16 | self.meteor_cmd = ['java', '-jar', '-Xmx2G', self.jar, 17 | '-', '-', '-stdio', '-l', language] 18 | self.env = os.environ 19 | self.env['LC_ALL'] = 'en_US.UTF_8' 20 | 21 | # Sanity check 22 | if shutil.which('java') is None: 23 | raise RuntimeError('METEOR requires java which is not installed.') 24 | 25 | if norm: 26 | self.meteor_cmd.append('-norm') 27 | 28 | self.meteor_p = subprocess.Popen(self.meteor_cmd, 29 | stdin=subprocess.PIPE, 30 | stdout=subprocess.PIPE, 31 | stderr=subprocess.PIPE, 32 | env=self.env, 33 | universal_newlines=True, bufsize=1) 34 | # Used to guarantee thread safety 35 | self.lock = threading.Lock() 36 | 37 | def method(self): 38 | return "METEOR" 39 | 40 | def compute_score(self, gts, res): 41 | imgIds = sorted(list(gts.keys())) 42 | scores = [] 43 | 44 | eval_line = 'EVAL' 45 | self.lock.acquire() 46 | for i in imgIds: 47 | assert len(res[i]) == 1 48 | 49 | hypothesis_str = res[i][0].replace('|||', '').replace(' ', ' ') 50 | score_line = ' ||| '.join( 51 | ('SCORE', ' ||| '.join(gts[i]), hypothesis_str)) 52 | 53 | # We obtained --> SCORE ||| reference 1 words ||| 54 | # reference n words ||| hypothesis words 55 | self.meteor_p.stdin.write(score_line + '\n') 56 | stat = self.meteor_p.stdout.readline().strip() 57 | eval_line += ' ||| {}'.format(stat) 58 | 59 | # Send to METEOR 60 | self.meteor_p.stdin.write(eval_line + '\n') 61 | 62 | # Collect segment scores 63 | for i in range(len(imgIds)): 64 | score = float(self.meteor_p.stdout.readline().strip()) 65 | scores.append(score) 66 | 67 | # Final score 68 | final_score = 100 * float(self.meteor_p.stdout.readline().strip()) 69 | self.lock.release() 70 | 71 | return final_score, scores 72 | 73 | def __del__(self): 74 | self.lock.acquire() 75 | self.meteor_p.stdin.close() 76 | self.meteor_p.wait() 77 | self.lock.release() 78 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/rouge/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'vrama91' 2 | -------------------------------------------------------------------------------- /nmtpytorch/cocoeval/rouge/rouge.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File Name : rouge.py 3 | # 4 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004) 5 | # 6 | # Creation Date : 2015-01-07 06:03 7 | # Author : Ramakrishna Vedantam 8 | 9 | import numpy as np 10 | 11 | 12 | def my_lcs(string, sub): 13 | """ 14 | Calculates longest common subsequence for a pair of tokenized strings 15 | :param string : list of str : tokens from a string split using whitespace 16 | :param sub : list of str : shorter string, also split using whitespace 17 | :returns: length (list of int): length of the longest common subsequence 18 | between the two strings 19 | 20 | my_lcs only gives length of the longest common subsequence, 21 | not the actual LCS 22 | """ 23 | if len(string) < len(sub): 24 | sub, string = string, sub 25 | 26 | lengths = [[0 for i in range(0, len(sub) + 1)] for j 27 | in range(0, len(string) + 1)] 28 | 29 | for j in range(1, len(sub) + 1): 30 | for i in range(1, len(string) + 1): 31 | if string[i - 1] == sub[j - 1]: 32 | lengths[i][j] = lengths[i - 1][j - 1] + 1 33 | else: 34 | lengths[i][j] = max(lengths[i - 1][j], lengths[i][j - 1]) 35 | 36 | return lengths[len(string)][len(sub)] 37 | 38 | 39 | class Rouge: 40 | """Class for computing ROUGE-L score for a set of candidate sentences 41 | for the MS COCO test set.""" 42 | def __init__(self): 43 | # vrama91: updated the value below based on discussion with Hovey 44 | self.beta = 1.2 45 | 46 | def calc_score(self, candidate, refs): 47 | """ 48 | Compute ROUGE-L score given one candidate and references for an image 49 | :param candidate: str : candidate sentence to be evaluated 50 | :param refs: list of str : COCO reference sentences for the particular 51 | image to be evaluated 52 | :returns score: int (ROUGE-L score for the candidate evaluated 53 | against references) 54 | """ 55 | assert len(candidate) == 1 56 | assert len(refs) > 0 57 | prec = [] 58 | rec = [] 59 | 60 | # split into tokens 61 | token_c = candidate[0].split(" ") 62 | 63 | for reference in refs: 64 | # split into tokens 65 | token_r = reference.split(" ") 66 | # compute the longest common subsequence 67 | lcs = my_lcs(token_r, token_c) 68 | prec.append(lcs / float(len(token_c))) 69 | rec.append(lcs / float(len(token_r))) 70 | 71 | prec_max = max(prec) 72 | rec_max = max(rec) 73 | 74 | if prec_max != 0 and rec_max != 0: 75 | score = ((1 + self.beta**2) * prec_max * rec_max) 76 | score /= float(rec_max + self.beta ** 2 * prec_max) 77 | else: 78 | score = 0.0 79 | return score 80 | 81 | def compute_score(self, gts, res): 82 | """ 83 | Computes Rouge-L score given a set of reference and candidate 84 | sentences for the dataset 85 | 86 | :param hypo_for_image: dict : candidate / test sentences with 87 | "image name" key and "tokenized sentences" as values 88 | :param ref_for_image: dict : reference MS-COCO sentences with 89 | "image name" key and "tokenized sentences" as values 90 | :returns: average_score: float (mean ROUGE-L score computed by 91 | averaging scores for all the images) 92 | """ 93 | score = [] 94 | for id in sorted(gts.keys()): 95 | hypo = res[id] 96 | ref = gts[id] 97 | 98 | score.append(self.calc_score(hypo, ref)) 99 | 100 | # Sanity check. 101 | assert isinstance(hypo, list) 102 | assert isinstance(ref, list) 103 | assert len(hypo) == 1 104 | assert len(ref) > 0 105 | 106 | average_score = np.mean(np.array(score)) 107 | return average_score, np.array(score) 108 | 109 | def method(self): 110 | return "Rouge" 111 | -------------------------------------------------------------------------------- /nmtpytorch/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # First the basic types 2 | from .npy import NumpyDataset 3 | from .kaldi import KaldiDataset 4 | from .imagefolder import ImageFolderDataset 5 | from .text import TextDataset 6 | from .numpy_sequence import NumpySequenceDataset 7 | from .label import LabelDataset 8 | from .shelve import ShelveDataset 9 | 10 | # Second the selector function 11 | def get_dataset(type_): 12 | return { 13 | 'numpy': NumpyDataset, 14 | 'numpysequence': NumpySequenceDataset, 15 | 'kaldi': KaldiDataset, 16 | 'imagefolder': ImageFolderDataset, 17 | 'text': TextDataset, 18 | 'label': LabelDataset, 19 | 'shelve': ShelveDataset, 20 | }[type_.lower()] 21 | 22 | 23 | # Should always be at the end 24 | from .multimodal import MultimodalDataset 25 | -------------------------------------------------------------------------------- /nmtpytorch/datasets/collate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # This will eventually disappear as this only provides .size 4 | # which can be inferred if we guarantee that batch_dim is always at 5 | # a given position regardless of input/output feature/tensor types. 6 | 7 | 8 | class Batch(dict): 9 | """A custom dictionary representing a batch.""" 10 | def __init__(self, *args, **kwargs): 11 | super().__init__(*args, **kwargs) 12 | dim1s = set([x.size(1) for x in self.values()]) 13 | assert len(dim1s) == 1, \ 14 | "Incompatible batch dimension (1) between modalities." 15 | self.size = dim1s.pop() 16 | 17 | def device(self, device): 18 | self.update({k: v.to(device) for k, v in self.items()}) 19 | 20 | def __repr__(self): 21 | s = "Batch(size={})\n".format(self.size) 22 | for data_source, tensor in self.items(): 23 | s += " {:10s} -> {} - {}\n".format( 24 | str(data_source), tensor.shape, tensor.device) 25 | return s 26 | 27 | 28 | def get_collate(data_sources): 29 | """Returns a special collate_fn which will view the underlying data 30 | in terms of the given DataSource keys.""" 31 | 32 | def collate_fn(batch): 33 | return Batch( 34 | {ds: ds.torchify([elem[ds] for elem in batch]) for ds in data_sources}, 35 | ) 36 | 37 | return collate_fn 38 | -------------------------------------------------------------------------------- /nmtpytorch/datasets/imagefolder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from functools import lru_cache 3 | from pathlib import Path 4 | 5 | from PIL import Image 6 | 7 | import torch 8 | from torch.utils import data 9 | from torchvision import transforms 10 | 11 | 12 | class ImageFolderDataset(data.Dataset): 13 | """A variant of torchvision.datasets.ImageFolder which drops support for 14 | target loading, i.e. this only loads images not attached to any other 15 | label. 16 | 17 | This class also makes use of ``lru_cache`` to cache an image file once 18 | opened to avoid repetitive disk access. 19 | 20 | Arguments: 21 | root (str): The root folder that contains the images and index.txt 22 | resize (int, optional): An optional integer to be given to 23 | ``torchvision.transforms.Resize``. Default: ``None``. 24 | crop (int, optional): An optional integer to be given to 25 | ``torchvision.transforms.CenterCrop``. Default: ``None``. 26 | replicate(int, optional): Replicate the image names ``replicate`` 27 | times in order to process the same image ``replicate`` times 28 | if ``replicate`` sentences are available during training time. 29 | warmup(bool, optional): If ``True``, the images will be read once 30 | at the beginning to fill the cache. 31 | """ 32 | def __init__(self, root, resize=None, crop=None, 33 | replicate=1, warmup=False, **kwargs): 34 | self.root = Path(root).expanduser().resolve() 35 | self.replicate = replicate 36 | 37 | # Image list in dataset order 38 | self.index = self.root / 'index.txt' 39 | 40 | _transforms = [] 41 | if resize is not None: 42 | _transforms.append(transforms.Resize(resize)) 43 | if crop is not None: 44 | _transforms.append(transforms.CenterCrop(crop)) 45 | _transforms.append(transforms.ToTensor()) 46 | _transforms.append( 47 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 48 | std=[0.229, 0.224, 0.225])) 49 | self.transform = transforms.Compose(_transforms) 50 | 51 | if not self.index.exists(): 52 | raise(RuntimeError( 53 | "index.txt does not exist in {}".format(self.root))) 54 | 55 | self.image_files = [] 56 | with self.index.open() as f: 57 | for fname in f: 58 | fname = self.root / fname.strip() 59 | assert fname.exists(), "{} does not exist.".format(fname) 60 | self.image_files.append(str(fname)) 61 | 62 | # Setup reader 63 | self.read_image = lru_cache(maxsize=self.__len__())(self._read_image) 64 | 65 | if warmup: 66 | for idx in range(self.__len__()): 67 | self[idx] 68 | 69 | # Replicate the list if requested 70 | self.image_files = self.image_files * self.replicate 71 | 72 | def _read_image(self, fname): 73 | with open(fname, 'rb') as f: 74 | img = Image.open(f).convert('RGB') 75 | return self.transform(img) 76 | 77 | @staticmethod 78 | def to_torch(batch, **kwargs): 79 | return torch.stack(batch) 80 | 81 | def __getitem__(self, idx): 82 | return self.read_image(self.image_files[idx]) 83 | 84 | def __len__(self): 85 | return len(self.image_files) 86 | 87 | def __repr__(self): 88 | s = "{}(replicate={}) ({} samples)\n".format( 89 | self.__class__.__name__, self.replicate, self.__len__()) 90 | s += " {}\n".format(self.root) 91 | if self.transform: 92 | s += ' Transforms: {}\n'.format( 93 | self.transform.__repr__().replace('\n', '\n' + ' ')) 94 | return s 95 | -------------------------------------------------------------------------------- /nmtpytorch/datasets/kaldi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from pathlib import Path 3 | from tqdm import tqdm 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | from torch.nn.utils.rnn import pad_sequence 8 | 9 | from ..utils.kaldi import readMatrixShape, readMatrixByOffset 10 | 11 | # TODO 12 | # ---- 13 | # an lru_cache() decorated version of readMatrixByOffset() will make sure that 14 | # all the training data is cached into memory after 1 epoch. 15 | 16 | 17 | class KaldiDataset(Dataset): 18 | """A PyTorch dataset for Kaldi .scp/ark. 19 | 20 | Arguments: 21 | fname (str or Path): A string or ``pathlib.Path`` object for 22 | a folder that contains ``feats_local.scp`` and optionally a ``segments.len`` 23 | file containing segment lengths. 24 | """ 25 | 26 | def __init__(self, fname, **kwargs): 27 | self.data = [] 28 | self.lengths = [] 29 | self.root = Path(fname) 30 | self.scp_path = self.root / 'feats_local.scp' 31 | self.len_path = self.root / 'segments.len' 32 | 33 | if not self.scp_path.exists(): 34 | raise RuntimeError('{} does not exist.'.format(self.scp_path)) 35 | 36 | if self.len_path.exists(): 37 | read_lengths = False 38 | # Read lengths file 39 | with open(self.len_path) as f: 40 | for line in f: 41 | self.lengths.append(int(line.strip())) 42 | else: 43 | # Read them below (this is slow) 44 | read_lengths = True 45 | 46 | with open(self.scp_path) as scp_input_file: 47 | for line in tqdm(scp_input_file, unit='segments'): 48 | uttid, pointer = line.strip().split() 49 | arkfile, offset = pointer.rsplit(':', 1) 50 | offset = int(offset) 51 | self.data.append((arkfile, offset)) 52 | if read_lengths: 53 | with open(arkfile, "rb") as g: 54 | g.seek(offset) 55 | feat_len = readMatrixShape(g)[0] 56 | 57 | self.lengths.append(feat_len) 58 | 59 | # Set dataset size 60 | self.size = len(self.data) 61 | 62 | if self.size != len(self.lengths): 63 | raise RuntimeError("Dataset size and lengths size does not match.") 64 | 65 | @staticmethod 66 | def to_torch(batch, **kwargs): 67 | return pad_sequence( 68 | [torch.FloatTensor(x) for x in batch], batch_first=False) 69 | 70 | def __getitem__(self, idx): 71 | """Read segment features from the actual .ark file.""" 72 | return readMatrixByOffset(*self.data[idx]) 73 | 74 | def __len__(self): 75 | return self.size 76 | 77 | def __repr__(self): 78 | s = "{} '{}' ({} samples)\n".format( 79 | self.__class__.__name__, self.scp_path.name, self.__len__()) 80 | return s 81 | -------------------------------------------------------------------------------- /nmtpytorch/datasets/label.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from pathlib import Path 3 | 4 | import torch 5 | from torch.utils.data import Dataset 6 | 7 | from ..utils.data import read_sentences 8 | 9 | 10 | class LabelDataset(Dataset): 11 | r"""A PyTorch dataset that returns a single integer representing a category. 12 | 13 | Arguments: 14 | fname (str or Path): A string or ``pathlib.Path`` object giving 15 | space delimited attributes per sentence. 16 | vocab (Vocabulary): A ``Vocabulary`` instance for the labels. 17 | """ 18 | 19 | def __init__(self, fname, vocab, **kwargs): 20 | self.path = Path(fname) 21 | self.vocab = vocab 22 | 23 | # Detect glob patterns 24 | self.fnames = sorted(self.path.parent.glob(self.path.name)) 25 | 26 | if len(self.fnames) == 0: 27 | raise RuntimeError('{} does not exist.'.format(self.path)) 28 | elif len(self.fnames) > 1: 29 | raise RuntimeError("Multiple source files not supported.") 30 | 31 | # Read the label strings and map them to vocabulary 32 | self.data, _ = read_sentences( 33 | self.fnames[0], self.vocab, eos=False, bos=False) 34 | 35 | # number of possible classes is the vocab size 36 | self.n_classes = len(self.vocab) 37 | 38 | # Dataset size 39 | self.size = len(self.data) 40 | 41 | @staticmethod 42 | def to_torch(batch, **kwargs): 43 | return torch.LongTensor(batch).t() 44 | 45 | def __getitem__(self, idx): 46 | return self.data[idx] 47 | 48 | def __len__(self): 49 | return self.size 50 | 51 | def __repr__(self): 52 | s = "{} '{}' ({} samples)\n".format( 53 | self.__class__.__name__, self.fnames[0].name, self.__len__()) 54 | return s 55 | -------------------------------------------------------------------------------- /nmtpytorch/datasets/npy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from pathlib import Path 3 | 4 | import numpy as np 5 | import torch 6 | from torch.utils.data import Dataset 7 | 8 | 9 | class NumpyDataset(Dataset): 10 | r"""A PyTorch dataset for Numpy .npy/npz serialized tensor files. The 11 | serialized tensor's first dimension should be the batch dimension. 12 | 13 | Arguments: 14 | fname (str or Path): A string or ``pathlib.Path`` object for 15 | the relevant numpy file. 16 | key (str, optional): If `fname` is `.npz` file, its relevant `key` 17 | will be fetched from the serialized object. 18 | order_file (str, None): If given, will be used to map sample indices 19 | to tensors using this list. Useful for tiled or repeated 20 | experiments. 21 | revert (bool, optional): If `True`, the data order will be reverted 22 | for adversarial/incongruent experiments during test-time. 23 | """ 24 | 25 | def __init__(self, fname, key=None, order_file=None, revert=False, **kwargs): 26 | self.path = Path(fname) 27 | if not self.path.exists(): 28 | raise RuntimeError('{} does not exist.'.format(self.path)) 29 | 30 | if self.path.suffix == '.npy': 31 | self.data = np.load(self.path) 32 | elif self.path.suffix == '.npz': 33 | assert key, "A key should be provided for .npz files." 34 | self.data = np.load(self.path)[key] 35 | 36 | if order_file: 37 | with open(order_file) as orf: 38 | self.order = [int(x) for x in orf.read().strip().split('\n')] 39 | else: 40 | self.order = list(range(self.data.shape[0])) 41 | 42 | if revert: 43 | self.order = self.order[::-1] 44 | 45 | # Dataset size 46 | self.size = len(self.order) 47 | 48 | @staticmethod 49 | def to_torch(batch, **kwargs): 50 | # NOTE: Assumes x.shape == (n, *) 51 | x = torch.from_numpy(np.array(batch, dtype='float32')) 52 | # Convert it to (t(=1 if fixed features), n, c) 53 | # By default we flatten h*w to first dim for interoperability 54 | # Models should further reshape the tensor for their needs 55 | return x.view(*x.size()[:2], -1).permute(2, 0, 1) 56 | 57 | def __getitem__(self, idx): 58 | return self.data[self.order[idx]] 59 | 60 | def __len__(self): 61 | return self.size 62 | 63 | def __repr__(self): 64 | s = "{} '{}' ({} samples)\n".format( 65 | self.__class__.__name__, self.path.name, self.__len__()) 66 | return s 67 | -------------------------------------------------------------------------------- /nmtpytorch/datasets/numpy_sequence.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from functools import lru_cache 3 | import numpy as np 4 | import torch 5 | from torch.utils.data import Dataset 6 | from ..utils.misc import pbar 7 | 8 | 9 | class NumpySequenceDataset(Dataset): 10 | """Read a sequence of numpy arrays. 11 | 12 | Arguments: 13 | fname (str or Path): Path to a list of paths to Numpy `.npy` files 14 | where each file contains an array with shape `(n_features, feat_dim)`. 15 | If the lines are in `:` format, additional length 16 | information will be used for bucketing. If the file itself is 17 | a `.npy` file, it will be treated as an array of numpy objects. 18 | For cases where all features are the same length, you should use 19 | `NumpyDataset`. 20 | cache (bool, optional): Whether the accessed files will be cached 21 | in memory or not. 22 | """ 23 | 24 | def __init__(self, fname, cache=False, **kwargs): 25 | self.fname = fname 26 | self.data = [] 27 | self.lengths = [] 28 | self.has_lengths = False 29 | self.cache = cache 30 | 31 | if not self.fname: 32 | raise RuntimeError('{} does not exist.'.format(self.fname)) 33 | 34 | if str(self.fname).endswith('.npy'): 35 | # Loads the whole dataset at once 36 | self.data = np.load(self.fname) 37 | self.lengths = [x.shape[0] for x in self.data] 38 | self.has_lengths = True 39 | self._read = lambda x: x 40 | else: 41 | with open(self.fname) as f_list: 42 | # Detect file format and seek back 43 | self.has_lengths = ':' in f_list.readline() 44 | f_list.seek(0) 45 | for line in pbar(f_list, unit='sents'): 46 | if self.has_lengths: 47 | path, length = line.strip().split(':') 48 | self.lengths.append(int(length)) 49 | else: 50 | path = line.strip() 51 | self.data.append(path) 52 | 53 | if self.cache: 54 | self._read = lru_cache(maxsize=len(self.data))(self._read_tensor) 55 | else: 56 | self._read = self._read_tensor 57 | 58 | # Set dataset size 59 | self.size = len(self.data) 60 | 61 | def _read_tensor(self, fname): 62 | """Reads the .npy file.""" 63 | return np.load(fname) 64 | 65 | def __getitem__(self, idx): 66 | # Each item is (t, feat_dim) 67 | return self._read(self.data[idx]) 68 | 69 | @staticmethod 70 | def to_torch(batch, **kwargs): 71 | # List of (t, feat_dim) 72 | max_len = max(x.shape[0] for x in batch) 73 | width = batch[0].shape[1] 74 | padded = [np.zeros((max_len, width)) for _ in batch] 75 | for pad, x in zip(padded, batch): 76 | pad[:x.shape[0]] = x 77 | # padded is (n_samples, t, feat_dim) 78 | # return (n, f, t) for compatibility with the other input sources 79 | return torch.from_numpy( 80 | np.array(padded, dtype='float32')).transpose(1, 2) 81 | 82 | def __len__(self): 83 | return self.size 84 | 85 | def __repr__(self): 86 | s = "{} (has_lengths={}) ({} samples)\n".format( 87 | self.__class__.__name__, self.has_lengths, self.__len__()) 88 | s += " {}\n".format(self.fname) 89 | return s 90 | -------------------------------------------------------------------------------- /nmtpytorch/datasets/shelve.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import shelve 3 | from pathlib import Path 4 | 5 | from sklearn import preprocessing 6 | import numpy as np 7 | from torch.utils.data import Dataset 8 | 9 | from ..utils.data import pad_video_sequence 10 | 11 | 12 | class ShelveDataset(Dataset): 13 | r"""A PyTorch dataset for Shelve serialized tensor files. The 14 | serialized tensor's first dimension should be the batch dimension. 15 | 16 | Arguments: 17 | fname (str or Path): A string or ``pathlib.Path`` object for 18 | the relevant .shelve file. 19 | norm_and_scale: True or False: Should we normalise and scale 20 | the image features? 21 | """ 22 | 23 | def __init__(self, fname, key=None, norm_and_scale=False, **kwargs): 24 | self.path = Path('{}.dat'.format(fname)) 25 | if not self.path.exists(): 26 | raise RuntimeError('{} does not exist.'.format(self.path)) 27 | 28 | self.data = shelve.open(str(fname.resolve())) 29 | self.norm_and_scale = norm_and_scale 30 | 31 | # Dataset size 32 | self.size = len(self.data) 33 | 34 | # Stores the lengths of the input video sequences to enable bucketing 35 | self.lengths = self.read_sequence_lengths() 36 | 37 | def read_sequence_lengths(self): 38 | '''Returns an array with the number of video feature vectors 39 | stored for each image. TODO: This is expensive and a slow 40 | way to start the process.''' 41 | lengths = [] 42 | for x in self.data: 43 | lengths.append(len(self.data[str(x)])) 44 | return lengths 45 | 46 | @staticmethod 47 | def to_torch(batch, **kwargs): 48 | ''' Pad the video sequence, if necessary. 49 | Transposes the video sequence to conform to the RNN expected inputs: 50 | n_samples x timesteps x feats -> timesteps x n_samples x feats 51 | ''' 52 | batch = pad_video_sequence(batch) 53 | batch = batch.transpose(0, 1) 54 | return batch 55 | 56 | def __getitem__(self, idx): 57 | if self.norm_and_scale: 58 | feats = self.data[str(idx)] 59 | feats = preprocessing.normalize(feats) 60 | return feats 61 | else: 62 | return np.array(self.data[str(idx)]) 63 | 64 | def __len__(self): 65 | return self.size 66 | 67 | def __repr__(self): 68 | s = "{} '{}' ({} samples)\n".format( 69 | self.__class__.__name__, self.path.name, self.__len__()) 70 | return s 71 | -------------------------------------------------------------------------------- /nmtpytorch/datasets/text.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | from pathlib import Path 4 | 5 | import torch 6 | 7 | from torch.utils.data import Dataset 8 | from torch.nn.utils.rnn import pad_sequence 9 | 10 | from ..utils.data import read_sentences 11 | 12 | logger = logging.getLogger('nmtpytorch') 13 | 14 | 15 | class TextDataset(Dataset): 16 | r"""A PyTorch dataset for sentences. 17 | 18 | Arguments: 19 | fname (str or Path): A string or ``pathlib.Path`` object giving 20 | the corpus. 21 | vocab (Vocabulary): A ``Vocabulary`` instance for the given corpus. 22 | bos (bool, optional): If ``True``, a special beginning-of-sentence 23 | "" marker will be prepended to sentences. 24 | """ 25 | 26 | def __init__(self, fname, vocab, bos=False, eos=True, **kwargs): 27 | self.path = Path(fname) 28 | self.vocab = vocab 29 | self.bos = bos 30 | self.eos = eos 31 | 32 | # Detect glob patterns 33 | self.fnames = sorted(self.path.parent.glob(self.path.name)) 34 | 35 | if len(self.fnames) == 0: 36 | raise RuntimeError('{} does not exist.'.format(self.path)) 37 | elif len(self.fnames) > 1: 38 | logger.info('Multiple files found, using first: {}'.format(self.fnames[0])) 39 | 40 | # Read the sentences and map them to vocabulary 41 | self.data, self.lengths = read_sentences( 42 | self.fnames[0], self.vocab, bos=self.bos, eos=self.eos) 43 | 44 | # Dataset size 45 | self.size = len(self.data) 46 | 47 | @staticmethod 48 | def to_torch(batch, **kwargs): 49 | return pad_sequence( 50 | [torch.tensor(b, dtype=torch.long) for b in batch], batch_first=False) 51 | 52 | def __getitem__(self, idx): 53 | return self.data[idx] 54 | 55 | def __len__(self): 56 | return self.size 57 | 58 | def __repr__(self): 59 | s = "{} '{}' ({} sentences)".format( 60 | self.__class__.__name__, self.fnames[0].name, self.__len__()) 61 | return s 62 | -------------------------------------------------------------------------------- /nmtpytorch/evaluator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from collections import OrderedDict 3 | 4 | from . import metrics 5 | from .utils.filterchain import FilterChain 6 | from .utils.misc import get_language 7 | 8 | 9 | class Evaluator: 10 | def __init__(self, refs, beam_metrics, filters=''): 11 | # metrics: list of upper-case beam-search metrics 12 | self.kwargs = {} 13 | self.scorers = OrderedDict() 14 | self.refs = list(refs.parent.glob(refs.name)) 15 | self.language = get_language(self.refs[0]) 16 | if self.language is None: 17 | # Fallback to en (this is only relevant for METEOR) 18 | self.language = 'en' 19 | 20 | self.filter = lambda s: s 21 | if filters: 22 | self.filter = FilterChain(filters) 23 | self.refs = self.filter(refs) 24 | 25 | assert len(self.refs) > 0, "Number of reference files == 0" 26 | 27 | for metric in sorted(beam_metrics): 28 | self.kwargs[metric] = {'language': self.language} 29 | self.scorers[metric] = getattr(metrics, metric + 'Scorer')() 30 | 31 | def score(self, hyps): 32 | """hyps is a list of hypotheses as they come out from decoder.""" 33 | assert isinstance(hyps, list), "hyps should be a list." 34 | 35 | # Post-process if requested 36 | hyps = self.filter(hyps) 37 | 38 | results = [] 39 | for key, scorer in self.scorers.items(): 40 | results.append( 41 | scorer.compute(self.refs, hyps, **self.kwargs[key])) 42 | return results 43 | -------------------------------------------------------------------------------- /nmtpytorch/layers/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | import apex 3 | LayerNorm = apex.normalization.FusedLayerNorm 4 | except ImportError as ie: 5 | import torch 6 | LayerNorm = torch.nn.LayerNorm 7 | 8 | # Basic layers 9 | from .ff import FF 10 | from .fusion import Fusion 11 | from .flatten import Flatten 12 | from .argselect import ArgSelect 13 | from .pool import Pool 14 | from .seq_conv import SequenceConvolution 15 | from .rnninit import RNNInitializer 16 | from .max_margin import MaxMargin 17 | 18 | # Embedding variants 19 | from .embedding import * 20 | 21 | # Attention layers 22 | from .attention import * 23 | 24 | # Encoder layers 25 | from .encoders import * 26 | 27 | # Decoder layers 28 | from .decoders import * 29 | -------------------------------------------------------------------------------- /nmtpytorch/layers/argselect.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class ArgSelect(torch.nn.Module): 5 | """Dummy layer that picks one of the returned values from mostly RNN-type 6 | `nn.Module` layers.""" 7 | def __init__(self, index): 8 | super().__init__() 9 | self.index = index 10 | 11 | def forward(self, x): 12 | return x[self.index] 13 | 14 | def __repr__(self): 15 | return "ArgSelect(index={})".format(self.index) 16 | -------------------------------------------------------------------------------- /nmtpytorch/layers/attention/__init__.py: -------------------------------------------------------------------------------- 1 | from .mlp import MLPAttention 2 | from .dot import DotAttention 3 | from .hierarchical import HierarchicalAttention 4 | from .co import CoAttention 5 | from .mhco import MultiHeadCoAttention 6 | from .uniform import UniformAttention 7 | from .scaled_dot import ScaledDotAttention 8 | 9 | 10 | def get_attention(type_): 11 | return { 12 | 'mlp': MLPAttention, 13 | 'dot': DotAttention, 14 | 'hier': HierarchicalAttention, 15 | 'co': CoAttention, 16 | 'mhco': MultiHeadCoAttention, 17 | 'uniform': UniformAttention, 18 | 'scaled_dot': ScaledDotAttention, 19 | }[type_] 20 | -------------------------------------------------------------------------------- /nmtpytorch/layers/attention/co.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | from ...utils.nn import get_activation_fn 7 | 8 | # Code contributed by @jlibovicky 9 | 10 | 11 | class CoAttention(nn.Module): 12 | """Co-attention between two sequences. 13 | 14 | Uses one hidden layer to compute an affinity matrix between two sequences. 15 | This can be then normalized in two direction which gives us 1->2 and 2->1 16 | attentions. 17 | 18 | The co-attention is computed using a single feed-forward layer as in 19 | Bahdanau's attention. 20 | """ 21 | def __init__(self, ctx_1_dim, ctx_2_dim, bottleneck, 22 | att_activ='tanh', mlp_bias=False): 23 | super().__init__() 24 | 25 | self.mlp_hid = nn.Conv2d(ctx_1_dim + ctx_2_dim, bottleneck, 1) 26 | self.mlp_out = nn.Conv2d(bottleneck, 1, 1, bias=mlp_bias) 27 | self.activ = get_activation_fn(att_activ) 28 | 29 | self.project_1_to_2 = nn.Linear(ctx_1_dim + ctx_2_dim, bottleneck) 30 | self.project_2_to_1 = nn.Linear(ctx_1_dim + ctx_2_dim, bottleneck) 31 | 32 | def forward(self, ctx_1, ctx_2, ctx_1_mask=None, ctx_2_mask=None): 33 | if ctx_2_mask is not None: 34 | ctx_2_neg_mask = (1. - ctx_2_mask.transpose(0, 1).unsqueeze(1)) * -1e12 35 | 36 | ctx_1_len = ctx_1.size(0) 37 | ctx_2_len = ctx_2.size(0) 38 | b_ctx_1 = ctx_1.permute(1, 2, 0).unsqueeze(3).repeat(1, 1, 1, ctx_2_len) 39 | b_ctx_2 = ctx_2.permute(1, 2, 0).unsqueeze(2).repeat(1, 1, ctx_1_len, 1) 40 | 41 | catted = torch.cat([b_ctx_1, b_ctx_2], dim=1) 42 | hidden = self.activ(self.mlp_hid(catted)) 43 | affinity_matrix = self.mlp_out(hidden).squeeze(1) 44 | if ctx_1_mask is not None: 45 | ctx_1_neg_mask = (1. - ctx_1_mask.transpose(0, 1).unsqueeze(2)) * -1e12 46 | affinity_matrix += ctx_1_neg_mask 47 | 48 | if ctx_2_mask is not None: 49 | ctx_2_neg_mask = (1. - ctx_2_mask.transpose(0, 1).unsqueeze(1)) * -1e12 50 | affinity_matrix += ctx_2_neg_mask 51 | 52 | dist_1_to_2 = F.softmax(affinity_matrix, dim=2) 53 | context_1_to_2 = ctx_1.permute(1, 2, 0).matmul(dist_1_to_2).permute(2, 0, 1) 54 | seq_1_to_2 = self.activ( 55 | self.project_1_to_2(torch.cat([ctx_2, context_1_to_2], dim=-1))) 56 | 57 | dist_2_to_1 = F.softmax(affinity_matrix, dim=1).transpose(1, 2) 58 | context_2_to_1 = ctx_2.permute(1, 2, 0).matmul(dist_2_to_1).permute(2, 0, 1) 59 | seq_2_to_1 = self.activ( 60 | self.project_2_to_1(torch.cat([ctx_1, context_2_to_1], dim=-1))) 61 | 62 | return seq_2_to_1, seq_1_to_2 63 | -------------------------------------------------------------------------------- /nmtpytorch/layers/attention/dot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | from ...utils.nn import get_activation_fn 7 | 8 | 9 | class DotAttention(nn.Module): 10 | """Attention layer with dot product.""" 11 | def __init__(self, ctx_dim, hid_dim, att_bottleneck='ctx', 12 | transform_ctx=True, att_activ='tanh', temp=1., ctx2hid=True, 13 | mlp_bias=None): 14 | # NOTE: 15 | # mlp_bias here to not break models that pass mlp_bias to all types 16 | # of attentions 17 | super().__init__() 18 | 19 | self.ctx_dim = ctx_dim 20 | self.hid_dim = hid_dim 21 | self._ctx2hid = ctx2hid 22 | self.temperature = temp 23 | self.activ = get_activation_fn(att_activ) 24 | 25 | # The common dimensionality for inner formulation 26 | if isinstance(att_bottleneck, int): 27 | self.mid_dim = att_bottleneck 28 | else: 29 | self.mid_dim = getattr(self, '{}_dim'.format(att_bottleneck)) 30 | 31 | # Adaptor from RNN's hidden dim to mid_dim 32 | self.hid2ctx = nn.Linear(self.hid_dim, self.mid_dim, bias=False) 33 | 34 | if transform_ctx or self.mid_dim != self.ctx_dim: 35 | # Additional context projection within same dimensionality 36 | self.ctx2ctx = nn.Linear(self.ctx_dim, self.mid_dim, bias=False) 37 | else: 38 | self.ctx2ctx = lambda x: x 39 | 40 | if self._ctx2hid: 41 | # ctx2hid: final transformation from ctx to hid 42 | self.ctx2hid = nn.Linear(self.ctx_dim, self.hid_dim, bias=False) 43 | else: 44 | self.ctx2hid = lambda x: x 45 | 46 | def forward(self, hid, ctx, ctx_mask=None): 47 | r"""Computes attention probabilities and final context using 48 | decoder's hidden state and source annotations. 49 | 50 | Arguments: 51 | hid(Tensor): A set of decoder hidden states of shape `T*B*H` 52 | where `T` == 1, `B` is batch dim and `H` is hidden state dim. 53 | ctx(Tensor): A set of annotations of shape `S*B*C` where `S` 54 | is the source timestep dim, `B` is batch dim and `C` 55 | is annotation dim. 56 | ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes 57 | in the padded positions. 58 | 59 | Returns: 60 | scores(Tensor): A tensor of shape `S*B` containing normalized 61 | attention scores for each position and sample. 62 | z_t(Tensor): A tensor of shape `B*H` containing the final 63 | attended context vector for this target decoding timestep. 64 | 65 | Notes: 66 | This will only work when `T==1` for now. 67 | """ 68 | # SxBxC 69 | ctx_ = self.ctx2ctx(ctx) 70 | # TxBxC 71 | hid_ = self.hid2ctx(hid) 72 | 73 | # shuffle dims to prepare for batch mat-mult -> SxB 74 | scores = torch.bmm(hid_.permute(1, 0, 2), ctx_.permute(1, 2, 0)).div( 75 | self.temperature).squeeze(1).t() 76 | 77 | # Normalize attention scores correctly -> S*B 78 | if ctx_mask is not None: 79 | # Mask out padded positions with -inf so that they get 0 attention 80 | scores.masked_fill_((1 - ctx_mask).bool(), -1e8) 81 | 82 | alpha = F.softmax(scores, dim=0) 83 | 84 | # Transform final context vector to H for further decoders 85 | return alpha, self.ctx2hid((alpha.unsqueeze(-1) * ctx).sum(0)) 86 | -------------------------------------------------------------------------------- /nmtpytorch/layers/attention/hierarchical.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | from torch import nn 4 | 5 | from ...utils.nn import get_activation_fn 6 | 7 | 8 | # Libovický, J., & Helcl, J. (2017). Attention Strategies for Multi-Source 9 | # Sequence-to-Sequence Learning. In Proceedings of the 55th Annual Meeting of 10 | # the Association for Computational Linguistics (Volume 2: Short Papers) 11 | # (Vol. 2, pp. 196-202). [Code contributed by @jlibovicky] 12 | 13 | 14 | class HierarchicalAttention(nn.Module): 15 | """Hierarchical attention over multiple modalities.""" 16 | def __init__(self, ctx_dims, hid_dim, mid_dim, att_activ='tanh'): 17 | super().__init__() 18 | 19 | self.activ = get_activation_fn(att_activ) 20 | self.ctx_dims = ctx_dims 21 | self.hid_dim = hid_dim 22 | self.mid_dim = mid_dim 23 | 24 | self.ctx_projs = nn.ModuleList([ 25 | nn.Linear(dim, mid_dim, bias=False) for dim in self.ctx_dims]) 26 | self.dec_proj = nn.Linear(hid_dim, mid_dim, bias=True) 27 | self.mlp = nn.Linear(self.mid_dim, 1, bias=False) 28 | 29 | def forward(self, contexts, hid): 30 | dec_state_proj = self.dec_proj(hid) 31 | ctx_projected = torch.cat([ 32 | p(ctx).unsqueeze(0) for p, ctx 33 | in zip(self.ctx_projs, contexts)], dim=0) 34 | energies = self.mlp(self.activ(dec_state_proj + ctx_projected)) 35 | att_dist = nn.functional.softmax(energies, dim=0) 36 | 37 | ctxs_cat = torch.cat([c.unsqueeze(0) for c in contexts]) 38 | joint_context = (att_dist * ctxs_cat).sum(0) 39 | 40 | return att_dist, joint_context 41 | -------------------------------------------------------------------------------- /nmtpytorch/layers/attention/mhco.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | # Code contributed by @jlibovicky 7 | 8 | 9 | class MultiHeadCoAttention(nn.Module): 10 | """Generalization of multi-head attention for co-attention.""" 11 | 12 | def __init__(self, ctx_1_dim, ctx_2_dim, bottleneck, head_count, dropout=0.1): 13 | assert bottleneck % head_count == 0 14 | self.dim_per_head = bottleneck // head_count 15 | self.model_dim = bottleneck 16 | 17 | super().__init__() 18 | self.head_count = head_count 19 | 20 | self.linear_keys_1 = nn.Linear(ctx_1_dim, 21 | head_count * self.dim_per_head) 22 | self.linear_values_1 = nn.Linear(ctx_1_dim, 23 | head_count * self.dim_per_head) 24 | self.linear_keys_2 = nn.Linear(ctx_2_dim, 25 | head_count * self.dim_per_head) 26 | self.linear_values_2 = nn.Linear(ctx_2_dim, 27 | head_count * self.dim_per_head) 28 | 29 | self.final_1_to_2_linear = nn.Linear(bottleneck, bottleneck) 30 | self.final_2_to_1_linear = nn.Linear(bottleneck, bottleneck) 31 | self.project_1_to_2 = nn.Linear(ctx_1_dim + ctx_2_dim, bottleneck) 32 | self.project_2_to_1 = nn.Linear(ctx_1_dim + ctx_2_dim, bottleneck) 33 | 34 | def forward(self, ctx_1, ctx_2, ctx_1_mask=None, ctx_2_mask=None): 35 | """Computes the context vector and the attention vectors.""" 36 | 37 | def shape(x, length): 38 | """ projection """ 39 | return x.view( 40 | length, batch_size, head_count, dim_per_head).permute(1, 2, 0, 3) 41 | 42 | def unshape(x, length): 43 | """ compute context """ 44 | return x.transpose(1, 2).contiguous().view( 45 | batch_size, length, head_count * dim_per_head).transpose(0, 1) 46 | 47 | batch_size = ctx_1.size(1) 48 | assert batch_size == ctx_2.size(1) 49 | dim_per_head = self.dim_per_head 50 | head_count = self.head_count 51 | ctx_1_len = ctx_1.size(0) 52 | ctx_2_len = ctx_2.size(0) 53 | 54 | # 1) Project key, value, and key_2. 55 | key_1_up = shape(self.linear_keys_1(ctx_1), ctx_1_len) 56 | value_1_up = shape(self.linear_values_1(ctx_1), ctx_1_len) 57 | key_2_up = shape(self.linear_keys_2(ctx_2), ctx_2_len) 58 | value_2_up = shape(self.linear_values_2(ctx_2), ctx_2_len) 59 | 60 | scores = torch.matmul(key_2_up, key_1_up.transpose(2, 3)) 61 | 62 | if ctx_1_mask is not None: 63 | mask = ctx_1_mask.t().unsqueeze(2).unsqueeze(3).expand_as(scores) 64 | scores = scores.masked_fill(mask.bool(), -1e18) 65 | if ctx_2_mask is not None: 66 | mask = ctx_2_mask.t().unsqueeze(1).unsqueeze(3).expand_as(scores) 67 | scores = scores.masked_fill(mask.bool(), -1e18) 68 | 69 | # 3) Apply attention dropout and compute context vectors. 70 | dist_1_to_2 = F.softmax(scores, dim=2) 71 | context_1_to_2 = unshape(torch.matmul(dist_1_to_2, value_1_up), ctx_2_len) 72 | context_1_to_2 = self.final_1_to_2_linear(context_1_to_2) 73 | seq_1_to_2 = self.activ( 74 | self.project_1_to_2(torch.cat([ctx_2, context_1_to_2], dim=-1))) 75 | 76 | dist_2_to_1 = F.softmax(scores, dim=1) 77 | context_2_to_1 = unshape( 78 | torch.matmul(dist_2_to_1.transpose(2, 3), value_2_up), ctx_1_len) 79 | context_2_to_1 = self.final_2_to_1_linear(context_2_to_1) 80 | seq_2_to_1 = self.activ( 81 | self.project_2_to_1(torch.cat([ctx_1, context_2_to_1], dim=-1))) 82 | 83 | return seq_2_to_1, seq_1_to_2 84 | -------------------------------------------------------------------------------- /nmtpytorch/layers/attention/mlp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | from .dot import DotAttention 7 | 8 | 9 | class MLPAttention(DotAttention): 10 | """Attention layer with feed-forward layer.""" 11 | def __init__(self, ctx_dim, hid_dim, att_bottleneck='ctx', 12 | transform_ctx=True, att_activ='tanh', 13 | mlp_bias=False, temp=1., ctx2hid=True): 14 | super().__init__(ctx_dim, hid_dim, att_bottleneck, transform_ctx, 15 | att_activ, temp, ctx2hid) 16 | 17 | if mlp_bias: 18 | self.bias = nn.Parameter(torch.Tensor(self.mid_dim)) 19 | self.bias.data.zero_() 20 | else: 21 | self.register_parameter('bias', None) 22 | 23 | self.mlp = nn.Linear(self.mid_dim, 1, bias=False) 24 | 25 | def forward(self, hid, ctx, ctx_mask=None): 26 | r"""Computes attention probabilities and final context using 27 | decoder's hidden state and source annotations. 28 | 29 | Arguments: 30 | hid(Tensor): A set of decoder hidden states of shape `T*B*H` 31 | where `T` == 1, `B` is batch dim and `H` is hidden state dim. 32 | ctx(Tensor): A set of annotations of shape `S*B*C` where `S` 33 | is the source timestep dim, `B` is batch dim and `C` 34 | is annotation dim. 35 | ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes 36 | in the padded positions. 37 | 38 | Returns: 39 | scores(Tensor): A tensor of shape `S*B` containing normalized 40 | attention scores for each position and sample. 41 | z_t(Tensor): A tensor of shape `B*H` containing the final 42 | attended context vector for this target decoding timestep. 43 | 44 | Notes: 45 | This will only work when `T==1` for now. 46 | """ 47 | # inner_sum -> SxBxC + TxBxC 48 | inner_sum = self.ctx2ctx(ctx) + self.hid2ctx(hid) 49 | 50 | if self.bias is not None: 51 | inner_sum.add_(self.bias) 52 | 53 | # Compute scores- > SxB 54 | scores = self.mlp( 55 | self.activ(inner_sum)).div(self.temperature).squeeze(-1) 56 | 57 | # Normalize attention scores correctly -> S*B 58 | if ctx_mask is not None: 59 | # Mask out padded positions with -inf so that they get 0 attention 60 | scores.masked_fill_((1 - ctx_mask).bool(), -1e8) 61 | 62 | alpha = F.softmax(scores, dim=0) 63 | 64 | # Transform final context vector to H for further decoders 65 | return alpha, self.ctx2hid((alpha.unsqueeze(-1) * ctx).sum(0)) 66 | -------------------------------------------------------------------------------- /nmtpytorch/layers/attention/scaled_dot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import math 3 | 4 | import torch 5 | 6 | 7 | class ScaledDotAttention(torch.nn.Module): 8 | """Scaled Dot-product attention from `Attention is all you need`. 9 | 10 | Arguments: 11 | 12 | Input: 13 | 14 | Output: 15 | """ 16 | 17 | def __init__(self, model_dim, n_heads, causal=False): 18 | super().__init__() 19 | self.model_dim = model_dim 20 | self.n_heads = n_heads 21 | self.causal = causal 22 | 23 | #self.k_dim = self.model_dim / self.n_heads 24 | #self.v_dim = self.model_dim / self.n_heads 25 | 26 | # Efficient linear projections for all heads 27 | self.lin_k = torch.nn.Linear( 28 | self.model_dim, self.model_dim, bias=False) 29 | self.lin_q = torch.nn.Linear( 30 | self.model_dim, self.model_dim, bias=False) 31 | self.lin_v = torch.nn.Linear( 32 | self.model_dim, self.model_dim, bias=False) 33 | 34 | # Final output layer is independent of number of heads 35 | self.lin_o = torch.nn.Linear( 36 | self.model_dim, self.model_dim, bias=False) 37 | 38 | self.scale = math.sqrt(self.model_dim / self.n_heads) 39 | 40 | def forward(self, inputs): 41 | """Scaled dot-product attention forward-pass 42 | 43 | :param inputs: dictionary with query, key, value and mask tensors 44 | the shape of the tensors are (tstep, bsize, dim) except for the 45 | mask which is (tstep, bsize) 46 | 47 | :return: foo 48 | """ 49 | q, k, v, mask = inputs 50 | # q is the query, v is the actual inputs and k is v's representation 51 | # for self attention q=v=k 52 | # for cross attention q != (v=k) 53 | # Project keys, queries and values --> (bsize, tstep, dim) 54 | tstep, bsize = mask.shape 55 | head_view = (tstep, bsize, self.n_heads, -1) 56 | # qp: (bsize, head, tstep, dim) 57 | # vp: (bsize, head, tstep, dim) 58 | # kp: (bsize, head, dim, tstep) 59 | qp = self.lin_q(q).view(*head_view).permute(1, 2, 0, 3) 60 | vp = self.lin_v(v).view(*head_view).permute(1, 2, 0, 3) 61 | kp = self.lin_k(k).view(*head_view).permute(1, 2, 3, 0) 62 | 63 | # z: (bsize, head, tstep, tstep) 64 | z = torch.matmul(qp, kp).div(self.scale).softmax(dim=-1) 65 | out = torch.matmul(z, vp).permute(2, 0, 1, 3).reshape_as(v) 66 | return (v, out, mask) 67 | -------------------------------------------------------------------------------- /nmtpytorch/layers/attention/uniform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | 4 | 5 | class UniformAttention(torch.nn.Module): 6 | """A dummy non-parametric attention layer that applies uniform weights.""" 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def forward(self, hid, ctx, ctx_mask=None): 11 | alpha = torch.ones(*ctx.shape[:2], device=ctx.device).div(ctx.shape[0]) 12 | wctx = (alpha.unsqueeze(-1) * ctx).sum(0) 13 | return alpha, wctx 14 | -------------------------------------------------------------------------------- /nmtpytorch/layers/decoders/__init__.py: -------------------------------------------------------------------------------- 1 | from .conditional import ConditionalDecoder 2 | from .simplegru import SimpleGRUDecoder 3 | from .conditionalmm import ConditionalMMDecoder 4 | from .multisourceconditional import MultiSourceConditionalDecoder 5 | from .xu import XuDecoder 6 | from .switchinggru import SwitchingGRUDecoder 7 | from .vector import VectorDecoder 8 | 9 | 10 | def get_decoder(type_): 11 | """Only expose ones with compatible __init__() arguments for now.""" 12 | return { 13 | 'cond': ConditionalDecoder, 14 | 'simplegru': SimpleGRUDecoder, 15 | 'condmm': ConditionalMMDecoder, 16 | 'vector': VectorDecoder, 17 | }[type_] 18 | -------------------------------------------------------------------------------- /nmtpytorch/layers/decoders/conditionalmm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch.nn.functional as F 3 | 4 | from ...utils.nn import get_rnn_hidden_state 5 | from ..attention import HierarchicalAttention, UniformAttention, get_attention 6 | from .. import Fusion 7 | from . import ConditionalDecoder 8 | 9 | 10 | class ConditionalMMDecoder(ConditionalDecoder): 11 | """A conditional multimodal decoder with multimodal attention.""" 12 | def __init__(self, fusion_type='concat', fusion_activ=None, 13 | aux_ctx_name='image', mm_att_type='md-dd', 14 | persistent_dump=False, **kwargs): 15 | super().__init__(**kwargs) 16 | self.aux_ctx_name = aux_ctx_name 17 | self.mm_att_type = mm_att_type 18 | self.persistent_dump = persistent_dump 19 | 20 | if self.mm_att_type == 'uniform': 21 | # Dummy uniform attention 22 | self.shared_dec_state = False 23 | self.shared_att_mlp = False 24 | else: 25 | # Parse attention type 26 | att_str = sorted(self.mm_att_type.lower().split('-')) 27 | assert len(att_str) == 2 and att_str[0][0] == 'd' and att_str[1][0] == 'm', \ 28 | "att_type should be m[d|i]-d[d-i]" 29 | # Independent ecoder state means shared dec state 30 | self.shared_dec_state = att_str[0][1] == 'i' 31 | 32 | # Independent odality means sharing the mlp in the MLP attention 33 | self.shared_att_mlp = att_str[1][1] == 'i' 34 | 35 | # Sanity check 36 | if self.shared_att_mlp and self.att_type != 'mlp': 37 | raise Exception("Shared attention requires MLP attention.") 38 | 39 | # Define (context) fusion operator 40 | self.fusion_type = fusion_type 41 | if fusion_type == "hierarchical": 42 | self.fusion = HierarchicalAttention( 43 | [self.hidden_size, self.hidden_size], 44 | self.hidden_size, self.hidden_size) 45 | else: 46 | if self.att_ctx2hid: 47 | # Old behaviour 48 | fusion_inp_size = 2 * self.hidden_size 49 | else: 50 | fusion_inp_sizes = list(self.ctx_size_dict.values()) 51 | if fusion_type == 'concat': 52 | fusion_inp_size = sum(fusion_inp_sizes) 53 | else: 54 | fusion_inp_size = fusion_inp_sizes[0] 55 | self.fusion = Fusion( 56 | fusion_type, fusion_inp_size, self.hidden_size, 57 | fusion_activ=fusion_activ) 58 | 59 | # Rename textual attention layer 60 | self.txt_att = self.att 61 | del self.att 62 | 63 | if self.mm_att_type == 'uniform': 64 | self.img_att = UniformAttention() 65 | else: 66 | # Visual attention over convolutional feature maps 67 | Attention = get_attention(self.att_type) 68 | self.img_att = Attention( 69 | self.ctx_size_dict[self.aux_ctx_name], self.hidden_size, 70 | transform_ctx=self.transform_ctx, mlp_bias=self.mlp_bias, 71 | ctx2hid=self.att_ctx2hid, 72 | att_activ=self.att_activ, 73 | att_bottleneck=self.att_bottleneck) 74 | 75 | # Tune multimodal attention type 76 | if self.shared_att_mlp: 77 | # Modality independent 78 | self.txt_att.mlp.weight = self.img_att.mlp.weight 79 | self.txt_att.ctx2ctx.weight = self.img_att.ctx2ctx.weight 80 | 81 | if self.shared_dec_state: 82 | # Decoder independent 83 | self.txt_att.hid2ctx.weight = self.img_att.hid2ctx.weight 84 | 85 | def f_next(self, ctx_dict, y, h): 86 | # Get hidden states from the first decoder (purely cond. on LM) 87 | h1_c1 = self.dec0(y, self._rnn_unpack_states(h)) 88 | h1 = get_rnn_hidden_state(h1_c1) 89 | 90 | # Apply attention 91 | self.txt_alpha_t, txt_z_t = self.txt_att( 92 | h1.unsqueeze(0), *ctx_dict[self.ctx_name]) 93 | self.img_alpha_t, img_z_t = self.img_att( 94 | h1.unsqueeze(0), *ctx_dict[self.aux_ctx_name]) 95 | # Save for reg loss terms 96 | self.history['alpha_img'].append(self.img_alpha_t.unsqueeze(0)) 97 | 98 | # Context will double dimensionality if fusion_type is concat 99 | # z_t should be compatible with hidden_size 100 | if self.fusion_type == "hierarchical": 101 | self.h_att, z_t = self.fusion([txt_z_t, img_z_t], h1.unsqueeze(0)) 102 | else: 103 | z_t = self.fusion(txt_z_t, img_z_t) 104 | 105 | if not self.training and self.persistent_dump: 106 | # For test-time activation debugging 107 | self.persistence['z_t'].append(z_t.t().cpu().numpy()) 108 | self.persistence['txt_z_t'].append(txt_z_t.t().cpu().numpy()) 109 | self.persistence['img_z_t'].append(img_z_t.t().cpu().numpy()) 110 | 111 | # Run second decoder (h1 is compatible now as it was returned by GRU) 112 | h2_c2 = self.dec1(z_t, h1_c1) 113 | h2 = get_rnn_hidden_state(h2_c2) 114 | 115 | # This is a bottleneck to avoid going from H to V directly 116 | logit = self.hid2out(self.out_merge_fn(h2, y, z_t)) 117 | 118 | # Apply dropout if any 119 | if self.dropout_out > 0: 120 | logit = self.do_out(logit) 121 | 122 | # Transform logit to T*B*V (V: vocab_size) 123 | # Compute log_softmax over token dim 124 | log_p = F.log_softmax(self.out2prob(logit), dim=-1) 125 | 126 | # Return log probs and new hidden states 127 | return log_p, self._rnn_pack_states(h2_c2) 128 | -------------------------------------------------------------------------------- /nmtpytorch/layers/decoders/multisourceconditional.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | from ...utils.nn import get_rnn_hidden_state 6 | from ..attention import get_attention, HierarchicalAttention 7 | from .. import Fusion 8 | from . import ConditionalDecoder 9 | 10 | 11 | class MultiSourceConditionalDecoder(ConditionalDecoder): 12 | """A conditional multimodal decoder with multimodal attention.""" 13 | def __init__(self, ctx_names, fusion_type='concat', **kwargs): 14 | super().__init__(**kwargs) 15 | 16 | # Define (context) fusion operator 17 | self.ctx_names = ctx_names 18 | self.fusion_type = fusion_type 19 | if fusion_type == "hierarchical": 20 | self.fusion = HierarchicalAttention( 21 | [self.hidden_size for _ in ctx_names], 22 | self.hidden_size, self.hidden_size) 23 | else: 24 | raise NotImplementedError("Concatenation and sum work only with two inputs now.") 25 | self.fusion = Fusion( 26 | fusion_type, len(ctx_names) * self.hidden_size, self.hidden_size) 27 | 28 | attns = [] 29 | for ctx_name in ctx_names: 30 | Attention = get_attention(self.att_type) 31 | attns.append(Attention( 32 | self.ctx_size_dict[ctx_name], self.hidden_size, 33 | transform_ctx=self.transform_ctx, mlp_bias=self.mlp_bias, 34 | att_activ=self.att_activ, 35 | att_bottleneck=self.att_bottleneck)) 36 | self.attns = nn.ModuleList(attns) 37 | 38 | def f_next(self, ctx_dict, y, h): 39 | # Get hidden states from the first decoder (purely cond. on LM) 40 | h1_c1 = self.dec0(y, self._rnn_unpack_states(h)) 41 | h1 = get_rnn_hidden_state(h1_c1) 42 | 43 | # Apply attention 44 | ctx_list = [att(h1.unsqueeze(0), *ctx_dict[name])[1] 45 | for att, name in zip(self.attns, self.ctx_names)] 46 | 47 | # Context will double dimensionality if fusion_type is concat 48 | # z_t should be compatible with hidden_size 49 | if self.fusion_type == "hierarchical": 50 | _, z_t = self.fusion(ctx_list, h1.unsqueeze(0)) 51 | else: 52 | z_t = self.fusion(ctx_list) 53 | 54 | # Run second decoder (h1 is compatible now as it was returned by GRU) 55 | h2_c2 = self.dec1(z_t, h1_c1) 56 | h2 = get_rnn_hidden_state(h2_c2) 57 | 58 | # This is a bottleneck to avoid going from H to V directly 59 | logit = self.hid2out(h2) 60 | 61 | # Apply dropout if any 62 | if self.dropout_out > 0: 63 | logit = self.do_out(logit) 64 | 65 | # Transform logit to T*B*V (V: vocab_size) 66 | # Compute log_softmax over token dim 67 | log_p = F.log_softmax(self.out2prob(logit), dim=-1) 68 | 69 | # Return log probs and new hidden states 70 | return log_p, self._rnn_pack_states(h2_c2) 71 | -------------------------------------------------------------------------------- /nmtpytorch/layers/decoders/simplegru.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | from .. import FF 6 | from . import ConditionalDecoder 7 | 8 | 9 | class SimpleGRUDecoder(ConditionalDecoder): 10 | """A simple GRU decoder with a single decoder layer. It has the same 11 | set of parameters as the parent class except `rnn_type`.""" 12 | def __init__(self, **kwargs): 13 | # Set rnn_type to GRU 14 | kwargs['rnn_type'] = 'gru' 15 | super().__init__(**kwargs) 16 | 17 | # Remove second GRU 18 | # Remove and replace hid2out since we now concatenate the 19 | # attention output and the hidden state 20 | del self.dec1, self.hid2out 21 | self.hid2out = FF(2 * self.hidden_size, 22 | self.input_size, bias_zero=True, activ='tanh') 23 | 24 | def f_next(self, ctx_dict, y, h): 25 | """Applies one timestep of recurrence.""" 26 | # Get hidden states from the first decoder (purely cond. on LM) 27 | h1 = self.dec0(y, h) 28 | 29 | # Apply attention 30 | alpha_t, z_t = self.att(h1.unsqueeze(0), *ctx_dict[self.ctx_name]) 31 | 32 | if not self.training: 33 | self.history['alpha_txt'].append(alpha_t) 34 | 35 | # Concatenate attented source and hidden state & project 36 | o = self.hid2out(torch.cat((h1, z_t), dim=-1)) 37 | 38 | # Apply dropout if any 39 | logit = self.do_out(o) if self.dropout_out > 0 else o 40 | 41 | # Transform logit to T*B*V (V: vocab_size) 42 | # Compute log_softmax over token dim 43 | log_p = F.log_softmax(self.out2prob(logit), dim=-1) 44 | 45 | # Return log probs and new hidden states 46 | return log_p, h1 47 | -------------------------------------------------------------------------------- /nmtpytorch/layers/decoders/switchinggru.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from collections import defaultdict 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | 7 | from .. import FF 8 | from ...utils.device import DEVICE 9 | from ..attention import get_attention 10 | 11 | 12 | class SwitchingGRUDecoder(nn.Module): 13 | """A multi-source aware attention based decoder. During training, 14 | this decoder will be fed by a single modality at a time while 15 | during inference one of the src->trg tasks will be performed. 16 | """ 17 | def __init__(self, input_size, hidden_size, modality_dict, n_vocab, 18 | tied_emb=False, dropout_out=0): 19 | super().__init__() 20 | 21 | self.hidden_size = hidden_size 22 | self.input_size = input_size 23 | self.n_vocab = n_vocab 24 | self.tied_emb = tied_emb 25 | self.dropout_out = dropout_out 26 | 27 | # Will have N attentions for N possible input modalities 28 | # dict: {en_speech: (encoding_size, att_type)} 29 | atts = {} 30 | for name, (enc_size, att_type) in modality_dict.items(): 31 | atts[name] = get_attention(att_type)(enc_size, self.hidden_size) 32 | 33 | self.atts = nn.ModuleDict(atts) 34 | 35 | # Create target embeddings 36 | self.emb = nn.Embedding(self.n_vocab, self.input_size, padding_idx=0) 37 | 38 | # Create first decoder layer necessary for attention 39 | self.dec0 = nn.GRUCell(self.input_size, self.hidden_size) 40 | self.dec1 = nn.GRUCell(self.hidden_size, self.hidden_size) 41 | 42 | # Output dropout 43 | if self.dropout_out > 0: 44 | self.do_out = nn.Dropout(p=self.dropout_out) 45 | 46 | # Output bottleneck: maps hidden states to target emb dim 47 | self.hid2out = FF(self.hidden_size, self.input_size, 48 | bias_zero=True, activ='tanh') 49 | 50 | # Final softmax 51 | self.out2prob = FF(self.input_size, self.n_vocab) 52 | 53 | # Tie input embedding matrix and output embedding matrix 54 | if self.tied_emb: 55 | self.out2prob.weight = self.emb.weight 56 | 57 | # Final loss 58 | self.nll_loss = nn.NLLLoss(reduction="sum", ignore_index=0) 59 | 60 | def f_init(self, sources): 61 | """Returns the initial h_0 for the decoder. `sources` is not used 62 | but passed for compatibility with beam search.""" 63 | self.history = defaultdict(list) 64 | batch_size = next(iter(sources.values()))[0].shape[1] 65 | # NOTE: Non-scatter aware, fix this 66 | return torch.zeros(batch_size, self.hidden_size, device=DEVICE) 67 | 68 | def f_next(self, sources, y, h): 69 | # Get hidden states from the first decoder (purely cond. on LM) 70 | h_1 = self.dec0(y, h) 71 | 72 | # sources will always contain single modality 73 | assert len(sources) == 1 74 | modality = list(sources.keys())[0] 75 | 76 | # Apply modality-specific attention 77 | alpha_t, z_t = self.atts[modality](h_1.unsqueeze(0), *sources[modality]) 78 | self.history['alpha_{}'.format(modality)].append(alpha_t) 79 | 80 | # Run second decoder (h_1 is compatible now as it was returned by GRU) 81 | h_2 = self.dec1(z_t, h_1) 82 | 83 | # This is a bottleneck to avoid going from H to V directly 84 | logit = self.hid2out(h_2) 85 | 86 | # Apply dropout if any 87 | if self.dropout_out > 0: 88 | logit = self.do_out(logit) 89 | 90 | # Transform logit to T*B*V (V: vocab_size) 91 | # Compute log_softmax over token dim 92 | log_p = F.log_softmax(self.out2prob(logit), dim=-1) 93 | 94 | # Return log probs and new hidden states 95 | return log_p, h_2 96 | 97 | def forward(self, sources, y): 98 | """Computes the softmax outputs given source annotations `sources` and 99 | ground-truth target token indices `y`. Only called during training. 100 | 101 | Arguments: 102 | sources(Tensor): A tensor of `S*B*ctx_dim` representing the source 103 | annotations in an order compatible with ground-truth targets. 104 | y(Tensor): A tensor of `T*B` containing ground-truth target 105 | token indices for the given batch. 106 | """ 107 | 108 | loss = 0.0 109 | logps = None if self.training else torch.zeros( 110 | y.shape[0] - 1, y.shape[1], self.n_vocab, device=y.device) 111 | 112 | # Convert token indices to embeddings -> T*B*E 113 | y_emb = self.emb(y) 114 | 115 | # Get initial hidden state 116 | h = self.f_init(sources) 117 | 118 | # -1: So that we skip the timestep where input is 119 | for t in range(y_emb.shape[0] - 1): 120 | log_p, h = self.f_next(sources, y_emb[t], h) 121 | if not self.training: 122 | logps[t] = log_p.data 123 | loss += self.nll_loss(log_p, y[t + 1]) 124 | 125 | return {'loss': loss, 'logps': logps} 126 | -------------------------------------------------------------------------------- /nmtpytorch/layers/decoders/vector.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch.nn.functional as F 3 | 4 | from ...utils.nn import get_rnn_hidden_state 5 | from . import ConditionalDecoder 6 | 7 | # Decoder without attention that uses a single input vector. 8 | # Layer contributed by @loicbarrault 9 | 10 | 11 | class VectorDecoder(ConditionalDecoder): 12 | """Single-layer RNN decoder using fixed-size vector representation.""" 13 | def __init__(self, **kwargs): 14 | # Disable attention 15 | kwargs['att_type'] = None 16 | super().__init__(**kwargs) 17 | 18 | def f_next(self, ctx_dict, y, h): 19 | """Applies one timestep of recurrence.""" 20 | # Get hidden states from the decoder 21 | h1_c1 = self.dec0(y, self._rnn_unpack_states(h)) 22 | h1 = get_rnn_hidden_state(h1_c1) 23 | 24 | # Project hidden state to embedding size 25 | o = self.hid2out(h1) 26 | 27 | # Apply dropout if any 28 | logit = self.do_out(o) if self.dropout_out > 0 else o 29 | 30 | # Transform logit to T*B*V (V: vocab_size) 31 | # Compute log_softmax over token dim 32 | log_p = F.log_softmax(self.out2prob(logit), dim=-1) 33 | 34 | # Return log probs and new hidden states 35 | return log_p, self._rnn_pack_states(h1_c1) 36 | -------------------------------------------------------------------------------- /nmtpytorch/layers/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | from .pembedding import PEmbedding 2 | -------------------------------------------------------------------------------- /nmtpytorch/layers/embedding/pembedding.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from torch import nn 3 | 4 | from .. import FF 5 | 6 | 7 | class PEmbedding(nn.Embedding): 8 | """An extension to regular `nn.Embedding` with MLP and dropout.""" 9 | def __init__(self, num_embeddings, embedding_dim, out_dim, 10 | activ='linear', dropout=0.0): 11 | super().__init__(num_embeddings, embedding_dim, padding_idx=0) 12 | self.proj = FF(embedding_dim, out_dim, activ=activ, bias=False) 13 | self.do = nn.Dropout(dropout) if dropout > 0.0 else lambda x: x 14 | 15 | def forward(self, input): 16 | # Get the embeddings from parent's forward 17 | return self.do(self.proj(super().forward(input))) 18 | -------------------------------------------------------------------------------- /nmtpytorch/layers/encoders/__init__.py: -------------------------------------------------------------------------------- 1 | from .image import ImageEncoder 2 | from .text import TextEncoder 3 | from .bilstmp import BiLSTMp 4 | from .multimodal_text import MultimodalTextEncoder 5 | from .multimodal_bilstmp import MultimodalBiLSTMp 6 | -------------------------------------------------------------------------------- /nmtpytorch/layers/encoders/bilstmp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | from torch import nn 4 | from torch.nn import functional as F 5 | 6 | from ..ff import FF 7 | 8 | logger = logging.getLogger('nmtpytorch') 9 | 10 | 11 | class BiLSTMp(nn.Module): 12 | """A bidirectional LSTM encoder for speech features. A batch should 13 | only contain samples that have the same sequence length. 14 | 15 | Arguments: 16 | input_size (int): Input feature dimensionality. 17 | hidden_size (int): LSTM hidden state dimensionality. 18 | proj_size (int): Projection layer size. 19 | proj_activ (str, optional): Non-linearity to apply to intermediate projection 20 | layers. (Default: 'tanh') 21 | layers (str): A '_' separated list of integers that defines the subsampling 22 | factor for each LSTM. 23 | dropout (float, optional): Use dropout (Default: 0.) 24 | Input: 25 | x (Tensor): A tensor of shape (n_timesteps, n_samples, n_feats) 26 | that includes acoustic features of dimension ``n_feats`` per 27 | each timestep (in the first dimension). 28 | 29 | Output: 30 | hs (Tensor): A tensor of shape (n_timesteps, n_samples, hidden * 2) 31 | that contains encoder hidden states for all timesteps. 32 | mask (Tensor): `None` since this layer expects all equal frame inputs. 33 | """ 34 | def __init__(self, input_size, hidden_size, proj_size, layers, 35 | proj_activ='tanh', dropout=0): 36 | super().__init__() 37 | 38 | self.input_size = input_size 39 | self.hidden_size = hidden_size 40 | self.proj_size = proj_size 41 | self.proj_activ = proj_activ 42 | self.layers = [int(i) for i in layers.split('_')] 43 | self.dropout = dropout 44 | self.n_layers = len(self.layers) 45 | 46 | # Doubles its size because of concatenation of forw-backw encs 47 | self.ctx_size = self.hidden_size * 2 48 | 49 | # Fill 0-vector as to the end of the frames 50 | self.pad_tuple = (0, 0, 0, 0, 0, 1) 51 | 52 | # Projections and LSTMs 53 | self.ffs = nn.ModuleList() 54 | self.lstms = nn.ModuleList() 55 | 56 | if self.dropout > 0: 57 | self.do = nn.Dropout(self.dropout) 58 | 59 | for i, ss_factor in enumerate(self.layers): 60 | # Add LSTMs 61 | self.lstms.append(nn.LSTM( 62 | self.input_size if i == 0 else self.hidden_size, 63 | self.hidden_size, bidirectional=True)) 64 | # Add non-linear bottlenecks 65 | self.ffs.append(FF( 66 | self.ctx_size, self.proj_size, activ=self.proj_activ)) 67 | 68 | def forward(self, x, **kwargs): 69 | # Generate a mask to detect padded sequences 70 | mask = x.ne(0).float().sum(2).ne(0).float() 71 | 72 | if mask.eq(0).nonzero().numel() > 0: 73 | logger.info("WARNING: Non-homogeneous batch in BiLSTMp.") 74 | 75 | # Pad with zero 76 | hs = F.pad(x, self.pad_tuple) 77 | 78 | for (ss_factor, f_lstm, f_ff) in zip(self.layers, self.lstms, self.ffs): 79 | if ss_factor > 1: 80 | # Skip states 81 | hs = f_ff(f_lstm(hs[::ss_factor])[0]) 82 | else: 83 | hs = f_ff(f_lstm(hs)[0]) 84 | 85 | if self.dropout > 0: 86 | hs = self.do(hs) 87 | 88 | # No mask is returned as batch should contain same-length sequences 89 | return hs, None 90 | -------------------------------------------------------------------------------- /nmtpytorch/layers/encoders/multimodal_bilstmp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | 4 | import torch 5 | from torch.nn import functional as F 6 | 7 | from ..ff import FF 8 | 9 | from . import BiLSTMp 10 | 11 | logger = logging.getLogger('nmtpytorch') 12 | 13 | 14 | class MultimodalBiLSTMp(BiLSTMp): 15 | """A bidirectional multimodal LSTM encoder for speech features. 16 | 17 | Arguments: 18 | feat_size (int): Auxiliary feature dimensionality. 19 | feat_fusion(str): Type of feature fusion: 'early_concat', 'early_sum', 20 | 'late_concat', 'late_sum', 'init'. 21 | feat_activ(str): Type of non-linearity if any for feature projection 22 | layer. 23 | input_size (int): Input speech feature dimensionality. 24 | hidden_size (int): LSTM hidden state dimensionality. 25 | proj_size (int): Projection layer size. 26 | proj_activ (str, optional): Non-linearity to apply to intermediate projection 27 | layers. (Default: 'tanh') 28 | layers (str): A '_' separated list of integers that defines the subsampling 29 | factor for each LSTM. 30 | dropout (float, optional): Use dropout (Default: 0.) 31 | Input: 32 | x (Tensor): A tensor of shape (n_timesteps, n_samples, n_feats) 33 | that includes acoustic features of dimension ``n_feats`` per 34 | each timestep (in the first dimension). 35 | 36 | Output: 37 | hs (Tensor): A tensor of shape (n_timesteps, n_samples, hidden * 2) 38 | that contains encoder hidden states for all timesteps. 39 | mask (Tensor): `None` since this layer expects all equal frame inputs. 40 | """ 41 | 42 | def __init__(self, feat_size, feat_fusion, feat_activ=None, **kwargs): 43 | # Call BiLSTMp.__init__ first 44 | super().__init__(**kwargs) 45 | 46 | self.feat_size = feat_size 47 | self.feat_fusion = feat_fusion 48 | self.feat_activ = feat_activ 49 | 50 | # early_concat: x = layer([x; aux]) 51 | # layer: feat_size + input_size -> input_size 52 | if self.feat_fusion == 'early_concat': 53 | self.feat_layer = FF( 54 | self.feat_size + self.input_size, self.input_size, activ=self.feat_activ) 55 | # early_sum: x = x + layer(aux) 56 | # layer: feat_size -> input_size 57 | elif self.feat_fusion == 'early_sum': 58 | self.feat_layer = FF(self.feat_size, self.input_size, activ=self.feat_activ) 59 | # late_concat: hs = layer([hs; aux]) 60 | # layer: proj_size + feat_size -> proj_size 61 | elif self.feat_fusion == 'late_concat': 62 | self.feat_layer = FF( 63 | self.feat_size + self.proj_size, self.proj_size, activ=self.feat_activ) 64 | # late_sum: hs = hs + layer(aux) 65 | # layer: feat_size -> proj_size 66 | elif self.feat_fusion == 'late_sum': 67 | self.feat_layer = FF(self.feat_size, self.proj_size, activ=self.feat_activ) 68 | # init: Initialize all LSTMs 69 | elif self.feat_fusion == 'init': 70 | # Use single h_0/c_0 for all stacked layers and directions for a 71 | # consistent information source. 72 | self.ff_init_c0 = FF(self.feat_size, self.hidden_size, activ=self.feat_activ) 73 | self.ff_init_h0 = FF(self.feat_size, self.hidden_size, activ=self.feat_activ) 74 | 75 | def forward(self, x, **kwargs): 76 | # Generate a mask to detect padded sequences 77 | mask = x.ne(0).float().sum(2).ne(0).float() 78 | 79 | if mask.eq(0).nonzero().numel() > 0: 80 | logger.info("WARNING: Non-homogeneous batch in BiLSTMp.") 81 | 82 | # Get auxiliary input 83 | aux_x = kwargs['aux'] 84 | 85 | ############## 86 | # Encoder init 87 | ############## 88 | if self.feat_fusion == 'init': 89 | # Tile to 2xBxH for bidirectionality 90 | c_0_ = self.ff_init_c0(aux_x).repeat(2, 1, 1) 91 | h_0_ = self.ff_init_h0(aux_x).repeat(2, 1, 1) 92 | 93 | # Should be a tuple of (h, c) for each layer 94 | h_0s = [(h_0_, c_0_) for _ in range(self.n_layers)] 95 | else: 96 | # Dummy setup so that the below method calls are good 97 | h_0s = [None for _ in range(self.n_layers)] 98 | if self.feat_fusion == 'early_concat': 99 | x = self.feat_layer( 100 | torch.cat([x, aux_x.repeat(x.shape[0], 1, 1)], dim=-1)) 101 | elif self.feat_fusion == 'early_sum': 102 | x.add_(self.feat_layer(aux_x).unsqueeze(0)) 103 | 104 | # Pad with zero 105 | hs = F.pad(x, self.pad_tuple) 106 | 107 | ################### 108 | # LSTM + Proj block 109 | ################### 110 | for (ss_factor, f_lstm, f_ff, h_0) in zip(self.layers, self.lstms, self.ffs, h_0s): 111 | if ss_factor > 1: 112 | # Skip states 113 | hs = f_ff(f_lstm(hs[::ss_factor], hx=h_0)[0]) 114 | else: 115 | hs = f_ff(f_lstm(hs, hx=h_0)[0]) 116 | 117 | ############# 118 | # Late Fusion 119 | ############# 120 | if self.feat_fusion == 'late_concat': 121 | hs = self.feat_layer( 122 | torch.cat([hs, aux_x.repeat(hs.shape[0], 1, 1)], dim=-1)) 123 | elif self.feat_fusion == 'late_sum': 124 | hs = hs + self.feat_layer(aux_x).unsqueeze(0) 125 | 126 | if self.dropout > 0: 127 | hs = self.do(hs) 128 | 129 | # No mask is returned as batch should contain same-length sequences 130 | return hs, None 131 | -------------------------------------------------------------------------------- /nmtpytorch/layers/ff.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import math 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | from torch import nn 7 | 8 | from ..utils.nn import get_activation_fn 9 | 10 | 11 | class FF(nn.Module): 12 | """A smart feedforward layer with activation support. 13 | 14 | Arguments: 15 | in_features(int): Input dimensionality. 16 | out_features(int): Output dimensionality. 17 | bias(bool, optional): Enable/disable bias for the layer. (Default: True) 18 | bias_zero(bool, optional): Start with a 0-vector bias. (Default: True) 19 | activ(str, optional): A string like 'tanh' or 'relu' to define the 20 | non-linearity type. `None` or `'linear'` is a linear layer (default). 21 | """ 22 | 23 | def __init__(self, in_features, out_features, bias=True, 24 | bias_zero=True, activ=None): 25 | super().__init__() 26 | self.in_features = in_features 27 | self.out_features = out_features 28 | self.use_bias = bias 29 | self.bias_zero = bias_zero 30 | self.activ_type = activ 31 | if self.activ_type in (None, 'linear'): 32 | self.activ_type = 'linear' 33 | self.weight = nn.Parameter(torch.Tensor(out_features, in_features)) 34 | self.activ = get_activation_fn(activ) 35 | 36 | if self.use_bias: 37 | self.bias = nn.Parameter(torch.Tensor(out_features)) 38 | else: 39 | self.register_parameter('bias', None) 40 | 41 | self.reset_parameters() 42 | 43 | def reset_parameters(self): 44 | stdv = 1. / math.sqrt(self.weight.size(1)) 45 | self.weight.data.uniform_(-stdv, stdv) 46 | if self.use_bias: 47 | if self.bias_zero: 48 | self.bias.data.zero_() 49 | else: 50 | self.bias.data.uniform_(-stdv, stdv) 51 | 52 | def forward(self, input): 53 | return self.activ(F.linear(input, self.weight, self.bias)) 54 | 55 | def __repr__(self): 56 | repr_ = self.__class__.__name__ + '(' \ 57 | + 'in_features=' + str(self.in_features) \ 58 | + ', out_features=' + str(self.out_features) \ 59 | + ', activ=' + str(self.activ_type) \ 60 | + ', bias=' + str(self.use_bias) 61 | if self.use_bias: 62 | repr_ += ', bias_zero=' + str(self.bias_zero) 63 | return repr_ + ')' 64 | -------------------------------------------------------------------------------- /nmtpytorch/layers/flatten.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Flatten(torch.nn.Module): 5 | """A flatten module to squeeze single dimensions.""" 6 | def __init__(self): 7 | super().__init__() 8 | 9 | def forward(self, x): 10 | return x.view(x.size(0), -1) 11 | 12 | def __repr__(self): 13 | return "Flatten()" 14 | -------------------------------------------------------------------------------- /nmtpytorch/layers/fusion.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import operator 3 | from functools import reduce 4 | 5 | import torch 6 | 7 | from . import FF 8 | from ..utils.nn import get_activation_fn 9 | 10 | 11 | class Fusion(torch.nn.Module): 12 | """A convenience fusion layer that merges an arbitrary number of inputs. 13 | 14 | Arguments: 15 | fusion_type(str, optional): One of ``concat,sum,mul`` defining the 16 | fusion operation. In the default setup of ``concat``, the 17 | following two arguments should be provided to create a 18 | ``Linear`` adaptor which will project the concatenated vector to 19 | ``output_size``. 20 | input_size(int, optional): The dimensionality of the concatenated 21 | input. Only necessary if ``fusion_type==concat``. 22 | output_size(int, optional): The output dimensionality of the 23 | concatenation. Only necessary if ``fusion_type==concat``. 24 | """ 25 | 26 | def __init__(self, fusion_type='concat', input_size=None, output_size=None, 27 | fusion_activ=None): 28 | super().__init__() 29 | 30 | self.fusion_type = fusion_type 31 | self.fusion_activ = fusion_activ 32 | self.forward = getattr(self, '_{}'.format(self.fusion_type)) 33 | self.activ = get_activation_fn(fusion_activ) 34 | self.adaptor = lambda x: x 35 | 36 | if self.fusion_type == 'concat' or input_size != output_size: 37 | self.adaptor = FF(input_size, output_size, bias=False, activ=None) 38 | 39 | def _sum(self, *inputs): 40 | return self.activ(self.adaptor(reduce(operator.add, inputs))) 41 | 42 | def _mul(self, *inputs): 43 | return self.activ(self.adaptor(reduce(operator.mul, inputs))) 44 | 45 | def _concat(self, *inputs): 46 | return self.activ(self.adaptor(torch.cat(inputs, dim=-1))) 47 | 48 | def __repr__(self): 49 | return "Fusion(type={}, adaptor={}, activ={})".format( 50 | self.fusion_type, 51 | getattr(self, 'adaptor') if hasattr(self, 'adaptor') else 'None', 52 | self.fusion_activ) 53 | -------------------------------------------------------------------------------- /nmtpytorch/layers/max_margin.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | from torch import nn 4 | 5 | # Layer contributed by @elliottd 6 | 7 | 8 | class MaxMargin(nn.Module): 9 | """A max-margin layer for ranking-based loss functions.""" 10 | 11 | def __init__(self, margin, max_violation=False): 12 | super().__init__() 13 | 14 | assert margin > 0., "margin must be > 0." 15 | 16 | # Other arguments 17 | self.margin = margin 18 | self.max_violation = max_violation 19 | 20 | def forward(self, enc1, enc2): 21 | """Computes the max-margin loss given a pair of rank-2 22 | annotation matrices. The matrices must have the same number of 23 | batches and the same number of feats. 24 | 25 | Arguments: 26 | enc1(Tensor): A tensor of `B*feats` representing the 27 | annotation vectors of the first encoder. 28 | enc2(Tensor): A tensor of `B*feats` representation the 29 | annotation vectors of the second encoder. 30 | """ 31 | 32 | assert enc1.shape == enc2.shape, \ 33 | "shapes must match: enc1 {} enc2 {}".format(enc1.shape, enc2.shape) 34 | 35 | enc1 = enc1 / enc1.norm(p=2, dim=1).unsqueeze(1) 36 | enc2 = enc2 / enc2.norm(p=2, dim=1).unsqueeze(1) 37 | loss = self.constrastive_loss(enc1, enc2) 38 | 39 | return {'loss': loss} 40 | 41 | def constrastive_loss(self, enc1, enc2): 42 | if enc1.shape[0] == 1: 43 | # There is no error when we have a single-instance batch. 44 | # Return a dummy error of 1e-5 as a regularizer 45 | return torch.tensor([1e-3], device=enc1.device) 46 | 47 | # compute enc1-enc2 score matrix 48 | scores = self.cosine_sim(enc1, enc2) 49 | diagonal = scores.diag().view(enc1.size(0), 1) 50 | d1 = diagonal.expand_as(scores) 51 | d2 = diagonal.t().expand_as(scores) 52 | 53 | cost_enc1 = (self.margin + scores - d2).clamp(min=0) 54 | cost_enc2 = (self.margin + scores - d1).clamp(min=0) 55 | 56 | # clear diagonals 57 | mask = torch.eye(scores.size(0), device=enc1.device) > .5 58 | cost_enc2 = cost_enc2.masked_fill_(mask, 0) 59 | cost_enc1 = cost_enc1.masked_fill_(mask, 0) 60 | 61 | # keep the maximum violating negative for each query 62 | if self.max_violation: 63 | cost_enc2 = cost_enc2.max(1)[0] 64 | cost_enc1 = cost_enc1.max(0)[0] 65 | denom = cost_enc1.shape[0]**2 - cost_enc1.shape[0] 66 | return (cost_enc2 + cost_enc1).sum() / denom 67 | 68 | def cosine_sim(self, one, two): 69 | '''Cosine similarity between all the first and second encoder pairs''' 70 | return one.mm(two.t()) 71 | -------------------------------------------------------------------------------- /nmtpytorch/layers/pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Pool(torch.nn.Module): 5 | """A pool layer with mean/max/sum/last options.""" 6 | def __init__(self, op_type, pool_dim, keepdim=True): 7 | super().__init__() 8 | 9 | self.op_type = op_type 10 | self.pool_dim = pool_dim 11 | self.keepdim = keepdim 12 | assert self.op_type in ["last", "mean", "max", "sum"], \ 13 | "Pool() operation should be mean, max, sum or last." 14 | 15 | if self.op_type == 'last': 16 | self.__pool_fn = lambda x: x.select( 17 | self.pool_dim, -1).unsqueeze(0) 18 | else: 19 | if self.op_type == 'max': 20 | self.__pool_fn = lambda x: torch.max( 21 | x, dim=self.pool_dim, keepdim=self.keepdim)[0] 22 | elif self.op_type == 'mean': 23 | self.__pool_fn = lambda x: torch.mean( 24 | x, dim=self.pool_dim, keepdim=self.keepdim) 25 | elif self.op_type == 'sum': 26 | self.__pool_fn = lambda x: torch.sum( 27 | x, dim=self.pool_dim, keepdim=self.keepdim) 28 | 29 | def forward(self, x): 30 | return self.__pool_fn(x) 31 | 32 | def __repr__(self): 33 | return "Pool(op_type={}, pool_dim={}, keepdim={})".format( 34 | self.op_type, self.pool_dim, self.keepdim) 35 | -------------------------------------------------------------------------------- /nmtpytorch/layers/rnninit.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | from torch import nn 4 | 5 | from . import FF 6 | 7 | 8 | class RNNInitializer(nn.Module): 9 | """RNN initializer block for encoders and decoders. 10 | 11 | Arguments: 12 | rnn_type(str): GRU or LSTM. 13 | input_size(int): Input dimensionality of the feature vectors that'll 14 | be used for initialization if ``method != zero``. 15 | hidden_size(int): Output dimensionality, i.e. hidden size of the RNN 16 | that will be initialized. 17 | n_layers(int): Number of recurrent layers to be initialized. 18 | data_source(str): The modality name to look for in the batch dictionary. 19 | method(str): One of ``last_ctx|mean_ctx|feats|zero``. 20 | activ(str, optional): The non-linearity to be used for all initializers 21 | except 'zero'. Default is ``None`` i.e. no non-linearity. 22 | """ 23 | def __init__(self, rnn_type, input_size, hidden_size, n_layers, data_source, 24 | method, activ=None): 25 | super().__init__() 26 | self.rnn_type = rnn_type 27 | self.input_size = input_size 28 | self.hidden_size = hidden_size 29 | self.n_layers = n_layers 30 | self.data_source = data_source 31 | self.method = method 32 | self.activ = activ 33 | 34 | # Check for RNN 35 | assert self.rnn_type in ('GRU', 'LSTM'), \ 36 | "rnn_type '{}' is unknown.".format(self.rnn_type) 37 | 38 | assert self.method in ('mean_ctx', 'last_ctx', 'zero', 'feats'), \ 39 | "RNN init method '{}' is unknown.".format(self.method) 40 | 41 | # LSTMs have also the cell state so double the output size 42 | assert self.rnn_type == 'GRU', 'LSTM support not ready yet.' 43 | self.n_states = 1 if self.rnn_type == 'GRU' else 2 44 | 45 | if self.method in ('mean_ctx', 'last_ctx', 'feats'): 46 | self.ff = FF( 47 | self.input_size, self.hidden_size * self.n_layers, 48 | activ=self.activ) 49 | 50 | # Set the actual initializer depending on the method 51 | self._initializer = getattr(self, '_init_{}'.format(self.method)) 52 | 53 | def forward(self, ctx_dict): 54 | ctx, ctx_mask = ctx_dict[self.data_source] 55 | x = self._initializer(ctx, ctx_mask) 56 | return torch.stack(torch.split(x, self.hidden_size, dim=-1)) 57 | 58 | def _init_zero(self, ctx, mask): 59 | # h_0: (n_layers, batch_size, hidden_size) 60 | return torch.zeros( 61 | ctx.shape[1], self.hidden_size * self.n_layers, device=ctx.device) 62 | 63 | def _init_feats(self, ctx, mask): 64 | return self.ff(ctx) 65 | 66 | def _init_mean_ctx(self, ctx, mask): 67 | if mask is None: 68 | return self.ff(ctx.mean(0)) 69 | else: 70 | return self.ff(ctx.sum(0) / mask.sum(0).unsqueeze(1)) 71 | 72 | def _init_last_ctx(self, ctx, mask): 73 | if mask is None: 74 | return self.ff(ctx[-1]) 75 | else: 76 | # Fetch last timesteps 77 | last_tsteps = mask.sum(0).sub(1).long() 78 | return self.ff(ctx[last_tsteps, range(ctx.shape[1])]) 79 | 80 | def __repr__(self): 81 | return self.__class__.__name__ + '(' \ 82 | + 'in_features={}, '.format(self.input_size) \ 83 | + 'out_features={}, '.format(self.hidden_size) \ 84 | + 'activ={}, '.format(self.activ) \ 85 | + 'method={}'.format(self.method) + ')' 86 | -------------------------------------------------------------------------------- /nmtpytorch/layers/seq_conv.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | 7 | # Code contributed by @jlibovicky 8 | 9 | 10 | class SequenceConvolution(nn.Module): 11 | """1D convolution with optional max-pooling. 12 | 13 | The layer applies 1D convolution of odd kernel size with output channel 14 | counts specified by a list of integers. Then, it optionally applies 1D 15 | max-pooling to reduce the sequence length. 16 | """ 17 | 18 | def __init__(self, input_dim, filters, max_pool_stride=None, activation='relu'): 19 | super().__init__() 20 | self.max_pool_stride = max_pool_stride 21 | 22 | self.conv_proj = nn.ModuleList([ 23 | nn.Conv1d(in_channels=input_dim, 24 | out_channels=size, 25 | kernel_size=2 * k + 1, 26 | padding=k) 27 | for k, size in enumerate(filters) if size > 0]) 28 | 29 | if self.max_pool_stride is not None: 30 | self.max_pool = nn.MaxPool1d( 31 | kernel_size=self.max_pool_stride, 32 | stride=self.max_pool_stride) 33 | else: 34 | self.max_pool = None 35 | 36 | def forward(self, x, mask): 37 | conv_outputs = [conv(x.permute(1, 2, 0)) for conv in self.conv_proj] 38 | conv_out = torch.cat(conv_outputs, dim=1) 39 | 40 | if self.max_pool is not None: 41 | conv_len = conv_out.size(-1) 42 | if conv_len < self.max_pool_stride: 43 | pad_size = self.max_pool_stride - conv_len 44 | conv_out = F.pad(conv_out, pad=[pad_size, pad_size]) 45 | max_pooled_data = self.max_pool(conv_out).permute(2, 0, 1) 46 | max_pooled_mask = (self.max_pool(mask.t().unsqueeze(1)).squeeze(1).t() 47 | if mask is not None else None) 48 | return max_pooled_data, max_pooled_mask 49 | else: 50 | return conv_out.permute(2, 0, 1), mask 51 | -------------------------------------------------------------------------------- /nmtpytorch/layers/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | from .residual_lnorm import ResidualLayerNorm 2 | from .positionwise_ff import PositionwiseFF 3 | from .embedding import TFEmbedding 4 | from .encoder import TFEncoder 5 | from .decoder import TFDecoder 6 | -------------------------------------------------------------------------------- /nmtpytorch/layers/transformers/decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..attention import ScaledDotAttention 4 | from . import ResidualLayerNorm, PositionwiseFF 5 | 6 | 7 | class TFDecoder(torch.nn.Module): 8 | """Decoder block for Transformer. 9 | 10 | Arguments: 11 | 12 | Input: 13 | 14 | Output: 15 | """ 16 | 17 | def __init__(self, model_dim, ff_dim, n_heads, n_layers): 18 | super().__init__() 19 | self.model_dim = model_dim 20 | self.ff_dim = ff_dim 21 | self.n_heads = n_heads 22 | self.n_layers = n_layers 23 | blocks = [] 24 | 25 | for _ in range(self.n_layers): 26 | layers = torch.nn.Sequential( 27 | ScaledDotAttention(self.model_dim, self.n_heads, causal=True), 28 | ResidualLayerNorm(self.model_dim), 29 | PositionwiseFF(self.model_dim, self.ff_dim), 30 | ResidualLayerNorm(self.model_dim), 31 | ) 32 | blocks.append(layers) 33 | 34 | self.blocks = torch.nn.ModuleList(blocks) 35 | 36 | def forward(self, x, mask=None, **kwargs): 37 | """Forward-pass of the encoder block. 38 | 39 | :param x: input tensor, shape (tstep, bsize, model_dim) 40 | :param mask: mask tensor for unavailable batch positions (tstep, bsize) 41 | 42 | :return: foo 43 | """ 44 | for block in self.blocks: 45 | x, mask = block((x, x, x, mask)) 46 | return (x, mask) 47 | -------------------------------------------------------------------------------- /nmtpytorch/layers/transformers/embedding.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class TFEmbedding(torch.nn.Embedding): 5 | """Position-aware embeddings for Transformer models. 6 | 7 | Adapted from OpenNMT-py & original `Attention is all you need` paper. 8 | """ 9 | def __init__(self, num_embeddings, embedding_dim, max_len=1024, dropout=0.1): 10 | self.num_embeddings = num_embeddings 11 | self.embedding_dim = embedding_dim 12 | self.max_len = max_len 13 | self.dropout = dropout 14 | 15 | # pos_embs: (max_len, emb_dim) 16 | pos_embs = torch.zeros(self.max_len, self.embedding_dim) 17 | # pos: (max_len, 1) 18 | pos = torch.arange(self.max_len).unsqueeze(1) 19 | # divs: 20 | divs = torch.pow(10000, 21 | torch.arange(self.embedding_dim).float().div(self.embedding_dim)) 22 | 23 | pos_embs[:, 0::2] = torch.sin(pos / divs[0::2]) 24 | pos_embs[:, 1::2] = torch.cos(pos / divs[1::2]) 25 | # pos_embs: (max_len, 1, emb_dim) 26 | pos_embs.unsqueeze_(1) 27 | sqrt_dim = torch.scalar_tensor(self.embedding_dim).sqrt() 28 | 29 | # Call parent's init() first 30 | super().__init__(num_embeddings, embedding_dim, padding_idx=0) 31 | 32 | # Register non-learnable params as buffers 33 | self.register_buffer('pos_embs', pos_embs) 34 | self.register_buffer('sqrt_dim', sqrt_dim) 35 | # Create dropout layer 36 | self.dropout_layer = torch.nn.Dropout(p=self.dropout) 37 | 38 | def forward(self, x): 39 | # Get the embeddings from parent's forward first 40 | embs = super().forward(x) 41 | return self.dropout_layer( 42 | embs.mul(self.sqrt_dim) + self.pos_embs[:embs.size(0)]) 43 | -------------------------------------------------------------------------------- /nmtpytorch/layers/transformers/encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..attention import ScaledDotAttention 4 | from . import ResidualLayerNorm, PositionwiseFF 5 | 6 | 7 | class TFEncoder(torch.nn.Module): 8 | """Encoder block for Transformer. 9 | 10 | Arguments: 11 | 12 | Input: 13 | 14 | Output: 15 | """ 16 | 17 | def __init__(self, model_dim, ff_dim, n_heads, n_layers): 18 | super().__init__() 19 | self.model_dim = model_dim 20 | self.ff_dim = ff_dim 21 | self.n_heads = n_heads 22 | self.n_layers = n_layers 23 | blocks = [] 24 | 25 | for _ in range(self.n_layers): 26 | layers = torch.nn.Sequential( 27 | ScaledDotAttention(self.model_dim, self.n_heads), 28 | ResidualLayerNorm(self.model_dim), 29 | PositionwiseFF(self.model_dim, self.ff_dim), 30 | ResidualLayerNorm(self.model_dim), 31 | ) 32 | blocks.append(layers) 33 | 34 | self.blocks = torch.nn.ModuleList(blocks) 35 | 36 | def forward(self, x, mask=None, **kwargs): 37 | """Forward-pass of the encoder block. 38 | 39 | :param x: input tensor, shape (tstep, bsize, model_dim) 40 | :param mask: mask tensor for unavailable batch positions (tstep, bsize) 41 | 42 | :return: foo 43 | """ 44 | for block in self.blocks: 45 | x, mask = block((x, x, x, mask)) 46 | return (x, mask) 47 | -------------------------------------------------------------------------------- /nmtpytorch/layers/transformers/positionwise_ff.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .. import FF 4 | 5 | 6 | class PositionwiseFF(torch.nn.Module): 7 | """Positionwise Feed-forward layer. 8 | 9 | Arguments: 10 | 11 | Input: 12 | 13 | Output: 14 | """ 15 | 16 | def __init__(self, model_dim, ff_dim, activ='relu'): 17 | super().__init__() 18 | self.model_dim = model_dim 19 | self.ff_dim = ff_dim 20 | self.activ = activ 21 | 22 | # Create the layers 23 | self.func = torch.nn.Sequential( 24 | FF(self.model_dim, self.ff_dim, activ=self.activ), 25 | FF(self.ff_dim, self.model_dim, activ=None), 26 | ) 27 | 28 | def forward(self, inputs): 29 | x, mask = inputs 30 | return (x, self.func(x), mask) 31 | -------------------------------------------------------------------------------- /nmtpytorch/layers/transformers/residual_lnorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .. import LayerNorm 4 | 5 | 6 | class ResidualLayerNorm(torch.nn.Module): 7 | """Residually connected Layer Normalization layer. 8 | 9 | Arguments: 10 | 11 | Input: 12 | 13 | Output: 14 | """ 15 | 16 | def __init__(self, model_dim, affine=True, dropout=0.1): 17 | super().__init__() 18 | self.model_dim = model_dim 19 | self.affine = affine 20 | self.dropout = dropout 21 | 22 | self.norm = LayerNorm(self.model_dim, elementwise_affine=self.affine) 23 | self.dropout_layer = torch.nn.Dropout(self.dropout) 24 | 25 | def forward(self, inputs): 26 | # Unpack into `x` and `Sublayer(x)` 27 | x, f_x, mask = inputs 28 | return (self.norm(x + self.dropout_layer(f_x)), mask) 29 | -------------------------------------------------------------------------------- /nmtpytorch/logger.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pathlib 3 | import logging 4 | 5 | from .cleanup import cleanup 6 | 7 | 8 | def setup(opts=None): 9 | _format = '%(message)s' 10 | 11 | formatter = logging.Formatter(_format) 12 | logger = logging.getLogger('nmtpytorch') 13 | logger.setLevel(logging.DEBUG) 14 | 15 | con_handler = logging.StreamHandler() 16 | con_handler.setFormatter(formatter) 17 | logger.addHandler(con_handler) 18 | 19 | if opts is not None: 20 | log_file = str(pathlib.Path(opts['save_path']) / 21 | opts['subfolder'] / opts['exp_id']) + '.log' 22 | file_handler = logging.FileHandler(log_file, mode='w') 23 | file_handler.setFormatter(formatter) 24 | logger.addHandler(file_handler) 25 | 26 | cleanup.register_handler(logger) 27 | return logger 28 | -------------------------------------------------------------------------------- /nmtpytorch/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .metric import Metric 2 | from .multibleu import BLEUScorer 3 | from .sacrebleu import SACREBLEUScorer 4 | from .meteor import METEORScorer 5 | from .wer import WERScorer 6 | from .cer import CERScorer 7 | from .rouge import ROUGEScorer 8 | 9 | beam_metrics = ["BLEU", "SACREBLEU", "METEOR", "WER", "CER", "ROUGE"] 10 | 11 | metric_info = { 12 | 'BLEU': 'max', 13 | 'SACREBLEU': 'max', 14 | 'METEOR': 'max', 15 | 'ROUGE': 'max', 16 | 'LOSS': 'min', 17 | 'WER': 'min', 18 | 'CER': 'min', 19 | 'ACC': 'max', 20 | 'RECALL': 'max', 21 | 'PRECISION': 'max', 22 | 'F1': 'max', 23 | } 24 | -------------------------------------------------------------------------------- /nmtpytorch/metrics/cer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import editdistance 3 | 4 | from .metric import Metric 5 | 6 | 7 | class CERScorer: 8 | """This is the same as WER but computes CER and also WER after post-processing.""" 9 | def compute(self, refs, hyps, language=None, lowercase=False): 10 | if isinstance(hyps, str): 11 | # hyps is a file 12 | hyp_sents = open(hyps).read().strip().split('\n') 13 | elif isinstance(hyps, list): 14 | hyp_sents = hyps 15 | 16 | # refs is a list, take its first item 17 | with open(refs[0]) as f: 18 | ref_sents = f.read().strip().split('\n') 19 | 20 | assert len(hyp_sents) == len(ref_sents), "CER: # of sentences does not match." 21 | 22 | n_ref_chars = 0 23 | n_ref_tokens = 0 24 | dist_chars = 0 25 | dist_tokens = 0 26 | for hyp, ref in zip(hyp_sents, ref_sents): 27 | hyp_chars = hyp.split(' ') 28 | ref_chars = ref.split(' ') 29 | n_ref_chars += len(ref_chars) 30 | dist_chars += editdistance.eval(hyp_chars, ref_chars) 31 | 32 | # Convert char-based sentences to token-based ones 33 | hyp_tokens = hyp.replace(' ', '').replace('', ' ').strip().split(' ') 34 | ref_tokens = ref.replace(' ', '').replace('', ' ').strip().split(' ') 35 | n_ref_tokens += len(ref_tokens) 36 | dist_tokens += editdistance.eval(hyp_tokens, ref_tokens) 37 | 38 | cer = (100 * dist_chars) / n_ref_chars 39 | wer = (100 * dist_tokens) / n_ref_tokens 40 | 41 | verbose_score = "{:.3f}% (n_errors = {}, n_ref_chars = {}, WER = {:.3f}%)".format( 42 | cer, dist_chars, n_ref_chars, wer) 43 | 44 | return Metric('CER', cer, verbose_score, higher_better=False) 45 | -------------------------------------------------------------------------------- /nmtpytorch/metrics/meteor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import shutil 4 | import pathlib 5 | import subprocess 6 | 7 | from ..utils.misc import listify, get_meteor_jar 8 | from .metric import Metric 9 | 10 | 11 | class METEORScorer: 12 | def __init__(self): 13 | self.jar = str(get_meteor_jar()) 14 | self.__cmdline = ["java", "-Xmx2G", "-jar", self.jar, 15 | "-", "-", "-stdio"] 16 | self.env = os.environ 17 | self.env['LC_ALL'] = 'en_US.UTF-8' 18 | 19 | # Sanity check 20 | if shutil.which('java') is None: 21 | raise RuntimeError('METEOR requires java which is not installed.') 22 | 23 | def compute(self, refs, hyps, language="auto"): 24 | cmdline = self.__cmdline[:] 25 | refs = listify(refs) 26 | 27 | if isinstance(hyps, str): 28 | # If file, open it for line reading 29 | hyps = open(hyps) 30 | 31 | if language == "auto": 32 | # Take the extension of the 1st reference file, e.g. ".de" 33 | language = pathlib.Path(refs[0]).suffix[1:] 34 | 35 | cmdline.extend(["-l", language]) 36 | 37 | # Make reference files a list 38 | iters = [open(f) for f in refs] 39 | iters.append(hyps) 40 | 41 | # Run METEOR process 42 | proc = subprocess.Popen(cmdline, 43 | stdout=subprocess.PIPE, 44 | stdin=subprocess.PIPE, 45 | stderr=subprocess.PIPE, 46 | env=self.env, 47 | universal_newlines=True, bufsize=1) 48 | 49 | eval_line = 'EVAL' 50 | 51 | for line_ctr, lines in enumerate(zip(*iters)): 52 | lines = [l.rstrip('\n') for l in lines] 53 | refstr = " ||| ".join(lines[:-1]) 54 | line = "SCORE ||| " + refstr + " ||| " + lines[-1] 55 | 56 | proc.stdin.write(line + '\n') 57 | eval_line += ' ||| {}'.format(proc.stdout.readline().strip()) 58 | 59 | # Send EVAL line to METEOR 60 | proc.stdin.write(eval_line + '\n') 61 | 62 | # Dummy read segment scores 63 | for i in range(line_ctr + 1): 64 | proc.stdout.readline().strip() 65 | 66 | # Compute final METEOR 67 | try: 68 | score = float(proc.stdout.readline().strip()) 69 | score = Metric('METEOR', 100 * score) 70 | except Exception as e: 71 | score = Metric('METEOR', 0.0) 72 | finally: 73 | # Close METEOR process 74 | proc.stdin.close() 75 | proc.terminate() 76 | proc.kill() 77 | proc.wait(timeout=2) 78 | return score 79 | -------------------------------------------------------------------------------- /nmtpytorch/metrics/metric.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from functools import total_ordering 3 | 4 | 5 | @total_ordering 6 | class Metric: 7 | """A Metric object to represent evaluation metrics. 8 | 9 | Arguments: 10 | name(str): A name for the metric that will be kept internally 11 | after upper-casing 12 | score(float): A floating point score 13 | detailed_score(str, optional): A custom, more detailed string 14 | representing the score given above (Default: "") 15 | higher_better(bool, optional): If ``False``, the smaller the better 16 | (Default: ``True``) 17 | """ 18 | 19 | def __init__(self, name, score, detailed_score="", higher_better=True): 20 | self.name = name.upper() 21 | self.score = score 22 | self.detailed_score = detailed_score 23 | self.higher_better = higher_better 24 | 25 | def __eq__(self, other): 26 | return self.score == other.score 27 | 28 | def __lt__(self, other): 29 | return self.score < other.score 30 | 31 | def __repr__(self): 32 | rhs = (self.detailed_score if self.detailed_score 33 | else "%.2f" % self.score) 34 | return self.name + ' = ' + rhs 35 | -------------------------------------------------------------------------------- /nmtpytorch/metrics/multibleu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import subprocess 3 | import pkg_resources 4 | 5 | from ..utils.misc import listify 6 | from .metric import Metric 7 | 8 | BLEU_SCRIPT = pkg_resources.resource_filename('nmtpytorch', 9 | 'lib/multi-bleu.perl') 10 | 11 | 12 | class BLEUScorer: 13 | """BLEUScorer class.""" 14 | def __init__(self): 15 | # For multi-bleu.perl we give the reference(s) files as argv, 16 | # while the candidate translations are read from stdin. 17 | self.__cmdline = [BLEU_SCRIPT] 18 | 19 | def compute(self, refs, hyps, language=None, lowercase=False): 20 | cmdline = self.__cmdline[:] 21 | 22 | if lowercase: 23 | cmdline.append("-lc") 24 | 25 | # Make reference files a list 26 | cmdline.extend(listify(refs)) 27 | 28 | if isinstance(hyps, str): 29 | hypstring = open(hyps).read().strip() 30 | elif isinstance(hyps, list): 31 | hypstring = "\n".join(hyps) 32 | 33 | score = subprocess.run(cmdline, stdout=subprocess.PIPE, 34 | input=hypstring, 35 | universal_newlines=True).stdout.splitlines() 36 | 37 | if len(score) == 0: 38 | return Metric('BLEU', 0, "0.0") 39 | else: 40 | score = score[0].strip() 41 | float_score = float(score.split()[2][:-1]) 42 | verbose_score = score.replace('BLEU = ', '') 43 | return Metric('BLEU', float_score, verbose_score) 44 | -------------------------------------------------------------------------------- /nmtpytorch/metrics/rouge.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .metric import Metric 3 | from ..cocoeval import Rouge 4 | 5 | 6 | class ROUGEScorer: 7 | def compute(self, refs, hyps, language=None, lowercase=False): 8 | if isinstance(hyps, str): 9 | # hyps is a file 10 | hyp_sents = open(hyps).read().strip().split('\n') 11 | elif isinstance(hyps, list): 12 | hyp_sents = hyps 13 | 14 | # refs is a list, take its first item 15 | with open(refs[0]) as f: 16 | ref_sents = f.read().strip().split('\n') 17 | 18 | assert len(hyp_sents) == len(ref_sents), "ROUGE: # of sentences does not match." 19 | 20 | rouge_scorer = Rouge() 21 | 22 | rouge_sum = 0 23 | for hyp, ref in zip(hyp_sents, ref_sents): 24 | rouge_sum += rouge_scorer.calc_score([hyp], [ref]) 25 | 26 | score = (100 * rouge_sum) / len(hyp_sents) 27 | verbose_score = "{:.3f}".format(score) 28 | 29 | return Metric('ROUGE', score, verbose_score, higher_better=True) 30 | -------------------------------------------------------------------------------- /nmtpytorch/metrics/sacrebleu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import subprocess 3 | 4 | from ..utils.misc import listify 5 | from .metric import Metric 6 | 7 | 8 | class SACREBLEUScorer: 9 | """SACREBLEUScorer class.""" 10 | def __init__(self): 11 | self.__cmdline = ["sacrebleu", "--short"] 12 | 13 | def compute(self, refs, hyps, language=None, lowercase=False): 14 | cmdline = self.__cmdline[:] 15 | 16 | if lowercase: 17 | cmdline.append("-lc") 18 | 19 | # Make reference files a list 20 | cmdline.extend(listify(refs)) 21 | 22 | if isinstance(hyps, str): 23 | hypstring = open(hyps).read().strip() 24 | elif isinstance(hyps, list): 25 | hypstring = "\n".join(hyps) 26 | 27 | score = subprocess.run(cmdline, stdout=subprocess.PIPE, 28 | input=hypstring, 29 | universal_newlines=True).stdout.splitlines() 30 | 31 | if len(score) == 0: 32 | return Metric('SACREBLEU', 0, "0.0") 33 | else: 34 | score = score[0].strip() 35 | float_score = float(score.split()[2]) 36 | verbose_score = ' '.join(score.split()[2:]) 37 | return Metric('SACREBLEU', float_score, verbose_score) 38 | -------------------------------------------------------------------------------- /nmtpytorch/metrics/wer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import editdistance 3 | 4 | from .metric import Metric 5 | 6 | 7 | class WERScorer: 8 | def compute(self, refs, hyps, language=None, lowercase=False): 9 | if isinstance(hyps, str): 10 | # hyps is a file 11 | hyp_sents = open(hyps).read().strip().split('\n') 12 | elif isinstance(hyps, list): 13 | hyp_sents = hyps 14 | 15 | # refs is a list, take its first item 16 | with open(refs[0]) as f: 17 | ref_sents = f.read().strip().split('\n') 18 | 19 | assert len(hyp_sents) == len(ref_sents), "WER: # of sentences does not match." 20 | 21 | n_ref_tokens = 0 22 | dist = 0 23 | for hyp, ref in zip(hyp_sents, ref_sents): 24 | hyp_tokens = hyp.split(' ') 25 | ref_tokens = ref.split(' ') 26 | n_ref_tokens += len(ref_tokens) 27 | dist += editdistance.eval(hyp_tokens, ref_tokens) 28 | 29 | score = (100 * dist) / n_ref_tokens 30 | verbose_score = "{:.3f}% (n_errors = {}, n_ref_tokens = {})".format( 31 | score, dist, n_ref_tokens) 32 | 33 | return Metric('WER', score, verbose_score, higher_better=False) 34 | -------------------------------------------------------------------------------- /nmtpytorch/models/__init__.py: -------------------------------------------------------------------------------- 1 | ##### 2 | # NMT 3 | ##### 4 | from .nmt import NMT 5 | from .tfnmt import TransformerNMT 6 | 7 | ################ 8 | # Multimodal NMT 9 | ################ 10 | from .simple_mmt import SimpleMMT 11 | from .attentive_mmt import AttentiveMMT 12 | 13 | ############### 14 | # Speech models 15 | ############### 16 | from .asr import ASR 17 | from .multimodal_asr import MultimodalASR 18 | -------------------------------------------------------------------------------- /nmtpytorch/models/attentive_mmt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | 4 | from torch import nn 5 | 6 | from ..datasets import MultimodalDataset 7 | from ..layers import ConditionalMMDecoder, TextEncoder, FF 8 | from .nmt import NMT 9 | 10 | logger = logging.getLogger('nmtpytorch') 11 | 12 | 13 | class AttentiveMMT(NMT): 14 | """An end-to-end sequence-to-sequence NMT model with visual attention over 15 | pre-extracted convolutional features. 16 | """ 17 | def set_defaults(self): 18 | # Set parent defaults 19 | super().set_defaults() 20 | self.defaults.update({ 21 | 'fusion_type': 'concat', # Multimodal context fusion (sum|mul|concat) 22 | 'fusion_activ': 'tanh', # Multimodal context non-linearity 23 | 'vis_activ': 'linear', # Visual feature transformation activ. 24 | 'n_channels': 2048, # depends on the features used 25 | 'mm_att_type': 'md-dd', # multimodal attention type 26 | # md: modality dep. 27 | # mi: modality indep. 28 | # dd: decoder state dep. 29 | # di: decoder state indep. 30 | 'out_logic': 'deep', # simple vs deep output 31 | 'persistent_dump': False, # To save activations during beam-search 32 | 'preatt': False, # Apply filtered attention 33 | 'preatt_activ': 'ReLU', # Activation for convatt block 34 | 'dropout_img': 0.0, # Dropout on image features 35 | }) 36 | 37 | def __init__(self, opts): 38 | super().__init__(opts) 39 | 40 | def setup(self, is_train=True): 41 | # Textual context dim 42 | txt_ctx_size = self.ctx_sizes[self.sl] 43 | 44 | # Add visual context transformation (sect. 3.2 in paper) 45 | self.ff_img = FF( 46 | self.opts.model['n_channels'], txt_ctx_size, 47 | activ=self.opts.model['vis_activ']) 48 | 49 | self.dropout_img = nn.Dropout(self.opts.model['dropout_img']) 50 | 51 | # Add vis ctx size 52 | self.ctx_sizes['image'] = txt_ctx_size 53 | 54 | ######################## 55 | # Create Textual Encoder 56 | ######################## 57 | self.enc = TextEncoder( 58 | input_size=self.opts.model['emb_dim'], 59 | hidden_size=self.opts.model['enc_dim'], 60 | n_vocab=self.n_src_vocab, 61 | rnn_type=self.opts.model['enc_type'], 62 | dropout_emb=self.opts.model['dropout_emb'], 63 | dropout_ctx=self.opts.model['dropout_ctx'], 64 | dropout_rnn=self.opts.model['dropout_enc'], 65 | num_layers=self.opts.model['n_encoders'], 66 | emb_maxnorm=self.opts.model['emb_maxnorm'], 67 | emb_gradscale=self.opts.model['emb_gradscale']) 68 | 69 | # Create Decoder 70 | self.dec = ConditionalMMDecoder( 71 | input_size=self.opts.model['emb_dim'], 72 | hidden_size=self.opts.model['dec_dim'], 73 | n_vocab=self.n_trg_vocab, 74 | rnn_type=self.opts.model['dec_type'], 75 | ctx_size_dict=self.ctx_sizes, 76 | ctx_name=str(self.sl), 77 | fusion_type=self.opts.model['fusion_type'], 78 | fusion_activ=self.opts.model['fusion_activ'], 79 | tied_emb=self.opts.model['tied_emb'], 80 | dec_init=self.opts.model['dec_init'], 81 | att_type=self.opts.model['att_type'], 82 | mm_att_type=self.opts.model['mm_att_type'], 83 | out_logic=self.opts.model['out_logic'], 84 | att_activ=self.opts.model['att_activ'], 85 | transform_ctx=self.opts.model['att_transform_ctx'], 86 | att_ctx2hid=False, 87 | mlp_bias=self.opts.model['att_mlp_bias'], 88 | att_bottleneck=self.opts.model['att_bottleneck'], 89 | dropout_out=self.opts.model['dropout_out'], 90 | emb_maxnorm=self.opts.model['emb_maxnorm'], 91 | emb_gradscale=self.opts.model['emb_gradscale'], 92 | persistent_dump=self.opts.model['persistent_dump']) 93 | 94 | # Share encoder and decoder weights 95 | if self.opts.model['tied_emb'] == '3way': 96 | self.enc.emb.weight = self.dec.emb.weight 97 | 98 | def load_data(self, split, batch_size, mode='train'): 99 | """Loads the requested dataset split.""" 100 | dataset = MultimodalDataset( 101 | data=self.opts.data[split + '_set'], 102 | mode=mode, batch_size=batch_size, 103 | vocabs=self.vocabs, topology=self.topology, 104 | bucket_by=self.opts.model['bucket_by'], 105 | max_len=self.opts.model.get('max_len', None), 106 | order_file=self.opts.data[split + '_set'].get('ord', None)) 107 | logger.info(dataset) 108 | return dataset 109 | 110 | def encode(self, batch, **kwargs): 111 | # Transform the features to context dim 112 | feats = self.dropout_img(self.ff_img(batch['image'])) 113 | 114 | # Get source language encodings (S*B*C) 115 | text_encoding = self.enc(batch[self.sl]) 116 | 117 | return { 118 | str(self.sl): text_encoding, 119 | 'image': (feats, None), 120 | } 121 | -------------------------------------------------------------------------------- /nmtpytorch/models/stale/README.md: -------------------------------------------------------------------------------- 1 | Stale models 2 | --- 3 | 4 | This folder contains files from older/experimental models which may or 5 | may not work with the current code. They are merely here for reference. 6 | -------------------------------------------------------------------------------- /nmtpytorch/models/tfnmt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | 4 | import torch 5 | from torch import nn 6 | 7 | from ..layers.transformers import * 8 | 9 | from . import NMT 10 | 11 | logger = logging.getLogger('nmtpytorch') 12 | 13 | 14 | class TransformerNMT(NMT): 15 | supports_beam_search = True 16 | 17 | def set_defaults(self): 18 | self.defaults = { 19 | 'model_dim': 512, # model_dim 20 | 'ff_dim': 2048, # Positionwise FF inner dimension 21 | 'n_enc_layers': 6, # Number of encoder layers 22 | 'n_dec_layers': 6, # Number of decoder layers 23 | 'n_heads': 8, # Number of attention heads 24 | 'direction': None, # Network directionality, i.e. en->de 25 | 'max_len': None, # Reject sentences where 'bucket_by' length > 80 26 | 'bucket_by': None, # A key like 'en' to define w.r.t which dataset 27 | # the batches will be sorted 28 | 'bucket_order': None, # Curriculum: ascending/descending/None 29 | 'sampler_type': 'bucket', # bucket or approximate 30 | 'short_list': 0, # Vocabulary short listing 31 | } 32 | 33 | def __init__(self, opts): 34 | super().__init__(opts) 35 | 36 | def reset_parameters(self): 37 | for name, param in self.named_parameters(): 38 | # Skip 1-d biases and scalars 39 | if param.requires_grad and param.dim() > 1: 40 | nn.init.kaiming_normal_(param.data) 41 | # Reset padding embedding to 0 42 | with torch.no_grad(): 43 | self.src_emb.weight.data[0].fill_(0) 44 | self.trg_emb.weight.data[0].fill_(0) 45 | 46 | def setup(self, is_train=True): 47 | """Sets up NN topology by creating the layers.""" 48 | # Create the embeddings 49 | self.src_emb = TFEmbedding(self.n_src_vocab, self.opts.model['model_dim']) 50 | self.trg_emb = TFEmbedding(self.n_trg_vocab, self.opts.model['model_dim']) 51 | self.enc = TFEncoder( 52 | self.opts.model['model_dim'], self.opts.model['ff_dim'], 53 | self.opts.model['n_heads'], self.opts.model['n_enc_layers']) 54 | self.dec = TFDecoder( 55 | self.opts.model['model_dim'], self.opts.model['ff_dim'], 56 | self.opts.model['n_heads'], self.opts.model['n_dec_layers']) 57 | self.seq_loss = torch.nn.NLLLoss(reduction='sum', ignore_index=0) 58 | 59 | def encode(self, batch, **kwargs): 60 | # mask: (tstep, bsize) 61 | mask = batch[self.sl].ne(0).float() 62 | 63 | # embs: (tstep, bsize, dim) 64 | embs = self.src_emb(batch[self.sl]) 65 | h, mask = self.enc(embs, mask=mask) 66 | 67 | d = {str(self.sl): (h, mask)} 68 | return d 69 | 70 | def forward(self, batch, **kwargs): 71 | # Get loss dict 72 | enc = self.encode(batch) 73 | 74 | dec_input = batch[self.tl] 75 | 76 | # result = self.dec(self.encode(batch), batch[self.tl]) 77 | # result['n_items'] = torch.nonzero(batch[self.tl][1:]).shape[0] 78 | # return result 79 | -------------------------------------------------------------------------------- /nmtpytorch/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .bucket import BucketBatchSampler 3 | from .approx import ApproximateBucketBatchSampler 4 | 5 | def get_sampler(type_): 6 | return { 7 | 'bucket': BucketBatchSampler, 8 | 'approximate': ApproximateBucketBatchSampler, 9 | }[type_.lower()] 10 | -------------------------------------------------------------------------------- /nmtpytorch/samplers/approx.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import math 3 | import logging 4 | from collections import defaultdict 5 | 6 | import numpy as np 7 | 8 | from ..utils.device import DEVICE_IDS 9 | from . import BucketBatchSampler 10 | 11 | logger = logging.getLogger('nmtpytorch') 12 | 13 | 14 | class ApproximateBucketBatchSampler(BucketBatchSampler): 15 | r"""Samples batch indices from sequence-length buckets efficiently 16 | with very little memory overhead. 17 | 18 | Different from `BucketBatchSampler`, this class bins data samples w.r.t 19 | lengths but does not guarantee that each bucket contains necessarily 20 | same-length sequences. Further padding/packing/masking should be done 21 | by detecting possible items in tensors. 22 | 23 | Arguments: 24 | batch_size (int): Size of mini-batch. 25 | sort_lens (list): List of source or target lengths corresponding to each 26 | item in the dataset. 27 | max_len (int, optional): A maximum sequence length that will be used 28 | to filter out very long sequences. ``None`` means no filtering. 29 | store_indices (bool, optional): If ``True``, indices that will unsort 30 | the dataset will be stored. This used by beam search/inference. 31 | order (str, optional): Default is ``None``, i.e. buckets are shuffled. 32 | If ``ascending`` or ``descending``, will iterate w.r.t bucket 33 | lengths to implement length-based curriculum learning. 34 | """ 35 | 36 | def __init__(self, batch_size, sort_lens, max_len=None, 37 | store_indices=False, order=None): 38 | assert order in (None, 'ascending', 'descending'), \ 39 | "order should be None, 'ascending' or 'descending'" 40 | 41 | self.batch_size = batch_size 42 | self.max_len = max_len 43 | self.n_rejects = 0 44 | self.order = order 45 | self.store_indices = store_indices 46 | 47 | # Additional balancing logic for multi-GPU 48 | self.n_devices = len(DEVICE_IDS) if DEVICE_IDS else 1 49 | 50 | # Buckets: sort_lens -> list of sample indices 51 | self.buckets = defaultdict(list) 52 | 53 | # Pre-compute how many times a bucket will be sampled 54 | self.bucket_idxs = [] 55 | 56 | # Fill the buckets while optionally filtering out long sequences 57 | if self.max_len is not None: 58 | for idx, len_ in enumerate(sort_lens): 59 | if len_ <= self.max_len: 60 | self.buckets[len_].append(idx) 61 | else: 62 | self.n_rejects += 1 63 | logger.info('{} samples rejected because of length filtering @ {}'.format( 64 | self.n_rejects, self.max_len)) 65 | else: 66 | # No length filtering 67 | for idx, len_ in enumerate(sort_lens): 68 | self.buckets[len_].append(idx) 69 | 70 | ###################################### 71 | # Modified part compared to base class 72 | ###################################### 73 | ordered_idxs = [] 74 | min_bucket_size = self.batch_size * 5 75 | for length in sorted(self.buckets): 76 | ordered_idxs.extend(self.buckets[length]) 77 | 78 | # Reset buckets 79 | self.buckets = {} 80 | n_elems = len(ordered_idxs) 81 | 82 | # Bin sorted buckets approximately 83 | for idx, start in enumerate(range(0, n_elems, min_bucket_size)): 84 | self.buckets[idx] = ordered_idxs[start:start + min_bucket_size] 85 | 86 | # number of elems in the last bucket 87 | last_bucket_size = len(self.buckets[idx]) 88 | # number of elems in the last batch of last bucket 89 | last_batch_size = last_bucket_size % self.batch_size 90 | # how many should we remove to make the last batch divisible for 91 | # many GPUs 92 | n_remove_from_last = last_batch_size % self.n_devices 93 | end_point = last_bucket_size - n_remove_from_last 94 | self.buckets[idx] = self.buckets[idx][:end_point] 95 | if n_remove_from_last > 0: 96 | logger.info('Removed {} samples to balance buckets.'.format( 97 | n_remove_from_last)) 98 | 99 | self.stats = {k: len(self.buckets[k]) for k in sorted(self.buckets)} 100 | 101 | for len_ in self.buckets: 102 | # Convert bucket to numpy array 103 | np_bucket = np.array(self.buckets[len_]) 104 | 105 | # How many batches will be done for this bucket? 106 | bucket_bs = np_bucket.size / self.batch_size 107 | idxs = [len_] * math.ceil(bucket_bs) 108 | 109 | self.buckets[len_] = np_bucket 110 | self.bucket_idxs.extend(idxs) 111 | 112 | # Convert to numpy array 113 | self.bucket_idxs = np.array(self.bucket_idxs) 114 | 115 | # Set number of batches 116 | self.n_batches = len(self.bucket_idxs) 117 | -------------------------------------------------------------------------------- /nmtpytorch/tester.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import time 3 | import logging 4 | from pathlib import Path 5 | 6 | import numpy as np 7 | import torch 8 | 9 | from .utils.misc import load_pt_file, pbar 10 | from .utils.data import make_dataloader 11 | from .utils.device import DEVICE 12 | 13 | from . import models 14 | from .config import Options 15 | 16 | logger = logging.getLogger('nmtpytorch') 17 | 18 | 19 | class Tester: 20 | """Tester for models without beam-search.""" 21 | 22 | def __init__(self, **kwargs): 23 | # Store attributes directly. See bin/nmtpy for their list. 24 | self.__dict__.update(kwargs) 25 | 26 | # How many models? 27 | if len(self.models) > 1: 28 | raise RuntimeError("Test mode requires single model file.") 29 | 30 | self.model_file = self.models[0] 31 | 32 | # Disable gradient tracking 33 | torch.set_grad_enabled(False) 34 | 35 | data = load_pt_file(self.model_file) 36 | weights, _, opts = data['model'], data['history'], data['opts'] 37 | 38 | opts = Options.from_dict(opts, override_list=self.override) 39 | instance = getattr(models, opts.train['model_type'])(opts=opts) 40 | 41 | if instance.supports_beam_search: 42 | logger.info("Model supports beam-search by the way.") 43 | 44 | # Setup layers 45 | instance.setup(is_train=False) 46 | # Load weights 47 | instance.load_state_dict(weights, strict=False) 48 | # Move to device 49 | instance.to(DEVICE) 50 | # Switch to eval mode 51 | instance.train(False) 52 | 53 | self.instance = instance 54 | 55 | # Can be a comma separated list of hardcoded test splits 56 | if self.splits: 57 | logger.info('Will process "{}"'.format(self.splits)) 58 | self.splits = self.splits.split(',') 59 | elif self.source: 60 | # Split into key:value's and parse into dict 61 | input_dict = {} 62 | logger.info('Will process input configuration:') 63 | for data_source in self.source.split(','): 64 | key, path = data_source.split(':', 1) 65 | input_dict[key] = Path(path) 66 | logger.info(' {}: {}'.format(key, input_dict[key])) 67 | self.instance.opts.data['new_set'] = input_dict 68 | self.splits = ['new'] 69 | 70 | def extract_encodings(self, instance, split): 71 | """(Experimental) feature extraction mode.""" 72 | dataset = instance.load_data(split, self.batch_size, mode='eval') 73 | loader = make_dataloader(dataset) 74 | n_samples = len(dataset) 75 | feats = [] 76 | ord_feats = [] 77 | logger.info('Starting extraction') 78 | start = time.time() 79 | for batch in pbar(loader, unit='batch'): 80 | batch.device(DEVICE) 81 | out, _ = list(instance.encode(batch).values())[0] 82 | feats.append(out.data.cpu().transpose(0, 1)) 83 | for feat in feats: 84 | # this is a batch 85 | ord_feats.extend([f for f in feat]) 86 | idxs = zip(range(n_samples), loader.batch_sampler.orig_idxs) 87 | idxs = sorted(idxs, key=lambda x: x[1]) 88 | ord_feats = [ord_feats[i[0]].numpy() for i in idxs] 89 | np.save('{}_{}.encodings.npy'.format(self.model_file, split), ord_feats) 90 | up_time = time.time() - start 91 | logger.info('Took {:.3f} seconds'.format(up_time)) 92 | 93 | def test(self, instance, split): 94 | dataset = instance.load_data(split, self.batch_size, mode='eval') 95 | loader = make_dataloader(dataset) 96 | 97 | logger.info('Starting computation') 98 | start = time.time() 99 | results = instance.test_performance( 100 | loader, 101 | dump_file="{}.{}".format(self.model_file, split)) 102 | up_time = time.time() - start 103 | logger.info('Took {:.3f} seconds'.format(up_time)) 104 | return results 105 | 106 | def __call__(self): 107 | for input_ in self.splits: 108 | if self.mode == 'eval': 109 | results = self.test(self.instance, input_) 110 | for res in results: 111 | print(' {}: {:.5f}'.format(res.name, res.score)) 112 | elif self.mode == 'enc': 113 | self.extract_encodings(self.instance, input_) 114 | -------------------------------------------------------------------------------- /nmtpytorch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['misc', 'device', 'nn', 'data', 'io', 'topology'] 2 | -------------------------------------------------------------------------------- /nmtpytorch/utils/data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import logging 4 | from torch.utils.data import DataLoader 5 | import numpy as np 6 | 7 | from ..utils.misc import fopen, pbar 8 | 9 | logger = logging.getLogger('nmtpytorch') 10 | 11 | 12 | def sort_predictions(data_loader, results): 13 | """Recovers the dataset order when bucketing samplers are used.""" 14 | if getattr(data_loader.batch_sampler, 'store_indices', False): 15 | results = [results[i] for i, j in sorted( 16 | enumerate(data_loader.batch_sampler.orig_idxs), key=lambda k: k[1])] 17 | return results 18 | 19 | 20 | def make_dataloader(dataset, pin_memory=False, num_workers=0): 21 | if num_workers != 0: 22 | logger.info('Forcing num_workers to 0 since it fails with torch 0.4') 23 | num_workers = 0 24 | 25 | return DataLoader( 26 | dataset, batch_sampler=dataset.sampler, 27 | collate_fn=dataset.collate_fn, 28 | pin_memory=pin_memory, num_workers=num_workers) 29 | 30 | 31 | def sort_batch(seqbatch): 32 | """Sorts torch tensor of integer indices by decreasing order.""" 33 | # 0 is padding_idx 34 | omask = (seqbatch != 0).long() 35 | olens = omask.sum(0) 36 | slens, sidxs = torch.sort(olens, descending=True) 37 | oidxs = torch.sort(sidxs)[1] 38 | return (oidxs, sidxs, slens.data.tolist(), omask.float()) 39 | 40 | 41 | def pad_video_sequence(seqs): 42 | """ 43 | Pads video sequences with zero vectors for minibatch processing. 44 | (contributor: @elliottd) 45 | 46 | TODO: Can we write the for loop in a more compact format? 47 | """ 48 | lengths = [len(s) for s in seqs] 49 | # Get the desired size of the padding vector from the input seqs data 50 | feat_size = seqs[0].shape[1] 51 | max_len = max(lengths) 52 | tmp = [] 53 | for s, len_ in zip(seqs, lengths): 54 | if max_len - len_ == 0: 55 | tmp.append(s) 56 | else: 57 | inner_tmp = s 58 | for i in range(max_len - len_): 59 | inner_tmp = np.vstack((inner_tmp, (np.array([0.] * feat_size)))) 60 | tmp.append(inner_tmp) 61 | padded = np.array(tmp, dtype='float32') 62 | return torch.FloatTensor(torch.from_numpy(padded)) 63 | 64 | 65 | def convert_to_onehot(idxs, n_classes): 66 | """Returns a binary batch_size x n_classes one-hot tensor.""" 67 | out = torch.zeros(len(idxs), n_classes, device=idxs[0].device) 68 | for row, indices in zip(out, idxs): 69 | row.scatter_(0, indices, 1) 70 | return out 71 | 72 | 73 | def read_sentences(fname, vocab, bos=False, eos=True): 74 | lines = [] 75 | lens = [] 76 | with fopen(fname) as f: 77 | for idx, line in enumerate(pbar(f, unit='sents')): 78 | line = line.strip() 79 | 80 | # Empty lines will cause a lot of headaches, 81 | # get rid of them during preprocessing! 82 | assert line, "Empty line (%d) found in %s" % (idx + 1, fname) 83 | 84 | # Map and append 85 | seq = vocab.sent_to_idxs(line, explicit_bos=bos, explicit_eos=eos) 86 | lines.append(seq) 87 | lens.append(len(seq)) 88 | 89 | return lines, lens 90 | -------------------------------------------------------------------------------- /nmtpytorch/utils/device.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import shutil 4 | import subprocess 5 | 6 | import torch 7 | 8 | DEVICE = None 9 | DEVICE_IDS = None 10 | 11 | 12 | class DeviceManager: 13 | __errors = { 14 | 'BadDeviceFormat': 'Device can be cpu, gpu or [N]gpu, i.e. 2gpu', 15 | 'NoDevFiles': 'Make sure you requested a GPU resource from your cluster.', 16 | 'NoSMI': 'nvidia-smi is not installed. Are you on the correct node?', 17 | 'EnvVar': 'Please set CUDA_VISIBLE_DEVICES explicitly.', 18 | 'NoMultiGPU': 'Multi-GPU not supported for now.', 19 | 'NotEnoughGPU': 'You requested {} GPUs while you have access to only {}.', 20 | } 21 | 22 | def __init__(self, dev): 23 | self.dev = dev.lower() 24 | self.pid = os.getpid() 25 | self.req_cpu = False 26 | self.req_gpu = False 27 | self.req_n_gpu = 0 28 | self.req_multi_gpu = False 29 | self.nvidia_smi = False 30 | self.cuda_dev_ids = None 31 | 32 | if not re.match('(cpu|[0-9]{0,1}gpu)$', self.dev): 33 | raise RuntimeError(self.__errors['BadDeviceFormat']) 34 | 35 | if self.dev == 'cpu': 36 | self.req_cpu = True 37 | self.dev = torch.device('cpu') 38 | else: 39 | self.req_gpu = True 40 | if self.dev == 'gpu': 41 | self.req_n_gpu = 1 42 | else: 43 | self.req_n_gpu = int(self.dev[0]) 44 | 45 | self.req_multi_gpu = self.req_n_gpu > 1 46 | 47 | # What we have 48 | self.nvidia_smi = shutil.which('nvidia-smi') 49 | self.cuda_dev_ids = os.environ.get('CUDA_VISIBLE_DEVICES', None) 50 | 51 | if self.nvidia_smi is None: 52 | raise RuntimeError(self.__errors['NoSMI']) 53 | if self.cuda_dev_ids == "NoDevFiles": 54 | raise RuntimeError(self.__errors['NoDevFiles']) 55 | elif self.cuda_dev_ids is None: 56 | raise RuntimeError(self.__errors['EnvVar']) 57 | 58 | # How many GPUs do we have access to? 59 | self.cuda_dev_ids = [int(de) for de in self.cuda_dev_ids.split(',')] 60 | 61 | # FIXME: Remove this once DataParallel works. 62 | if self.req_n_gpu > 1 or len(self.cuda_dev_ids) > 1: 63 | raise RuntimeError(self.__errors['NoMultiGPU']) 64 | 65 | if self.req_n_gpu > len(self.cuda_dev_ids): 66 | raise RuntimeError( 67 | self.__errors['NotEnoughGPU'].format( 68 | self.req_n_gpu, len(self.cuda_dev_ids))) 69 | else: 70 | self.cuda_dev_ids = self.cuda_dev_ids[:self.req_n_gpu] 71 | 72 | # Set master device (is always cuda:0 since we force env.var 73 | # restriction) 74 | self.dev = torch.device('cuda:0') 75 | 76 | global DEVICE, DEVICE_IDS 77 | DEVICE = self.dev 78 | DEVICE_IDS = self.cuda_dev_ids 79 | 80 | def get_cuda_mem_usage(self, name=True): 81 | if self.req_cpu: 82 | return None 83 | 84 | pr = subprocess.run([ 85 | self.nvidia_smi, 86 | "--query-compute-apps=pid,gpu_name,used_memory", 87 | "--format=csv,noheader"], stdout=subprocess.PIPE, universal_newlines=True) 88 | 89 | for line in pr.stdout.strip().split('\n'): 90 | pid, gpu_name, usage = line.split(',') 91 | if int(pid) == self.pid: 92 | if name: 93 | return '{} -> {}'.format(gpu_name.strip(), usage.strip()) 94 | return usage.strip() 95 | 96 | return 'N/A' 97 | 98 | def __repr__(self): 99 | if self.req_cpu: 100 | return "DeviceManager(dev='cpu')" 101 | return "DeviceManager({}, n_gpu={})".format(self.dev, self.req_n_gpu) 102 | -------------------------------------------------------------------------------- /nmtpytorch/utils/filterchain.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import re 3 | from pathlib import Path 4 | 5 | from .misc import get_temp_file, fopen 6 | 7 | 8 | class FilterChain: 9 | """A sequential filter chain to post-process list of tokens. 10 | 11 | Arguments: 12 | filters(list): A list of strings representing filters to apply. 13 | 14 | Available Filters: 15 | 'de-bpe': Stitches back subword units produced with apply_bpe 16 | 'de-spm': Stitches back sentence pieces produced with spm_encode 17 | 'de-segment': Converts to normal form 18 | 'de-compond': Stitches back German compound splittings 19 | 'c2w': Stitches back space delimited characters to words. 20 | Necessary for word-level BLEU, etc. when using CharNMT. 21 | 'lower': Lowercase. 22 | 'upper': Uppercase. 23 | 'de-hyphen': De-hyphenate 'foo @-@ bar' constructs of Moses. 24 | 25 | """ 26 | FILTERS = { 27 | 'de-bpe': lambda s: s.replace("@@ ", "").replace("@@", ""), 28 | 'de-tag': lambda s: re.sub('<[a-zA-Z][a-zA-Z]>', '', s), 29 | # Decoder for Google sentenpiece 30 | # only for default params of spm_encode 31 | 'de-spm': lambda s: s.replace(" ", "").replace("\u2581", " ").strip(), 32 | # Converts segmentations of to normal form 33 | 'de-segment': lambda s: re.sub(' *<.*?:(.*?)>', '\\1', s), 34 | # Space delim character sequence to non-tokenized normal word form 35 | 'c2w': lambda s: s.replace(' ', '').replace('', ' ').strip(), 36 | # Filters out fillers from compound splitted sentences 37 | 'de-compound': lambda s: (s.replace(" @@ ", "").replace(" @@", "") 38 | .replace(" @", "").replace("@ ", "")), 39 | # de-hyphenate when -a given to Moses tokenizer 40 | 'de-hyphen': lambda s: re.sub(r'\s*@-@\s*', '-', s), 41 | 'lower': lambda s: s.lower(), 42 | 'upper': lambda s: s.upper(), 43 | } 44 | 45 | def __init__(self, filters): 46 | assert not set(filters).difference(set(self.FILTERS.keys())), \ 47 | "Unknown evaluation filter given." 48 | self.filters = filters 49 | self._funcs = [self.FILTERS[k] for k in self.filters] 50 | 51 | def _apply(self, list_of_strs): 52 | """Applies filters consecutively on a list of sentences.""" 53 | for func in self._funcs: 54 | list_of_strs = [func(s) for s in list_of_strs] 55 | return list_of_strs 56 | 57 | def __call__(self, inp): 58 | """Applies the filterchain on a given input. 59 | 60 | Arguments: 61 | inp(pathlib.Path or list): If a `Path` given, temporary 62 | file(s) with filters applied are returned. The `Path` can 63 | also be a glob expression. Otherwise, a list with filtered 64 | sentences is returned. 65 | """ 66 | if isinstance(inp, Path): 67 | # Need to create copies of reference files with filters applied 68 | # and return their paths instead 69 | fnames = inp.parent.glob(inp.name) 70 | new_fnames = [] 71 | for fname in fnames: 72 | lines = [] 73 | f = fopen(fname) 74 | for line in f: 75 | lines.append(line.strip()) 76 | f.close() 77 | f = get_temp_file() 78 | for line in self._apply(lines): 79 | f.write(line + '\n') 80 | f.close() 81 | new_fnames.append(f.name) 82 | return new_fnames 83 | 84 | elif isinstance(inp, list): 85 | return self._apply(inp) 86 | 87 | def __repr__(self): 88 | return "FilterChain({})".format(" -> ".join(self.filters)) 89 | -------------------------------------------------------------------------------- /nmtpytorch/utils/io.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from collections import deque 3 | 4 | 5 | class FileRotator: 6 | """A fixed queue with Path() elements where pushing a new element pops 7 | the oldest one and removes it from disk. 8 | 9 | Arguments: 10 | maxlen(int): The capacity of the queue. 11 | """ 12 | 13 | def __init__(self, maxlen): 14 | self.maxlen = maxlen 15 | self.elems = deque(maxlen=self.maxlen) 16 | 17 | def push(self, elem): 18 | if len(self.elems) == self.maxlen: 19 | # Remove oldest item 20 | popped = self.elems.pop() 21 | if popped.exists(): 22 | popped.unlink() 23 | 24 | # Add new item 25 | self.elems.appendleft(elem) 26 | 27 | def __repr__(self): 28 | return self.elems.__repr__() 29 | -------------------------------------------------------------------------------- /nmtpytorch/utils/nn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pickle as pkl 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | 8 | 9 | def get_rnn_hidden_state(h): 10 | """Returns h_t transparently regardless of RNN type.""" 11 | return h if not isinstance(h, tuple) else h[0] 12 | 13 | 14 | def get_activation_fn(name): 15 | """Returns a callable activation function from torch.""" 16 | if name in (None, 'linear'): 17 | return lambda x: x 18 | elif name in ('sigmoid', 'tanh'): 19 | return getattr(torch, name) 20 | else: 21 | return getattr(F, name) 22 | 23 | 24 | def mean_pool(data): 25 | """Simple mean pool function for transforming 3D features of shape 26 | [T]imesteps x [B]atch_size x [F]eature_size into 2D BxF features. 27 | (author: @klmulligan) 28 | 29 | Arguments: 30 | data (tuple): Encoder result of form (data: Tensor(TxBxF), mask: Tensor(TxB)) 31 | Returns: 32 | pooled_data (Tensor): Mean pooled data of shape BxF. 33 | """ 34 | # Unpack 35 | x, mask = data 36 | 37 | if mask is not None: 38 | return x.sum(0) / mask.sum(0).unsqueeze(1) 39 | else: 40 | return x.mean(0) 41 | 42 | 43 | def get_partial_embedding_layer(vocab, embedding_dim, pretrained_file, 44 | freeze='none', oov_zero=True): 45 | """A partially updateable embedding layer with pretrained embeddings. 46 | This is experimental and not quite tested.""" 47 | avail_idxs, miss_idxs = [], [] 48 | avail_embs = [] 49 | 50 | # Load the pickled dictionary 51 | with open(pretrained_file, 'rb') as f: 52 | pret_dict = pkl.load(f) 53 | 54 | for idx, word in vocab._imap.items(): 55 | if word in pret_dict: 56 | avail_embs.append(pret_dict[word]) 57 | avail_idxs.append(idx) 58 | else: 59 | miss_idxs.append(idx) 60 | 61 | # This matrix contains the pretrained embeddings 62 | avail_embs = torch.Tensor(avail_embs) 63 | 64 | # We don't need the whole dictionary anymore 65 | del pret_dict 66 | 67 | n_pretrained = len(avail_idxs) 68 | n_learned = vocab.n_tokens - n_pretrained 69 | 70 | # Sanity checks 71 | assert len(avail_idxs) + len(miss_idxs) == vocab.n_tokens 72 | 73 | # Create the layer 74 | emb = nn.Embedding(vocab.n_tokens, embedding_dim, padding_idx=0) 75 | if oov_zero: 76 | emb.weight.data.fill_(0) 77 | 78 | # Copy in the pretrained embeddings 79 | emb.weight.data[n_learned:] = avail_embs 80 | # Sanity check 81 | assert torch.equal(emb.weight.data[-1], avail_embs[-1]) 82 | 83 | grad_mask = None 84 | if freeze == 'all': 85 | emb.weight.requires_grad = False 86 | elif freeze == 'partial': 87 | # Create bitmap gradient mask 88 | grad_mask = torch.ones(vocab.n_tokens) 89 | grad_mask[n_learned:].fill_(0) 90 | grad_mask[0].fill_(0) 91 | grad_mask.unsqueeze_(1) 92 | 93 | def grad_mask_hook(grad): 94 | return grad_mask.to(grad.device) * grad 95 | 96 | emb.weight.register_hook(grad_mask_hook) 97 | 98 | # Return the layer 99 | return emb 100 | -------------------------------------------------------------------------------- /nmtpytorch/utils/tensorboard.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pathlib 3 | 4 | from torch.utils.tensorboard import SummaryWriter 5 | 6 | 7 | class TensorBoard: 8 | def __init__(self, model, log_dir, exp_id, subfolder): 9 | self.model = model 10 | self.log_dir = log_dir 11 | self.exp_id = exp_id 12 | self.subfolder = subfolder 13 | self.writer = None 14 | self.available = bool(self.log_dir) 15 | 16 | # Call setup 17 | self.setup() 18 | 19 | def _nop(self, *args, **kwargs): 20 | return 21 | 22 | def setup(self): 23 | """Setups TensorBoard logger.""" 24 | if not self.available: 25 | self.replace_loggers() 26 | return 27 | 28 | # Construct full folder path 29 | self.log_dir = pathlib.Path(self.log_dir).expanduser() 30 | self.log_dir = self.log_dir / self.subfolder / self.exp_id 31 | self.log_dir.mkdir(parents=True, exist_ok=True) 32 | 33 | # Set up summary writer 34 | self.writer = SummaryWriter(self.log_dir) 35 | 36 | def replace_loggers(self): 37 | """Replace all log_* methods with dummy _nop.""" 38 | self.log_metrics = self._nop 39 | self.log_scalar = self._nop 40 | self.log_activations = self._nop 41 | self.log_gradients = self._nop 42 | 43 | def log_metrics(self, metrics, step, suffix=''): 44 | """Logs evaluation metrics as scalars.""" 45 | for metric in metrics: 46 | self.writer.add_scalar(suffix + metric.name, metric.score, 47 | global_step=step) 48 | 49 | def log_scalar(self, name, value, step): 50 | """Logs single scalar value.""" 51 | self.writer.add_scalar(name, value, global_step=step) 52 | 53 | def log_activations(self, step): 54 | """Logs activations by layer.""" 55 | pass 56 | 57 | def log_gradients(self, step): 58 | """Logs gradients by layer.""" 59 | pass 60 | 61 | def close(self): 62 | """Closes TensorBoard handle.""" 63 | if self.available: 64 | self.writer.close() 65 | 66 | def __repr__(self): 67 | if not self.log_dir: 68 | return "No 'tensorboard_dir' given in config" 69 | return "TensorBoard is active" 70 | -------------------------------------------------------------------------------- /nmtpytorch/utils/topology.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from collections import UserString, OrderedDict 3 | 4 | from .. import datasets 5 | 6 | 7 | class DataSource(UserString): 8 | def __init__(self, name, _type, src=False, trg=False): 9 | super().__init__(name) 10 | self._type = _type 11 | self.src = src 12 | self.trg = trg 13 | self.side = 'src' if self.src else 'trg' 14 | 15 | # Assign the method that knows how to create a tensor for a batch 16 | # of this type 17 | klass = getattr(datasets, '{}Dataset'.format(_type)) 18 | self.kwargs = {} 19 | self.torchify = lambda batch: klass.to_torch(batch, **self.kwargs) 20 | def __repr__(self): 21 | return "DataSource('{}', kwargs:{})".format(self.data, self.kwargs) 22 | 23 | 24 | class Topology: 25 | """A simple object that parses the direction string provided through the 26 | experiment configuration file. 27 | 28 | A direction is a string with the following syntax: 29 | feat:, feat:, ... -> feat:, feat:, ... 30 | 31 | where 32 | feat determines the name of the modality, i.e. 'en', 'image', etc. 33 | type is the prefix of the actual ``Dataset`` class to be used 34 | with this modality, i.e. Text, ImageFolder, OneHot, etc. 35 | if type is omitted, the default is Text. 36 | 37 | Example: 38 | de:Text (no target side) 39 | de:Text -> en:Text 40 | de:Text -> en:Text, en_pos:OneHot 41 | de:Text, image:ImageFolder -> en:Text 42 | """ 43 | def __init__(self, direction): 44 | self.direction = direction 45 | self.srcs = OrderedDict() 46 | self.trgs = OrderedDict() 47 | self.all = OrderedDict() 48 | 49 | parts = direction.strip().split('->') 50 | if len(parts) == 1: 51 | srcs, trgs = parts[0].strip().split(','), [] 52 | else: 53 | srcs = parts[0].strip().split(',') if parts[0].strip() else [] 54 | trgs = parts[1].strip().split(',') if parts[1].strip() else [] 55 | 56 | # Temporary dict to parse sources and targets in a single loop 57 | tmp = {'srcs': srcs, 'trgs': trgs} 58 | 59 | for key, values in tmp.items(): 60 | _dict = getattr(self, key) 61 | for val in values: 62 | name, *ftype = val.strip().split(':') 63 | ftype = ftype[0] if len(ftype) > 0 else "Text" 64 | ds = DataSource(name, ftype, 65 | src=(key == 'srcs'), trg=(key == 'trgs')) 66 | if name in self.all: 67 | raise RuntimeError( 68 | '"{}" already given as a data source.'.format(name)) 69 | _dict[name] = ds 70 | self.all[name] = ds 71 | 72 | # Assign shortcuts 73 | self.first_src = list(self.srcs.keys())[0] 74 | self.first_trg = list(self.trgs.keys())[0] 75 | 76 | def is_included_in(self, t): 77 | """Return True if this topology is included in t, otherwise False.""" 78 | if t is None: 79 | return False 80 | return (self.srcs.keys() <= t.srcs.keys()) and (self.trgs.keys() <= t.trgs.keys()) 81 | 82 | def get_srcs(self, _type): 83 | return [v for v in self.srcs.values() if v._type == _type] 84 | 85 | def get_trgs(self, _type): 86 | return [v for v in self.trgs.values() if v._type == _type] 87 | 88 | def get_src_langs(self): 89 | return self.get_srcs('Text') 90 | 91 | def get_trg_langs(self): 92 | return self.get_trgs('Text') 93 | 94 | def __getitem__(self, key): 95 | return self.all[key] 96 | 97 | def __repr__(self): 98 | s = "Sources:\n" 99 | for x in self.srcs.values(): 100 | s += " {}\n".format(x.__repr__()) 101 | s += "Targets:\n" 102 | for x in self.trgs.values(): 103 | s += " {}\n".format(x.__repr__()) 104 | return s 105 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | scripts 2 | -- 3 | -------------------------------------------------------------------------------- /scripts/create-pretrained-embs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import re 3 | import json 4 | import argparse 5 | from collections import OrderedDict 6 | 7 | import numpy as np 8 | import torch 9 | 10 | from nmtpytorch.vocabulary import Vocabulary 11 | 12 | 13 | def get_nmtpy_vocab_tokens(fname): 14 | vocab = Vocabulary(fname, name='en') 15 | base_tokens = list(vocab._map.keys()) 16 | # remove special tokens 17 | base_tokens = set(base_tokens).difference(vocab.TOKENS.keys()) 18 | return base_tokens 19 | 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser( 23 | prog='create-pretrained-embs', 24 | description="Creates a .ckpt file with pretrained embeddings ready-to-use.") 25 | 26 | parser.add_argument('-i', '--input', type=str, required=True, 27 | help="Input pretrained file.") 28 | 29 | parser.add_argument('-t', '--type', type=str, required=True, 30 | choices=['glove', 'fasttext'], 31 | help="Input file format i.e. glove or fasttext") 32 | 33 | parser.add_argument('-n', '--n-tokens', type=int, default=0, 34 | help="Size limit of final vocabulary.") 35 | 36 | parser.add_argument('-b', '--base-vocab', type=str, nargs='*', 37 | help="nmtpy .vocab file(s) for tokens which should always be included.") 38 | 39 | parser.add_argument('-o', '--out-prefix', type=str, required=True, 40 | help="Prefix for output files.") 41 | 42 | args = parser.parse_args() 43 | 44 | embs = {} 45 | base_tokens = [] 46 | 47 | ######################## 48 | # Read base vocabularies 49 | ######################## 50 | for fname in args.base_vocab: 51 | base_tokens.extend(get_nmtpy_vocab_tokens(fname)) 52 | 53 | ####################### 54 | # Read pretrained store 55 | ####################### 56 | with open(args.input) as f: 57 | for line in f: 58 | word, *vals = line.strip().split(' ') 59 | embs[word] = vals 60 | 61 | # Separate out special tokens 62 | spec_embs = {} 63 | for tok in ('', '', '', '', '', ''): 64 | if tok in embs: 65 | spec_embs[tok] = embs.pop(tok) 66 | elif tok.upper() in embs: 67 | spec_embs[tok.upper()] = embs.pop(tok.upper()) 68 | 69 | print(f'Number of pretrained vectors: {len(embs)}') 70 | 71 | ############################################# 72 | # Construct the list for the final vocabulary 73 | ############################################# 74 | deferred_init = [] 75 | vocab = OrderedDict() 76 | 77 | def emb2float(vals): 78 | return [float(v) for v in vals] 79 | 80 | # Put anything to as it will later be rewritten with zeros 81 | vocab[''] = emb2float(embs['.']) 82 | vocab[''] = emb2float(spec_embs['']) 83 | vocab[''] = emb2float(spec_embs['']) 84 | # We'll re-init this at a later stage 85 | vocab[''] = emb2float(embs['.']) 86 | # Moses hyphen symbol is OOV, use plain hyphen 87 | embs['@-@'] = embs['-'] 88 | 89 | # Put base tokens 90 | for tok in base_tokens: 91 | if tok in embs: 92 | vocab[tok] = emb2float(embs.pop(tok)) 93 | else: 94 | deferred_init.append(tok) 95 | 96 | # Only alphabetic ones 97 | re_pat = re.compile('^[a-z]+$') 98 | filtered_words = list(filter(lambda x: re_pat.match(x), embs.keys())) 99 | 100 | if args.n_tokens > 0: 101 | # Complete to args.n_tokens 102 | how_many = args.n_tokens - len(vocab) - len(deferred_init) 103 | else: 104 | # Add all 105 | how_many = len(filtered_words) 106 | 107 | for word in filtered_words[:how_many]: 108 | vocab[word] = emb2float(embs[word]) 109 | 110 | word_order = list(vocab.keys()) 111 | emb_W = np.array(list(vocab.values()), dtype='float32') 112 | 113 | # Init randomly the deferred ones with sample averages 114 | np.random.seed(39348) 115 | deferred_embs = np.empty( 116 | (len(deferred_init), emb_W.shape[1]), dtype='float32') 117 | for idx, tok in enumerate(deferred_init): 118 | word_order.append(tok) 119 | idxs = np.random.permutation(emb_W.shape[0])[:10000] 120 | deferred_embs[idx] = emb_W[idxs].mean(0) 121 | 122 | # merge altogether 123 | emb_W = np.concatenate([emb_W, deferred_embs]) 124 | 125 | # Finally replace with average embedding 126 | emb_W[word_order.index('')] = emb_W.mean(0) 127 | 128 | # cast down 129 | emb_W = torch.from_numpy(emb_W.astype('float16')) 130 | 131 | # Dump file 132 | torch.save(emb_W, f'{args.out_prefix}.pt') 133 | 134 | json_vocab = OrderedDict({k: i for i, k in enumerate(word_order)}) 135 | with open(f'{args.out_prefix}.vocab.en', 'w') as f: 136 | json.dump(json_vocab, f, ensure_ascii=False, indent=2) 137 | 138 | print(f'Final number of vocabulary: {emb_W.shape[0]}') 139 | -------------------------------------------------------------------------------- /scripts/dump-attention.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import pickle as pkl 4 | from pathlib import Path 5 | 6 | import numpy as np 7 | import torch 8 | import tqdm 9 | 10 | from nmtpytorch.translator import Translator 11 | from nmtpytorch.utils.data import make_dataloader 12 | 13 | 14 | 15 | if __name__ == '__main__': 16 | parser = argparse.ArgumentParser( 17 | prog='nmtpy-dump-attention', 18 | formatter_class=argparse.RawDescriptionHelpFormatter, 19 | description="generate attention pkl", 20 | argument_default=argparse.SUPPRESS) 21 | 22 | parser.add_argument('-m', '--model', type=str, required=True, 23 | help=".ckpt model file") 24 | parser.add_argument('-s', '--split', type=str, 25 | help='test_set name given as in configuration file') 26 | parser.add_argument('-o', '--output', type=str, 27 | help='output file name.') 28 | 29 | args = parser.parse_args() 30 | translator = Translator(models=[args.model], splits=args.split, 31 | source=None, disable_filters=True, override=None, 32 | task_id=None) 33 | 34 | model = translator.instances[0] 35 | 36 | dataset = model.load_data(args.split, 64, mode='beam') 37 | loader = make_dataloader(dataset) 38 | data = [] 39 | 40 | torch.set_grad_enabled(False) 41 | 42 | # Greedy search 43 | for batch in tqdm.tqdm(loader, unit='batch'): 44 | # Visual attention (may not be available) 45 | img_att = [[] for i in range(batch.size)] 46 | 47 | # Textual attention 48 | main_att = [[] for i in range(batch.size)] 49 | 50 | # Hierarchical attention 51 | hie_att = [[] for i in range(batch.size)] 52 | 53 | hyps = [[] for i in range(batch.size)] 54 | 55 | fini = torch.zeros(batch.size, dtype=torch.long) 56 | ctx_dict = model.encode(batch) 57 | 58 | # Get initial hidden state 59 | h_t = model.dec.f_init(ctx_dict) 60 | 61 | y_t = model.get_bos(batch.size) 62 | 63 | # Iterate for 100 timesteps 64 | for t in range(100): 65 | logp, h_t = model.dec.f_next(ctx_dict, model.dec.get_emb(y_t, t).squeeze(1), h_t) 66 | 67 | # text attention 68 | tatt = model.dec.history['alpha_txt'][-1].data.clone().numpy() 69 | iatt, hatt = None, None 70 | 71 | # If decoder has .img_alpha_t 72 | if hasattr(model.dec, 'img_alpha_t'): 73 | iatt = model.dec.img_alpha_t.data.clone().numpy() 74 | 75 | if hasattr(model.dec, 'h_att'): 76 | hatt = model.dec.h_att.data.clone().numpy() 77 | 78 | top_scores, y_t = logp.data.topk(1, largest=True) 79 | hyp = y_t.numpy().tolist() 80 | for idx, w in enumerate(hyp): 81 | if 2 not in hyps[idx]: 82 | hyps[idx].append(w[0]) 83 | main_att[idx].append(tatt[:, idx]) 84 | if iatt is None: 85 | img_att[idx].append(None) 86 | else: 87 | img_att[idx].append(iatt[:, idx]) 88 | 89 | if hatt is None: 90 | hie_att[idx].append(None) 91 | else: 92 | hie_att[idx].append(hatt[:, idx]) 93 | 94 | # Did we finish? (2 == ) 95 | fini = fini | y_t.eq(2).squeeze().long() 96 | if fini.sum() == batch.size: 97 | break 98 | 99 | for h, sa, ia, ha in zip(hyps, main_att, img_att, hie_att): 100 | d = { 101 | 'hyp': model.trg_vocab.idxs_to_sent(h), 102 | 'pri_att': np.array(sa), 103 | 'sec_att': np.array(ia) if ia is not None else None, 104 | 'hie_att': np.array(ha) if ha is not None else None, 105 | } 106 | data.append(d) 107 | 108 | # Put into correct order 109 | data = [data[i] for i, j in sorted( 110 | enumerate(loader.batch_sampler.orig_idxs), key=lambda k: k[1])] 111 | 112 | src_lines = [] 113 | with open(model.opts.data['{}_set'.format(args.split)][model.sl]) as sf: 114 | for line in sf: 115 | src_lines.append(line.strip()) 116 | 117 | for d, line in zip(data, src_lines): 118 | d['src'] = line 119 | 120 | with open(args.output, 'wb') as f: 121 | pkl.dump(data, f) 122 | -------------------------------------------------------------------------------- /scripts/package.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VER=$1 4 | 5 | if [[ -z $VER ]]; then 6 | echo "You need to provide a version string." 7 | exit 1 8 | fi 9 | 10 | rm -rf build/ dist/ 11 | 12 | echo "Preparing $VER" 13 | echo "__version__ = '${VER}'" > nmtpytorch/__init__.py 14 | 15 | git commit nmtpytorch/__init__.py -m "bump version to ${VER}" 16 | git push origin master 17 | git tag -a "v${VER}" -m "Version ${VER}" 18 | git push origin --tags 19 | 20 | # prep packages 21 | python setup.py sdist bdist_wheel 22 | 23 | #twine upload --repository-url https://test.pypi.org/legacy/ dist/* # Upload to TestPyPI 24 | twine upload dist/* # Upload to PyPI 25 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pathlib 3 | import setuptools 4 | 5 | 6 | def get_nmtpytorch_version(): 7 | with open('nmtpytorch/__init__.py') as f: 8 | s = f.read().split('\n')[0] 9 | if '__version__' not in s: 10 | raise RuntimeError('Can not detect version from nmtpytorch/__init__.py') 11 | return eval(s.split(' ')[-1]) 12 | 13 | 14 | with open('README.md') as f: 15 | long_description = f.read() 16 | 17 | with open('NEWS.md') as f: 18 | release_notes = f.read() 19 | 20 | long_description = long_description.replace( 21 | '## Release Notes\n\nSee [NEWS.md](NEWS.md).\n', release_notes) 22 | 23 | setuptools.setup( 24 | name='nmtpytorch', 25 | version=get_nmtpytorch_version(), 26 | description='Sequence-to-Sequence Framework in PyTorch', 27 | long_description=long_description, 28 | long_description_content_type='text/markdown', 29 | url='https://github.com/lium-lst/nmtpytorch', 30 | author='Ozan Caglayan', 31 | author_email='ozancag@gmail.com', 32 | license='MIT', 33 | project_urls={ 34 | 'Wiki': 'https://github.com/lium-lst/nmtpytorch/wiki', 35 | }, 36 | classifiers=[ 37 | 'Intended Audience :: Science/Research', 38 | 'Topic :: Scientific/Engineering', 39 | 'License :: OSI Approved :: MIT License', 40 | 'Programming Language :: Python :: 3 :: Only', 41 | 'Programming Language :: Python :: 3.7', 42 | 'Operating System :: POSIX', 43 | ], 44 | keywords='nmt neural-mt translation sequence-to-sequence deep-learning pytorch', 45 | python_requires='~=3.7', 46 | install_requires=[ 47 | 'numpy', 'scikit-learn', 'tqdm', 'pillow', 48 | 'torch==1.4.0', 'torchvision==0.5.0', 'pytorch-ignite==0.3.0', 49 | 'sacrebleu>=1.2.9', 50 | 'editdistance==0.4', 'subword_nmt==0.3.5', 51 | ], 52 | include_package_data=True, 53 | exclude_package_data={'': ['.git']}, 54 | packages=setuptools.find_packages(), 55 | scripts=[str(p) for p in pathlib.Path('bin').glob('*')], 56 | zip_safe=False) 57 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 90 3 | ignore = E116,E241,E265,W504,E501 4 | exclude = docs,examples,build 5 | --------------------------------------------------------------------------------