├── .gitignore
├── LICENSE
├── README.md
├── bin
    ├── pysimt
    ├── pysimt-build-vocab
    ├── pysimt-coco-metrics
    └── pysimt-install-extra
├── configs
    ├── en-de
    │   ├── rnn-mmt
    │   │   ├── consc-rnn-mmt-dec-oc.conf
    │   │   ├── consc-rnn-mmt-dec-od.conf
    │   │   ├── consc-rnn-mmt-enc-od.conf
    │   │   ├── wait1-rnn-mmt-dec-oc.conf
    │   │   ├── wait1-rnn-mmt-dec-od.conf
    │   │   ├── wait1-rnn-mmt-enc-od.conf
    │   │   ├── wait2-rnn-mmt-dec-oc.conf
    │   │   ├── wait2-rnn-mmt-dec-od.conf
    │   │   ├── wait2-rnn-mmt-enc-od.conf
    │   │   ├── wait3-rnn-mmt-dec-oc.conf
    │   │   ├── wait3-rnn-mmt-dec-od.conf
    │   │   ├── wait3-rnn-mmt-enc-od.conf
    │   │   ├── wait4-rnn-mmt-dec-oc.conf
    │   │   ├── wait4-rnn-mmt-dec-od.conf
    │   │   ├── wait4-rnn-mmt-enc-od.conf
    │   │   ├── wait5-rnn-mmt-dec-oc.conf
    │   │   ├── wait5-rnn-mmt-dec-od.conf
    │   │   ├── wait5-rnn-mmt-enc-od.conf
    │   │   ├── wait6-rnn-mmt-dec-oc.conf
    │   │   ├── wait6-rnn-mmt-dec-od.conf
    │   │   ├── wait6-rnn-mmt-enc-od.conf
    │   │   ├── wait7-rnn-mmt-dec-oc.conf
    │   │   ├── wait7-rnn-mmt-dec-od.conf
    │   │   └── wait7-rnn-mmt-enc-od.conf
    │   ├── rnn-nmt
    │   │   ├── consc-rnn.conf
    │   │   ├── wait1-rnn.conf
    │   │   ├── wait2-rnn.conf
    │   │   ├── wait3-rnn.conf
    │   │   ├── wait4-rnn.conf
    │   │   ├── wait5-rnn.conf
    │   │   ├── wait6-rnn.conf
    │   │   └── wait7-rnn.conf
    │   ├── transformers-mmt
    │   │   ├── consc-tf-mmt-dec-oc.conf
    │   │   ├── consc-tf-mmt-dec-od.conf
    │   │   ├── consc-tf-mmt-enc-od.conf
    │   │   ├── wait1-tf-mmt-dec-oc.conf
    │   │   ├── wait1-tf-mmt-dec-od.conf
    │   │   ├── wait1-tf-mmt-enc-od.conf
    │   │   ├── wait2-tf-mmt-dec-oc.conf
    │   │   ├── wait2-tf-mmt-dec-od.conf
    │   │   ├── wait2-tf-mmt-enc-od.conf
    │   │   ├── wait3-tf-mmt-dec-oc.conf
    │   │   ├── wait3-tf-mmt-dec-od.conf
    │   │   ├── wait3-tf-mmt-enc-od.conf
    │   │   ├── wait5-tf-mmt-dec-oc.conf
    │   │   ├── wait5-tf-mmt-dec-od.conf
    │   │   ├── wait5-tf-mmt-enc-od.conf
    │   │   ├── wait7-tf-mmt-dec-oc.conf
    │   │   ├── wait7-tf-mmt-dec-od.conf
    │   │   └── wait7-tf-mmt-enc-od.conf
    │   └── transformers-nmt
    │   │   ├── consc-tf.conf
    │   │   ├── wait1-tf.conf
    │   │   ├── wait2-tf.conf
    │   │   ├── wait3-tf.conf
    │   │   ├── wait4-tf.conf
    │   │   ├── wait5-tf.conf
    │   │   ├── wait6-tf.conf
    │   │   └── wait7-tf.conf
    └── en-fr
    │   ├── rnn-mmt
    │       ├── consc-rnn-mmt-dec-oc.conf
    │       ├── consc-rnn-mmt-dec-od.conf
    │       ├── consc-rnn-mmt-enc-od.conf
    │       ├── wait1-rnn-mmt-dec-oc.conf
    │       ├── wait1-rnn-mmt-dec-od.conf
    │       ├── wait1-rnn-mmt-enc-od.conf
    │       ├── wait2-rnn-mmt-dec-oc.conf
    │       ├── wait2-rnn-mmt-dec-od.conf
    │       ├── wait2-rnn-mmt-enc-od.conf
    │       ├── wait3-rnn-mmt-dec-oc.conf
    │       ├── wait3-rnn-mmt-dec-od.conf
    │       ├── wait3-rnn-mmt-enc-od.conf
    │       ├── wait4-rnn-mmt-dec-oc.conf
    │       ├── wait4-rnn-mmt-dec-od.conf
    │       ├── wait4-rnn-mmt-enc-od.conf
    │       ├── wait5-rnn-mmt-dec-oc.conf
    │       ├── wait5-rnn-mmt-dec-od.conf
    │       ├── wait5-rnn-mmt-enc-od.conf
    │       ├── wait6-rnn-mmt-dec-oc.conf
    │       ├── wait6-rnn-mmt-dec-od.conf
    │       ├── wait6-rnn-mmt-enc-od.conf
    │       ├── wait7-rnn-mmt-dec-oc.conf
    │       ├── wait7-rnn-mmt-dec-od.conf
    │       └── wait7-rnn-mmt-enc-od.conf
    │   ├── rnn-nmt
    │       ├── consc-rnn.conf
    │       ├── wait1-rnn.conf
    │       ├── wait2-rnn.conf
    │       ├── wait3-rnn.conf
    │       ├── wait4-rnn.conf
    │       ├── wait5-rnn.conf
    │       ├── wait6-rnn.conf
    │       └── wait7-rnn.conf
    │   ├── transformers-mmt
    │       ├── consc-tf-mmt-dec-oc.conf
    │       ├── consc-tf-mmt-dec-od.conf
    │       ├── consc-tf-mmt-enc-od.conf
    │       ├── wait1-tf-mmt-dec-oc.conf
    │       ├── wait1-tf-mmt-dec-od.conf
    │       ├── wait1-tf-mmt-enc-od.conf
    │       ├── wait2-tf-mmt-dec-oc.conf
    │       ├── wait2-tf-mmt-dec-od.conf
    │       ├── wait2-tf-mmt-enc-od.conf
    │       ├── wait3-tf-mmt-dec-oc.conf
    │       ├── wait3-tf-mmt-dec-od.conf
    │       ├── wait3-tf-mmt-enc-od.conf
    │       ├── wait5-tf-mmt-dec-oc.conf
    │       ├── wait5-tf-mmt-dec-od.conf
    │       ├── wait5-tf-mmt-enc-od.conf
    │       ├── wait7-tf-mmt-dec-oc.conf
    │       ├── wait7-tf-mmt-dec-od.conf
    │       └── wait7-tf-mmt-enc-od.conf
    │   └── transformers-nmt
    │       ├── consc-tf.conf
    │       ├── wait1-tf.conf
    │       ├── wait2-tf.conf
    │       ├── wait3-tf.conf
    │       ├── wait4-tf.conf
    │       ├── wait5-tf.conf
    │       ├── wait6-tf.conf
    │       └── wait7-tf.conf
├── data
    ├── README.md
    ├── moses-5cbafabfd
    │   ├── README.md
    │   ├── share
    │   │   └── nonbreaking_prefixes
    │   │   │   ├── README.txt
    │   │   │   ├── nonbreaking_prefix.cs
    │   │   │   ├── nonbreaking_prefix.de
    │   │   │   ├── nonbreaking_prefix.en
    │   │   │   └── nonbreaking_prefix.fr
    │   └── tokenizer
    │   │   ├── basic-protected-patterns
    │   │   ├── detokenizer.perl
    │   │   ├── lowercase.perl
    │   │   ├── normalize-punctuation.perl
    │   │   ├── remove-non-printing-char.perl
    │   │   ├── replace-unicode-punctuation.perl
    │   │   └── tokenizer.perl
    └── multi30k
    │   ├── README.md
    │   ├── features
    │       └── README.md
    │   ├── image_lists
    │       ├── test_2016_flickr.imglist
    │       ├── test_2017_flickr.imglist
    │       ├── test_2017_mscoco.imglist
    │       ├── test_2018_flickr.imglist
    │       ├── train.imglist
    │       └── val.imglist
    │   ├── prepare.sh
    │   └── raw
    │       ├── README.md
    │       ├── test_2016_flickr.cs.gz
    │       ├── test_2016_flickr.de.gz
    │       ├── test_2016_flickr.en.gz
    │       ├── test_2016_flickr.fr.gz
    │       ├── test_2017_flickr.de.gz
    │       ├── test_2017_flickr.en.gz
    │       ├── test_2017_flickr.fr.gz
    │       ├── test_2017_mscoco.de.gz
    │       ├── test_2017_mscoco.en.gz
    │       ├── test_2017_mscoco.fr.gz
    │       ├── test_2018_flickr.cs.gz
    │       ├── test_2018_flickr.de.gz
    │       ├── test_2018_flickr.en.gz
    │       ├── test_2018_flickr.fr.gz
    │       ├── train.cs.gz
    │       ├── train.de.gz
    │       ├── train.en.gz
    │       ├── train.fr.gz
    │       ├── val.cs.gz
    │       ├── val.de.gz
    │       ├── val.en.gz
    │       └── val.fr.gz
├── doccov.svg
├── docs
    ├── datasets
    │   ├── base.html
    │   ├── collate.html
    │   ├── imagefolder.html
    │   ├── index.html
    │   ├── kaldi.html
    │   ├── multimodal.html
    │   ├── numpy.html
    │   ├── objdet.html
    │   └── text.html
    ├── evaluator.html
    ├── index.html
    ├── layers
    │   ├── attention
    │   │   ├── dot.html
    │   │   ├── hierarchical.html
    │   │   ├── index.html
    │   │   ├── mlp.html
    │   │   ├── multihead.html
    │   │   ├── scaled_dot.html
    │   │   └── uniform.html
    │   ├── decoders
    │   │   ├── conditional.html
    │   │   ├── index.html
    │   │   └── tf_decoder.html
    │   ├── embedding.html
    │   ├── encoders
    │   │   ├── index.html
    │   │   ├── recurrent.html
    │   │   ├── speech_lstm.html
    │   │   ├── transformers.html
    │   │   └── vis_features.html
    │   ├── ff.html
    │   ├── fusion.html
    │   ├── index.html
    │   ├── pool.html
    │   ├── positionwise_ff.html
    │   ├── selector.html
    │   └── transformers
    │   │   ├── base_sublayer.html
    │   │   ├── cross_attention_sublayer.html
    │   │   ├── cross_attention_sublayer_mm_flat.html
    │   │   ├── cross_attention_sublayer_mm_hier.html
    │   │   ├── cross_attention_sublayer_mm_parallel.html
    │   │   ├── cross_attention_sublayer_mm_serial.html
    │   │   ├── index.html
    │   │   └── self_attention_sublayer.html
    ├── lr_scheduler.html
    ├── mainloop.html
    ├── metrics
    │   ├── cer.html
    │   ├── index.html
    │   ├── meteor.html
    │   ├── metric.html
    │   ├── multibleu.html
    │   ├── sacrebleu.html
    │   ├── simnmt.html
    │   └── wer.html
    ├── models
    │   ├── index.html
    │   ├── snmt_rnn.html
    │   ├── snmt_rnn_encatt.html
    │   ├── snmt_rnn_encatt_waitk.html
    │   ├── snmt_rnn_waitk.html
    │   ├── snmt_tf.html
    │   └── snmt_tf_waitk.html
    ├── monitor.html
    ├── optimizer.html
    ├── samplers
    │   ├── approx.html
    │   ├── bucket.html
    │   └── index.html
    ├── stranslator.html
    ├── translators
    │   ├── greedy.html
    │   ├── index.html
    │   ├── sim_greedy.html
    │   └── waitk_greedy.html
    ├── utils
    │   ├── batch.html
    │   ├── data.html
    │   ├── device.html
    │   ├── filterchain.html
    │   ├── index.html
    │   ├── io.html
    │   ├── kaldi.html
    │   ├── misc.html
    │   ├── ml_metrics.html
    │   ├── nn.html
    │   ├── resource_mgr.html
    │   ├── tensorboard.html
    │   └── topology.html
    └── vocabulary.html
├── environment.yml
├── experiments
    └── README.md
├── logo.png
├── make_docs.sh
├── pysimt
    ├── __init__.py
    ├── cocoeval
    │   ├── README.md
    │   ├── __init__.py
    │   ├── bleu
    │   │   ├── LICENSE.bleu
    │   │   ├── __init__.py
    │   │   ├── bleu.py
    │   │   └── bleu_scorer.py
    │   ├── cider
    │   │   ├── __init__.py
    │   │   ├── cider.py
    │   │   └── cider_scorer.py
    │   ├── meteor
    │   │   ├── __init__.py
    │   │   └── meteor.py
    │   └── rouge
    │   │   ├── __init__.py
    │   │   └── rouge.py
    ├── config.py
    ├── datasets
    │   ├── __init__.py
    │   ├── collate.py
    │   ├── multimodal.py
    │   ├── numpy.py
    │   ├── objdet.py
    │   └── text.py
    ├── docs.md
    ├── evaluator.py
    ├── layers
    │   ├── __init__.py
    │   ├── attention
    │   │   ├── __init__.py
    │   │   ├── dot.py
    │   │   ├── hierarchical.py
    │   │   ├── mlp.py
    │   │   ├── multihead.py
    │   │   ├── scaled_dot.py
    │   │   └── uniform.py
    │   ├── decoders
    │   │   ├── __init__.py
    │   │   ├── conditional.py
    │   │   └── tf_decoder.py
    │   ├── embedding.py
    │   ├── encoders
    │   │   ├── __init__.py
    │   │   ├── recurrent.py
    │   │   ├── speech_lstm.py
    │   │   ├── transformers.py
    │   │   └── vis_features.py
    │   ├── ff.py
    │   ├── fusion.py
    │   ├── pool.py
    │   ├── positionwise_ff.py
    │   ├── selector.py
    │   └── transformers
    │   │   ├── __init__.py
    │   │   ├── base_sublayer.py
    │   │   ├── cross_attention_sublayer.py
    │   │   ├── cross_attention_sublayer_mm_flat.py
    │   │   ├── cross_attention_sublayer_mm_hier.py
    │   │   ├── cross_attention_sublayer_mm_parallel.py
    │   │   ├── cross_attention_sublayer_mm_serial.py
    │   │   └── self_attention_sublayer.py
    ├── logger.py
    ├── lr_scheduler.py
    ├── mainloop.py
    ├── metrics
    │   ├── __init__.py
    │   ├── cer.py
    │   ├── meteor.py
    │   ├── metric.py
    │   ├── multibleu.py
    │   ├── sacrebleu.py
    │   ├── simnmt.py
    │   └── wer.py
    ├── models
    │   ├── __init__.py
    │   ├── snmt_rnn.py
    │   ├── snmt_rnn_encatt.py
    │   ├── snmt_rnn_encatt_waitk.py
    │   ├── snmt_rnn_waitk.py
    │   ├── snmt_tf.py
    │   └── snmt_tf_waitk.py
    ├── monitor.py
    ├── optimizer.py
    ├── samplers
    │   ├── __init__.py
    │   ├── approx.py
    │   └── bucket.py
    ├── stranslator.py
    ├── translators
    │   ├── __init__.py
    │   ├── beam.py
    │   ├── greedy.py
    │   ├── sim_greedy.py
    │   └── waitk_greedy.py
    ├── utils
    │   ├── __init__.py
    │   ├── batch.py
    │   ├── data.py
    │   ├── device.py
    │   ├── filterchain.py
    │   ├── io.py
    │   ├── kaldi.py
    │   ├── misc.py
    │   ├── ml_metrics.py
    │   ├── nn.py
    │   ├── resource_mgr.py
    │   ├── tensorboard.py
    │   └── topology.py
    └── vocabulary.py
├── scripts
    ├── decode_greedy.sh
    ├── decode_test_waitk.sh
    ├── decode_train_waitk.sh
    ├── decode_wait_if_diff.sh
    ├── decode_wait_if_worse.sh
    ├── delay_analysis.py
    └── delay_metrics.py
├── setup.py
└── tox.ini


/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | pysimt.egg-info
 3 | .cache
 4 | build/
 5 | dist/
 6 | doc/_build/
 7 | ipynb/.ipynb_checkpoints
 8 | .idea/
 9 | data/multi30k/en-de
10 | data/multi30k/en-fr
11 | data/multi30k/en-cs
12 | data/multi30k/features
13 | experiments
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 NLP@Imperial
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/bin/pysimt-install-extra:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | which java &> /dev/null
 3 | if [[ "x$?" == "x1" ]]; then
 4 |   echo "'java' not found in PATH. You need to have a working JRE installation for METEOR."
 5 | else
 6 |   echo "OK: Found 'java'."
 7 | fi
 8 | 
 9 | CACHE=${HOME}/.pysimt
10 | METEOR=${CACHE}/meteor-data
11 | 
12 | if [[ ! -d ${CACHE} ]]; then
13 |   echo "Creating ${CACHE} folder..."
14 |   mkdir -p ${CACHE}
15 | fi
16 | 
17 | if [[ ! -d $METEOR ]]; then
18 |   git clone https://github.com/ozancaglayan/meteor-1.5-data.git $METEOR
19 |   pushd $METEOR
20 |   ./recompress.sh
21 |   popd
22 | fi
23 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-mmt/consc-rnn-mmt-dec-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | 
51 | aux_dropout: 0.5
52 | aux_lnorm: True
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | dec_inp_activ: None
57 | mm_fusion_op: sum
58 | mm_fusion_dropout: 0.0
59 | 
60 | 
61 | [data]
62 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
63 | img_root: ./data/multi30k/features/butd
64 | 
65 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
66 |             'image': '${img_root}/train_obj36.npz',
67 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
68 | 
69 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
70 |           'image': '${img_root}/valid_obj36.npz',
71 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
74 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
75 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
76 | 
77 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
78 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
79 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
80 | 
81 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
82 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
83 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
84 | 
85 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
86 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
87 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
88 | 
89 | [vocabulary]
90 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
91 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
92 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-mmt/consc-rnn-mmt-enc-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: EncoderSelfAttentionSimultaneousNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | 
51 | aux_dropout: 0.5
52 | aux_lnorm: True
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | dec_inp_activ: None
57 | 
58 | feat_mode: roi_feats
59 | n_heads: 1
60 | 
61 | 
62 | [data]
63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
64 | img_root: ./data/multi30k/features/butd
65 | 
66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
67 |             'image': '${img_root}/train_obj36.npz',
68 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
69 | 
70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
71 |           'image': '${img_root}/valid_obj36.npz',
72 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
73 | 
74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
75 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
76 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
79 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
80 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
81 | 
82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
83 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
84 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
85 | 
86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
87 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
88 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
89 | 
90 | [vocabulary]
91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
93 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-mmt/wait1-rnn-mmt-dec-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | translator_args: {'k': 1}
51 | 
52 | aux_dropout: 0.5
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | aux_lnorm: True
57 | dec_inp_activ: None
58 | mm_fusion_op: sum
59 | mm_fusion_dropout: 0.0
60 | 
61 | 
62 | [data]
63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
64 | img_root: ./data/multi30k/features/butd
65 | 
66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
67 |             'image': '${img_root}/train_obj36.npz',
68 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
69 | 
70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
71 |           'image': '${img_root}/valid_obj36.npz',
72 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
73 | 
74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
75 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
76 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
79 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
80 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
81 | 
82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
83 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
84 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
85 | 
86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
87 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
88 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
89 | 
90 | [vocabulary]
91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
93 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-mmt/wait2-rnn-mmt-dec-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | translator_args: {'k': 2}
51 | 
52 | aux_dropout: 0.5
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | aux_lnorm: True
57 | dec_inp_activ: None
58 | mm_fusion_op: sum
59 | mm_fusion_dropout: 0.0
60 | 
61 | 
62 | [data]
63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
64 | img_root: ./data/multi30k/features/butd
65 | 
66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
67 |             'image': '${img_root}/train_obj36.npz',
68 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
69 | 
70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
71 |           'image': '${img_root}/valid_obj36.npz',
72 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
73 | 
74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
75 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
76 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
79 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
80 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
81 | 
82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
83 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
84 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
85 | 
86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
87 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
88 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
89 | 
90 | [vocabulary]
91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
93 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-mmt/wait3-rnn-mmt-dec-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | translator_args: {'k': 3}
51 | 
52 | aux_dropout: 0.5
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | aux_lnorm: True
57 | dec_inp_activ: None
58 | mm_fusion_op: sum
59 | mm_fusion_dropout: 0.0
60 | 
61 | 
62 | [data]
63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
64 | img_root: ./data/multi30k/features/butd
65 | 
66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
67 |             'image': '${img_root}/train_obj36.npz',
68 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
69 | 
70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
71 |           'image': '${img_root}/valid_obj36.npz',
72 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
73 | 
74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
75 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
76 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
79 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
80 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
81 | 
82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
83 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
84 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
85 | 
86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
87 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
88 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
89 | 
90 | [vocabulary]
91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
93 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-mmt/wait4-rnn-mmt-dec-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | translator_args: {'k': 4}
51 | 
52 | aux_dropout: 0.5
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | aux_lnorm: True
57 | dec_inp_activ: None
58 | mm_fusion_op: sum
59 | mm_fusion_dropout: 0.0
60 | 
61 | 
62 | [data]
63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
64 | img_root: ./data/multi30k/features/butd
65 | 
66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
67 |             'image': '${img_root}/train_obj36.npz',
68 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
69 | 
70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
71 |           'image': '${img_root}/valid_obj36.npz',
72 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
73 | 
74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
75 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
76 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
79 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
80 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
81 | 
82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
83 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
84 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
85 | 
86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
87 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
88 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
89 | 
90 | [vocabulary]
91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
93 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-mmt/wait5-rnn-mmt-dec-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | translator_args: {'k': 5}
51 | 
52 | aux_dropout: 0.5
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | aux_lnorm: True
57 | dec_inp_activ: None
58 | mm_fusion_op: sum
59 | mm_fusion_dropout: 0.0
60 | 
61 | 
62 | [data]
63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
64 | img_root: ./data/multi30k/features/butd
65 | 
66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
67 |             'image': '${img_root}/train_obj36.npz',
68 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
69 | 
70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
71 |           'image': '${img_root}/valid_obj36.npz',
72 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
73 | 
74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
75 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
76 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
79 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
80 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
81 | 
82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
83 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
84 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
85 | 
86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
87 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
88 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
89 | 
90 | [vocabulary]
91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
93 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-mmt/wait6-rnn-mmt-dec-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | translator_args: {'k': 6}
51 | 
52 | aux_dropout: 0.5
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | aux_lnorm: True
57 | dec_inp_activ: None
58 | mm_fusion_op: sum
59 | mm_fusion_dropout: 0.0
60 | 
61 | 
62 | [data]
63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
64 | img_root: ./data/multi30k/features/butd
65 | 
66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
67 |             'image': '${img_root}/train_obj36.npz',
68 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
69 | 
70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
71 |           'image': '${img_root}/valid_obj36.npz',
72 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
73 | 
74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
75 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
76 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
79 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
80 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
81 | 
82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
83 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
84 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
85 | 
86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
87 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
88 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
89 | 
90 | [vocabulary]
91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
93 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-mmt/wait7-rnn-mmt-dec-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | translator_args: {'k': 7}
51 | 
52 | aux_dropout: 0.5
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | aux_lnorm: True
57 | dec_inp_activ: None
58 | mm_fusion_op: sum
59 | mm_fusion_dropout: 0.0
60 | 
61 | 
62 | [data]
63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
64 | img_root: ./data/multi30k/features/butd
65 | 
66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
67 |             'image': '${img_root}/train_obj36.npz',
68 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
69 | 
70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
71 |           'image': '${img_root}/valid_obj36.npz',
72 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
73 | 
74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
75 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
76 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
79 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
80 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
81 | 
82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
83 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
84 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
85 | 
86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
87 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
88 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
89 | 
90 | [vocabulary]
91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
93 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-nmt/consc-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | 
50 | [data]
51 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
52 | 
53 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
54 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
55 | 
56 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
57 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
58 | 
59 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
60 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
61 | 
62 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
63 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
64 | 
65 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
66 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
67 | 
68 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
69 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
70 | 
71 | [vocabulary]
72 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
73 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
74 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-nmt/wait1-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 1}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-nmt/wait2-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 2}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-nmt/wait3-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 3}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-nmt/wait4-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 4}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-nmt/wait5-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 5}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-nmt/wait6-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 6}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-de/rnn-nmt/wait7-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 7}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-de/transformers-nmt/consc-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | 
57 | [data]
58 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
59 | 
60 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
61 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
62 | 
63 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
64 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
73 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
74 | 
75 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
76 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | [vocabulary]
79 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
80 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
81 | 


--------------------------------------------------------------------------------
/configs/en-de/transformers-nmt/wait1-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 1}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-de/transformers-nmt/wait2-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 2}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-de/transformers-nmt/wait3-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 3}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-de/transformers-nmt/wait4-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 4}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-de/transformers-nmt/wait5-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 5}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-de/transformers-nmt/wait6-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 6}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-de/transformers-nmt/wait7-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: de
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 7}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-fr/rnn-mmt/consc-rnn-mmt-dec-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | 
51 | aux_dropout: 0.5
52 | aux_lnorm: True
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | dec_inp_activ: None
57 | mm_fusion_op: sum
58 | mm_fusion_dropout: 0.0
59 | 
60 | 
61 | [data]
62 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
63 | img_root: ./data/multi30k/features/butd
64 | 
65 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
66 |             'image': '${img_root}/train_obj36.npz',
67 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
68 | 
69 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
70 |           'image': '${img_root}/valid_obj36.npz',
71 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
74 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
75 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
76 | 
77 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
78 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
79 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
80 | 
81 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
82 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
83 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
84 | 
85 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
86 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
87 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
88 | 
89 | [vocabulary]
90 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
91 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
92 | 


--------------------------------------------------------------------------------
/configs/en-fr/rnn-mmt/consc-rnn-mmt-enc-od.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: EncoderSelfAttentionSimultaneousNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | enc_lnorm: True
37 | dec_dim: 320
38 | emb_dim: 200
39 | dropout_emb: 0.4
40 | dropout_ctx: 0.5
41 | dropout_out: 0.5
42 | n_encoders: 2
43 | tied_emb: 2way
44 | max_len: None
45 | out_logic: deep
46 | 
47 | direction: src:Text, image:ObjectDetections -> trg:Text
48 | sampler_type: bucket
49 | bucket_by: src
50 | 
51 | aux_dropout: 0.5
52 | aux_lnorm: True
53 | aux_proj_dim: 320
54 | aux_proj_activ: tanh
55 | aux_dim: 2048
56 | dec_inp_activ: None
57 | 
58 | feat_mode: roi_feats
59 | n_heads: 1
60 | 
61 | 
62 | [data]
63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
64 | img_root: ./data/multi30k/features/butd
65 | 
66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
67 |             'image': '${img_root}/train_obj36.npz',
68 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
69 | 
70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
71 |           'image': '${img_root}/valid_obj36.npz',
72 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
73 | 
74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
75 |                        'image': '${img_root}/test_2016_flickr_obj36.npz',
76 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
79 |                        'image': '${img_root}/test_2017_flickr_obj36.npz',
80 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
81 | 
82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
83 |                        'image': '${img_root}/test_2017_mscoco_obj36.npz',
84 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
85 | 
86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
87 |                        'image': '${img_root}/test_2018_flickr_obj36.npz',
88 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
89 | 
90 | [vocabulary]
91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
93 | 


--------------------------------------------------------------------------------
/configs/en-fr/rnn-nmt/consc-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | 
50 | [data]
51 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
52 | 
53 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
54 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
55 | 
56 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
57 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
58 | 
59 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
60 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
61 | 
62 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
63 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
64 | 
65 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
66 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
67 | 
68 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
69 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
70 | 
71 | [vocabulary]
72 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
73 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
74 | 


--------------------------------------------------------------------------------
/configs/en-fr/rnn-nmt/wait1-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 1}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-fr/rnn-nmt/wait2-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 2}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-fr/rnn-nmt/wait3-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 3}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-fr/rnn-nmt/wait4-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 4}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-fr/rnn-nmt/wait5-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 5}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-fr/rnn-nmt/wait6-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 6}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-fr/rnn-nmt/wait7-rnn.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | [train]
 8 | seed: 1582660384
 9 | model_type: SimultaneousWaitKNMT
10 | patience: 10
11 | max_epochs: 100
12 | eval_freq: 0
13 | eval_metrics: bleu,loss
14 | eval_filters: ['de-hyphen']
15 | eval_batch_size: 32
16 | save_best_metrics: True
17 | eval_max_len: 100
18 | n_checkpoints: 0
19 | l2_reg: 1e-05
20 | lr_decay: plateau
21 | lr_decay_revert: False
22 | lr_decay_factor: 0.5
23 | lr_decay_patience: 2
24 | gclip: 1
25 | optimizer: adam
26 | lr: 0.0004
27 | batch_size: 64
28 | save_path: ./experiments/${vars:sl}-${vars:tl}
29 | tensorboard_dir: ${save_path}/tb_dir
30 | 
31 | [model]
32 | att_type: mlp
33 | att_bottleneck: hid
34 | enc_dim: 320
35 | enc_bidirectional: False
36 | dec_dim: 320
37 | emb_dim: 200
38 | dropout_emb: 0.4
39 | dropout_ctx: 0.5
40 | dropout_out: 0.5
41 | n_encoders: 2
42 | tied_emb: 2way
43 | max_len: None
44 | out_logic: deep
45 | 
46 | direction: src:Text -> trg:Text
47 | sampler_type: bucket
48 | bucket_by: src
49 | translator_args: {'k': 7}
50 | 
51 | [data]
52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
53 | 
54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
55 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
56 | 
57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
58 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
59 | 
60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
61 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
62 | 
63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
64 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | [vocabulary]
73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
75 | 


--------------------------------------------------------------------------------
/configs/en-fr/transformers-nmt/consc-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | 
57 | [data]
58 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
59 | 
60 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
61 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
62 | 
63 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
64 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
65 | 
66 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
67 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
68 | 
69 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
70 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
71 | 
72 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
73 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
74 | 
75 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
76 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
77 | 
78 | [vocabulary]
79 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
80 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
81 | 


--------------------------------------------------------------------------------
/configs/en-fr/transformers-nmt/wait1-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 1}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-fr/transformers-nmt/wait2-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 2}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-fr/transformers-nmt/wait3-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 3}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-fr/transformers-nmt/wait4-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 4}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-fr/transformers-nmt/wait5-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 5}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-fr/transformers-nmt/wait6-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 6}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/configs/en-fr/transformers-nmt/wait7-tf.conf:
--------------------------------------------------------------------------------
 1 | [vars]
 2 | # source language
 3 | sl: en
 4 | # target language
 5 | tl: fr
 6 | 
 7 | d_model: 512
 8 | 
 9 | [train]
10 | seed: 1582660384
11 | model_type: SimultaneousTFWaitKNMT
12 | patience: 30
13 | max_epochs: 100
14 | eval_freq: 0
15 | eval_metrics: bleu,loss
16 | eval_filters: ['de-hyphen']
17 | eval_batch_size: 32
18 | save_best_metrics: True
19 | eval_max_len: 100
20 | n_checkpoints: 0
21 | l2_reg: 0
22 | adam_betas: 0.9, 0.98
23 | lr_decay: noam
24 | tf_model_dim: ${vars:d_model}
25 | lr_warmup_steps: 4000
26 | lr_decay_revert: False
27 | lr_decay_factor: 0.5
28 | lr_decay_patience: 2
29 | gclip: 1
30 | optimizer: adam
31 | lr: 0.2
32 | batch_size: 32
33 | save_path: ./experiments/${vars:sl}-${vars:tl}
34 | tensorboard_dir: ${save_path}/tb_dir
35 | 
36 | [model]
37 | max_len: None
38 | out_logic: deep
39 | model_dim: ${vars:d_model}
40 | num_heads: 8
41 | enc_ff_dim: 2048
42 | dec_ff_dim: 2048
43 | enc_n_layers: 6
44 | dec_n_layers: 6
45 | short_list: 0
46 | enc_bidirectional: False
47 | ff_activ: relu
48 | tied_emb: 2way
49 | dropout: 0.1
50 | attn_dropout: 0.1
51 | pre_norm: True
52 | 
53 | direction: src:Text -> trg:Text
54 | sampler_type: bucket
55 | bucket_by: src
56 | translator_args: {'k': 7}
57 | 
58 | [data]
59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl}
60 | 
61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}',
62 |             'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'}
63 | 
64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}',
65 |           'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'}
66 | 
67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}',
68 |                        'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'}
69 | 
70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}',
71 |                        'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'}
72 | 
73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}',
74 |                        'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'}
75 | 
76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}',
77 |                        'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'}
78 | 
79 | [vocabulary]
80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl}
81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl}
82 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | # Data preparation
2 | 
3 | - Make sure that you already created and activated the `pysimt` anaconda environment.
4 | - Please run `install.sh` from within this folder, to install the necessary tools for data pre-processing.
5 | - Finally, proceed with independent preparation scripts under `multi30k`.
6 | 


--------------------------------------------------------------------------------
/data/moses-5cbafabfd/README.md:
--------------------------------------------------------------------------------
 1 | moses scripts
 2 | --
 3 | 
 4 | This is a snapshot of Moses scripts from the upstream repository. The
 5 | specifi commit taken is:
 6 | 
 7 | ```
 8 | commit 5cbafabfd5ed2833ca8808bdca6e785935713159
 9 | Author: Hieu Hoang <hieuhoang@gmail.com>
10 | Date:   Wed Oct 14 11:48:26 2020 -0700
11 | ```
12 | 


--------------------------------------------------------------------------------
/data/moses-5cbafabfd/share/nonbreaking_prefixes/README.txt:
--------------------------------------------------------------------------------
1 | The language suffix can be found here:
2 | 
3 | http://www.loc.gov/standards/iso639-2/php/code_list.php
4 | 
5 | This code includes data from Daniel Naber's Language Tools (czech abbreviations).
6 | This code includes data from czech wiktionary (also czech abbreviations).
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/data/moses-5cbafabfd/share/nonbreaking_prefixes/nonbreaking_prefix.en:
--------------------------------------------------------------------------------
  1 | #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
  2 | #Special cases are included for prefixes that ONLY appear before 0-9 numbers.
  3 | 
  4 | #any single upper case letter  followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
  5 | #usually upper case letters are initials in a name
  6 | A
  7 | B
  8 | C
  9 | D
 10 | E
 11 | F
 12 | G
 13 | H
 14 | I
 15 | J
 16 | K
 17 | L
 18 | M
 19 | N
 20 | O
 21 | P
 22 | Q
 23 | R
 24 | S
 25 | T
 26 | U
 27 | V
 28 | W
 29 | X
 30 | Y
 31 | Z
 32 | 
 33 | #List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
 34 | Adj
 35 | Adm
 36 | Adv
 37 | Asst
 38 | Bart
 39 | Bldg
 40 | Brig
 41 | Bros
 42 | Capt
 43 | Cmdr
 44 | Col
 45 | Comdr
 46 | Con
 47 | Corp
 48 | Cpl
 49 | DR
 50 | Dr
 51 | Drs
 52 | Ens
 53 | Gen
 54 | Gov
 55 | Hon
 56 | Hr
 57 | Hosp
 58 | Insp
 59 | Lt
 60 | MM
 61 | MR
 62 | MRS
 63 | MS
 64 | Maj
 65 | Messrs
 66 | Mlle
 67 | Mme
 68 | Mr
 69 | Mrs
 70 | Ms
 71 | Msgr
 72 | Op
 73 | Ord
 74 | Pfc
 75 | Ph
 76 | Prof
 77 | Pvt
 78 | Rep
 79 | Reps
 80 | Res
 81 | Rev
 82 | Rt
 83 | Sen
 84 | Sens
 85 | Sfc
 86 | Sgt
 87 | Sr
 88 | St
 89 | Supt
 90 | Surg
 91 | 
 92 | #misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence)
 93 | v
 94 | vs
 95 | i.e
 96 | rev
 97 | e.g
 98 | # rupees
 99 | Rs
100 | 
101 | #Numbers only. These should only induce breaks when followed by a numeric sequence
102 | # add NUMERIC_ONLY after the word for this function
103 | #This case is mostly for the english "No." which can either be a sentence of its own, or
104 | #if followed by a number, a non-breaking prefix
105 | No #NUMERIC_ONLY# 
106 | Nos
107 | Art #NUMERIC_ONLY#
108 | Nr
109 | pp #NUMERIC_ONLY#
110 | 
111 | #month abbreviations
112 | Jan
113 | Feb
114 | Mar
115 | Apr
116 | #May is a full word
117 | Jun
118 | Jul
119 | Aug
120 | Sep
121 | Oct
122 | Nov
123 | Dec
124 | 


--------------------------------------------------------------------------------
/data/moses-5cbafabfd/share/nonbreaking_prefixes/nonbreaking_prefix.fr:
--------------------------------------------------------------------------------
  1 | #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
  2 | #Special cases are included for prefixes that ONLY appear before 0-9 numbers.
  3 | #
  4 | #any single upper case letter  followed by a period is not a sentence ender
  5 | #usually upper case letters are initials in a name
  6 | #no French words end in single lower-case letters, so we throw those in too?
  7 | A
  8 | B
  9 | C
 10 | D
 11 | E
 12 | F
 13 | G
 14 | H
 15 | I
 16 | J
 17 | K
 18 | L
 19 | M
 20 | N
 21 | O
 22 | P
 23 | Q
 24 | R
 25 | S
 26 | T
 27 | U
 28 | V
 29 | W
 30 | X
 31 | Y
 32 | Z
 33 | #a
 34 | b
 35 | c
 36 | d
 37 | e
 38 | f
 39 | g
 40 | h
 41 | i
 42 | j
 43 | k
 44 | l
 45 | m
 46 | n
 47 | o
 48 | p
 49 | q
 50 | r
 51 | s
 52 | t
 53 | u
 54 | v
 55 | w
 56 | x
 57 | y
 58 | z
 59 | 
 60 | # Period-final abbreviation list for French
 61 | A.C.N
 62 | A.M
 63 | art
 64 | ann
 65 | apr
 66 | av
 67 | auj
 68 | lib
 69 | B.P
 70 | boul
 71 | ca
 72 | c.-à-d
 73 | cf
 74 | ch.-l
 75 | chap
 76 | contr
 77 | C.P.I
 78 | C.Q.F.D
 79 | C.N
 80 | C.N.S
 81 | C.S
 82 | dir
 83 | éd
 84 | e.g
 85 | env
 86 | al
 87 | etc
 88 | E.V
 89 | ex
 90 | fasc
 91 | fém
 92 | fig
 93 | fr
 94 | hab
 95 | ibid
 96 | id
 97 | i.e
 98 | inf
 99 | LL.AA
100 | LL.AA.II
101 | LL.AA.RR
102 | LL.AA.SS
103 | L.D
104 | LL.EE
105 | LL.MM
106 | LL.MM.II.RR
107 | loc.cit
108 | masc
109 | MM
110 | ms
111 | N.B
112 | N.D.A
113 | N.D.L.R
114 | N.D.T
115 | n/réf
116 | NN.SS
117 | N.S
118 | N.D
119 | N.P.A.I
120 | p.c.c
121 | pl
122 | pp
123 | p.ex
124 | p.j
125 | P.S
126 | R.A.S
127 | R.-V
128 | R.P
129 | R.I.P
130 | SS
131 | S.S
132 | S.A
133 | S.A.I
134 | S.A.R
135 | S.A.S
136 | S.E
137 | sec
138 | sect
139 | sing
140 | S.M
141 | S.M.I.R
142 | sq
143 | sqq
144 | suiv
145 | sup
146 | suppl
147 | tél
148 | T.S.V.P
149 | vb
150 | vol
151 | vs
152 | X.O
153 | Z.I
154 | 


--------------------------------------------------------------------------------
/data/moses-5cbafabfd/tokenizer/basic-protected-patterns:
--------------------------------------------------------------------------------
1 | <\/?\S+\/?>
2 | <\S+( [a-zA-Z0-9]+\=\"?[^\"]\")+ ?\/?>
3 | <\S+( [a-zA-Z0-9]+\=\'?[^\']\')+ ?\/?>
4 | [\w\-\_\.]+\@([\w\-\_]+\.)+[a-zA-Z]{2,}
5 | http[s]?:\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]*(\/)?
6 | ftp[s]?:\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]*(\/)?
7 | rsync:\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]*(\/)?
8 | 


--------------------------------------------------------------------------------
/data/moses-5cbafabfd/tokenizer/lowercase.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | #
 3 | # This file is part of moses.  Its use is licensed under the GNU Lesser General
 4 | # Public License version 2.1 or, at your option, any later version.
 5 | 
 6 | use warnings;
 7 | use strict;
 8 | 
 9 | while (@ARGV) {
10 |     $_ = shift;
11 |     /^-b$/ && ($| = 1, next); # not buffered (flush each line)
12 | }
13 | 
14 | binmode(STDIN, ":utf8");
15 | binmode(STDOUT, ":utf8");
16 | 
17 | while(<STDIN>) {
18 |   print lc($_);
19 | }
20 | 


--------------------------------------------------------------------------------
/data/moses-5cbafabfd/tokenizer/normalize-punctuation.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | #
 3 | # This file is part of moses.  Its use is licensed under the GNU Lesser General
 4 | # Public License version 2.1 or, at your option, any later version.
 5 | 
 6 | use warnings;
 7 | use strict;
 8 | 
 9 | my $language = "en";
10 | my $PENN = 0;
11 | 
12 | while (@ARGV) {
13 |     $_ = shift;
14 |     /^-b$/ && ($| = 1, next); # not buffered (flush each line)
15 |     /^-l$/ && ($language = shift, next);
16 |     /^[^\-]/ && ($language = $_, next);
17 |   	/^-penn$/ && ($PENN = 1, next);
18 | }
19 | 
20 | while(<STDIN>) {
21 |     s/\r//g;
22 |     # remove extra spaces
23 |     s/\(/ \(/g;
24 |     s/\)/\) /g; s/ +/ /g;
25 |     s/\) ([\.\!\:\?\;\,])/\)$1/g;
26 |     s/\( /\(/g;
27 |     s/ \)/\)/g;
28 |     s/(\d) \%/$1\%/g;
29 |     s/ :/:/g;
30 |     s/ ;/;/g;
31 |     # normalize unicode punctuation
32 |     if ($PENN == 0) {
33 |       s/\`/\'/g;
34 |       s/\'\'/ \" /g;
35 |     }
36 | 
37 |     s/„/\"/g;
38 |     s/“/\"/g;
39 |     s/”/\"/g;
40 |     s/–/-/g;
41 |     s/—/ - /g; s/ +/ /g;
42 |     s/´/\'/g;
43 |     s/([a-z])‘([a-z])/$1\'$2/gi;
44 |     s/([a-z])’([a-z])/$1\'$2/gi;
45 |     s/‘/\'/g;
46 |     s/‚/\'/g;
47 |     s/’/\"/g;
48 |     s/''/\"/g;
49 |     s/´´/\"/g;
50 |     s/…/.../g;
51 |     # French quotes
52 |     s/ « / \"/g;
53 |     s/« /\"/g;
54 |     s/«/\"/g;
55 |     s/ » /\" /g;
56 |     s/ »/\"/g;
57 |     s/»/\"/g;
58 |     # handle pseudo-spaces
59 |     s/ \%/\%/g;
60 |     s/nº /nº /g;
61 |     s/ :/:/g;
62 |     s/ ºC/ ºC/g;
63 |     s/ cm/ cm/g;
64 |     s/ \?/\?/g;
65 |     s/ \!/\!/g;
66 |     s/ ;/;/g;
67 |     s/, /, /g; s/ +/ /g;
68 | 
69 |     # English "quotation," followed by comma, style
70 |     if ($language eq "en") {
71 | 	s/\"([,\.]+)/$1\"/g;
72 |     }
73 |     # Czech is confused
74 |     elsif ($language eq "cs" || $language eq "cz") {
75 |     }
76 |     # German/Spanish/French "quotation", followed by comma, style
77 |     else {
78 | 	s/,\"/\",/g;	
79 | 	s/(\.+)\"(\s*[^<])/\"$1$2/g; # don't fix period at end of sentence
80 |     }
81 | 
82 | 
83 |     if ($language eq "de" || $language eq "es" || $language eq "cz" || $language eq "cs" || $language eq "fr") {
84 | 	s/(\d) (\d)/$1,$2/g;
85 |     }
86 |     else {
87 | 	s/(\d) (\d)/$1.$2/g;
88 |     }
89 |     print $_;
90 | }
91 | 


--------------------------------------------------------------------------------
/data/moses-5cbafabfd/tokenizer/remove-non-printing-char.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | #
 3 | # This file is part of moses.  Its use is licensed under the GNU Lesser General
 4 | # Public License version 2.1 or, at your option, any later version.
 5 | 
 6 | use warnings;
 7 | use utf8;
 8 | 
 9 | while (@ARGV) {
10 |     $_ = shift;
11 |     /^-b$/ && ($| = 1, next); # not buffered (flush each line)
12 | }
13 | 
14 | binmode(STDIN, ":utf8");
15 | binmode(STDOUT, ":utf8");
16 | binmode(STDERR, ":utf8");
17 | 
18 | while (my $line = <STDIN>) {
19 |   chomp($line);
20 |   #$line =~ tr/\040-\176/ /c;
21 |   #$line =~ s/[^[:print:]]/ /g;
22 |   #$line =~ s/\s+/ /g;
23 |   $line =~ s/\p{C}/ /g;
24 | 
25 |   print "$line\n";
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/data/moses-5cbafabfd/tokenizer/replace-unicode-punctuation.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | #
 3 | # This file is part of moses.  Its use is licensed under the GNU Lesser General
 4 | # Public License version 2.1 or, at your option, any later version.
 5 | 
 6 | use warnings;
 7 | use strict;
 8 | 
 9 | while (@ARGV) {
10 |     $_ = shift;
11 |     /^-b$/ && ($| = 1, next); # not buffered (flush each line)
12 | }
13 | 
14 | #binmode(STDIN, ":utf8");
15 | #binmode(STDOUT, ":utf8");
16 | 
17 | while(<STDIN>) {
18 |   s/，/,/g;
19 |   s/。 */. /g;
20 |   s/、/,/g;
21 |   s/”/"/g;
22 |   s/“/"/g;
23 |   s/∶/:/g;
24 |   s/：/:/g;
25 |   s/？/\?/g;
26 |   s/《/"/g;
27 |   s/》/"/g;
28 |   s/）/\)/g;
29 |   s/！/\!/g;
30 |   s/（/\(/g;
31 |   s/；/;/g;
32 |   s/１/1/g;
33 |   s/」/"/g;
34 |   s/「/"/g;
35 |   s/０/0/g;
36 |   s/３/3/g;
37 |   s/２/2/g;
38 |   s/５/5/g;
39 |   s/６/6/g;
40 |   s/９/9/g;
41 |   s/７/7/g;
42 |   s/８/8/g;
43 |   s/４/4/g;
44 |   s/． */. /g;
45 |   s/～/\~/g;
46 |   s/’/\'/g;
47 |   s/…/\.\.\./g;
48 |   s/━/\-/g;
49 |   s/〈/\</g;
50 |   s/〉/\>/g;
51 |   s/【/\[/g;
52 |   s/】/\]/g;
53 |   s/％/\%/g;
54 |   print $_;
55 | }
56 | 


--------------------------------------------------------------------------------
/data/multi30k/README.md:
--------------------------------------------------------------------------------
1 | Data preparation
2 | --
3 | 
4 | Run `prepare.sh` from this folder to create word-level tokenized corpora and
5 | related vocabulary files. The script will also download and unpack the
6 | object classification (OC) and object detection (OD) features under `features/`.
7 | 


--------------------------------------------------------------------------------
/data/multi30k/features/README.md:
--------------------------------------------------------------------------------
 1 | Visual features
 2 | ---
 3 | 
 4 | Two types of visual features are provided:
 5 | 
 6 | - [(Download)](https://zenodo.org/record/4298396/files/multi30k_resnet50_features.tar.bz2?download=1) Object classification (OC) features are extracted from a pre-trained ResNet-50 CNN
 7 | - [(Download)](https://zenodo.org/record/4298396/files/multi30k_butd_features.tar.bz2?download=1) Object detection (OD) features are extracted from the `bottom-up-top-down (BUTD)`
 8 |   object detection model.
 9 | 
10 | These features will be automatically downloaded when `prepare.sh` is executed.
11 | 


--------------------------------------------------------------------------------
/data/multi30k/prepare.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Export moses path
 4 | MOSES_PATH=../moses-5cbafabfd/scripts
 5 | PATH=${MOSES_PATH}:$PATH
 6 | SUFF="lc.norm.tok"
 7 | 
 8 | for tlang in de fr cs; do
 9 |   echo "Preparing en-${tlang} dataset"
10 |   folder="en-${tlang}"
11 |   mkdir -p $folder
12 |   for sp in train val test_2016_flickr test_2017_flickr test_2017_mscoco test_2018_flickr; do
13 |     # Process both sides
14 |     for llang in en ${tlang}; do
15 |       inp="raw/${sp}.${llang}.gz"
16 |       if [ -f $inp ]; then
17 |         zcat $inp | lowercase.perl -l ${llang} | normalize-punctuation.perl -l ${llang} | \
18 |           tokenizer.perl -l ${llang} -a -threads 4 > $folder/${sp}.${SUFF}.${llang}
19 |       fi
20 |     done
21 | 
22 |     trg="${sp}.${SUFF}.${tlang}"
23 | 
24 |     # De-hyphenize test set targets for proper evaluation afterwards
25 |     if [[ "$sp" =~ ^test.* ]] && [[ -f "${folder}/${trg}" ]]; then
26 |       sed -r 's/\s*@-@\s*/-/g' < ${folder}/${trg} > ${folder}/${trg}.dehyph
27 |     fi
28 |   done
29 |   # Create vocabularies
30 |   pysimt-build-vocab ${folder}/train.${SUFF}.en -o ${folder}
31 |   pysimt-build-vocab ${folder}/train.${SUFF}.${tlang} -o ${folder}
32 | done
33 | 
34 | ### Download features
35 | pushd features
36 | wget "https://zenodo.org/record/4298396/files/multi30k_butd_features.tar.bz2?download=1" -O butd.tar.bz2
37 | tar xvf butd.tar.bz2
38 | # rename folder
39 | mv multi30k_butd_features butd
40 | wget "https://zenodo.org/record/4298396/files/multi30k_resnet50_features.tar.bz2?download=1" -O resnet.tar.bz2
41 | tar xvf resnet.tar.bz2
42 | popd
43 | 


--------------------------------------------------------------------------------
/data/multi30k/raw/README.md:
--------------------------------------------------------------------------------
1 | Multi30k dataset
2 | --
3 | 
4 | This folder contains a snapshot (Nov-2020) of the upstream [multi30k repository](https://github.com/multi30k/dataset).
5 | 


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2016_flickr.cs.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2016_flickr.cs.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2016_flickr.de.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2016_flickr.de.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2016_flickr.en.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2016_flickr.en.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2016_flickr.fr.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2016_flickr.fr.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2017_flickr.de.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_flickr.de.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2017_flickr.en.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_flickr.en.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2017_flickr.fr.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_flickr.fr.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2017_mscoco.de.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_mscoco.de.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2017_mscoco.en.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_mscoco.en.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2017_mscoco.fr.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_mscoco.fr.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2018_flickr.cs.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2018_flickr.cs.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2018_flickr.de.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2018_flickr.de.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2018_flickr.en.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2018_flickr.en.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/test_2018_flickr.fr.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2018_flickr.fr.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/train.cs.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/train.cs.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/train.de.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/train.de.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/train.en.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/train.en.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/train.fr.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/train.fr.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/val.cs.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/val.cs.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/val.de.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/val.de.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/val.en.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/val.en.gz


--------------------------------------------------------------------------------
/data/multi30k/raw/val.fr.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/val.fr.gz


--------------------------------------------------------------------------------
/doccov.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="142" height="20">
 2 |     <linearGradient id="s" x2="0" y2="100%">
 3 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
 4 |         <stop offset="1" stop-opacity=".1"/>
 5 |     </linearGradient>
 6 |     <clipPath id="r">
 7 |         <rect width="142" height="20" rx="3" fill="#fff"/>
 8 |     </clipPath>
 9 |     <g clip-path="url(#r)">
10 |         <rect width="99" height="20" fill="#555"/>
11 |         <rect x="99" width="43" height="20" fill="#fe7d37"/>
12 |         <rect width="142" height="20" fill="url(#s)"/>
13 |     </g>
14 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" font-size="110">
15 |         <text x="505" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="890">docstr-coverage</text>
16 |         <text x="505" y="140" transform="scale(.1)" textLength="890">docstr-coverage</text>
17 |         <text x="1195" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)">56%</text>
18 |         <text x="1195" y="140" transform="scale(.1)">56%</text>
19 |     </g>
20 | </svg>


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: pysimt
 2 | 
 3 | dependencies:
 4 |   - python=3.7
 5 |   - pip
 6 |   - ipython
 7 |   - pyyaml
 8 |   - numpy
 9 |   - tensorboard
10 |   - tabulate
11 |   - tqdm
12 |   - pip:
13 |     - torch==1.7.0
14 |     - sacrebleu>=1.4.13
15 |     - subword_nmt
16 |     - editdistance==0.5.3
17 |     - -e .
18 | 


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
1 | # Experiments
2 | 
3 | Experiment related files will be stored in here by default.
4 | Revise the configuration files to change that.
5 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/logo.png


--------------------------------------------------------------------------------
/make_docs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | rm -rf docs/
 4 | pdoc --html pysimt -o docs/
 5 | mv docs/pysimt/* docs
 6 | 
 7 | docstr-coverage -Pim pysimt --badge doccov.svg
 8 | 
 9 | git commit docs doccov.svg -m "update docs"
10 | 


--------------------------------------------------------------------------------
/pysimt/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = '1.0.0'
 2 | """
 3 | `pysimt` is a `PyTorch`-based sequence-to-sequence (S2S) framework that facilitates
 4 | research in unimodal and multi-modal machine translation. The framework
 5 | is especially geared towards a set of recent simultaneous MT approaches, including
 6 | heuristics-based decoding and prefix-to-prefix training/decoding. Common metrics
 7 | such as average proportion (AP), average lag (AL), and consecutive wait (CW)
 8 | are provided through well-defined APIs as well.
 9 | 
10 | 
11 | .. include:: ./docs.md
12 | """
13 | 
14 | 
15 | # Disable documentation generation for the following sub modules
16 | __pdoc__ = {
17 |     'cocoeval': False,
18 |     'config': False,
19 |     'logger': False,
20 | }
21 | 


--------------------------------------------------------------------------------
/pysimt/cocoeval/README.md:
--------------------------------------------------------------------------------
1 | pycocoevalcap
2 | ---
3 | 
4 | This is a copy from
5 |  https://github.com/tylin/coco-caption/tree/master/pycocoevalcap
6 | 
7 | with Python 2 support dropped.
8 | 


--------------------------------------------------------------------------------
/pysimt/cocoeval/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | from .bleu.bleu import Bleu
3 | from .cider.cider import Cider
4 | from .rouge.rouge import Rouge
5 | from .meteor.meteor import Meteor
6 | 


--------------------------------------------------------------------------------
/pysimt/cocoeval/bleu/LICENSE.bleu:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Xinlei Chen, Hao Fang, Tsung-Yi Lin, and Ramakrishna Vedantam
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/pysimt/cocoeval/bleu/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/pysimt/cocoeval/bleu/bleu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # File Name : bleu.py
 3 | #
 4 | # Description : Wrapper for BLEU scorer.
 5 | #
 6 | # Creation Date : 06-01-2015
 7 | # Last Modified : Thu 19 Mar 2015 09:13:28 PM PDT
 8 | # Authors : Hao Fang <hfang@uw.edu> and Tsung-Yi Lin <tl483@cornell.edu>
 9 | 
10 | from .bleu_scorer import BleuScorer
11 | 
12 | 
13 | class Bleu:
14 |     def __init__(self, n=4):
15 |         # default compute Blue score up to 4
16 |         self._n = n
17 |         self._hypo_for_image = {}
18 |         self.ref_for_image = {}
19 | 
20 |     def compute_score(self, gts, res):
21 | 
22 |         bleu_scorer = BleuScorer(n=self._n)
23 |         for id in sorted(gts.keys()):
24 |             hypo = res[id]
25 |             ref = gts[id]
26 | 
27 |             # Sanity check.
28 |             assert isinstance(hypo, list)
29 |             assert isinstance(ref, list)
30 |             assert len(hypo) == 1
31 |             assert len(ref) >= 1
32 | 
33 |             bleu_scorer += (hypo[0], ref)
34 | 
35 |         # score, scores = bleu_scorer.compute_score(option='shortest')
36 |         # score, scores = bleu_scorer.compute_score(option='average',verbose=1)
37 |         score, scores = bleu_scorer.compute_score(option='closest', verbose=0)
38 | 
39 |         # return (bleu, bleu_info)
40 |         return score, scores
41 | 
42 |     def method(self):
43 |         return "Bleu"
44 | 


--------------------------------------------------------------------------------
/pysimt/cocoeval/cider/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/pysimt/cocoeval/cider/cider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Filename: cider.py
 3 | #
 4 | # Description: Describes the class to compute the CIDEr
 5 | #              (Consensus-Based Image Description Evaluation) Metric
 6 | # by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
 7 | #
 8 | # Creation Date: Sun Feb  8 14:16:54 2015
 9 | #
10 | # Authors: Ramakrishna Vedantam <vrama91@vt.edu> and
11 | # Tsung-Yi Lin <tl483@cornell.edu>
12 | 
13 | from .cider_scorer import CiderScorer
14 | 
15 | 
16 | class Cider:
17 |     """Main Class to compute the CIDEr metric."""
18 | 
19 |     def __init__(self, test=None, refs=None, n=4, sigma=6.0):
20 |         # set cider to sum over 1 to 4-grams
21 |         self._n = n
22 |         # set the standard deviation parameter for gaussian penalty
23 |         self._sigma = sigma
24 | 
25 |     def compute_score(self, gts, res):
26 |         """Main function to compute CIDEr score
27 | 
28 |         Arguments:
29 |             hypo_for_image (dict): dictionary with key <image> and
30 |                 value <tokenized hypothesis / candidate sentence>
31 |             ref_for_image (dict): dictionary with key <image> and value
32 |                 <tokenized reference sentence>
33 | 
34 |         Returns:
35 |             cider (float): computed CIDEr score for the corpus
36 |         """
37 | 
38 |         cider_scorer = CiderScorer(n=self._n, sigma=self._sigma)
39 | 
40 |         for id in sorted(gts.keys()):
41 |             hypo = res[id]
42 |             ref = gts[id]
43 | 
44 |             # Sanity check.
45 |             assert isinstance(hypo, list)
46 |             assert isinstance(ref, list)
47 |             assert len(hypo) == 1
48 |             assert len(ref) > 0
49 | 
50 |             cider_scorer += (hypo[0], ref)
51 | 
52 |         (score, scores) = cider_scorer.compute_score()
53 | 
54 |         return score, scores
55 | 
56 |     def method(self):
57 |         return "CIDEr"
58 | 


--------------------------------------------------------------------------------
/pysimt/cocoeval/meteor/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/pysimt/cocoeval/meteor/meteor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Python wrapper for METEOR implementation, by Xinlei Chen
 3 | # Acknowledge Michael Denkowski for the generous discussion and help
 4 | 
 5 | import os
 6 | import shutil
 7 | import threading
 8 | import subprocess
 9 | 
10 | from ...utils.misc import get_meteor_jar
11 | 
12 | 
13 | class Meteor:
14 |     def __init__(self, language, norm=False):
15 |         self.jar = str(get_meteor_jar())
16 |         self.meteor_cmd = ['java', '-jar', '-Xmx2G', self.jar,
17 |                            '-', '-', '-stdio', '-l', language]
18 |         self.env = os.environ
19 |         self.env['LC_ALL'] = 'en_US.UTF_8'
20 | 
21 |         # Sanity check
22 |         if shutil.which('java') is None:
23 |             raise RuntimeError('METEOR requires java which is not installed.')
24 | 
25 |         if norm:
26 |             self.meteor_cmd.append('-norm')
27 | 
28 |         self.meteor_p = subprocess.Popen(self.meteor_cmd,
29 |                                          stdin=subprocess.PIPE,
30 |                                          stdout=subprocess.PIPE,
31 |                                          stderr=subprocess.PIPE,
32 |                                          env=self.env,
33 |                                          universal_newlines=True, bufsize=1)
34 |         # Used to guarantee thread safety
35 |         self.lock = threading.Lock()
36 | 
37 |     def method(self):
38 |         return "METEOR"
39 | 
40 |     def compute_score(self, gts, res):
41 |         imgIds = sorted(list(gts.keys()))
42 |         scores = []
43 | 
44 |         eval_line = 'EVAL'
45 |         self.lock.acquire()
46 |         for i in imgIds:
47 |             assert len(res[i]) == 1
48 | 
49 |             hypothesis_str = res[i][0].replace('|||', '').replace('  ', ' ')
50 |             score_line = ' ||| '.join(
51 |                 ('SCORE', ' ||| '.join(gts[i]), hypothesis_str))
52 | 
53 |             # We obtained --> SCORE ||| reference 1 words |||
54 |             # reference n words ||| hypothesis words
55 |             self.meteor_p.stdin.write(score_line + '\n')
56 |             stat = self.meteor_p.stdout.readline().strip()
57 |             eval_line += ' ||| {}'.format(stat)
58 | 
59 |         # Send to METEOR
60 |         self.meteor_p.stdin.write(eval_line + '\n')
61 | 
62 |         # Collect segment scores
63 |         for i in range(len(imgIds)):
64 |             score = float(self.meteor_p.stdout.readline().strip())
65 |             scores.append(score)
66 | 
67 |         # Final score
68 |         final_score = 100 * float(self.meteor_p.stdout.readline().strip())
69 |         self.lock.release()
70 | 
71 |         return final_score, scores
72 | 
73 |     def __del__(self):
74 |         self.lock.acquire()
75 |         self.meteor_p.stdin.close()
76 |         self.meteor_p.wait()
77 |         self.lock.release()
78 | 


--------------------------------------------------------------------------------
/pysimt/cocoeval/rouge/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'vrama91'
2 | 


--------------------------------------------------------------------------------
/pysimt/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A dataset in `pysimt` inherits from `torch.nn.Dataset` and is designed
 3 | to read and expose a specific type of corpus.
 4 | 
 5 | * A dataset class name should end with the `Dataset` suffix.
 6 | * The `__init__` method should include `**kwargs` for other possible arguments.
 7 | * The `__getitem__` and `__len__` methods should be implemented.
 8 | * A static method `to_torch(batch, **kwargs)` is automatically used when
 9 |   preparing the batch tensor during forward-pass.
10 | 
11 | Please see `pysimt.datasets.TextDataset` to get an idea on how to implement
12 | a new dataset.
13 | 
14 | """
15 | 
16 | from .numpy import NumpyDataset
17 | from .text import TextDataset
18 | from .objdet import ObjectDetectionsDataset
19 | 
20 | 
21 | # Second the selector function
22 | def get_dataset(type_):
23 |     return {
24 |         'numpy': NumpyDataset,
25 |         'text': TextDataset,
26 |         'objectdetections': ObjectDetectionsDataset,
27 |     }[type_.lower()]
28 | 
29 | 
30 | # Should always be at the end
31 | from .multimodal import MultimodalDataset   # noqa
32 | 


--------------------------------------------------------------------------------
/pysimt/datasets/collate.py:
--------------------------------------------------------------------------------
 1 | class Batch(dict):
 2 |     """A custom dictionary representing a batch."""
 3 |     def __init__(self, *args, **kwargs):
 4 |         super().__init__(*args, **kwargs)
 5 |         dim1s = set([x.size(1) for x in self.values()])
 6 |         assert len(dim1s) == 1, \
 7 |             "Incompatible batch dimension (1) between modalities."
 8 |         self.size = dim1s.pop()
 9 | 
10 |     def device(self, device):
11 |         self.update({k: v.to(device) for k, v in self.items()})
12 | 
13 |     def __repr__(self):
14 |         s = "Batch(size={})\n".format(self.size)
15 |         for data_source, tensor in self.items():
16 |             s += "  {:10s} -> {} - {}\n".format(
17 |                 str(data_source), tensor.shape, tensor.device)
18 |         return s
19 | 
20 | 
21 | def get_collate(data_sources):
22 |     """Returns a special collate_fn which will view the underlying data
23 |     in terms of the given DataSource keys."""
24 | 
25 |     def collate_fn(batch):
26 |         return Batch(
27 |             {ds: ds.torchify([elem[ds] for elem in batch]) for ds in data_sources},
28 |         )
29 | 
30 |     return collate_fn
31 | 


--------------------------------------------------------------------------------
/pysimt/datasets/numpy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from pathlib import Path
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | 
 8 | 
 9 | class NumpyDataset(Dataset):
10 |     r"""A PyTorch dataset for Numpy .npy/npz serialized tensor files. The
11 |     serialized tensor's first dimension should be the batch dimension.
12 | 
13 |     Arguments:
14 |         fname (str or Path): A string or ``pathlib.Path`` object for
15 |             the relevant numpy file.
16 |         key (str, optional): If `fname` is `.npz` file, its relevant `key`
17 |             will be fetched from the serialized object.
18 |         order_file (str, None): If given, will be used to map sample indices
19 |             to tensors using this list. Useful for tiled or repeated
20 |             experiments.
21 |         revert (bool, optional): If `True`, the data order will be reverted
22 |             for adversarial/incongruent experiments during test-time.
23 |     """
24 | 
25 |     def __init__(self, fname, key=None, order_file=None, revert=False, **kwargs):
26 |         self.path = Path(fname)
27 |         if not self.path.exists():
28 |             raise RuntimeError('{} does not exist.'.format(self.path))
29 | 
30 |         if self.path.suffix == '.npy':
31 |             self.data = np.load(self.path)
32 |         elif self.path.suffix == '.npz':
33 |             assert key, "A key should be provided for .npz files."
34 |             self.data = np.load(self.path)[key]
35 | 
36 |         if order_file:
37 |             with open(order_file) as orf:
38 |                 self.order = [int(x) for x in orf.read().strip().split('\n')]
39 |         else:
40 |             self.order = list(range(self.data.shape[0]))
41 | 
42 |         if revert:
43 |             self.order = self.order[::-1]
44 | 
45 |         # Dataset size
46 |         self.size = len(self.order)
47 | 
48 |     @staticmethod
49 |     def to_torch(batch, **kwargs):
50 |         # NOTE: Assumes x.shape == (n, *)
51 |         x = torch.from_numpy(np.array(batch, dtype='float32'))
52 |         # Convert it to (t(=1 if fixed features), n, c)
53 |         # By default we flatten h*w to first dim for interoperability
54 |         # Models should further reshape the tensor for their needs
55 |         return x.view(*x.size()[:2], -1).permute(2, 0, 1)
56 | 
57 |     def __getitem__(self, idx):
58 |         return self.data[self.order[idx]]
59 | 
60 |     def __len__(self):
61 |         return self.size
62 | 
63 |     def __repr__(self):
64 |         s = "{} '{}' ({} samples)\n".format(
65 |             self.__class__.__name__, self.path.name, self.__len__())
66 |         return s
67 | 


--------------------------------------------------------------------------------
/pysimt/evaluator.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | from . import metrics
 4 | from .utils.filterchain import FilterChain
 5 | from .utils.misc import get_language
 6 | 
 7 | 
 8 | class Evaluator:
 9 |     def __init__(self, refs, beam_metrics, filters=''):
10 |         # metrics: list of upper-case beam-search metrics
11 |         self.kwargs = {}
12 |         self.scorers = OrderedDict()
13 |         self.refs = list(refs.parent.glob(refs.name))
14 |         self.language = get_language(self.refs[0])
15 |         if self.language is None:
16 |             # Fallback to en (this is only relevant for METEOR)
17 |             self.language = 'en'
18 | 
19 |         self.filter = None
20 |         if filters:
21 |             self.filter = FilterChain(filters)
22 |             self.refs = self.filter.apply(refs)
23 | 
24 |         assert len(self.refs) > 0, "Number of reference files == 0"
25 | 
26 |         for metric in sorted(beam_metrics):
27 |             self.kwargs[metric] = {'language': self.language}
28 |             self.scorers[metric] = getattr(metrics, metric + 'Scorer')()
29 | 
30 |     def score(self, hyps):
31 |         """hyps is a list of hypotheses as they come out from decoder."""
32 |         assert isinstance(hyps, list), "hyps should be a list."
33 | 
34 |         # Post-process if requested
35 |         if self.filter is not None:
36 |             hyps = self.filter.apply(hyps)
37 | 
38 |         results = []
39 |         for key, scorer in self.scorers.items():
40 |             results.append(
41 |                 scorer.compute(self.refs, hyps, **self.kwargs[key]))
42 |         return results
43 | 


--------------------------------------------------------------------------------
/pysimt/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Basic layers
 2 | from .ff import FF
 3 | from .pool import Pool
 4 | from .fusion import Fusion
 5 | from .selector import Selector
 6 | from .positionwise_ff import PositionwiseFF
 7 | 
 8 | from .embedding import TFEmbedding, ProjectedEmbedding
 9 | 
10 | # Attention layers
11 | from .attention import DotAttention
12 | from .attention import MLPAttention
13 | from .attention import UniformAttention
14 | from .attention import ScaledDotAttention
15 | from .attention import MultiheadAttention
16 | from .attention import HierarchicalAttention
17 | 
18 | # Encoder layers
19 | from .encoders import RecurrentEncoder
20 | from .encoders import TFEncoder
21 | from .encoders import VisualFeaturesEncoder
22 | 
23 | # Decoder layers
24 | from .decoders import ConditionalGRUDecoder
25 | from .decoders import TFDecoder
26 | 


--------------------------------------------------------------------------------
/pysimt/layers/attention/__init__.py:
--------------------------------------------------------------------------------
 1 | from .mlp import MLPAttention
 2 | from .dot import DotAttention
 3 | from .hierarchical import HierarchicalAttention
 4 | from .uniform import UniformAttention
 5 | from .scaled_dot import ScaledDotAttention
 6 | from .multihead import MultiheadAttention
 7 | 
 8 | 
 9 | def get_attention(type_):
10 |     return {
11 |         'mlp': MLPAttention,
12 |         'dot': DotAttention,
13 |         'hier': HierarchicalAttention,
14 |         'uniform': UniformAttention,
15 |         'multihead': MultiheadAttention,
16 |         'scaled_dot': ScaledDotAttention,
17 |     }[type_]
18 | 


--------------------------------------------------------------------------------
/pysimt/layers/attention/hierarchical.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from ...utils.nn import get_activation_fn
 6 | 
 7 | 
 8 | # Libovický, J., & Helcl, J. (2017). Attention Strategies for Multi-Source
 9 | # Sequence-to-Sequence Learning. In Proceedings of the 55th Annual Meeting of
10 | # the Association for Computational Linguistics (Volume 2: Short Papers)
11 | # (Vol. 2, pp. 196-202). [Code contributed by @jlibovicky]
12 | 
13 | 
14 | class HierarchicalAttention(nn.Module):
15 |     """Hierarchical attention over multiple modalities."""
16 |     def __init__(self, ctx_dims, hid_dim, mid_dim, att_activ='tanh'):
17 |         super().__init__()
18 | 
19 |         self.activ = get_activation_fn(att_activ)
20 |         self.ctx_dims = ctx_dims
21 |         self.hid_dim = hid_dim
22 |         self.mid_dim = mid_dim
23 | 
24 |         self.ctx_projs = nn.ModuleList([
25 |             nn.Linear(dim, mid_dim, bias=False) for dim in self.ctx_dims])
26 |         self.dec_proj = nn.Linear(hid_dim, mid_dim, bias=True)
27 |         self.mlp = nn.Linear(self.mid_dim, 1, bias=False)
28 | 
29 |     def forward(self, contexts, hid):
30 |         dec_state_proj = self.dec_proj(hid)
31 |         ctx_projected = torch.cat([
32 |             p(ctx).unsqueeze(0) for p, ctx
33 |             in zip(self.ctx_projs, contexts)], dim=0)
34 |         energies = self.mlp(self.activ(dec_state_proj + ctx_projected))
35 |         att_dist = nn.functional.softmax(energies, dim=0)
36 | 
37 |         ctxs_cat = torch.cat([c.unsqueeze(0) for c in contexts])
38 |         joint_context = (att_dist * ctxs_cat).sum(0)
39 | 
40 |         return att_dist, joint_context
41 | 


--------------------------------------------------------------------------------
/pysimt/layers/attention/mlp.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | import torch.nn.functional as F
 4 | from torch import nn
 5 | 
 6 | from .dot import DotAttention
 7 | 
 8 | 
 9 | class MLPAttention(DotAttention):
10 |     """Attention layer with feed-forward layer."""
11 |     def __init__(self, ctx_dim, hid_dim, att_bottleneck='ctx',
12 |                  transform_ctx=True, att_activ='tanh',
13 |                  mlp_bias=False, temp=1., ctx2hid=True):
14 |         super().__init__(ctx_dim, hid_dim, att_bottleneck, transform_ctx,
15 |                          att_activ, temp, ctx2hid)
16 | 
17 |         if mlp_bias:
18 |             self.bias = nn.Parameter(torch.Tensor(self.mid_dim))
19 |             self.bias.data.zero_()
20 |         else:
21 |             self.register_parameter('bias', None)
22 | 
23 |         self.mlp = nn.Linear(self.mid_dim, 1, bias=False)
24 | 
25 |     def forward(self, hid, ctx, ctx_mask=None):
26 |         r"""Computes attention probabilities and final context using
27 |         decoder's hidden state and source annotations.
28 | 
29 |         Arguments:
30 |             hid(Tensor): A set of decoder hidden states of shape `T*B*H`
31 |                 where `T` == 1, `B` is batch dim and `H` is hidden state dim.
32 |             ctx(Tensor): A set of annotations of shape `S*B*C` where `S`
33 |                 is the source timestep dim, `B` is batch dim and `C`
34 |                 is annotation dim.
35 |             ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes
36 |                 in the padded positions.
37 | 
38 |         Returns:
39 |             scores(Tensor): A tensor of shape `S*B` containing normalized
40 |                 attention scores for each position and sample.
41 |             z_t(Tensor): A tensor of shape `B*H` containing the final
42 |                 attended context vector for this target decoding timestep.
43 | 
44 |         Notes:
45 |             This will only work when `T==1` for now.
46 |         """
47 |         # inner_sum -> SxBxC + TxBxC
48 |         inner_sum = self.ctx2ctx(ctx) + self.hid2ctx(hid)
49 | 
50 |         if self.bias is not None:
51 |             inner_sum.add_(self.bias)
52 | 
53 |         # Compute scores- > SxB
54 |         scores = self.mlp(
55 |             self.activ(inner_sum)).div(self.temperature).squeeze(-1)
56 | 
57 |         # Normalize attention scores correctly -> S*B
58 |         if ctx_mask is not None:
59 |             # Mask out padded positions with -inf so that they get 0 attention
60 |             scores.masked_fill_((1 - ctx_mask).bool(), -1e8)
61 | 
62 |         alpha = F.softmax(scores, dim=0)
63 | 
64 |         # Transform final context vector to H for further decoders
65 |         return alpha, self.ctx2hid((alpha.unsqueeze(-1) * ctx).sum(0))
66 | 


--------------------------------------------------------------------------------
/pysimt/layers/attention/uniform.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | 
 4 | 
 5 | class UniformAttention(torch.nn.Module):
 6 |     """A dummy non-parametric attention layer that applies uniform weights."""
 7 |     def __init__(self):
 8 |         super().__init__()
 9 | 
10 |     def forward(self, hid, ctx, ctx_mask=None):
11 |         alpha = torch.ones(*ctx.shape[:2], device=ctx.device).div(ctx.shape[0])
12 |         wctx = (alpha.unsqueeze(-1) * ctx).sum(0)
13 |         return alpha, wctx
14 | 


--------------------------------------------------------------------------------
/pysimt/layers/decoders/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conditional import ConditionalGRUDecoder
 2 | from .tf_decoder import TFDecoder
 3 | 
 4 | 
 5 | def get_decoder(type_):
 6 |     """Only expose ones with compatible __init__() arguments for now."""
 7 |     return {
 8 |         'cond': ConditionalGRUDecoder,
 9 |         'tf': TFDecoder,
10 |     }[type_]
11 | 


--------------------------------------------------------------------------------
/pysimt/layers/encoders/__init__.py:
--------------------------------------------------------------------------------
1 | from .recurrent import RecurrentEncoder
2 | from .transformers import TFEncoder
3 | from .vis_features import VisualFeaturesEncoder
4 | from .speech_lstm import SpeechLSTM
5 | 


--------------------------------------------------------------------------------
/pysimt/layers/encoders/vis_features.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from ...utils.nn import generate_visual_features_padding_masks
 4 | from .. import FF
 5 | 
 6 | 
 7 | class VisualFeaturesEncoder(nn.Module):
 8 |     """A facility encoder for pre-extracted visual features.
 9 | 
10 |     Arguments:
11 |         input_size (int): number of channels in the last dimension of
12 |             the features.
13 |         proj_dim(int, optional): If not `None`, add a final projection
14 |             layer similar to a 1x1 Conv2D.
15 |         proj_activ(str, optional): Non-linearity for projection layer.
16 |             `None` or `linear` does not apply any non-linearity.
17 |         layer_norm(bool, optional): Apply layer normalization.
18 |         l2_norm(bool, optional): L2-normalize features.
19 |         dropout (float, optional): Optional dropout to be applied on the
20 |             projected visual features.
21 |         pool (bool, optional): If True, applies global average pooling
22 |             to reduce conv features to a single vector.
23 | 
24 |     Input:
25 |         x (Tensor): A tensor of shape (w*h, batch_size, input_size)
26 | 
27 |     Output:
28 |         h (Tensor): A tensor of shape (w*h, batch_size, proj_dim)
29 |         mask (None): No masking is done for visual features.
30 |     """
31 |     def __init__(self, input_size, proj_dim=None, proj_activ=None,
32 |                  layer_norm=False, l2_norm=False, dropout=0.0, pool=False, image_masking=False):
33 |         super().__init__()
34 | 
35 |         self.ctx_size = input_size
36 |         self.l2_norm = l2_norm
37 |         self._image_masking = image_masking
38 | 
39 |         output_layers = []
40 |         if proj_dim is not None:
41 |             output_layers.append(
42 |                 FF(input_size, proj_dim, activ=proj_activ))
43 |             self.ctx_size = proj_dim
44 | 
45 |         if layer_norm:
46 |             output_layers.append(nn.LayerNorm(self.ctx_size))
47 | 
48 |         if dropout > 0:
49 |             output_layers.append(nn.Dropout(dropout))
50 | 
51 |         self.output = nn.Sequential(*output_layers)
52 | 
53 |         # Variables for caching
54 |         self._states, self._mask = None, None
55 | 
56 |     def forward(self, x, **kwargs):
57 |         if self._image_masking:
58 |             self._mask = generate_visual_features_padding_masks(x)
59 |         if self.l2_norm:
60 |             x.div_(x.norm(p=2, dim=-1, keepdim=True))
61 |         self._states = self.output(x)
62 |         return self._states, self._mask
63 | 
64 |     def get_states(self, up_to=int(1e6)):
65 |         assert self._states is not None, \
66 |             "encoder was not called for caching the states."
67 |         return self._states, self._mask
68 | 


--------------------------------------------------------------------------------
/pysimt/layers/ff.py:
--------------------------------------------------------------------------------
 1 | """A convenience feed-forward layer with non-linearity support."""
 2 | 
 3 | import math
 4 | 
 5 | import torch
 6 | import torch.nn.functional as F
 7 | from torch import nn
 8 | 
 9 | from ..utils.nn import get_activation_fn
10 | 
11 | 
12 | class FF(nn.Module):
13 |     """A convenience feed-forward layer with non-linearity option.
14 | 
15 |     Args:
16 |         input_size: The size of the input features
17 |         hidden_size: The size of the output features
18 |         bias: If `False`, disables the bias component
19 |         bias_zero: If `False`, randomly initialize the bias instead of zero
20 |             initialization
21 |         activ: The activation function name that will be searched
22 |             in `torch` and `torch.nn.functional` modules. `None` or `linear`
23 |             disables the activation function
24 | 
25 |     Example:
26 |         >>> FF(300, 400, bias=True, activ='tanh') # a tanh MLP
27 |         >>> FF(300, 400, bias=False, activ=None) # a linear layer
28 |     """
29 | 
30 |     def __init__(self, input_size, hidden_size, bias=True,
31 |                  bias_zero=True, activ=None):
32 |         """"""
33 |         super().__init__()
34 |         self.input_size = input_size
35 |         self.hidden_size = hidden_size
36 |         self.use_bias = bias
37 |         self.bias_zero = bias_zero
38 |         self.activ_type = activ
39 |         if self.activ_type in (None, 'linear'):
40 |             self.activ_type = 'linear'
41 |         self.weight = nn.Parameter(torch.Tensor(hidden_size, input_size))
42 |         self.activ = get_activation_fn(activ)
43 | 
44 |         if self.use_bias:
45 |             self.bias = nn.Parameter(torch.Tensor(hidden_size))
46 |         else:
47 |             self.register_parameter('bias', None)
48 | 
49 |         self.reset_parameters()
50 | 
51 |     def reset_parameters(self):
52 |         stdv = 1. / math.sqrt(self.weight.size(1))
53 |         self.weight.data.uniform_(-stdv, stdv)
54 |         if self.use_bias:
55 |             if self.bias_zero:
56 |                 self.bias.data.zero_()
57 |             else:
58 |                 self.bias.data.uniform_(-stdv, stdv)
59 | 
60 |     def forward(self, input):
61 |         return self.activ(F.linear(input, self.weight, self.bias))
62 | 
63 |     def __repr__(self):
64 |         repr_ = self.__class__.__name__ + '(' \
65 |             + 'input_size=' + str(self.input_size) \
66 |             + ', hidden_size=' + str(self.hidden_size) \
67 |             + ', activ=' + str(self.activ_type) \
68 |             + ', bias=' + str(self.use_bias)
69 |         if self.use_bias:
70 |             repr_ += ', bias_zero=' + str(self.bias_zero)
71 |         return repr_ + ')'
72 | 


--------------------------------------------------------------------------------
/pysimt/layers/fusion.py:
--------------------------------------------------------------------------------
 1 | """A convenience layer that merges an arbitrary number of inputs."""
 2 | 
 3 | import operator
 4 | from typing import Optional
 5 | from functools import reduce
 6 | 
 7 | import torch
 8 | 
 9 | from . import FF
10 | from ..utils.nn import get_activation_fn
11 | 
12 | 
13 | class Fusion(torch.nn.Module):
14 |     """A convenience layer that merges an arbitrary number of inputs using
15 |     concatenation, addition or multiplication. It then applies an optional
16 |     non-linearity given by the `activ` argument. If `operation==concat`,
17 |     additional arguments should be provided to define an adaptor MLP
18 |     that will project the concatenated vector into a lower dimensional space.
19 | 
20 |     Args:
21 |         operation: `concat`, `sum` or `mul` for concatenation, addition, and
22 |             multiplication respectively
23 |         activ: The activation function name that will be searched
24 |             in `torch` and `torch.nn.functional` modules. `None` or `linear`
25 |             disables the activation function
26 |         input_size: Only required for `concat` fusion, to denote the concatenated
27 |             input vector size. This will be used to add an MLP adaptor layer
28 |             after concatenation to project the fused vector into a lower
29 |             dimension
30 |         output_size: Only required for `concat` fusion, to denote the
31 |             output size of the aforementioned adaptor layer
32 |     """
33 |     def __init__(self,
34 |                  operation: str = 'concat',
35 |                  activ: Optional[str] = 'linear',
36 |                  input_size: Optional[int] = None,
37 |                  output_size: Optional[int] = None):
38 |         """"""
39 |         super().__init__()
40 | 
41 |         self.operation = operation
42 |         self.activ = activ
43 |         self.forward = getattr(self, '_{}'.format(self.operation))
44 |         self.activ = get_activation_fn(activ)
45 |         self.adaptor = lambda x: x
46 | 
47 |         if self.operation == 'concat' or input_size != output_size:
48 |             self.adaptor = FF(input_size, output_size, bias=False, activ=None)
49 | 
50 |     def _sum(self, inputs):
51 |         return self.activ(self.adaptor(reduce(operator.add, inputs)))
52 | 
53 |     def _mul(self, inputs):
54 |         return self.activ(self.adaptor(reduce(operator.mul, inputs)))
55 | 
56 |     def _concat(self, inputs):
57 |         return self.activ(self.adaptor(torch.cat(inputs, dim=-1)))
58 | 
59 |     def __repr__(self):
60 |         return f"Fusion(type={self.operation}, activ={self.activ})"
61 | 


--------------------------------------------------------------------------------
/pysimt/layers/pool.py:
--------------------------------------------------------------------------------
 1 | """A convenience layer to apply pooling to a sequential tensor."""
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | class Pool(torch.nn.Module):
 7 |     """A convenience layer to apply various sorts of pooling to a
 8 |     sequential tensor. The pooling operation can be `last`, `mean`, `max`, or
 9 |     `sum`.
10 | 
11 |     Args:
12 |         operation: The pooling operator.
13 |             It should be one from `last`, `mean`, `max`, `sum`.
14 |         pool_dim: The dimension along which the pooling will be applied
15 |         keepdim: Passed along to the underlying `torch` functions for
16 |             `max`, `mean` and `sum` variants.
17 | 
18 |     Examples:
19 |         >>> import torch
20 |         >>> from pysimt.layers import Pool
21 |         >>> x = torch.rand(10, 32, 200) # n_timesteps, n_samples, feat_dim
22 |         >>> p = Pool('sum', 0)
23 |         >>> torch.equal(p(x), x.sum(0, keepdim=True))
24 |         True
25 |         >>> p = Pool('max', 0)
26 |         >>> torch.equal(p(x), x.max(0, keepdim=True)[0])
27 |         True
28 |         >>> p = Pool('mean', 0)
29 |         >>> torch.equal(p(x), x.mean(0, keepdim=True))
30 |         True
31 |         >>> p = Pool('last', 0)
32 |         >>> torch.equal(p(x), x.select(0, -1).unsqueeze(0))
33 |         True
34 |         >>> torch.equal(p(x), x[-1].unsqueeze(0))
35 |         True
36 |         >>> p = Pool('last', 1)
37 |         >>> torch.equal(p(x), x.select(1, -1).unsqueeze(0))
38 |         True
39 |     """
40 |     def __init__(self, operation: str, pool_dim: int, keepdim: bool = True):
41 |         """"""
42 |         super().__init__()
43 | 
44 |         self.operation = operation
45 |         self.pool_dim = pool_dim
46 |         self.keepdim = keepdim
47 | 
48 |         assert self.operation in ["last", "mean", "max", "sum"], \
49 |             "Pool() operation should be mean, max, sum or last."
50 | 
51 |         # Assign the shortcut
52 |         self.forward = getattr(self, '_{}'.format(self.operation))
53 | 
54 |     def _last(self, x: torch.Tensor) -> torch.Tensor:
55 |         return x.select(self.pool_dim, -1).unsqueeze(0)
56 | 
57 |     def _max(self, x: torch.Tensor) -> torch.Tensor:
58 |         return torch.max(x, dim=self.pool_dim, keepdim=self.keepdim)[0]
59 | 
60 |     def _mean(self, x: torch.Tensor) -> torch.Tensor:
61 |         return torch.mean(x, dim=self.pool_dim, keepdim=self.keepdim)
62 | 
63 |     def _sum(self, x: torch.Tensor) -> torch.Tensor:
64 |         return torch.sum(x, dim=self.pool_dim, keepdim=self.keepdim)
65 | 
66 |     def __repr__(self):
67 |         return "Pool(operation={}, pool_dim={}, keepdim={})".format(
68 |             self.operation, self.pool_dim, self.keepdim)
69 | 


--------------------------------------------------------------------------------
/pysimt/layers/positionwise_ff.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from . import FF
 4 | from .transformers import BaseSublayer
 5 | 
 6 | 
 7 | class PositionwiseFF(nn.Module):
 8 |     """Positionwise Feed-forward layer.
 9 | 
10 |     Arguments:
11 | 
12 |     Input:
13 | 
14 |     Output:
15 |     """
16 | 
17 |     def __init__(self, model_dim, ff_dim, activ='gelu', dropout=0.1):
18 |         """
19 |         Creates a PositionwiseFF.
20 |         :param model_dim: The model dimensions.
21 |         :param ff_dim: The feedforward dimensions.
22 |         :param activ: The activation function. Default: gelu
23 |         :param dropout: The amount of dropout. Default: 0.1
24 |         """
25 |         super().__init__()
26 |         self.model_dim = model_dim
27 |         self.ff_dim = ff_dim
28 |         self.activ = activ
29 | 
30 |         # Create the layers
31 |         self.layers = nn.Sequential(
32 |             FF(self.model_dim, self.ff_dim, activ=self.activ),
33 |             nn.Dropout(dropout),
34 |             FF(self.ff_dim, self.model_dim, activ=None),
35 |         )
36 | 
37 |     def forward(self, x):
38 |         return self.layers(x)
39 | 
40 | 
41 | class PositionwiseSublayer(BaseSublayer):
42 |     def __init__(self, model_dim, ff_dim, ff_activ='gelu', dropout=0.1, is_pre_norm=False):
43 |         """
44 |         Creates a PositionwiseSublayer.
45 |         :param model_dim: The model dimensions.
46 |         :param ff_dim: The dimensions of the feed forward network.
47 |         :param ff_activ: The activation of the feed forward network.
48 |         :param dropout: The dropout rate.
49 |         :param is_pre_norm: Whether the layer type is pre_norm. Default: True.
50 |         """
51 |         super().__init__(model_dim, dropout, is_pre_norm)
52 |         self.feed_forward = PositionwiseFF(model_dim, ff_dim, ff_activ, dropout=dropout)
53 | 
54 |     def forward(self, x, mask=None):
55 |         """
56 |         Performs a forward pass over the PositionwiseSublayer.
57 |         :param x: The input x.
58 |         :param mask: The input mask.
59 |         :return: The output from the forward pass of the PositionwiseSublayer.
60 |         """
61 |         residual = x
62 |         x = self.apply_pre_norm_if_needed(x)
63 |         x = self.feed_forward(x)
64 |         x = self.apply_residual(residual, x)
65 |         x = self.apply_post_norm_if_needed(x)
66 |         return x
67 | 


--------------------------------------------------------------------------------
/pysimt/layers/selector.py:
--------------------------------------------------------------------------------
 1 | """A utility layer that returns a particular element from the previous layer."""
 2 | 
 3 | from torch import nn, Tensor
 4 | from typing import Iterable, Any
 5 | 
 6 | 
 7 | class Selector(nn.Module):
 8 |     """Utility layer that selects and returns a particular element out of
 9 |     a tuple. It is useful to select a particular output from the previous layer,
10 |     when used in constructs such as `torch.nn.Sequential()`.
11 | 
12 |     Args:
13 |         index: The position to select from the given input.
14 | 
15 |     Example:
16 |         >>> layers = []
17 |         >>> layers.append(torch.nn.GRU(200, 400))
18 |         # By default, GRU returns (output, h_n) but we are not interested in h_n
19 |         >>> layers.append(Selector(0))
20 |         >>> layers.append(torch.nn.Dropout(0.2))
21 |         >>> self.block = nn.Sequential(*layers)
22 |     """
23 |     def __init__(self, index: int):
24 |         """"""
25 |         super().__init__()
26 |         self.index = index
27 | 
28 |     def forward(self, x: Iterable[Tensor]) -> Tensor:
29 |         """Returns the pre-determined `self.index`'th position of `x`."""
30 |         return x[self.index]
31 | 
32 |     def __repr__(self):
33 |         return f"Selector(index={self.index})"
34 | 


--------------------------------------------------------------------------------
/pysimt/layers/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_sublayer import BaseSublayer
2 | from .self_attention_sublayer import SelfAttentionSublayer
3 | from .cross_attention_sublayer import CrossAttentionSublayer
4 | from .cross_attention_sublayer_mm_flat import FlatMMCrossAttentionSublayer
5 | from .cross_attention_sublayer_mm_hier import HierarchicalMMCrossAttentionSublayer
6 | from .cross_attention_sublayer_mm_serial import SerialMMCrossAttentionSublayer
7 | from .cross_attention_sublayer_mm_parallel import ParallelMMCrossAttentionSublayer
8 | 


--------------------------------------------------------------------------------
/pysimt/layers/transformers/base_sublayer.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | class BaseSublayer(nn.Module):
 5 | 
 6 |     def __init__(self, model_dim, dropout=0.1, is_pre_norm=False):
 7 |         """
 8 |         Creates a BaseSublayer.
 9 |         :param model_dim: The model dimension.
10 |         :param dropout: The dropout layer.
11 |         :param is_pre_norm: Whether it should use pre_norm transformer layers. Default: False.
12 |         """
13 |         super().__init__()
14 |         self.is_pre_norm = is_pre_norm
15 |         self.layer_norm = nn.LayerNorm(model_dim, eps=1e-6)
16 |         self.dropout = nn.Dropout(dropout)
17 | 
18 |     def forward(self, **kwargs):
19 |         raise NotImplementedError("BaseSublayer does not implement forward.")
20 | 
21 |     def apply_pre_norm_if_needed(self, x):
22 |         """
23 |         Applies pre_norm to the input if needed. If pre_norm is false, the input remains unchanged.
24 |         :param x: The input.
25 |         :return: The output.
26 |         """
27 |         if self.is_pre_norm:
28 |             x = self.layer_norm(x)
29 |         return x
30 | 
31 |     def apply_post_norm_if_needed(self, x):
32 |         """
33 |         Applies post_norm to the input if needed. If pre_norm is true, the input remains unchanged.
34 |         :param x: The input.
35 |         :return: The output.
36 |         """
37 |         if not self.is_pre_norm:
38 |             x = self.layer_norm(x)
39 |         return x
40 | 
41 |     def apply_residual(self, residual, x):
42 |         """
43 |         Applies the residual connection.
44 |         :param residual: The residual.
45 |         :param x: The input x.
46 |         :return: The output of the residual connection.
47 |         """
48 |         return residual + self.dropout(x)
49 | 


--------------------------------------------------------------------------------
/pysimt/layers/transformers/cross_attention_sublayer.py:
--------------------------------------------------------------------------------
 1 | from ..attention import ScaledDotAttention
 2 | from .base_sublayer import BaseSublayer
 3 | 
 4 | 
 5 | class CrossAttentionSublayer(BaseSublayer):
 6 |     def __init__(self, model_dim, n_heads, dropout=0.1,
 7 |                  attn_dropout=0.0, is_pre_norm=False):
 8 |         """
 9 |         Creates a CrossAttentionSublayer.
10 |         :param model_dim: The model dimension.
11 |         :param n_heads: The number of attention heads.
12 |         :param dropout: The dropout rate for the residual connection.
13 |         :param is_pre_norm: Whether the layer type is pre_norm. Default: True.
14 |         """
15 |         super().__init__(model_dim, dropout, is_pre_norm)
16 |         self.attn = ScaledDotAttention(model_dim, n_heads, attn_dropout)
17 | 
18 |     def forward(self, query, key, value, mask=None, **kwargs):
19 |         """
20 |         Performs a forward pass over the CrossAttentionSublayer.
21 |         :param query: The query. For encoder-decoder attention, it is the output from the previous decoder layer.
22 |         :param key: The key. For encoder-decoder attention, it is the output from the encoder.
23 |         :param value: The mask. For encoder-decoder attention, it is the output from the encoder.
24 |         :param mask: The mask. For encoder-decoder attention, it is the encoder mask.
25 |         :return: The output of the CrossAttentionSublayer.
26 |         """
27 |         residual = query
28 |         query = self.apply_pre_norm_if_needed(query)
29 |         attn_out, attn_weights = self.attn((query, key, value, mask))
30 |         out = self.apply_residual(residual, attn_out)
31 |         out = self.apply_post_norm_if_needed(out)
32 |         return out, attn_weights
33 | 


--------------------------------------------------------------------------------
/pysimt/layers/transformers/cross_attention_sublayer_mm_flat.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .base_sublayer import BaseSublayer
 4 | from ..attention import ScaledDotAttention
 5 | from ...utils.nn import generate_default_mask
 6 | 
 7 | 
 8 | class FlatMMCrossAttentionSublayer(BaseSublayer):
 9 |     def __init__(self, model_dim, n_heads, dropout=0.1,
10 |                  attn_dropout=0.0, is_pre_norm=False):
11 |         """
12 |         Creates a FlatMMCrossAttentionSublayer.
13 |         :param model_dim: The model dimensions.
14 |         :param n_heads: The number of attention heads.
15 |         :param dropout: The dropout rate for the residual connection.
16 |         :param is_pre_norm: Whether the layer type is pre_norm. Default: True.
17 |         """
18 |         super().__init__(model_dim, dropout, is_pre_norm)
19 |         self.multimodal_attn = ScaledDotAttention(
20 |             model_dim, n_heads, attn_dropout)
21 | 
22 |     def forward(self, query, key_txt, value_txt, mask_txt,
23 |                 key_img, value_img, mask_img=None):
24 |         """
25 |         Performs a forward pass.
26 |         :param query: The query for the attention layers.
27 |         :param key_txt: The key for the textual modality. If None, it is set to the query.
28 |         :param value_txt: The value for the textual modality. If None, it is set to the query.
29 |         :param mask_txt: The textual modality mask.
30 |         :param key_img: The key for the visual modality.
31 |         :param value_img: The value for the visual modality.
32 |         :param mask_img: The visual modality mask. Default: None.
33 |         :return:
34 |         """
35 |         residual = query
36 |         query = self.apply_pre_norm_if_needed(query)
37 |         if key_txt is None:
38 |             key_txt = query
39 |         if value_txt is None:
40 |             value_txt = query
41 | 
42 |         combined_mask = self._generate_combined_mask(
43 |             key_img, mask_img, mask_txt)
44 | 
45 |         multimodal_key = torch.cat((key_img, key_txt), dim=0)
46 |         multimodal_value = torch.cat((value_img, value_txt), dim=0)
47 |         attn_multimodal, attn_weights = self.multimodal_attn(
48 |             (query, multimodal_key, multimodal_value, combined_mask))
49 | 
50 |         out = self.apply_residual(residual, attn_multimodal)
51 |         out = self.apply_post_norm_if_needed(out)
52 |         return out, attn_weights
53 | 
54 |     @staticmethod
55 |     def _generate_combined_mask(key_img, mask_img, mask_txt):
56 |         if mask_img is None:
57 |             mask_img = generate_default_mask(key_img, mask_txt.shape[1])
58 |         combined_mask = torch.cat((mask_img, mask_txt), dim=-1)
59 |         return combined_mask
60 | 


--------------------------------------------------------------------------------
/pysimt/layers/transformers/cross_attention_sublayer_mm_parallel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..attention import ScaledDotAttention
 4 | from .base_sublayer import BaseSublayer
 5 | 
 6 | 
 7 | class ParallelMMCrossAttentionSublayer(BaseSublayer):
 8 |     def __init__(self, model_dim, n_heads, dropout=0.1, attn_dropout=0.0, is_pre_norm=False, fusion='sum'):
 9 |         """
10 |         Creates a ParallelCrossAttentionSublayer.
11 |         :param model_dim: The model dimensions.
12 |         :param n_heads: The number of attention heads.
13 |         :param dropout: The dropout rate for the residual connection.
14 |         :param is_pre_norm: Whether the layer type is pre_norm. Default: True.
15 |         """
16 |         super().__init__(model_dim, dropout, is_pre_norm)
17 |         self.attn_txt = ScaledDotAttention(model_dim, n_heads, attn_dropout)
18 |         self.attn_img = ScaledDotAttention(model_dim, n_heads, attn_dropout)
19 |         self.fusion = fusion
20 | 
21 |     def forward(self, query, key_txt, value_txt, mask_txt, key_img, value_img, mask_img=None):
22 |         """
23 |         Performs a forward pass over the CrossAttentionSublayer.
24 |         :param query: The query. For encoder-decoder attention, it is the output from the previous decoder layer.
25 |         :param key_txt: The key. For encoder-decoder attention, it is the output from the encoder.
26 |         :param value_txt: The mask. For encoder-decoder attention, it is the output from the encoder.
27 |         :param value_img:
28 |         :param key_img:
29 |         :param mask_txt: The textual encoder mask.
30 |         :param mask_img: The visual features mask.
31 |         :return: The output of the CrossAttentionSublayer.
32 |         """
33 |         residual = query
34 |         query = self.apply_pre_norm_if_needed(query)
35 | 
36 |         attn_txt, attn_weights_txt = self.attn_txt((query, key_txt, value_txt, mask_txt))
37 |         attn_img, attn_weights_img = self.attn_img((query, key_img, value_img, mask_img))
38 | 
39 |         attn_combined = torch.add(attn_txt, attn_img)
40 |         out = self.apply_residual(residual, attn_combined)
41 |         out = self.apply_post_norm_if_needed(out)
42 |         return out, {'txt': attn_weights_txt, 'img': attn_weights_img}
43 | 


--------------------------------------------------------------------------------
/pysimt/layers/transformers/cross_attention_sublayer_mm_serial.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from .cross_attention_sublayer import CrossAttentionSublayer
 4 | 
 5 | 
 6 | class SerialMMCrossAttentionSublayer(nn.Module):
 7 |     def __init__(self, model_dim, n_heads, dropout=0.1,
 8 |                  attn_dropout=0.0, is_pre_norm=False):
 9 |         """
10 |         Creates a ParallelCrossAttentionSublayer.
11 |         :param model_dim: The model dimensions.
12 |         :param n_heads: The number of attention heads.
13 |         :param dropout: The dropout rate for the residual connection.
14 |         :param is_pre_norm: Whether the layer type is pre_norm. Default: True.
15 |         """
16 |         super().__init__()
17 |         self.attn_txt = CrossAttentionSublayer(
18 |             model_dim, n_heads, dropout, attn_dropout, is_pre_norm)
19 |         self.attn_img = CrossAttentionSublayer(
20 |             model_dim, n_heads, dropout, attn_dropout, is_pre_norm)
21 | 
22 |     def forward(self, query, key_txt, value_txt, mask_txt,
23 |                 key_img, value_img,  mask_img=None):
24 |         """
25 |         Performs a forward pass over the CrossAttentionSublayer.
26 |         :param query: The query. For encoder-decoder attention, it is the output from the previous decoder layer.
27 |         :param key_txt: The key. For encoder-decoder attention, it is the output from the encoder.
28 |         :param value_txt: The mask. For encoder-decoder attention, it is the output from the encoder.
29 |         :param value_img:
30 |         :param key_img:
31 |         :param mask_txt: The textual encoder mask.
32 |         :param mask_img: The visual features mask.
33 |         :return: The output of the CrossAttentionSublayer.
34 |         """
35 |         attn_txt, attn_weights_txt = self.attn_txt(
36 |             query, key_txt, value_txt, mask_txt)
37 |         attn_img, attn_weights_img = self.attn_img(
38 |             attn_txt, key_img, value_img, mask_img)
39 |         return attn_img, {'txt': attn_weights_txt, 'img': attn_weights_img}
40 | 


--------------------------------------------------------------------------------
/pysimt/layers/transformers/self_attention_sublayer.py:
--------------------------------------------------------------------------------
 1 | from ..attention import ScaledDotAttention
 2 | from .base_sublayer import BaseSublayer
 3 | 
 4 | 
 5 | class SelfAttentionSublayer(BaseSublayer):
 6 | 
 7 |     def __init__(self, model_dim, n_heads, dropout=0.1,
 8 |                  attn_dropout=0.0, is_pre_norm=False):
 9 |         """
10 |         Creates a SelfAttentionSublayer.
11 |         :param model_dim: The model dimensions.
12 |         :param n_heads: The number of attention heads.
13 |         :param dropout: The dropout rate for the residual connection.
14 |         :param is_pre_norm: Whether the layer type is pre_norm. Default: True.
15 |         """
16 |         super().__init__(model_dim, dropout, is_pre_norm)
17 |         self.attn = ScaledDotAttention(model_dim, n_heads, attn_dropout)
18 | 
19 |     def forward(self, x, mask=None):
20 |         """
21 |         Performs a forward pass over the SelfAttentionSublayer.
22 |         :param x: The input. Will be used as query, key and value.
23 |         :param mask: The input mask.
24 |         :return: The output of the SelfAttentionSublayer.
25 |         """
26 |         residual = x
27 |         x = self.apply_pre_norm_if_needed(x)
28 |         attn_out, attn_weights = self.attn((x, x, x, mask))
29 |         out = self.apply_residual(residual, attn_out)
30 |         out = self.apply_post_norm_if_needed(out)
31 |         return out, attn_weights
32 | 


--------------------------------------------------------------------------------
/pysimt/logger.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import logging
 3 | 
 4 | from .utils.resource_mgr import res_mgr
 5 | 
 6 | 
 7 | def setup(opts=None):
 8 |     _format = '%(message)s'
 9 | 
10 |     formatter = logging.Formatter(_format)
11 |     logger = logging.getLogger('pysimt')
12 |     logger.setLevel(logging.DEBUG)
13 | 
14 |     con_handler = logging.StreamHandler()
15 |     con_handler.setFormatter(formatter)
16 |     logger.addHandler(con_handler)
17 | 
18 |     if opts is not None:
19 |         log_file = str(pathlib.Path(opts['save_path']) /
20 |                        opts['subfolder'] / opts['exp_id']) + '.log'
21 |         file_handler = logging.FileHandler(log_file, mode='w')
22 |         file_handler.setFormatter(formatter)
23 |         logger.addHandler(file_handler)
24 | 
25 |     res_mgr.register_handler(logger)
26 |     return logger
27 | 


--------------------------------------------------------------------------------
/pysimt/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | class NoamScheduler:
 2 |     """NoamScheduler implementation from the `Attention is all you need!` paper."""
 3 |     def __init__(self, optimizer, tf_model_dim, learning_rate, lr_warmup_steps=4000):
 4 |         """
 5 |         Creates a NoamScheduler, implementing the formula from the Attention is all you need! paper.
 6 |         :param optimizer: The optimizer.
 7 |         :param tf_model_dim: The model dimensions.
 8 |         :param learning_rate: The learning rate.
 9 |         :param lr_warmup_steps: The warmup steps.
10 |         """
11 |         assert tf_model_dim is not None, 'tf_model_dim must be set to the model dimensions noam decay'
12 |         assert lr_warmup_steps > 0, 'lr_warmup_steps must be greater than 0 for noam decay'
13 |         self.optimizer = optimizer
14 |         self._num_steps = 0
15 |         self.lr_warmup_steps = lr_warmup_steps
16 |         self.tf_model_dim = tf_model_dim
17 |         self._learning_rate = learning_rate
18 | 
19 |     def step(self):
20 |         """
21 |         Reduces the learning rate according to the formula in Attention is all you need! and performs an optimizer step.
22 |         """
23 |         self._num_steps += 1
24 |         current_learning_rate = self.get_decay() * self._learning_rate
25 |         for parameter in self.optimizer.param_groups:
26 |             parameter['lr'] = current_learning_rate
27 |         self.optimizer.step()
28 | 
29 |     def get_decay(self):
30 |         return self.tf_model_dim ** (-0.5) * min(self._num_steps ** (-0.5),
31 |                                                  self._num_steps * self.lr_warmup_steps ** (-1.5))
32 | 


--------------------------------------------------------------------------------
/pysimt/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | from .metric import Metric
 2 | from .multibleu import BLEUScorer
 3 | from .sacrebleu import SACREBLEUScorer
 4 | from .meteor import METEORScorer
 5 | from .cer import CERScorer
 6 | from .wer import WERScorer
 7 | from .simnmt import AVPScorer, AVLScorer, CWMScorer
 8 | 
 9 | """These metrics can be used in early stopping."""
10 | 
11 | # Generation related metrics
12 | beam_metrics = ["BLEU", "SACREBLEU", "METEOR", "CER", "WER"]
13 | 
14 | metric_info = {
15 |     'BLEU': 'max',
16 |     'SACREBLEU': 'max',
17 |     'METEOR': 'max',
18 |     'LOSS': 'min',
19 |     'ACC': 'max',
20 |     'RECALL': 'max',
21 |     'PRECISION': 'max',
22 |     'F1': 'max',
23 |     'CER': 'min',
24 |     'WER': 'max',
25 |     # simultaneous translation
26 |     'AVP': 'min',   # Average proportion (Cho and Esipova, 2016)
27 |     'AVL': 'min',   # Average Lagging (Ma et al., 2019 (STACL))
28 |     'DAL': 'min',   # Differentiable AL (not implemented)
29 |     'CW':  'min',   # Consecutive wait (Gu et al., 2017) [Not Implemented]
30 | }
31 | 


--------------------------------------------------------------------------------
/pysimt/metrics/cer.py:
--------------------------------------------------------------------------------
 1 | """Character error rate (CER)."""
 2 | 
 3 | from typing import Iterable, Union, Optional
 4 | import editdistance
 5 | 
 6 | from .metric import Metric
 7 | 
 8 | 
 9 | class CERScorer:
10 |     """Computes the character error rate (CER) metric and returns a `Metric`
11 |     object.
12 | 
13 |     Args:
14 |         refs: List of reference text files. Only the first one will be used
15 |         hyps: Either a string denoting the hypotheses' filename, or
16 |             a list that contains the hypotheses strings themselves
17 |         language: unused
18 |         lowercase: unused
19 |     """
20 |     def compute(self, refs: Iterable[str],
21 |                 hyps: Union[str, Iterable[str]],
22 |                 language: Optional[str] = None,
23 |                 lowercase: bool = False) -> Metric:
24 |         if isinstance(hyps, str):
25 |             # hyps is a file
26 |             hyp_sents = open(hyps).read().strip().split('\n')
27 |         elif isinstance(hyps, list):
28 |             hyp_sents = hyps
29 | 
30 |         # refs is a list, take its first item
31 |         with open(refs[0]) as f:
32 |             ref_sents = f.read().strip().split('\n')
33 | 
34 |         assert len(hyp_sents) == len(ref_sents), "CER: # of sentences does not match."
35 | 
36 |         n_ref_chars = 0
37 |         n_ref_tokens = 0
38 |         dist_chars = 0
39 |         dist_tokens = 0
40 |         for hyp, ref in zip(hyp_sents, ref_sents):
41 |             hyp_chars = hyp.split(' ')
42 |             ref_chars = ref.split(' ')
43 |             n_ref_chars += len(ref_chars)
44 |             dist_chars += editdistance.eval(hyp_chars, ref_chars)
45 | 
46 |             # Convert char-based sentences to token-based ones
47 |             hyp_tokens = hyp.replace(' ', '').replace('<s>', ' ').strip().split(' ')
48 |             ref_tokens = ref.replace(' ', '').replace('<s>', ' ').strip().split(' ')
49 |             n_ref_tokens += len(ref_tokens)
50 |             dist_tokens += editdistance.eval(hyp_tokens, ref_tokens)
51 | 
52 |         cer = (100 * dist_chars) / n_ref_chars
53 |         wer = (100 * dist_tokens) / n_ref_tokens
54 | 
55 |         verbose_score = "{:.3f}% (n_errors = {}, n_ref_chars = {}, WER = {:.3f}%)".format(
56 |             cer, dist_chars, n_ref_chars, wer)
57 | 
58 |         return Metric('CER', cer, verbose_score, higher_better=False)
59 | 


--------------------------------------------------------------------------------
/pysimt/metrics/metric.py:
--------------------------------------------------------------------------------
 1 | """Base Metric class to be derived from."""
 2 | 
 3 | from functools import total_ordering
 4 | 
 5 | 
 6 | @total_ordering
 7 | class Metric:
 8 |     """A base class that will be inherited by evaluation metrics.
 9 | 
10 |     Args:
11 |         name: A name for the metric that will be kept internally after upper-casing
12 |         score: A floating point score
13 |         detailed_score: A custom, more detailed string
14 |             representing the score given above
15 |         higher_better: If `False`, the smaller the better
16 |     """
17 |     def __init__(self, name: str, score: float,
18 |                  detailed_score: str = "", higher_better: bool = True):
19 |         self.name = name.upper()
20 |         self.score = score
21 |         self.detailed_score = detailed_score
22 |         self.higher_better = higher_better
23 | 
24 |     def __eq__(self, other):
25 |         return self.score == other.score
26 | 
27 |     def __lt__(self, other):
28 |         return self.score < other.score
29 | 
30 |     def __repr__(self):
31 |         rhs = (self.detailed_score if self.detailed_score
32 |                else "%.2f" % self.score)
33 |         return self.name + ' = ' + rhs
34 | 


--------------------------------------------------------------------------------
/pysimt/metrics/multibleu.py:
--------------------------------------------------------------------------------
 1 | """Tokenized BLEU through sacreBLEU API."""
 2 | 
 3 | from typing import Union, Iterable, TextIO
 4 | 
 5 | from sacrebleu import corpus_bleu
 6 | 
 7 | from ..utils.misc import listify
 8 | from ..utils.io import read_reference_files, read_hypothesis_file
 9 | from .metric import Metric
10 | 
11 | 
12 | class BLEUScorer:
13 |     """Computes the multi-bleu equivalent using SacreBLEU, with tokenization
14 |     option disabled.
15 | 
16 |     Args:
17 |         refs: List of reference text files
18 |         hyps: A file path, or a list of hypothesis strings or an open file handle
19 |         language: unused
20 |     """
21 |     def compute(self, refs: Iterable[str],
22 |                 hyps: Union[str, Iterable[str], TextIO],
23 |                 language=None) -> Metric:
24 |         if isinstance(hyps, str):
25 |             hyps = read_hypothesis_file(hyps)
26 | 
27 |         assert isinstance(hyps, list)
28 | 
29 |         refs = read_reference_files(*listify(refs))
30 | 
31 |         score = corpus_bleu(hyps, refs, tokenize='none')
32 |         verbose_score = ' '.join(score.format().split()[2:])
33 |         float_score = score.score
34 |         return Metric('BLEU', float_score, verbose_score)
35 | 


--------------------------------------------------------------------------------
/pysimt/metrics/sacrebleu.py:
--------------------------------------------------------------------------------
 1 | """Detokenized BLEU i.e. sacreBLEU."""
 2 | 
 3 | from typing import Union, Iterable, TextIO
 4 | 
 5 | from sacrebleu import corpus_bleu
 6 | 
 7 | from ..utils.misc import listify
 8 | from ..utils.io import read_reference_files, read_hypothesis_file
 9 | from .metric import Metric
10 | 
11 | 
12 | class SACREBLEUScorer:
13 |     """Computes the usual SacreBLEU metric with the default v13a tokenizer.
14 |     This metric expects de-tokenized references and hypotheses, i.e.
15 |     it only makes sense to use this with SPM files and the `de-spm`
16 |     post-processing filter. For the more usual tokenized BLEU, check the
17 |     `BLEU` metric.
18 | 
19 |     Args:
20 |         refs: List of reference text files
21 |         hyps: A file path, or a list of hypothesis strings or an open file handle
22 |         language: unused
23 |     """
24 |     def compute(self, refs: Iterable[str],
25 |                 hyps: Union[str, Iterable[str], TextIO],
26 |                 language=None) -> Metric:
27 |         if isinstance(hyps, str):
28 |             hyps = read_hypothesis_file(hyps)
29 | 
30 |         assert isinstance(hyps, list)
31 | 
32 |         refs = read_reference_files(*listify(refs))
33 | 
34 |         score = corpus_bleu(hyps, refs)
35 |         verbose_score = ' '.join(score.format().split()[2:])
36 |         float_score = score.score
37 |         return Metric('SACREBLEU', float_score, verbose_score)
38 | 


--------------------------------------------------------------------------------
/pysimt/metrics/wer.py:
--------------------------------------------------------------------------------
 1 | """Word error rate (WER)."""
 2 | 
 3 | from typing import Iterable, Union, Optional
 4 | import editdistance
 5 | 
 6 | from .metric import Metric
 7 | 
 8 | 
 9 | class WERScorer:
10 |     """Computes the word error rate (WER) metric and returns a `Metric`
11 |     object.
12 | 
13 |     Args:
14 |         refs: List of reference text files. Only the first one will be used
15 |         hyps: Either a string denoting the hypotheses' filename, or
16 |             a list that contains the hypotheses strings themselves
17 |         language: unused
18 |         lowercase: unused
19 |     """
20 |     def compute(self, refs: Iterable[str],
21 |                 hyps: Union[str, Iterable[str]],
22 |                 language: Optional[str] = None,
23 |                 lowercase: bool = False) -> Metric:
24 |         if isinstance(hyps, str):
25 |             # hyps is a file
26 |             hyp_sents = open(hyps).read().strip().split('\n')
27 |         elif isinstance(hyps, list):
28 |             hyp_sents = hyps
29 | 
30 |         # refs is a list, take its first item
31 |         with open(refs[0]) as f:
32 |             ref_sents = f.read().strip().split('\n')
33 | 
34 |         assert len(hyp_sents) == len(ref_sents), "WER: # of sentences does not match."
35 | 
36 |         n_ref_tokens = 0
37 |         dist = 0
38 |         for hyp, ref in zip(hyp_sents, ref_sents):
39 |             hyp_tokens = hyp.split(' ')
40 |             ref_tokens = ref.split(' ')
41 |             n_ref_tokens += len(ref_tokens)
42 |             dist += editdistance.eval(hyp_tokens, ref_tokens)
43 | 
44 |         score = (100 * dist) / n_ref_tokens
45 |         verbose_score = "{:.3f}% (n_errors = {}, n_ref_tokens = {})".format(
46 |             score, dist, n_ref_tokens)
47 | 
48 |         return Metric('WER', score, verbose_score, higher_better=False)
49 | 


--------------------------------------------------------------------------------
/pysimt/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .snmt_rnn import SimultaneousNMT
2 | from .snmt_rnn_waitk import SimultaneousWaitKNMT
3 | 
4 | from .snmt_rnn_encatt import EncoderSelfAttentionSimultaneousNMT
5 | from .snmt_rnn_encatt_waitk import EncoderSelfAttentionSimultaneousWaitKNMT
6 | 
7 | from .snmt_tf import SimultaneousTFNMT
8 | from .snmt_tf_waitk import SimultaneousTFWaitKNMT
9 | 


--------------------------------------------------------------------------------
/pysimt/models/snmt_rnn_encatt.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from torch import nn
 4 | from ..layers.attention import MultiheadAttention
 5 | 
 6 | from . import SimultaneousNMT
 7 | 
 8 | 
 9 | logger = logging.getLogger('pysimt')
10 | 
11 | 
12 | class EncoderSelfAttentionSimultaneousNMT(SimultaneousNMT):
13 |     """Simultaneous self-attentive MMT i.e. the ENC-O* model in the paper."""
14 | 
15 |     def set_defaults(self):
16 |         super().set_defaults()
17 |         self.defaults.update({
18 |             'n_heads': 1,
19 |             'att_dropout': 0.0,
20 |         })
21 | 
22 |     def setup(self, is_train=True):
23 |         """Sets up NN topology by creating the layers."""
24 |         encoders = {}
25 |         for key in self.topology.srcs.keys():
26 |             encoders[key] = getattr(self, f'create_{key}_encoder')()
27 | 
28 |         # Separate out visual encoder to avoid multimodal decoder-side
29 |         # attention to be enabled
30 |         self.ff_vis_enc = encoders.pop('image')
31 | 
32 |         self.encoders = nn.ModuleDict(encoders)
33 |         self.dec = self.create_decoder(encoders=self.encoders)
34 | 
35 |         # create the cross-modal self-attention network
36 |         self.mm_attn = MultiheadAttention(
37 |             self.opts.model['enc_dim'], self.opts.model['enc_dim'],
38 |             n_heads=self.opts.model['n_heads'],
39 |             dropout=self.opts.model['att_dropout'], attn_type='cross')
40 |         self.mm_lnorm = nn.LayerNorm(self.opts.model['enc_dim'])
41 | 
42 |         # Share encoder and decoder weights
43 |         if self.opts.model['tied_emb'] == '3way':
44 |             self.encoders[str(self.sl)].emb.weight = self.dec.emb.weight
45 | 
46 |     def cache_enc_states(self, batch):
47 |         """Caches encoder states internally by forward-pass'ing each encoder."""
48 |         self.encoders['src'](batch['src'])
49 |         self.ff_vis_enc(batch['image'])
50 | 
51 |         src_states, src_mask = self.encoders['src'].get_states()
52 |         img_states, img_mask = self.ff_vis_enc.get_states()
53 | 
54 |         # key values are image states
55 |         kv = img_states.transpose(0, 1)
56 |         attn_out = self.mm_attn(
57 |             q=src_states.transpose(0, 1), k=kv, v=kv,
58 |             q_mask=src_mask.transpose(0, 1).logical_not()).transpose(0, 1)
59 | 
60 |         # Inject this into the encoder itself for caching
61 |         self.encoders['src']._states = self.mm_lnorm(src_states + attn_out)
62 | 


--------------------------------------------------------------------------------
/pysimt/models/snmt_rnn_encatt_waitk.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from . import EncoderSelfAttentionSimultaneousNMT
 4 | 
 5 | logger = logging.getLogger('pysimt')
 6 | 
 7 | 
 8 | """This is the training-time wait-k model from:
 9 |     Ma et al. (2018), STACL: Simultaneous Translation with Implicit Anticipation
10 |    and Controllable Latency using Prefix-to-Prefix Framework, arXiv:1810.08398
11 | 
12 | The only required parameter is the `k` argument for training. When decoding,
13 | pass the `k` argument explicitly to `pysimt translate`. A large enough `k`
14 | should produce the same results as the `snmt.py` model.
15 | """
16 | 
17 | 
18 | class EncoderSelfAttentionSimultaneousWaitKNMT(EncoderSelfAttentionSimultaneousNMT):
19 |     def set_defaults(self):
20 |         super().set_defaults()
21 |         self.defaults.update({
22 |             # Decoding/training simultaneous NMT args
23 |             'translator_type': 'wk',        # This model implements train-time wait-k
24 |             'translator_args': {'k': 1e4},  # k as in wait-k in training
25 |             'consecutive_warmup': 0,        # consecutive training for this many epochs
26 |         })
27 | 
28 |     def __init__(self, opts):
29 |         super().__init__(opts)
30 |         assert self.opts.model['translator_type'] != 'bs', \
31 |             'Beam search not compatible with simultaneous models'
32 | 
33 |     def forward(self, batch, **kwargs):
34 |         """Training forward-pass with explicit timestep-based loop."""
35 |         loss = 0.0
36 | 
37 |         k = int(self.opts.model['translator_args']['k'])
38 |         if self.training:
39 |             epoch_count = kwargs['ectr']
40 |             if epoch_count <= self.opts.model['consecutive_warmup']:
41 |                 # warming up, use full contexts
42 |                 k = int(1e4)
43 | 
44 |         # Cache encoder states first
45 |         self.cache_enc_states(batch)
46 | 
47 |         # Initial state is None i.e. 0.
48 |         h = self.dec.f_init()
49 | 
50 |         # Convert target token indices to embeddings -> T*B*E
51 |         y = batch[self.tl]
52 |         y_emb = self.dec.emb(y)
53 | 
54 |         # -1: So that we skip the timestep where input is <eos>
55 |         for t in range(y_emb.size(0) - 1):
56 |             ###########################################
57 |             # waitk: pass partial context incrementally
58 |             ###########################################
59 |             state_dict = self.get_enc_state_dict(up_to=k + t)
60 |             log_p, h = self.dec.f_next(state_dict, y_emb[t], h)
61 |             loss += self.dec.nll_loss(log_p, y[t + 1])
62 | 
63 |         return {
64 |             'loss': loss,
65 |             'n_items': y[1:].nonzero(as_tuple=False).size(0),
66 |         }
67 | 


--------------------------------------------------------------------------------
/pysimt/models/snmt_rnn_waitk.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from . import SimultaneousNMT
 4 | 
 5 | logger = logging.getLogger('pysimt')
 6 | 
 7 | 
 8 | """This is the training-time wait-k model from:
 9 |     Ma et al. (2018), STACL: Simultaneous Translation with Implicit Anticipation
10 |    and Controllable Latency using Prefix-to-Prefix Framework, arXiv:1810.08398
11 | 
12 | The only required parameter is the `k` argument for training. When decoding,
13 | pass the `k` argument explicitly to `pysimt translate`. A large enough `k`
14 | should produce the same results as the `snmt.py` model.
15 | """
16 | 
17 | 
18 | class SimultaneousWaitKNMT(SimultaneousNMT):
19 | 
20 |     def set_defaults(self):
21 |         super().set_defaults()
22 |         self.defaults.update({
23 |             # Decoding/training simultaneous NMT args
24 |             'translator_type': 'wk',        # This model implements train-time wait-k
25 |             'translator_args': {'k': 1e4},  # k as in wait-k in training
26 |             'consecutive_warmup': 0,        # consecutive training for this many epochs
27 |         })
28 | 
29 |     def __init__(self, opts):
30 |         super().__init__(opts)
31 |         assert self.opts.model['translator_type'] != 'bs', \
32 |             'Beam search not compatible with simultaneous models'
33 | 
34 |     def forward(self, batch, **kwargs):
35 |         """Training forward-pass with explicit timestep-based loop."""
36 |         loss = 0.0
37 | 
38 |         k = int(self.opts.model['translator_args']['k'])
39 |         if self.training:
40 |             epoch_count = kwargs['ectr']
41 |             if epoch_count <= self.opts.model['consecutive_warmup']:
42 |                 # warming up, use full contexts
43 |                 k = int(1e4)
44 | 
45 |         # Cache encoder states first
46 |         self.cache_enc_states(batch)
47 | 
48 |         # Initial state is None i.e. 0.
49 |         h = self.dec.f_init()
50 | 
51 |         # Convert target token indices to embeddings -> T*B*E
52 |         y = batch[self.tl]
53 |         y_emb = self.dec.emb(y)
54 | 
55 |         # -1: So that we skip the timestep where input is <eos>
56 |         for t in range(y_emb.size(0) - 1):
57 |             ###########################################
58 |             # waitk: pass partial context incrementally
59 |             ###########################################
60 |             state_dict = self.get_enc_state_dict(up_to=k + t)
61 |             log_p, h = self.dec.f_next(state_dict, y_emb[t], h)
62 |             loss += self.dec.nll_loss(log_p, y[t + 1])
63 | 
64 |         return {
65 |             'loss': loss,
66 |             'n_items': y[1:].nonzero(as_tuple=False).size(0),
67 |         }
68 | 


--------------------------------------------------------------------------------
/pysimt/models/snmt_tf_waitk.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from . import SimultaneousTFNMT
 4 | 
 5 | logger = logging.getLogger('pysimt')
 6 | 
 7 | """This is the training-time wait-k model from:
 8 |     Ma et al. (2018), STACL: Simultaneous Translation with Implicit Anticipation
 9 |    and Controllable Latency using Prefix-to-Prefix Framework, arXiv:1810.08398
10 | 
11 | The only required parameter is the `k` argument for training. When decoding,
12 | pass the `k` argument explicitly to `pysimt translate`. A large enough `k`
13 | should produce the same results as the `snmt.py` model.
14 | """
15 | 
16 | 
17 | class SimultaneousTFWaitKNMT(SimultaneousTFNMT):
18 | 
19 |     def set_defaults(self):
20 |         super().set_defaults()
21 |         self.defaults.update({
22 |             # Decoding/training simultaneous NMT args
23 |             'translator_type': 'wk',  # This model implements train-time wait-k
24 |             'translator_args': {'k': 1e4},  # k as in wait-k in training
25 |             'consecutive_warmup': 0,  # consecutive training for this many epochs
26 |         })
27 | 
28 |     def __init__(self, opts):
29 |         super().__init__(opts)
30 |         assert not self.opts.model['enc_bidirectional'], \
31 |             'Bidirectional TF encoder is not currently supported for simultaneous MT.'
32 |         assert self.opts.model['translator_type'] != 'bs', \
33 |             'Beam search not compatible with simultaneous models'
34 | 
35 |     def forward(self, batch, **kwargs):
36 |         """
37 |         Performs a forward pass.
38 |         :param batch: The batch.
39 |         :param kwargs: Any extra arguments.
40 |         :return: The output from the forward pass.
41 |         """
42 |         k = int(self.opts.model['translator_args']['k'])
43 |         if self.training:
44 |             epoch_count = kwargs['ectr']
45 |             if epoch_count <= self.opts.model['consecutive_warmup']:
46 |                 # warming up, use full contexts
47 |                 k = int(1e4)
48 | 
49 |         # Pass 'k' to the model.
50 |         return super().forward(batch, k=k)
51 | 


--------------------------------------------------------------------------------
/pysimt/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bucket import BucketBatchSampler
 2 | from .approx import ApproximateBucketBatchSampler
 3 | 
 4 | 
 5 | def get_sampler(type_):
 6 |     return {
 7 |         'bucket': BucketBatchSampler,
 8 |         'approximate': ApproximateBucketBatchSampler,
 9 |     }[type_.lower()]
10 | 


--------------------------------------------------------------------------------
/pysimt/translators/__init__.py:
--------------------------------------------------------------------------------
 1 | from .greedy import GreedySearch
 2 | from .sim_greedy import SimultaneousGreedySearch
 3 | from .waitk_greedy import SimultaneousWaitKGreedySearch
 4 | from .beam import BeamSearch
 5 | 
 6 | def get_translator(_type):
 7 |     return {
 8 |         'gs': GreedySearch,
 9 |         'sgs': SimultaneousGreedySearch,
10 |         'wk': SimultaneousWaitKGreedySearch,
11 |         'bs': BeamSearch,
12 |     }[_type]
13 | 


--------------------------------------------------------------------------------
/pysimt/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | 


--------------------------------------------------------------------------------
/pysimt/utils/data.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import DataLoader
 2 | 
 3 | 
 4 | def sort_predictions(data_loader, results):
 5 |     """Recovers the dataset order when bucketing samplers are used."""
 6 |     if getattr(data_loader.batch_sampler, 'store_indices', False):
 7 |         results = [results[i] for i, j in sorted(
 8 |             enumerate(data_loader.batch_sampler.orig_idxs), key=lambda k: k[1])]
 9 |     return results
10 | 
11 | 
12 | def make_dataloader(dataset, pin_memory=False, num_workers=0):
13 |     return DataLoader(
14 |         dataset, batch_sampler=dataset.sampler,
15 |         collate_fn=dataset.collate_fn,
16 |         pin_memory=pin_memory, num_workers=num_workers)
17 | 


--------------------------------------------------------------------------------
/pysimt/utils/io.py:
--------------------------------------------------------------------------------
 1 | import bz2
 2 | import gzip
 3 | import lzma
 4 | import pathlib
 5 | 
 6 | from collections import deque
 7 | from typing import List, Iterable, Any
 8 | 
 9 | import numpy as np
10 | from tqdm import tqdm
11 | 
12 | 
13 | class FileRotator:
14 |     """A fixed queue with Path() elements where pushing a new element pops
15 |     the oldest one and removes it from disk.
16 | 
17 |     Arguments:
18 |         maxlen(int): The capacity of the queue.
19 |     """
20 | 
21 |     def __init__(self, maxlen):
22 |         self.maxlen = maxlen
23 |         self.elems = deque(maxlen=self.maxlen)
24 | 
25 |     def push(self, elem):
26 |         if len(self.elems) == self.maxlen:
27 |             # Remove oldest item
28 |             popped = self.elems.pop()
29 |             if popped.exists():
30 |                 popped.unlink()
31 | 
32 |         # Add new item
33 |         self.elems.appendleft(elem)
34 | 
35 |     def __repr__(self):
36 |         return self.elems.__repr__()
37 | 
38 | 
39 | def fopen(filename: str, key: str = None):
40 |     """gzip,bzip2,xz,numpy aware file opening function."""
41 |     assert '*' not in str(filename), "Glob patterns not supported in fopen()"
42 | 
43 |     filename = str(pathlib.Path(filename).expanduser())
44 |     if filename.endswith('.gz'):
45 |         return gzip.open(filename, 'rt')
46 |     elif filename.endswith('.bz2'):
47 |         return bz2.open(filename, 'rt')
48 |     elif filename.endswith(('.xz', '.lzma')):
49 |         return lzma.open(filename, 'rt')
50 |     elif filename.endswith(('.npy', '.npz')):
51 |         if filename.endswith('.npz'):
52 |             assert key is not None, "No key= given for .npz file."
53 |             return np.load(filename)[key]
54 |         else:
55 |             return np.load(filename)
56 |     else:
57 |         # Plain text
58 |         return open(filename, 'r')
59 | 
60 | 
61 | def read_hypothesis_file(fname: str) -> List[str]:
62 |     """Reads lines from a text file and returns it as a list of strings."""
63 |     lines = []
64 |     with open(fname) as f:
65 |         for line in f:
66 |             lines.append(line.strip())
67 |     return lines
68 | 
69 | 
70 | def read_reference_files(*args) -> List[List[str]]:
71 |     """Read every file given in `args` and produce a list of lists that
72 |     supports multiple references."""
73 |     all_lines = []
74 | 
75 |     for fname in args:
76 |         lines = []
77 |         with open(fname) as f:
78 |             for line in f:
79 |                 lines.append(line.strip())
80 |         all_lines.append(lines)
81 | 
82 |     ref_lens = [len(lns) for lns in all_lines]
83 |     assert len(set(ref_lens)) == 1, \
84 |         "Reference streams do not have the same lengths."
85 | 
86 |     return all_lines
87 | 
88 | 
89 | def progress_bar(iterator: Iterable[Any], unit: str = 'it'):
90 |     """Wraps the given iterator into tqdm for progress bar rendering."""
91 |     return tqdm(iterator, unit=unit, ncols=70, smoothing=0)
92 | 


--------------------------------------------------------------------------------
/pysimt/utils/ml_metrics.py:
--------------------------------------------------------------------------------
 1 | class Loss:
 2 |     """Accumulates and computes correctly training and validation losses."""
 3 |     def __init__(self):
 4 |         self.reset()
 5 | 
 6 |     def reset(self):
 7 |         self._loss = 0
 8 |         self._denom = 0
 9 |         self.batch_loss = 0
10 | 
11 |     def update(self, loss, n_items):
12 |         # Store last batch loss
13 |         self.batch_loss = loss.item()
14 |         # Add it to cumulative loss
15 |         self._loss += self.batch_loss
16 |         # Normalize batch loss w.r.t n_items
17 |         self.batch_loss /= n_items
18 |         # Accumulate n_items inside the denominator
19 |         self._denom += n_items
20 | 
21 |     def get(self):
22 |         if self._denom == 0:
23 |             return 0
24 |         return self._loss / self._denom
25 | 
26 |     @property
27 |     def denom(self):
28 |         return self._denom
29 | 


--------------------------------------------------------------------------------
/pysimt/utils/tensorboard.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pathlib
 3 | 
 4 | from torch.utils.tensorboard import SummaryWriter
 5 | 
 6 | 
 7 | class TensorBoard:
 8 |     def __init__(self, model, log_dir, exp_id, subfolder):
 9 |         self.model = model
10 |         self.log_dir = log_dir
11 |         self.exp_id = exp_id
12 |         self.subfolder = subfolder
13 |         self.writer = None
14 |         self.available = bool(self.log_dir)
15 | 
16 |         # Call setup
17 |         self.setup()
18 | 
19 |     def _nop(self, *args, **kwargs):
20 |         return
21 | 
22 |     def setup(self):
23 |         """Setups TensorBoard logger."""
24 |         if not self.available:
25 |             self.replace_loggers()
26 |             return
27 | 
28 |         # Construct full folder path
29 |         self.log_dir = pathlib.Path(self.log_dir).expanduser()
30 |         self.log_dir = self.log_dir / self.subfolder / self.exp_id
31 |         self.log_dir.mkdir(parents=True, exist_ok=True)
32 | 
33 |         # Set up summary writer
34 |         self.writer = SummaryWriter(self.log_dir)
35 | 
36 |     def replace_loggers(self):
37 |         """Replace all log_* methods with dummy _nop."""
38 |         self.log_metrics = self._nop
39 |         self.log_scalar = self._nop
40 |         self.log_activations = self._nop
41 |         self.log_gradients = self._nop
42 | 
43 |     def log_metrics(self, metrics, step, suffix=''):
44 |         """Logs evaluation metrics as scalars."""
45 |         for metric in metrics:
46 |             self.writer.add_scalar(suffix + metric.name, metric.score,
47 |                                    global_step=step)
48 | 
49 |     def log_scalar(self, name, value, step):
50 |         """Logs single scalar value."""
51 |         self.writer.add_scalar(name, value, global_step=step)
52 | 
53 |     def log_activations(self, step):
54 |         """Logs activations by layer."""
55 |         pass
56 | 
57 |     def log_gradients(self, step):
58 |         """Logs gradients by layer."""
59 |         pass
60 | 
61 |     def close(self):
62 |         """Closes TensorBoard handle."""
63 |         if self.available:
64 |             self.writer.close()
65 | 
66 |     def __repr__(self):
67 |         if not self.log_dir:
68 |             return "No 'tensorboard_dir' given in config"
69 |         return "TensorBoard is active"
70 | 


--------------------------------------------------------------------------------
/scripts/decode_greedy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Only decode for snmt models and not waitk. It does not make sense for the latter
 4 | 
 5 | # Set GPU0 if not set
 6 | export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
 7 | test_set="test_2016_flickr,test_2017_flickr,test_2017_mscoco"
 8 | 
 9 | 
10 | # Greedy decode everything (batched)
11 | for ckpt in `find -L -name '*simultaneousnmt-*.best.loss.ckpt'`; do
12 |   fname=`basename $ckpt`
13 |   prefix=${ckpt/.best.loss.ckpt/}
14 |   log=${ckpt/.best.loss.ckpt/.log}
15 |   grep -q 'Training finished' ${log}
16 |   if [ "$?" == "0" ]; then
17 |     # check for the availability of one test set
18 |     if [ ! -f "${prefix}.test_2017_flickr.gs" ]; then
19 |       pysimt translate -m 60 -s ${test_set} -f gs -o ${prefix} $ckpt
20 |     fi
21 |   fi
22 | done
23 | 


--------------------------------------------------------------------------------
/scripts/decode_test_waitk.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set GPU0 if not set
 4 | export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
 5 | test_set="test_2016_flickr,test_2017_flickr,test_2017_mscoco"
 6 | 
 7 | 
 8 | # Test-time wait-k for SNMT (not re-trained) models
 9 | for ckpt in `find -L -name '*simultaneousnmt-*.best.loss.ckpt'`; do
10 |   fname=`basename $ckpt`
11 |   prefix=${ckpt/.best.loss.ckpt/}
12 |   log=${ckpt/.best.loss.ckpt/.log}
13 |   grep -q 'Training finished' ${log}
14 |   if [ "$?" == "0" ]; then
15 |     # check for the availabilty of one test set
16 |     if [ ! -f "${prefix}.test_2017_flickr.wait1.gs" ]; then
17 |       pysimt translate -m 60 -s ${test_set} -b 1 -f wk --n-init-tokens "1,2,3,4,5,6,7" \
18 |         -o ${prefix} $ckpt
19 |     fi
20 |   fi
21 | done
22 | 


--------------------------------------------------------------------------------
/scripts/decode_train_waitk.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set GPU0 if not set
 4 | export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
 5 | test_set="test_2016_flickr,test_2017_flickr,test_2017_mscoco"
 6 | 
 7 | 
 8 | # Train-time wait-k
 9 | for ckpt in `find -L -name '*simultaneouswaitk*.best.loss.ckpt'`; do
10 |   fname=`basename $ckpt`
11 |   model=`dirname $ckpt`
12 |   k=`echo $model | sed -r 's#\./wait([0-9])-rnn.*#\1#'`
13 |   prefix=${ckpt/.best.loss.ckpt/}
14 |   log=${ckpt/.best.loss.ckpt/.log}
15 |   grep -q 'Training finished' ${log}
16 |   if [ "$?" == "0" ]; then
17 |     # check for the availability of one test set
18 |     if [ ! -f "${prefix}.test_2017_flickr.wait${k}.gs" ]; then
19 |       pysimt translate -m 60 -s ${test_set} -b 1 -f wk --n-init-tokens "$k" \
20 |         -o ${prefix} $ckpt
21 |     fi
22 |   fi
23 | done
24 | 


--------------------------------------------------------------------------------
/scripts/decode_wait_if_diff.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set GPU0 if not set
 4 | export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
 5 | test_set="test_2016_flickr,test_2017_flickr,test_2017_mscoco"
 6 | 
 7 | # Simultaneous greedy decode (Cho and Esipova, 2016)
 8 | # only decode SNMT models and not waitk
 9 | for ckpt in `find -L -name '*simultaneousnmt-*.best.loss.ckpt'`; do
10 |   fname=`basename $ckpt`
11 |   prefix=${ckpt/.best.loss.ckpt/}
12 |   log=${ckpt/.best.loss.ckpt/.log}
13 |   grep -q 'Training finished' ${log}
14 |   if [ "$?" == "0" ]; then
15 |     # check for the availabilty of one test set
16 |     if [ ! -f "${prefix}.test_2017_flickr.s1_d1_wait_if_diff.gs" ]; then
17 |       pysimt translate -m 60 -s ${test_set} -b 1 -f sgs --n-init-tokens "1,2" \
18 |         --delta "1" --criteria "wait_if_diff" -o ${prefix} $ckpt
19 |     fi
20 |   fi
21 | done
22 | 


--------------------------------------------------------------------------------
/scripts/decode_wait_if_worse.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set GPU0 if not set
 4 | export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
 5 | test_set="test_2016_flickr,test_2017_flickr,test_2017_mscoco"
 6 | 
 7 | # Simultaneous greedy decode (Cho and Esipova, 2016)
 8 | # only decode SNMT models and not waitk
 9 | for ckpt in `find -L -name '*simultaneousnmt-*.best.loss.ckpt'`; do
10 |   fname=`basename $ckpt`
11 |   prefix=${ckpt/.best.loss.ckpt/}
12 |   log=${ckpt/.best.loss.ckpt/.log}
13 |   grep -q 'Training finished' ${log}
14 |   if [ "$?" == "0" ]; then
15 |     # check for the availabilty of one test set
16 |     if [ ! -f "${prefix}.test_2017_flickr.s1_d1_wait_if_worse.gs" ]; then
17 |       pysimt translate -m 60 -s ${test_set} -b 1 -f sgs --n-init-tokens "1,2" \
18 |         --delta "1" --criteria "wait_if_worse" -o ${prefix} $ckpt
19 |     fi
20 |   fi
21 | done
22 | 


--------------------------------------------------------------------------------
/scripts/delay_metrics.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | 
 5 | from pathlib import Path
 6 | 
 7 | import tabulate
 8 | import sacrebleu
 9 | 
10 | from pysimt.metrics.simnmt import AVPScorer, AVLScorer
11 | 
12 | 
13 | """This script should be run from within the parent folder where each pysimt
14 | experiment resides."""
15 | 
16 | 
17 | def read_lines_from_file(fname):
18 |     lines = []
19 |     with open(fname) as f:
20 |         for line in f:
21 |             lines.append(line.strip())
22 |     return lines
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     results = {}
27 |     trglang = sys.argv[1]
28 |     if trglang not in ('en', 'de', 'fr', 'cs'):
29 |         print(f'Usage: {sys.argv[0]} <target lang> [action files]')
30 |         sys.exit(1)
31 | 
32 |     scorers = [
33 |         AVPScorer(add_trg_eos=False),
34 |         AVLScorer(add_trg_eos=False),
35 |     ]
36 | 
37 |     act_files = sys.argv[2:]
38 | 
39 |     # get test set
40 |     test_sets = set([a.split('.')[1] for a in act_files])
41 |     assert len(test_sets) == 1, "Different test set files given"
42 |     test_set = list(test_sets)[0]
43 |     print(f'Test set is {test_set}, target language is {trglang}\n\n')
44 | 
45 |     ref_root = Path(__file__).parent / f'../data/multi30k/en-{trglang}'
46 |     ref_file = ref_root / f'{test_set}.lc.norm.tok.{trglang}.dehyph'
47 |     if ref_file.exists():
48 |         refs = read_lines_from_file(ref_file)
49 |     else:
50 |         raise RuntimeError(f'{ref_file} does not exist')
51 | 
52 |     for act_file in act_files:
53 |         # Compute delay metrics
54 |         scores = [s.compute_from_file(act_file) for s in scorers]
55 |         results[act_file] = {s.name: s.score for s in scores}
56 | 
57 |         # try to reach hypothesis file
58 |         hyp_file = act_file.replace('.acts', '.gs')
59 |         if os.path.exists(hyp_file):
60 |             hyps = read_lines_from_file(hyp_file)
61 |             bleu = sacrebleu.corpus_bleu(
62 |                 hyps, [refs], tokenize='none', lowercase=False).score
63 |         else:
64 |             bleu = -1.0
65 | 
66 |         results[act_file]['BLEU'] = bleu
67 |         results[act_file]['Q/AVP'] = bleu / scores[0].score
68 | 
69 |     if results:
70 |         headers = ['Name'] + list(next(iter(results.values())).keys())
71 |         results = [[name, *[scores[key] for key in headers[1:]]] for name, scores in results.items()]
72 |         results = sorted(results, key=lambda x: x[headers.index('BLEU')])
73 |         print(tabulate.tabulate(results, headers=headers, floatfmt='.2f'))
74 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import setuptools
 3 | 
 4 | 
 5 | def get_pysimt_version():
 6 |     with open('pysimt/__init__.py') as f:
 7 |         s = f.read().split('\n')[0]
 8 |         if '__version__' not in s:
 9 |             raise RuntimeError('Can not detect version from pysimt/__init__.py')
10 |         return eval(s.split(' ')[-1])
11 | 
12 | 
13 | setuptools.setup(
14 |     name='pysimt',
15 |     version=get_pysimt_version(),
16 |     description='A PyTorch framework for Simultaneous Neural Machine Translation',
17 |     url='https://github.com/ImperialNLP/pysimt',
18 |     author='Ozan Caglayan, Veneta Haralampieva, Julia Ive, Andy Li',
19 |     author_email='o.caglayan@ic.ac.uk',
20 |     license='MIT',
21 |     classifiers=[
22 |         'Intended Audience :: Science/Research',
23 |         'Topic :: Scientific/Engineering',
24 |         'License :: OSI Approved :: MIT License',
25 |         'Programming Language :: Python :: 3 :: Only',
26 |         'Programming Language :: Python :: 3.7',
27 |         'Operating System :: POSIX',
28 |     ],
29 |     keywords='nmt neural-mt simultaneous translation sequence-to-sequence deep-learning pytorch',
30 |     python_requires='~=3.7',
31 |     install_requires=[
32 |         'numpy', 'tqdm', 'pillow',
33 |         'torch', 'torchvision', 'sacrebleu>1.4.10',
34 |     ],
35 |     packages=setuptools.find_packages(),
36 |     scripts=[str(p) for p in pathlib.Path('bin').glob('*')],
37 |     zip_safe=False)
38 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 90
3 | ignore = E116,E241,E265,W504,E501
4 | exclude = docs,examples,build
5 | 


--------------------------------------------------------------------------------