├── .gitignore ├── LICENSE ├── README.md ├── bin ├── pysimt ├── pysimt-build-vocab ├── pysimt-coco-metrics └── pysimt-install-extra ├── configs ├── en-de │ ├── rnn-mmt │ │ ├── consc-rnn-mmt-dec-oc.conf │ │ ├── consc-rnn-mmt-dec-od.conf │ │ ├── consc-rnn-mmt-enc-od.conf │ │ ├── wait1-rnn-mmt-dec-oc.conf │ │ ├── wait1-rnn-mmt-dec-od.conf │ │ ├── wait1-rnn-mmt-enc-od.conf │ │ ├── wait2-rnn-mmt-dec-oc.conf │ │ ├── wait2-rnn-mmt-dec-od.conf │ │ ├── wait2-rnn-mmt-enc-od.conf │ │ ├── wait3-rnn-mmt-dec-oc.conf │ │ ├── wait3-rnn-mmt-dec-od.conf │ │ ├── wait3-rnn-mmt-enc-od.conf │ │ ├── wait4-rnn-mmt-dec-oc.conf │ │ ├── wait4-rnn-mmt-dec-od.conf │ │ ├── wait4-rnn-mmt-enc-od.conf │ │ ├── wait5-rnn-mmt-dec-oc.conf │ │ ├── wait5-rnn-mmt-dec-od.conf │ │ ├── wait5-rnn-mmt-enc-od.conf │ │ ├── wait6-rnn-mmt-dec-oc.conf │ │ ├── wait6-rnn-mmt-dec-od.conf │ │ ├── wait6-rnn-mmt-enc-od.conf │ │ ├── wait7-rnn-mmt-dec-oc.conf │ │ ├── wait7-rnn-mmt-dec-od.conf │ │ └── wait7-rnn-mmt-enc-od.conf │ ├── rnn-nmt │ │ ├── consc-rnn.conf │ │ ├── wait1-rnn.conf │ │ ├── wait2-rnn.conf │ │ ├── wait3-rnn.conf │ │ ├── wait4-rnn.conf │ │ ├── wait5-rnn.conf │ │ ├── wait6-rnn.conf │ │ └── wait7-rnn.conf │ ├── transformers-mmt │ │ ├── consc-tf-mmt-dec-oc.conf │ │ ├── consc-tf-mmt-dec-od.conf │ │ ├── consc-tf-mmt-enc-od.conf │ │ ├── wait1-tf-mmt-dec-oc.conf │ │ ├── wait1-tf-mmt-dec-od.conf │ │ ├── wait1-tf-mmt-enc-od.conf │ │ ├── wait2-tf-mmt-dec-oc.conf │ │ ├── wait2-tf-mmt-dec-od.conf │ │ ├── wait2-tf-mmt-enc-od.conf │ │ ├── wait3-tf-mmt-dec-oc.conf │ │ ├── wait3-tf-mmt-dec-od.conf │ │ ├── wait3-tf-mmt-enc-od.conf │ │ ├── wait5-tf-mmt-dec-oc.conf │ │ ├── wait5-tf-mmt-dec-od.conf │ │ ├── wait5-tf-mmt-enc-od.conf │ │ ├── wait7-tf-mmt-dec-oc.conf │ │ ├── wait7-tf-mmt-dec-od.conf │ │ └── wait7-tf-mmt-enc-od.conf │ └── transformers-nmt │ │ ├── consc-tf.conf │ │ ├── wait1-tf.conf │ │ ├── wait2-tf.conf │ │ ├── wait3-tf.conf │ │ ├── wait4-tf.conf │ │ ├── wait5-tf.conf │ │ ├── wait6-tf.conf │ │ └── wait7-tf.conf └── en-fr │ ├── rnn-mmt │ ├── consc-rnn-mmt-dec-oc.conf │ ├── consc-rnn-mmt-dec-od.conf │ ├── consc-rnn-mmt-enc-od.conf │ ├── wait1-rnn-mmt-dec-oc.conf │ ├── wait1-rnn-mmt-dec-od.conf │ ├── wait1-rnn-mmt-enc-od.conf │ ├── wait2-rnn-mmt-dec-oc.conf │ ├── wait2-rnn-mmt-dec-od.conf │ ├── wait2-rnn-mmt-enc-od.conf │ ├── wait3-rnn-mmt-dec-oc.conf │ ├── wait3-rnn-mmt-dec-od.conf │ ├── wait3-rnn-mmt-enc-od.conf │ ├── wait4-rnn-mmt-dec-oc.conf │ ├── wait4-rnn-mmt-dec-od.conf │ ├── wait4-rnn-mmt-enc-od.conf │ ├── wait5-rnn-mmt-dec-oc.conf │ ├── wait5-rnn-mmt-dec-od.conf │ ├── wait5-rnn-mmt-enc-od.conf │ ├── wait6-rnn-mmt-dec-oc.conf │ ├── wait6-rnn-mmt-dec-od.conf │ ├── wait6-rnn-mmt-enc-od.conf │ ├── wait7-rnn-mmt-dec-oc.conf │ ├── wait7-rnn-mmt-dec-od.conf │ └── wait7-rnn-mmt-enc-od.conf │ ├── rnn-nmt │ ├── consc-rnn.conf │ ├── wait1-rnn.conf │ ├── wait2-rnn.conf │ ├── wait3-rnn.conf │ ├── wait4-rnn.conf │ ├── wait5-rnn.conf │ ├── wait6-rnn.conf │ └── wait7-rnn.conf │ ├── transformers-mmt │ ├── consc-tf-mmt-dec-oc.conf │ ├── consc-tf-mmt-dec-od.conf │ ├── consc-tf-mmt-enc-od.conf │ ├── wait1-tf-mmt-dec-oc.conf │ ├── wait1-tf-mmt-dec-od.conf │ ├── wait1-tf-mmt-enc-od.conf │ ├── wait2-tf-mmt-dec-oc.conf │ ├── wait2-tf-mmt-dec-od.conf │ ├── wait2-tf-mmt-enc-od.conf │ ├── wait3-tf-mmt-dec-oc.conf │ ├── wait3-tf-mmt-dec-od.conf │ ├── wait3-tf-mmt-enc-od.conf │ ├── wait5-tf-mmt-dec-oc.conf │ ├── wait5-tf-mmt-dec-od.conf │ ├── wait5-tf-mmt-enc-od.conf │ ├── wait7-tf-mmt-dec-oc.conf │ ├── wait7-tf-mmt-dec-od.conf │ └── wait7-tf-mmt-enc-od.conf │ └── transformers-nmt │ ├── consc-tf.conf │ ├── wait1-tf.conf │ ├── wait2-tf.conf │ ├── wait3-tf.conf │ ├── wait4-tf.conf │ ├── wait5-tf.conf │ ├── wait6-tf.conf │ └── wait7-tf.conf ├── data ├── README.md ├── moses-5cbafabfd │ ├── README.md │ ├── share │ │ └── nonbreaking_prefixes │ │ │ ├── README.txt │ │ │ ├── nonbreaking_prefix.cs │ │ │ ├── nonbreaking_prefix.de │ │ │ ├── nonbreaking_prefix.en │ │ │ └── nonbreaking_prefix.fr │ └── tokenizer │ │ ├── basic-protected-patterns │ │ ├── detokenizer.perl │ │ ├── lowercase.perl │ │ ├── normalize-punctuation.perl │ │ ├── remove-non-printing-char.perl │ │ ├── replace-unicode-punctuation.perl │ │ └── tokenizer.perl └── multi30k │ ├── README.md │ ├── features │ └── README.md │ ├── image_lists │ ├── test_2016_flickr.imglist │ ├── test_2017_flickr.imglist │ ├── test_2017_mscoco.imglist │ ├── test_2018_flickr.imglist │ ├── train.imglist │ └── val.imglist │ ├── prepare.sh │ └── raw │ ├── README.md │ ├── test_2016_flickr.cs.gz │ ├── test_2016_flickr.de.gz │ ├── test_2016_flickr.en.gz │ ├── test_2016_flickr.fr.gz │ ├── test_2017_flickr.de.gz │ ├── test_2017_flickr.en.gz │ ├── test_2017_flickr.fr.gz │ ├── test_2017_mscoco.de.gz │ ├── test_2017_mscoco.en.gz │ ├── test_2017_mscoco.fr.gz │ ├── test_2018_flickr.cs.gz │ ├── test_2018_flickr.de.gz │ ├── test_2018_flickr.en.gz │ ├── test_2018_flickr.fr.gz │ ├── train.cs.gz │ ├── train.de.gz │ ├── train.en.gz │ ├── train.fr.gz │ ├── val.cs.gz │ ├── val.de.gz │ ├── val.en.gz │ └── val.fr.gz ├── doccov.svg ├── docs ├── datasets │ ├── base.html │ ├── collate.html │ ├── imagefolder.html │ ├── index.html │ ├── kaldi.html │ ├── multimodal.html │ ├── numpy.html │ ├── objdet.html │ └── text.html ├── evaluator.html ├── index.html ├── layers │ ├── attention │ │ ├── dot.html │ │ ├── hierarchical.html │ │ ├── index.html │ │ ├── mlp.html │ │ ├── multihead.html │ │ ├── scaled_dot.html │ │ └── uniform.html │ ├── decoders │ │ ├── conditional.html │ │ ├── index.html │ │ └── tf_decoder.html │ ├── embedding.html │ ├── encoders │ │ ├── index.html │ │ ├── recurrent.html │ │ ├── speech_lstm.html │ │ ├── transformers.html │ │ └── vis_features.html │ ├── ff.html │ ├── fusion.html │ ├── index.html │ ├── pool.html │ ├── positionwise_ff.html │ ├── selector.html │ └── transformers │ │ ├── base_sublayer.html │ │ ├── cross_attention_sublayer.html │ │ ├── cross_attention_sublayer_mm_flat.html │ │ ├── cross_attention_sublayer_mm_hier.html │ │ ├── cross_attention_sublayer_mm_parallel.html │ │ ├── cross_attention_sublayer_mm_serial.html │ │ ├── index.html │ │ └── self_attention_sublayer.html ├── lr_scheduler.html ├── mainloop.html ├── metrics │ ├── cer.html │ ├── index.html │ ├── meteor.html │ ├── metric.html │ ├── multibleu.html │ ├── sacrebleu.html │ ├── simnmt.html │ └── wer.html ├── models │ ├── index.html │ ├── snmt_rnn.html │ ├── snmt_rnn_encatt.html │ ├── snmt_rnn_encatt_waitk.html │ ├── snmt_rnn_waitk.html │ ├── snmt_tf.html │ └── snmt_tf_waitk.html ├── monitor.html ├── optimizer.html ├── samplers │ ├── approx.html │ ├── bucket.html │ └── index.html ├── stranslator.html ├── translators │ ├── greedy.html │ ├── index.html │ ├── sim_greedy.html │ └── waitk_greedy.html ├── utils │ ├── batch.html │ ├── data.html │ ├── device.html │ ├── filterchain.html │ ├── index.html │ ├── io.html │ ├── kaldi.html │ ├── misc.html │ ├── ml_metrics.html │ ├── nn.html │ ├── resource_mgr.html │ ├── tensorboard.html │ └── topology.html └── vocabulary.html ├── environment.yml ├── experiments └── README.md ├── logo.png ├── make_docs.sh ├── pysimt ├── __init__.py ├── cocoeval │ ├── README.md │ ├── __init__.py │ ├── bleu │ │ ├── LICENSE.bleu │ │ ├── __init__.py │ │ ├── bleu.py │ │ └── bleu_scorer.py │ ├── cider │ │ ├── __init__.py │ │ ├── cider.py │ │ └── cider_scorer.py │ ├── meteor │ │ ├── __init__.py │ │ └── meteor.py │ └── rouge │ │ ├── __init__.py │ │ └── rouge.py ├── config.py ├── datasets │ ├── __init__.py │ ├── collate.py │ ├── multimodal.py │ ├── numpy.py │ ├── objdet.py │ └── text.py ├── docs.md ├── evaluator.py ├── layers │ ├── __init__.py │ ├── attention │ │ ├── __init__.py │ │ ├── dot.py │ │ ├── hierarchical.py │ │ ├── mlp.py │ │ ├── multihead.py │ │ ├── scaled_dot.py │ │ └── uniform.py │ ├── decoders │ │ ├── __init__.py │ │ ├── conditional.py │ │ └── tf_decoder.py │ ├── embedding.py │ ├── encoders │ │ ├── __init__.py │ │ ├── recurrent.py │ │ ├── speech_lstm.py │ │ ├── transformers.py │ │ └── vis_features.py │ ├── ff.py │ ├── fusion.py │ ├── pool.py │ ├── positionwise_ff.py │ ├── selector.py │ └── transformers │ │ ├── __init__.py │ │ ├── base_sublayer.py │ │ ├── cross_attention_sublayer.py │ │ ├── cross_attention_sublayer_mm_flat.py │ │ ├── cross_attention_sublayer_mm_hier.py │ │ ├── cross_attention_sublayer_mm_parallel.py │ │ ├── cross_attention_sublayer_mm_serial.py │ │ └── self_attention_sublayer.py ├── logger.py ├── lr_scheduler.py ├── mainloop.py ├── metrics │ ├── __init__.py │ ├── cer.py │ ├── meteor.py │ ├── metric.py │ ├── multibleu.py │ ├── sacrebleu.py │ ├── simnmt.py │ └── wer.py ├── models │ ├── __init__.py │ ├── snmt_rnn.py │ ├── snmt_rnn_encatt.py │ ├── snmt_rnn_encatt_waitk.py │ ├── snmt_rnn_waitk.py │ ├── snmt_tf.py │ └── snmt_tf_waitk.py ├── monitor.py ├── optimizer.py ├── samplers │ ├── __init__.py │ ├── approx.py │ └── bucket.py ├── stranslator.py ├── translators │ ├── __init__.py │ ├── beam.py │ ├── greedy.py │ ├── sim_greedy.py │ └── waitk_greedy.py ├── utils │ ├── __init__.py │ ├── batch.py │ ├── data.py │ ├── device.py │ ├── filterchain.py │ ├── io.py │ ├── kaldi.py │ ├── misc.py │ ├── ml_metrics.py │ ├── nn.py │ ├── resource_mgr.py │ ├── tensorboard.py │ └── topology.py └── vocabulary.py ├── scripts ├── decode_greedy.sh ├── decode_test_waitk.sh ├── decode_train_waitk.sh ├── decode_wait_if_diff.sh ├── decode_wait_if_worse.sh ├── delay_analysis.py └── delay_metrics.py ├── setup.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | pysimt.egg-info 3 | .cache 4 | build/ 5 | dist/ 6 | doc/_build/ 7 | ipynb/.ipynb_checkpoints 8 | .idea/ 9 | data/multi30k/en-de 10 | data/multi30k/en-fr 11 | data/multi30k/en-cs 12 | data/multi30k/features 13 | experiments 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 NLP@Imperial 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bin/pysimt-install-extra: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | which java &> /dev/null 3 | if [[ "x$?" == "x1" ]]; then 4 | echo "'java' not found in PATH. You need to have a working JRE installation for METEOR." 5 | else 6 | echo "OK: Found 'java'." 7 | fi 8 | 9 | CACHE=${HOME}/.pysimt 10 | METEOR=${CACHE}/meteor-data 11 | 12 | if [[ ! -d ${CACHE} ]]; then 13 | echo "Creating ${CACHE} folder..." 14 | mkdir -p ${CACHE} 15 | fi 16 | 17 | if [[ ! -d $METEOR ]]; then 18 | git clone https://github.com/ozancaglayan/meteor-1.5-data.git $METEOR 19 | pushd $METEOR 20 | ./recompress.sh 21 | popd 22 | fi 23 | -------------------------------------------------------------------------------- /configs/en-de/rnn-mmt/consc-rnn-mmt-dec-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | 51 | aux_dropout: 0.5 52 | aux_lnorm: True 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | dec_inp_activ: None 57 | mm_fusion_op: sum 58 | mm_fusion_dropout: 0.0 59 | 60 | 61 | [data] 62 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 63 | img_root: ./data/multi30k/features/butd 64 | 65 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 66 | 'image': '${img_root}/train_obj36.npz', 67 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 68 | 69 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 70 | 'image': '${img_root}/valid_obj36.npz', 71 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 74 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 75 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 76 | 77 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 78 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 79 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 80 | 81 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 82 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 83 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 84 | 85 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 86 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 87 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 88 | 89 | [vocabulary] 90 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 91 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 92 | -------------------------------------------------------------------------------- /configs/en-de/rnn-mmt/consc-rnn-mmt-enc-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: EncoderSelfAttentionSimultaneousNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | 51 | aux_dropout: 0.5 52 | aux_lnorm: True 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | dec_inp_activ: None 57 | 58 | feat_mode: roi_feats 59 | n_heads: 1 60 | 61 | 62 | [data] 63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 64 | img_root: ./data/multi30k/features/butd 65 | 66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 67 | 'image': '${img_root}/train_obj36.npz', 68 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 69 | 70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 71 | 'image': '${img_root}/valid_obj36.npz', 72 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 73 | 74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 75 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 76 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 79 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 80 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 81 | 82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 83 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 84 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 85 | 86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 87 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 88 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 89 | 90 | [vocabulary] 91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 93 | -------------------------------------------------------------------------------- /configs/en-de/rnn-mmt/wait1-rnn-mmt-dec-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | translator_args: {'k': 1} 51 | 52 | aux_dropout: 0.5 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | aux_lnorm: True 57 | dec_inp_activ: None 58 | mm_fusion_op: sum 59 | mm_fusion_dropout: 0.0 60 | 61 | 62 | [data] 63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 64 | img_root: ./data/multi30k/features/butd 65 | 66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 67 | 'image': '${img_root}/train_obj36.npz', 68 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 69 | 70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 71 | 'image': '${img_root}/valid_obj36.npz', 72 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 73 | 74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 75 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 76 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 79 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 80 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 81 | 82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 83 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 84 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 85 | 86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 87 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 88 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 89 | 90 | [vocabulary] 91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 93 | -------------------------------------------------------------------------------- /configs/en-de/rnn-mmt/wait2-rnn-mmt-dec-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | translator_args: {'k': 2} 51 | 52 | aux_dropout: 0.5 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | aux_lnorm: True 57 | dec_inp_activ: None 58 | mm_fusion_op: sum 59 | mm_fusion_dropout: 0.0 60 | 61 | 62 | [data] 63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 64 | img_root: ./data/multi30k/features/butd 65 | 66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 67 | 'image': '${img_root}/train_obj36.npz', 68 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 69 | 70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 71 | 'image': '${img_root}/valid_obj36.npz', 72 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 73 | 74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 75 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 76 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 79 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 80 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 81 | 82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 83 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 84 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 85 | 86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 87 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 88 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 89 | 90 | [vocabulary] 91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 93 | -------------------------------------------------------------------------------- /configs/en-de/rnn-mmt/wait3-rnn-mmt-dec-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | translator_args: {'k': 3} 51 | 52 | aux_dropout: 0.5 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | aux_lnorm: True 57 | dec_inp_activ: None 58 | mm_fusion_op: sum 59 | mm_fusion_dropout: 0.0 60 | 61 | 62 | [data] 63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 64 | img_root: ./data/multi30k/features/butd 65 | 66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 67 | 'image': '${img_root}/train_obj36.npz', 68 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 69 | 70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 71 | 'image': '${img_root}/valid_obj36.npz', 72 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 73 | 74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 75 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 76 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 79 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 80 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 81 | 82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 83 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 84 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 85 | 86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 87 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 88 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 89 | 90 | [vocabulary] 91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 93 | -------------------------------------------------------------------------------- /configs/en-de/rnn-mmt/wait4-rnn-mmt-dec-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | translator_args: {'k': 4} 51 | 52 | aux_dropout: 0.5 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | aux_lnorm: True 57 | dec_inp_activ: None 58 | mm_fusion_op: sum 59 | mm_fusion_dropout: 0.0 60 | 61 | 62 | [data] 63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 64 | img_root: ./data/multi30k/features/butd 65 | 66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 67 | 'image': '${img_root}/train_obj36.npz', 68 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 69 | 70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 71 | 'image': '${img_root}/valid_obj36.npz', 72 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 73 | 74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 75 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 76 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 79 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 80 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 81 | 82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 83 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 84 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 85 | 86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 87 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 88 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 89 | 90 | [vocabulary] 91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 93 | -------------------------------------------------------------------------------- /configs/en-de/rnn-mmt/wait5-rnn-mmt-dec-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | translator_args: {'k': 5} 51 | 52 | aux_dropout: 0.5 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | aux_lnorm: True 57 | dec_inp_activ: None 58 | mm_fusion_op: sum 59 | mm_fusion_dropout: 0.0 60 | 61 | 62 | [data] 63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 64 | img_root: ./data/multi30k/features/butd 65 | 66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 67 | 'image': '${img_root}/train_obj36.npz', 68 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 69 | 70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 71 | 'image': '${img_root}/valid_obj36.npz', 72 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 73 | 74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 75 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 76 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 79 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 80 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 81 | 82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 83 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 84 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 85 | 86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 87 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 88 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 89 | 90 | [vocabulary] 91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 93 | -------------------------------------------------------------------------------- /configs/en-de/rnn-mmt/wait6-rnn-mmt-dec-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | translator_args: {'k': 6} 51 | 52 | aux_dropout: 0.5 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | aux_lnorm: True 57 | dec_inp_activ: None 58 | mm_fusion_op: sum 59 | mm_fusion_dropout: 0.0 60 | 61 | 62 | [data] 63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 64 | img_root: ./data/multi30k/features/butd 65 | 66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 67 | 'image': '${img_root}/train_obj36.npz', 68 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 69 | 70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 71 | 'image': '${img_root}/valid_obj36.npz', 72 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 73 | 74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 75 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 76 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 79 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 80 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 81 | 82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 83 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 84 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 85 | 86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 87 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 88 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 89 | 90 | [vocabulary] 91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 93 | -------------------------------------------------------------------------------- /configs/en-de/rnn-mmt/wait7-rnn-mmt-dec-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | translator_args: {'k': 7} 51 | 52 | aux_dropout: 0.5 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | aux_lnorm: True 57 | dec_inp_activ: None 58 | mm_fusion_op: sum 59 | mm_fusion_dropout: 0.0 60 | 61 | 62 | [data] 63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 64 | img_root: ./data/multi30k/features/butd 65 | 66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 67 | 'image': '${img_root}/train_obj36.npz', 68 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 69 | 70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 71 | 'image': '${img_root}/valid_obj36.npz', 72 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 73 | 74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 75 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 76 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 79 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 80 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 81 | 82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 83 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 84 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 85 | 86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 87 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 88 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 89 | 90 | [vocabulary] 91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 93 | -------------------------------------------------------------------------------- /configs/en-de/rnn-nmt/consc-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | 50 | [data] 51 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 52 | 53 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 54 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 55 | 56 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 57 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 58 | 59 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 60 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 61 | 62 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 63 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 64 | 65 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 66 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 67 | 68 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 69 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 70 | 71 | [vocabulary] 72 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 73 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 74 | -------------------------------------------------------------------------------- /configs/en-de/rnn-nmt/wait1-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 1} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-de/rnn-nmt/wait2-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 2} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-de/rnn-nmt/wait3-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 3} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-de/rnn-nmt/wait4-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 4} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-de/rnn-nmt/wait5-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 5} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-de/rnn-nmt/wait6-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 6} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-de/rnn-nmt/wait7-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 7} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-de/transformers-nmt/consc-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | 57 | [data] 58 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 59 | 60 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 62 | 63 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 73 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 74 | 75 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 76 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | [vocabulary] 79 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 80 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 81 | -------------------------------------------------------------------------------- /configs/en-de/transformers-nmt/wait1-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 1} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-de/transformers-nmt/wait2-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 2} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-de/transformers-nmt/wait3-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 3} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-de/transformers-nmt/wait4-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 4} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-de/transformers-nmt/wait5-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 5} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-de/transformers-nmt/wait6-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 6} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-de/transformers-nmt/wait7-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: de 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 7} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-fr/rnn-mmt/consc-rnn-mmt-dec-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | 51 | aux_dropout: 0.5 52 | aux_lnorm: True 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | dec_inp_activ: None 57 | mm_fusion_op: sum 58 | mm_fusion_dropout: 0.0 59 | 60 | 61 | [data] 62 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 63 | img_root: ./data/multi30k/features/butd 64 | 65 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 66 | 'image': '${img_root}/train_obj36.npz', 67 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 68 | 69 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 70 | 'image': '${img_root}/valid_obj36.npz', 71 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 74 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 75 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 76 | 77 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 78 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 79 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 80 | 81 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 82 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 83 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 84 | 85 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 86 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 87 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 88 | 89 | [vocabulary] 90 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 91 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 92 | -------------------------------------------------------------------------------- /configs/en-fr/rnn-mmt/consc-rnn-mmt-enc-od.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: EncoderSelfAttentionSimultaneousNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | enc_lnorm: True 37 | dec_dim: 320 38 | emb_dim: 200 39 | dropout_emb: 0.4 40 | dropout_ctx: 0.5 41 | dropout_out: 0.5 42 | n_encoders: 2 43 | tied_emb: 2way 44 | max_len: None 45 | out_logic: deep 46 | 47 | direction: src:Text, image:ObjectDetections -> trg:Text 48 | sampler_type: bucket 49 | bucket_by: src 50 | 51 | aux_dropout: 0.5 52 | aux_lnorm: True 53 | aux_proj_dim: 320 54 | aux_proj_activ: tanh 55 | aux_dim: 2048 56 | dec_inp_activ: None 57 | 58 | feat_mode: roi_feats 59 | n_heads: 1 60 | 61 | 62 | [data] 63 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 64 | img_root: ./data/multi30k/features/butd 65 | 66 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 67 | 'image': '${img_root}/train_obj36.npz', 68 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 69 | 70 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 71 | 'image': '${img_root}/valid_obj36.npz', 72 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 73 | 74 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 75 | 'image': '${img_root}/test_2016_flickr_obj36.npz', 76 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 79 | 'image': '${img_root}/test_2017_flickr_obj36.npz', 80 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 81 | 82 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 83 | 'image': '${img_root}/test_2017_mscoco_obj36.npz', 84 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 85 | 86 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 87 | 'image': '${img_root}/test_2018_flickr_obj36.npz', 88 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 89 | 90 | [vocabulary] 91 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 92 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 93 | -------------------------------------------------------------------------------- /configs/en-fr/rnn-nmt/consc-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | 50 | [data] 51 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 52 | 53 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 54 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 55 | 56 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 57 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 58 | 59 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 60 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 61 | 62 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 63 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 64 | 65 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 66 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 67 | 68 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 69 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 70 | 71 | [vocabulary] 72 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 73 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 74 | -------------------------------------------------------------------------------- /configs/en-fr/rnn-nmt/wait1-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 1} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-fr/rnn-nmt/wait2-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 2} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-fr/rnn-nmt/wait3-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 3} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-fr/rnn-nmt/wait4-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 4} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-fr/rnn-nmt/wait5-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 5} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-fr/rnn-nmt/wait6-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 6} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-fr/rnn-nmt/wait7-rnn.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | [train] 8 | seed: 1582660384 9 | model_type: SimultaneousWaitKNMT 10 | patience: 10 11 | max_epochs: 100 12 | eval_freq: 0 13 | eval_metrics: bleu,loss 14 | eval_filters: ['de-hyphen'] 15 | eval_batch_size: 32 16 | save_best_metrics: True 17 | eval_max_len: 100 18 | n_checkpoints: 0 19 | l2_reg: 1e-05 20 | lr_decay: plateau 21 | lr_decay_revert: False 22 | lr_decay_factor: 0.5 23 | lr_decay_patience: 2 24 | gclip: 1 25 | optimizer: adam 26 | lr: 0.0004 27 | batch_size: 64 28 | save_path: ./experiments/${vars:sl}-${vars:tl} 29 | tensorboard_dir: ${save_path}/tb_dir 30 | 31 | [model] 32 | att_type: mlp 33 | att_bottleneck: hid 34 | enc_dim: 320 35 | enc_bidirectional: False 36 | dec_dim: 320 37 | emb_dim: 200 38 | dropout_emb: 0.4 39 | dropout_ctx: 0.5 40 | dropout_out: 0.5 41 | n_encoders: 2 42 | tied_emb: 2way 43 | max_len: None 44 | out_logic: deep 45 | 46 | direction: src:Text -> trg:Text 47 | sampler_type: bucket 48 | bucket_by: src 49 | translator_args: {'k': 7} 50 | 51 | [data] 52 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 53 | 54 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 55 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 56 | 57 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 58 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 59 | 60 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 62 | 63 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | [vocabulary] 73 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 74 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 75 | -------------------------------------------------------------------------------- /configs/en-fr/transformers-nmt/consc-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | 57 | [data] 58 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 59 | 60 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 61 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 62 | 63 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 64 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 65 | 66 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 67 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 68 | 69 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 70 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 71 | 72 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 73 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 74 | 75 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 76 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 77 | 78 | [vocabulary] 79 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 80 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 81 | -------------------------------------------------------------------------------- /configs/en-fr/transformers-nmt/wait1-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 1} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-fr/transformers-nmt/wait2-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 2} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-fr/transformers-nmt/wait3-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 3} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-fr/transformers-nmt/wait4-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 4} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-fr/transformers-nmt/wait5-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 5} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-fr/transformers-nmt/wait6-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 6} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /configs/en-fr/transformers-nmt/wait7-tf.conf: -------------------------------------------------------------------------------- 1 | [vars] 2 | # source language 3 | sl: en 4 | # target language 5 | tl: fr 6 | 7 | d_model: 512 8 | 9 | [train] 10 | seed: 1582660384 11 | model_type: SimultaneousTFWaitKNMT 12 | patience: 30 13 | max_epochs: 100 14 | eval_freq: 0 15 | eval_metrics: bleu,loss 16 | eval_filters: ['de-hyphen'] 17 | eval_batch_size: 32 18 | save_best_metrics: True 19 | eval_max_len: 100 20 | n_checkpoints: 0 21 | l2_reg: 0 22 | adam_betas: 0.9, 0.98 23 | lr_decay: noam 24 | tf_model_dim: ${vars:d_model} 25 | lr_warmup_steps: 4000 26 | lr_decay_revert: False 27 | lr_decay_factor: 0.5 28 | lr_decay_patience: 2 29 | gclip: 1 30 | optimizer: adam 31 | lr: 0.2 32 | batch_size: 32 33 | save_path: ./experiments/${vars:sl}-${vars:tl} 34 | tensorboard_dir: ${save_path}/tb_dir 35 | 36 | [model] 37 | max_len: None 38 | out_logic: deep 39 | model_dim: ${vars:d_model} 40 | num_heads: 8 41 | enc_ff_dim: 2048 42 | dec_ff_dim: 2048 43 | enc_n_layers: 6 44 | dec_n_layers: 6 45 | short_list: 0 46 | enc_bidirectional: False 47 | ff_activ: relu 48 | tied_emb: 2way 49 | dropout: 0.1 50 | attn_dropout: 0.1 51 | pre_norm: True 52 | 53 | direction: src:Text -> trg:Text 54 | sampler_type: bucket 55 | bucket_by: src 56 | translator_args: {'k': 7} 57 | 58 | [data] 59 | txt_root: ./data/multi30k/${vars:sl}-${vars:tl} 60 | 61 | train_set: {'src': '${txt_root}/train.lc.norm.tok.${vars:sl}', 62 | 'trg': '${txt_root}/train.lc.norm.tok.${vars:tl}'} 63 | 64 | val_set: {'src': '${txt_root}/val.lc.norm.tok.${vars:sl}', 65 | 'trg': '${txt_root}/val.lc.norm.tok.${vars:tl}'} 66 | 67 | test_2016_flickr_set: {'src': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:sl}', 68 | 'trg': '${txt_root}/test_2016_flickr.lc.norm.tok.${vars:tl}'} 69 | 70 | test_2017_flickr_set: {'src': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:sl}', 71 | 'trg': '${txt_root}/test_2017_flickr.lc.norm.tok.${vars:tl}'} 72 | 73 | test_2017_mscoco_set: {'src': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:sl}', 74 | 'trg': '${txt_root}/test_2017_mscoco.lc.norm.tok.${vars:tl}'} 75 | 76 | test_2018_flickr_set: {'src': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:sl}', 77 | 'trg': '${txt_root}/test_2018_flickr.lc.norm.tok.${vars:tl}'} 78 | 79 | [vocabulary] 80 | src: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:sl} 81 | trg: ${data:txt_root}/train.lc.norm.tok.vocab.${vars:tl} 82 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # Data preparation 2 | 3 | - Make sure that you already created and activated the `pysimt` anaconda environment. 4 | - Please run `install.sh` from within this folder, to install the necessary tools for data pre-processing. 5 | - Finally, proceed with independent preparation scripts under `multi30k`. 6 | -------------------------------------------------------------------------------- /data/moses-5cbafabfd/README.md: -------------------------------------------------------------------------------- 1 | moses scripts 2 | -- 3 | 4 | This is a snapshot of Moses scripts from the upstream repository. The 5 | specifi commit taken is: 6 | 7 | ``` 8 | commit 5cbafabfd5ed2833ca8808bdca6e785935713159 9 | Author: Hieu Hoang 10 | Date: Wed Oct 14 11:48:26 2020 -0700 11 | ``` 12 | -------------------------------------------------------------------------------- /data/moses-5cbafabfd/share/nonbreaking_prefixes/README.txt: -------------------------------------------------------------------------------- 1 | The language suffix can be found here: 2 | 3 | http://www.loc.gov/standards/iso639-2/php/code_list.php 4 | 5 | This code includes data from Daniel Naber's Language Tools (czech abbreviations). 6 | This code includes data from czech wiktionary (also czech abbreviations). 7 | 8 | 9 | -------------------------------------------------------------------------------- /data/moses-5cbafabfd/share/nonbreaking_prefixes/nonbreaking_prefix.en: -------------------------------------------------------------------------------- 1 | #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. 2 | #Special cases are included for prefixes that ONLY appear before 0-9 numbers. 3 | 4 | #any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) 5 | #usually upper case letters are initials in a name 6 | A 7 | B 8 | C 9 | D 10 | E 11 | F 12 | G 13 | H 14 | I 15 | J 16 | K 17 | L 18 | M 19 | N 20 | O 21 | P 22 | Q 23 | R 24 | S 25 | T 26 | U 27 | V 28 | W 29 | X 30 | Y 31 | Z 32 | 33 | #List of titles. These are often followed by upper-case names, but do not indicate sentence breaks 34 | Adj 35 | Adm 36 | Adv 37 | Asst 38 | Bart 39 | Bldg 40 | Brig 41 | Bros 42 | Capt 43 | Cmdr 44 | Col 45 | Comdr 46 | Con 47 | Corp 48 | Cpl 49 | DR 50 | Dr 51 | Drs 52 | Ens 53 | Gen 54 | Gov 55 | Hon 56 | Hr 57 | Hosp 58 | Insp 59 | Lt 60 | MM 61 | MR 62 | MRS 63 | MS 64 | Maj 65 | Messrs 66 | Mlle 67 | Mme 68 | Mr 69 | Mrs 70 | Ms 71 | Msgr 72 | Op 73 | Ord 74 | Pfc 75 | Ph 76 | Prof 77 | Pvt 78 | Rep 79 | Reps 80 | Res 81 | Rev 82 | Rt 83 | Sen 84 | Sens 85 | Sfc 86 | Sgt 87 | Sr 88 | St 89 | Supt 90 | Surg 91 | 92 | #misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) 93 | v 94 | vs 95 | i.e 96 | rev 97 | e.g 98 | # rupees 99 | Rs 100 | 101 | #Numbers only. These should only induce breaks when followed by a numeric sequence 102 | # add NUMERIC_ONLY after the word for this function 103 | #This case is mostly for the english "No." which can either be a sentence of its own, or 104 | #if followed by a number, a non-breaking prefix 105 | No #NUMERIC_ONLY# 106 | Nos 107 | Art #NUMERIC_ONLY# 108 | Nr 109 | pp #NUMERIC_ONLY# 110 | 111 | #month abbreviations 112 | Jan 113 | Feb 114 | Mar 115 | Apr 116 | #May is a full word 117 | Jun 118 | Jul 119 | Aug 120 | Sep 121 | Oct 122 | Nov 123 | Dec 124 | -------------------------------------------------------------------------------- /data/moses-5cbafabfd/share/nonbreaking_prefixes/nonbreaking_prefix.fr: -------------------------------------------------------------------------------- 1 | #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. 2 | #Special cases are included for prefixes that ONLY appear before 0-9 numbers. 3 | # 4 | #any single upper case letter followed by a period is not a sentence ender 5 | #usually upper case letters are initials in a name 6 | #no French words end in single lower-case letters, so we throw those in too? 7 | A 8 | B 9 | C 10 | D 11 | E 12 | F 13 | G 14 | H 15 | I 16 | J 17 | K 18 | L 19 | M 20 | N 21 | O 22 | P 23 | Q 24 | R 25 | S 26 | T 27 | U 28 | V 29 | W 30 | X 31 | Y 32 | Z 33 | #a 34 | b 35 | c 36 | d 37 | e 38 | f 39 | g 40 | h 41 | i 42 | j 43 | k 44 | l 45 | m 46 | n 47 | o 48 | p 49 | q 50 | r 51 | s 52 | t 53 | u 54 | v 55 | w 56 | x 57 | y 58 | z 59 | 60 | # Period-final abbreviation list for French 61 | A.C.N 62 | A.M 63 | art 64 | ann 65 | apr 66 | av 67 | auj 68 | lib 69 | B.P 70 | boul 71 | ca 72 | c.-à-d 73 | cf 74 | ch.-l 75 | chap 76 | contr 77 | C.P.I 78 | C.Q.F.D 79 | C.N 80 | C.N.S 81 | C.S 82 | dir 83 | éd 84 | e.g 85 | env 86 | al 87 | etc 88 | E.V 89 | ex 90 | fasc 91 | fém 92 | fig 93 | fr 94 | hab 95 | ibid 96 | id 97 | i.e 98 | inf 99 | LL.AA 100 | LL.AA.II 101 | LL.AA.RR 102 | LL.AA.SS 103 | L.D 104 | LL.EE 105 | LL.MM 106 | LL.MM.II.RR 107 | loc.cit 108 | masc 109 | MM 110 | ms 111 | N.B 112 | N.D.A 113 | N.D.L.R 114 | N.D.T 115 | n/réf 116 | NN.SS 117 | N.S 118 | N.D 119 | N.P.A.I 120 | p.c.c 121 | pl 122 | pp 123 | p.ex 124 | p.j 125 | P.S 126 | R.A.S 127 | R.-V 128 | R.P 129 | R.I.P 130 | SS 131 | S.S 132 | S.A 133 | S.A.I 134 | S.A.R 135 | S.A.S 136 | S.E 137 | sec 138 | sect 139 | sing 140 | S.M 141 | S.M.I.R 142 | sq 143 | sqq 144 | suiv 145 | sup 146 | suppl 147 | tél 148 | T.S.V.P 149 | vb 150 | vol 151 | vs 152 | X.O 153 | Z.I 154 | -------------------------------------------------------------------------------- /data/moses-5cbafabfd/tokenizer/basic-protected-patterns: -------------------------------------------------------------------------------- 1 | <\/?\S+\/?> 2 | <\S+( [a-zA-Z0-9]+\=\"?[^\"]\")+ ?\/?> 3 | <\S+( [a-zA-Z0-9]+\=\'?[^\']\')+ ?\/?> 4 | [\w\-\_\.]+\@([\w\-\_]+\.)+[a-zA-Z]{2,} 5 | http[s]?:\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]*(\/)? 6 | ftp[s]?:\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]*(\/)? 7 | rsync:\/\/[^:\/\s]+(\/\w+)*\/[\w\-\.]*(\/)? 8 | -------------------------------------------------------------------------------- /data/moses-5cbafabfd/tokenizer/lowercase.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # 3 | # This file is part of moses. Its use is licensed under the GNU Lesser General 4 | # Public License version 2.1 or, at your option, any later version. 5 | 6 | use warnings; 7 | use strict; 8 | 9 | while (@ARGV) { 10 | $_ = shift; 11 | /^-b$/ && ($| = 1, next); # not buffered (flush each line) 12 | } 13 | 14 | binmode(STDIN, ":utf8"); 15 | binmode(STDOUT, ":utf8"); 16 | 17 | while() { 18 | print lc($_); 19 | } 20 | -------------------------------------------------------------------------------- /data/moses-5cbafabfd/tokenizer/normalize-punctuation.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # 3 | # This file is part of moses. Its use is licensed under the GNU Lesser General 4 | # Public License version 2.1 or, at your option, any later version. 5 | 6 | use warnings; 7 | use strict; 8 | 9 | my $language = "en"; 10 | my $PENN = 0; 11 | 12 | while (@ARGV) { 13 | $_ = shift; 14 | /^-b$/ && ($| = 1, next); # not buffered (flush each line) 15 | /^-l$/ && ($language = shift, next); 16 | /^[^\-]/ && ($language = $_, next); 17 | /^-penn$/ && ($PENN = 1, next); 18 | } 19 | 20 | while() { 21 | s/\r//g; 22 | # remove extra spaces 23 | s/\(/ \(/g; 24 | s/\)/\) /g; s/ +/ /g; 25 | s/\) ([\.\!\:\?\;\,])/\)$1/g; 26 | s/\( /\(/g; 27 | s/ \)/\)/g; 28 | s/(\d) \%/$1\%/g; 29 | s/ :/:/g; 30 | s/ ;/;/g; 31 | # normalize unicode punctuation 32 | if ($PENN == 0) { 33 | s/\`/\'/g; 34 | s/\'\'/ \" /g; 35 | } 36 | 37 | s/„/\"/g; 38 | s/“/\"/g; 39 | s/”/\"/g; 40 | s/–/-/g; 41 | s/—/ - /g; s/ +/ /g; 42 | s/´/\'/g; 43 | s/([a-z])‘([a-z])/$1\'$2/gi; 44 | s/([a-z])’([a-z])/$1\'$2/gi; 45 | s/‘/\'/g; 46 | s/‚/\'/g; 47 | s/’/\"/g; 48 | s/''/\"/g; 49 | s/´´/\"/g; 50 | s/…/.../g; 51 | # French quotes 52 | s/ « / \"/g; 53 | s/« /\"/g; 54 | s/«/\"/g; 55 | s/ » /\" /g; 56 | s/ »/\"/g; 57 | s/»/\"/g; 58 | # handle pseudo-spaces 59 | s/ \%/\%/g; 60 | s/nº /nº /g; 61 | s/ :/:/g; 62 | s/ ºC/ ºC/g; 63 | s/ cm/ cm/g; 64 | s/ \?/\?/g; 65 | s/ \!/\!/g; 66 | s/ ;/;/g; 67 | s/, /, /g; s/ +/ /g; 68 | 69 | # English "quotation," followed by comma, style 70 | if ($language eq "en") { 71 | s/\"([,\.]+)/$1\"/g; 72 | } 73 | # Czech is confused 74 | elsif ($language eq "cs" || $language eq "cz") { 75 | } 76 | # German/Spanish/French "quotation", followed by comma, style 77 | else { 78 | s/,\"/\",/g; 79 | s/(\.+)\"(\s*[^<])/\"$1$2/g; # don't fix period at end of sentence 80 | } 81 | 82 | 83 | if ($language eq "de" || $language eq "es" || $language eq "cz" || $language eq "cs" || $language eq "fr") { 84 | s/(\d) (\d)/$1,$2/g; 85 | } 86 | else { 87 | s/(\d) (\d)/$1.$2/g; 88 | } 89 | print $_; 90 | } 91 | -------------------------------------------------------------------------------- /data/moses-5cbafabfd/tokenizer/remove-non-printing-char.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # 3 | # This file is part of moses. Its use is licensed under the GNU Lesser General 4 | # Public License version 2.1 or, at your option, any later version. 5 | 6 | use warnings; 7 | use utf8; 8 | 9 | while (@ARGV) { 10 | $_ = shift; 11 | /^-b$/ && ($| = 1, next); # not buffered (flush each line) 12 | } 13 | 14 | binmode(STDIN, ":utf8"); 15 | binmode(STDOUT, ":utf8"); 16 | binmode(STDERR, ":utf8"); 17 | 18 | while (my $line = ) { 19 | chomp($line); 20 | #$line =~ tr/\040-\176/ /c; 21 | #$line =~ s/[^[:print:]]/ /g; 22 | #$line =~ s/\s+/ /g; 23 | $line =~ s/\p{C}/ /g; 24 | 25 | print "$line\n"; 26 | } 27 | 28 | -------------------------------------------------------------------------------- /data/moses-5cbafabfd/tokenizer/replace-unicode-punctuation.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # 3 | # This file is part of moses. Its use is licensed under the GNU Lesser General 4 | # Public License version 2.1 or, at your option, any later version. 5 | 6 | use warnings; 7 | use strict; 8 | 9 | while (@ARGV) { 10 | $_ = shift; 11 | /^-b$/ && ($| = 1, next); # not buffered (flush each line) 12 | } 13 | 14 | #binmode(STDIN, ":utf8"); 15 | #binmode(STDOUT, ":utf8"); 16 | 17 | while() { 18 | s/,/,/g; 19 | s/。 */. /g; 20 | s/、/,/g; 21 | s/”/"/g; 22 | s/“/"/g; 23 | s/∶/:/g; 24 | s/:/:/g; 25 | s/?/\?/g; 26 | s/《/"/g; 27 | s/》/"/g; 28 | s/)/\)/g; 29 | s/!/\!/g; 30 | s/(/\(/g; 31 | s/;/;/g; 32 | s/1/1/g; 33 | s/」/"/g; 34 | s/「/"/g; 35 | s/0/0/g; 36 | s/3/3/g; 37 | s/2/2/g; 38 | s/5/5/g; 39 | s/6/6/g; 40 | s/9/9/g; 41 | s/7/7/g; 42 | s/8/8/g; 43 | s/4/4/g; 44 | s/. */. /g; 45 | s/~/\~/g; 46 | s/’/\'/g; 47 | s/…/\.\.\./g; 48 | s/━/\-/g; 49 | s/〈/\/g; 51 | s/【/\[/g; 52 | s/】/\]/g; 53 | s/%/\%/g; 54 | print $_; 55 | } 56 | -------------------------------------------------------------------------------- /data/multi30k/README.md: -------------------------------------------------------------------------------- 1 | Data preparation 2 | -- 3 | 4 | Run `prepare.sh` from this folder to create word-level tokenized corpora and 5 | related vocabulary files. The script will also download and unpack the 6 | object classification (OC) and object detection (OD) features under `features/`. 7 | -------------------------------------------------------------------------------- /data/multi30k/features/README.md: -------------------------------------------------------------------------------- 1 | Visual features 2 | --- 3 | 4 | Two types of visual features are provided: 5 | 6 | - [(Download)](https://zenodo.org/record/4298396/files/multi30k_resnet50_features.tar.bz2?download=1) Object classification (OC) features are extracted from a pre-trained ResNet-50 CNN 7 | - [(Download)](https://zenodo.org/record/4298396/files/multi30k_butd_features.tar.bz2?download=1) Object detection (OD) features are extracted from the `bottom-up-top-down (BUTD)` 8 | object detection model. 9 | 10 | These features will be automatically downloaded when `prepare.sh` is executed. 11 | -------------------------------------------------------------------------------- /data/multi30k/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Export moses path 4 | MOSES_PATH=../moses-5cbafabfd/scripts 5 | PATH=${MOSES_PATH}:$PATH 6 | SUFF="lc.norm.tok" 7 | 8 | for tlang in de fr cs; do 9 | echo "Preparing en-${tlang} dataset" 10 | folder="en-${tlang}" 11 | mkdir -p $folder 12 | for sp in train val test_2016_flickr test_2017_flickr test_2017_mscoco test_2018_flickr; do 13 | # Process both sides 14 | for llang in en ${tlang}; do 15 | inp="raw/${sp}.${llang}.gz" 16 | if [ -f $inp ]; then 17 | zcat $inp | lowercase.perl -l ${llang} | normalize-punctuation.perl -l ${llang} | \ 18 | tokenizer.perl -l ${llang} -a -threads 4 > $folder/${sp}.${SUFF}.${llang} 19 | fi 20 | done 21 | 22 | trg="${sp}.${SUFF}.${tlang}" 23 | 24 | # De-hyphenize test set targets for proper evaluation afterwards 25 | if [[ "$sp" =~ ^test.* ]] && [[ -f "${folder}/${trg}" ]]; then 26 | sed -r 's/\s*@-@\s*/-/g' < ${folder}/${trg} > ${folder}/${trg}.dehyph 27 | fi 28 | done 29 | # Create vocabularies 30 | pysimt-build-vocab ${folder}/train.${SUFF}.en -o ${folder} 31 | pysimt-build-vocab ${folder}/train.${SUFF}.${tlang} -o ${folder} 32 | done 33 | 34 | ### Download features 35 | pushd features 36 | wget "https://zenodo.org/record/4298396/files/multi30k_butd_features.tar.bz2?download=1" -O butd.tar.bz2 37 | tar xvf butd.tar.bz2 38 | # rename folder 39 | mv multi30k_butd_features butd 40 | wget "https://zenodo.org/record/4298396/files/multi30k_resnet50_features.tar.bz2?download=1" -O resnet.tar.bz2 41 | tar xvf resnet.tar.bz2 42 | popd 43 | -------------------------------------------------------------------------------- /data/multi30k/raw/README.md: -------------------------------------------------------------------------------- 1 | Multi30k dataset 2 | -- 3 | 4 | This folder contains a snapshot (Nov-2020) of the upstream [multi30k repository](https://github.com/multi30k/dataset). 5 | -------------------------------------------------------------------------------- /data/multi30k/raw/test_2016_flickr.cs.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2016_flickr.cs.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2016_flickr.de.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2016_flickr.de.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2016_flickr.en.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2016_flickr.en.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2016_flickr.fr.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2016_flickr.fr.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2017_flickr.de.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_flickr.de.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2017_flickr.en.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_flickr.en.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2017_flickr.fr.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_flickr.fr.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2017_mscoco.de.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_mscoco.de.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2017_mscoco.en.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_mscoco.en.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2017_mscoco.fr.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2017_mscoco.fr.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2018_flickr.cs.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2018_flickr.cs.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2018_flickr.de.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2018_flickr.de.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2018_flickr.en.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2018_flickr.en.gz -------------------------------------------------------------------------------- /data/multi30k/raw/test_2018_flickr.fr.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/test_2018_flickr.fr.gz -------------------------------------------------------------------------------- /data/multi30k/raw/train.cs.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/train.cs.gz -------------------------------------------------------------------------------- /data/multi30k/raw/train.de.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/train.de.gz -------------------------------------------------------------------------------- /data/multi30k/raw/train.en.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/train.en.gz -------------------------------------------------------------------------------- /data/multi30k/raw/train.fr.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/train.fr.gz -------------------------------------------------------------------------------- /data/multi30k/raw/val.cs.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/val.cs.gz -------------------------------------------------------------------------------- /data/multi30k/raw/val.de.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/val.de.gz -------------------------------------------------------------------------------- /data/multi30k/raw/val.en.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/val.en.gz -------------------------------------------------------------------------------- /data/multi30k/raw/val.fr.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/data/multi30k/raw/val.fr.gz -------------------------------------------------------------------------------- /doccov.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | docstr-coverage 16 | docstr-coverage 17 | 56% 18 | 56% 19 | 20 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pysimt 2 | 3 | dependencies: 4 | - python=3.7 5 | - pip 6 | - ipython 7 | - pyyaml 8 | - numpy 9 | - tensorboard 10 | - tabulate 11 | - tqdm 12 | - pip: 13 | - torch==1.7.0 14 | - sacrebleu>=1.4.13 15 | - subword_nmt 16 | - editdistance==0.5.3 17 | - -e . 18 | -------------------------------------------------------------------------------- /experiments/README.md: -------------------------------------------------------------------------------- 1 | # Experiments 2 | 3 | Experiment related files will be stored in here by default. 4 | Revise the configuration files to change that. 5 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialNLP/pysimt/edeffa4f62f290293bbea3c92fb88c3903842dc3/logo.png -------------------------------------------------------------------------------- /make_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm -rf docs/ 4 | pdoc --html pysimt -o docs/ 5 | mv docs/pysimt/* docs 6 | 7 | docstr-coverage -Pim pysimt --badge doccov.svg 8 | 9 | git commit docs doccov.svg -m "update docs" 10 | -------------------------------------------------------------------------------- /pysimt/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.0' 2 | """ 3 | `pysimt` is a `PyTorch`-based sequence-to-sequence (S2S) framework that facilitates 4 | research in unimodal and multi-modal machine translation. The framework 5 | is especially geared towards a set of recent simultaneous MT approaches, including 6 | heuristics-based decoding and prefix-to-prefix training/decoding. Common metrics 7 | such as average proportion (AP), average lag (AL), and consecutive wait (CW) 8 | are provided through well-defined APIs as well. 9 | 10 | 11 | .. include:: ./docs.md 12 | """ 13 | 14 | 15 | # Disable documentation generation for the following sub modules 16 | __pdoc__ = { 17 | 'cocoeval': False, 18 | 'config': False, 19 | 'logger': False, 20 | } 21 | -------------------------------------------------------------------------------- /pysimt/cocoeval/README.md: -------------------------------------------------------------------------------- 1 | pycocoevalcap 2 | --- 3 | 4 | This is a copy from 5 | https://github.com/tylin/coco-caption/tree/master/pycocoevalcap 6 | 7 | with Python 2 support dropped. 8 | -------------------------------------------------------------------------------- /pysimt/cocoeval/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | from .bleu.bleu import Bleu 3 | from .cider.cider import Cider 4 | from .rouge.rouge import Rouge 5 | from .meteor.meteor import Meteor 6 | -------------------------------------------------------------------------------- /pysimt/cocoeval/bleu/LICENSE.bleu: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Xinlei Chen, Hao Fang, Tsung-Yi Lin, and Ramakrishna Vedantam 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /pysimt/cocoeval/bleu/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /pysimt/cocoeval/bleu/bleu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File Name : bleu.py 3 | # 4 | # Description : Wrapper for BLEU scorer. 5 | # 6 | # Creation Date : 06-01-2015 7 | # Last Modified : Thu 19 Mar 2015 09:13:28 PM PDT 8 | # Authors : Hao Fang and Tsung-Yi Lin 9 | 10 | from .bleu_scorer import BleuScorer 11 | 12 | 13 | class Bleu: 14 | def __init__(self, n=4): 15 | # default compute Blue score up to 4 16 | self._n = n 17 | self._hypo_for_image = {} 18 | self.ref_for_image = {} 19 | 20 | def compute_score(self, gts, res): 21 | 22 | bleu_scorer = BleuScorer(n=self._n) 23 | for id in sorted(gts.keys()): 24 | hypo = res[id] 25 | ref = gts[id] 26 | 27 | # Sanity check. 28 | assert isinstance(hypo, list) 29 | assert isinstance(ref, list) 30 | assert len(hypo) == 1 31 | assert len(ref) >= 1 32 | 33 | bleu_scorer += (hypo[0], ref) 34 | 35 | # score, scores = bleu_scorer.compute_score(option='shortest') 36 | # score, scores = bleu_scorer.compute_score(option='average',verbose=1) 37 | score, scores = bleu_scorer.compute_score(option='closest', verbose=0) 38 | 39 | # return (bleu, bleu_info) 40 | return score, scores 41 | 42 | def method(self): 43 | return "Bleu" 44 | -------------------------------------------------------------------------------- /pysimt/cocoeval/cider/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /pysimt/cocoeval/cider/cider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Filename: cider.py 3 | # 4 | # Description: Describes the class to compute the CIDEr 5 | # (Consensus-Based Image Description Evaluation) Metric 6 | # by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726) 7 | # 8 | # Creation Date: Sun Feb 8 14:16:54 2015 9 | # 10 | # Authors: Ramakrishna Vedantam and 11 | # Tsung-Yi Lin 12 | 13 | from .cider_scorer import CiderScorer 14 | 15 | 16 | class Cider: 17 | """Main Class to compute the CIDEr metric.""" 18 | 19 | def __init__(self, test=None, refs=None, n=4, sigma=6.0): 20 | # set cider to sum over 1 to 4-grams 21 | self._n = n 22 | # set the standard deviation parameter for gaussian penalty 23 | self._sigma = sigma 24 | 25 | def compute_score(self, gts, res): 26 | """Main function to compute CIDEr score 27 | 28 | Arguments: 29 | hypo_for_image (dict): dictionary with key and 30 | value 31 | ref_for_image (dict): dictionary with key and value 32 | 33 | 34 | Returns: 35 | cider (float): computed CIDEr score for the corpus 36 | """ 37 | 38 | cider_scorer = CiderScorer(n=self._n, sigma=self._sigma) 39 | 40 | for id in sorted(gts.keys()): 41 | hypo = res[id] 42 | ref = gts[id] 43 | 44 | # Sanity check. 45 | assert isinstance(hypo, list) 46 | assert isinstance(ref, list) 47 | assert len(hypo) == 1 48 | assert len(ref) > 0 49 | 50 | cider_scorer += (hypo[0], ref) 51 | 52 | (score, scores) = cider_scorer.compute_score() 53 | 54 | return score, scores 55 | 56 | def method(self): 57 | return "CIDEr" 58 | -------------------------------------------------------------------------------- /pysimt/cocoeval/meteor/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /pysimt/cocoeval/meteor/meteor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Python wrapper for METEOR implementation, by Xinlei Chen 3 | # Acknowledge Michael Denkowski for the generous discussion and help 4 | 5 | import os 6 | import shutil 7 | import threading 8 | import subprocess 9 | 10 | from ...utils.misc import get_meteor_jar 11 | 12 | 13 | class Meteor: 14 | def __init__(self, language, norm=False): 15 | self.jar = str(get_meteor_jar()) 16 | self.meteor_cmd = ['java', '-jar', '-Xmx2G', self.jar, 17 | '-', '-', '-stdio', '-l', language] 18 | self.env = os.environ 19 | self.env['LC_ALL'] = 'en_US.UTF_8' 20 | 21 | # Sanity check 22 | if shutil.which('java') is None: 23 | raise RuntimeError('METEOR requires java which is not installed.') 24 | 25 | if norm: 26 | self.meteor_cmd.append('-norm') 27 | 28 | self.meteor_p = subprocess.Popen(self.meteor_cmd, 29 | stdin=subprocess.PIPE, 30 | stdout=subprocess.PIPE, 31 | stderr=subprocess.PIPE, 32 | env=self.env, 33 | universal_newlines=True, bufsize=1) 34 | # Used to guarantee thread safety 35 | self.lock = threading.Lock() 36 | 37 | def method(self): 38 | return "METEOR" 39 | 40 | def compute_score(self, gts, res): 41 | imgIds = sorted(list(gts.keys())) 42 | scores = [] 43 | 44 | eval_line = 'EVAL' 45 | self.lock.acquire() 46 | for i in imgIds: 47 | assert len(res[i]) == 1 48 | 49 | hypothesis_str = res[i][0].replace('|||', '').replace(' ', ' ') 50 | score_line = ' ||| '.join( 51 | ('SCORE', ' ||| '.join(gts[i]), hypothesis_str)) 52 | 53 | # We obtained --> SCORE ||| reference 1 words ||| 54 | # reference n words ||| hypothesis words 55 | self.meteor_p.stdin.write(score_line + '\n') 56 | stat = self.meteor_p.stdout.readline().strip() 57 | eval_line += ' ||| {}'.format(stat) 58 | 59 | # Send to METEOR 60 | self.meteor_p.stdin.write(eval_line + '\n') 61 | 62 | # Collect segment scores 63 | for i in range(len(imgIds)): 64 | score = float(self.meteor_p.stdout.readline().strip()) 65 | scores.append(score) 66 | 67 | # Final score 68 | final_score = 100 * float(self.meteor_p.stdout.readline().strip()) 69 | self.lock.release() 70 | 71 | return final_score, scores 72 | 73 | def __del__(self): 74 | self.lock.acquire() 75 | self.meteor_p.stdin.close() 76 | self.meteor_p.wait() 77 | self.lock.release() 78 | -------------------------------------------------------------------------------- /pysimt/cocoeval/rouge/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'vrama91' 2 | -------------------------------------------------------------------------------- /pysimt/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A dataset in `pysimt` inherits from `torch.nn.Dataset` and is designed 3 | to read and expose a specific type of corpus. 4 | 5 | * A dataset class name should end with the `Dataset` suffix. 6 | * The `__init__` method should include `**kwargs` for other possible arguments. 7 | * The `__getitem__` and `__len__` methods should be implemented. 8 | * A static method `to_torch(batch, **kwargs)` is automatically used when 9 | preparing the batch tensor during forward-pass. 10 | 11 | Please see `pysimt.datasets.TextDataset` to get an idea on how to implement 12 | a new dataset. 13 | 14 | """ 15 | 16 | from .numpy import NumpyDataset 17 | from .text import TextDataset 18 | from .objdet import ObjectDetectionsDataset 19 | 20 | 21 | # Second the selector function 22 | def get_dataset(type_): 23 | return { 24 | 'numpy': NumpyDataset, 25 | 'text': TextDataset, 26 | 'objectdetections': ObjectDetectionsDataset, 27 | }[type_.lower()] 28 | 29 | 30 | # Should always be at the end 31 | from .multimodal import MultimodalDataset # noqa 32 | -------------------------------------------------------------------------------- /pysimt/datasets/collate.py: -------------------------------------------------------------------------------- 1 | class Batch(dict): 2 | """A custom dictionary representing a batch.""" 3 | def __init__(self, *args, **kwargs): 4 | super().__init__(*args, **kwargs) 5 | dim1s = set([x.size(1) for x in self.values()]) 6 | assert len(dim1s) == 1, \ 7 | "Incompatible batch dimension (1) between modalities." 8 | self.size = dim1s.pop() 9 | 10 | def device(self, device): 11 | self.update({k: v.to(device) for k, v in self.items()}) 12 | 13 | def __repr__(self): 14 | s = "Batch(size={})\n".format(self.size) 15 | for data_source, tensor in self.items(): 16 | s += " {:10s} -> {} - {}\n".format( 17 | str(data_source), tensor.shape, tensor.device) 18 | return s 19 | 20 | 21 | def get_collate(data_sources): 22 | """Returns a special collate_fn which will view the underlying data 23 | in terms of the given DataSource keys.""" 24 | 25 | def collate_fn(batch): 26 | return Batch( 27 | {ds: ds.torchify([elem[ds] for elem in batch]) for ds in data_sources}, 28 | ) 29 | 30 | return collate_fn 31 | -------------------------------------------------------------------------------- /pysimt/datasets/numpy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from pathlib import Path 3 | 4 | import numpy as np 5 | import torch 6 | from torch.utils.data import Dataset 7 | 8 | 9 | class NumpyDataset(Dataset): 10 | r"""A PyTorch dataset for Numpy .npy/npz serialized tensor files. The 11 | serialized tensor's first dimension should be the batch dimension. 12 | 13 | Arguments: 14 | fname (str or Path): A string or ``pathlib.Path`` object for 15 | the relevant numpy file. 16 | key (str, optional): If `fname` is `.npz` file, its relevant `key` 17 | will be fetched from the serialized object. 18 | order_file (str, None): If given, will be used to map sample indices 19 | to tensors using this list. Useful for tiled or repeated 20 | experiments. 21 | revert (bool, optional): If `True`, the data order will be reverted 22 | for adversarial/incongruent experiments during test-time. 23 | """ 24 | 25 | def __init__(self, fname, key=None, order_file=None, revert=False, **kwargs): 26 | self.path = Path(fname) 27 | if not self.path.exists(): 28 | raise RuntimeError('{} does not exist.'.format(self.path)) 29 | 30 | if self.path.suffix == '.npy': 31 | self.data = np.load(self.path) 32 | elif self.path.suffix == '.npz': 33 | assert key, "A key should be provided for .npz files." 34 | self.data = np.load(self.path)[key] 35 | 36 | if order_file: 37 | with open(order_file) as orf: 38 | self.order = [int(x) for x in orf.read().strip().split('\n')] 39 | else: 40 | self.order = list(range(self.data.shape[0])) 41 | 42 | if revert: 43 | self.order = self.order[::-1] 44 | 45 | # Dataset size 46 | self.size = len(self.order) 47 | 48 | @staticmethod 49 | def to_torch(batch, **kwargs): 50 | # NOTE: Assumes x.shape == (n, *) 51 | x = torch.from_numpy(np.array(batch, dtype='float32')) 52 | # Convert it to (t(=1 if fixed features), n, c) 53 | # By default we flatten h*w to first dim for interoperability 54 | # Models should further reshape the tensor for their needs 55 | return x.view(*x.size()[:2], -1).permute(2, 0, 1) 56 | 57 | def __getitem__(self, idx): 58 | return self.data[self.order[idx]] 59 | 60 | def __len__(self): 61 | return self.size 62 | 63 | def __repr__(self): 64 | s = "{} '{}' ({} samples)\n".format( 65 | self.__class__.__name__, self.path.name, self.__len__()) 66 | return s 67 | -------------------------------------------------------------------------------- /pysimt/evaluator.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from . import metrics 4 | from .utils.filterchain import FilterChain 5 | from .utils.misc import get_language 6 | 7 | 8 | class Evaluator: 9 | def __init__(self, refs, beam_metrics, filters=''): 10 | # metrics: list of upper-case beam-search metrics 11 | self.kwargs = {} 12 | self.scorers = OrderedDict() 13 | self.refs = list(refs.parent.glob(refs.name)) 14 | self.language = get_language(self.refs[0]) 15 | if self.language is None: 16 | # Fallback to en (this is only relevant for METEOR) 17 | self.language = 'en' 18 | 19 | self.filter = None 20 | if filters: 21 | self.filter = FilterChain(filters) 22 | self.refs = self.filter.apply(refs) 23 | 24 | assert len(self.refs) > 0, "Number of reference files == 0" 25 | 26 | for metric in sorted(beam_metrics): 27 | self.kwargs[metric] = {'language': self.language} 28 | self.scorers[metric] = getattr(metrics, metric + 'Scorer')() 29 | 30 | def score(self, hyps): 31 | """hyps is a list of hypotheses as they come out from decoder.""" 32 | assert isinstance(hyps, list), "hyps should be a list." 33 | 34 | # Post-process if requested 35 | if self.filter is not None: 36 | hyps = self.filter.apply(hyps) 37 | 38 | results = [] 39 | for key, scorer in self.scorers.items(): 40 | results.append( 41 | scorer.compute(self.refs, hyps, **self.kwargs[key])) 42 | return results 43 | -------------------------------------------------------------------------------- /pysimt/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Basic layers 2 | from .ff import FF 3 | from .pool import Pool 4 | from .fusion import Fusion 5 | from .selector import Selector 6 | from .positionwise_ff import PositionwiseFF 7 | 8 | from .embedding import TFEmbedding, ProjectedEmbedding 9 | 10 | # Attention layers 11 | from .attention import DotAttention 12 | from .attention import MLPAttention 13 | from .attention import UniformAttention 14 | from .attention import ScaledDotAttention 15 | from .attention import MultiheadAttention 16 | from .attention import HierarchicalAttention 17 | 18 | # Encoder layers 19 | from .encoders import RecurrentEncoder 20 | from .encoders import TFEncoder 21 | from .encoders import VisualFeaturesEncoder 22 | 23 | # Decoder layers 24 | from .decoders import ConditionalGRUDecoder 25 | from .decoders import TFDecoder 26 | -------------------------------------------------------------------------------- /pysimt/layers/attention/__init__.py: -------------------------------------------------------------------------------- 1 | from .mlp import MLPAttention 2 | from .dot import DotAttention 3 | from .hierarchical import HierarchicalAttention 4 | from .uniform import UniformAttention 5 | from .scaled_dot import ScaledDotAttention 6 | from .multihead import MultiheadAttention 7 | 8 | 9 | def get_attention(type_): 10 | return { 11 | 'mlp': MLPAttention, 12 | 'dot': DotAttention, 13 | 'hier': HierarchicalAttention, 14 | 'uniform': UniformAttention, 15 | 'multihead': MultiheadAttention, 16 | 'scaled_dot': ScaledDotAttention, 17 | }[type_] 18 | -------------------------------------------------------------------------------- /pysimt/layers/attention/hierarchical.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | from torch import nn 4 | 5 | from ...utils.nn import get_activation_fn 6 | 7 | 8 | # Libovický, J., & Helcl, J. (2017). Attention Strategies for Multi-Source 9 | # Sequence-to-Sequence Learning. In Proceedings of the 55th Annual Meeting of 10 | # the Association for Computational Linguistics (Volume 2: Short Papers) 11 | # (Vol. 2, pp. 196-202). [Code contributed by @jlibovicky] 12 | 13 | 14 | class HierarchicalAttention(nn.Module): 15 | """Hierarchical attention over multiple modalities.""" 16 | def __init__(self, ctx_dims, hid_dim, mid_dim, att_activ='tanh'): 17 | super().__init__() 18 | 19 | self.activ = get_activation_fn(att_activ) 20 | self.ctx_dims = ctx_dims 21 | self.hid_dim = hid_dim 22 | self.mid_dim = mid_dim 23 | 24 | self.ctx_projs = nn.ModuleList([ 25 | nn.Linear(dim, mid_dim, bias=False) for dim in self.ctx_dims]) 26 | self.dec_proj = nn.Linear(hid_dim, mid_dim, bias=True) 27 | self.mlp = nn.Linear(self.mid_dim, 1, bias=False) 28 | 29 | def forward(self, contexts, hid): 30 | dec_state_proj = self.dec_proj(hid) 31 | ctx_projected = torch.cat([ 32 | p(ctx).unsqueeze(0) for p, ctx 33 | in zip(self.ctx_projs, contexts)], dim=0) 34 | energies = self.mlp(self.activ(dec_state_proj + ctx_projected)) 35 | att_dist = nn.functional.softmax(energies, dim=0) 36 | 37 | ctxs_cat = torch.cat([c.unsqueeze(0) for c in contexts]) 38 | joint_context = (att_dist * ctxs_cat).sum(0) 39 | 40 | return att_dist, joint_context 41 | -------------------------------------------------------------------------------- /pysimt/layers/attention/mlp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | from .dot import DotAttention 7 | 8 | 9 | class MLPAttention(DotAttention): 10 | """Attention layer with feed-forward layer.""" 11 | def __init__(self, ctx_dim, hid_dim, att_bottleneck='ctx', 12 | transform_ctx=True, att_activ='tanh', 13 | mlp_bias=False, temp=1., ctx2hid=True): 14 | super().__init__(ctx_dim, hid_dim, att_bottleneck, transform_ctx, 15 | att_activ, temp, ctx2hid) 16 | 17 | if mlp_bias: 18 | self.bias = nn.Parameter(torch.Tensor(self.mid_dim)) 19 | self.bias.data.zero_() 20 | else: 21 | self.register_parameter('bias', None) 22 | 23 | self.mlp = nn.Linear(self.mid_dim, 1, bias=False) 24 | 25 | def forward(self, hid, ctx, ctx_mask=None): 26 | r"""Computes attention probabilities and final context using 27 | decoder's hidden state and source annotations. 28 | 29 | Arguments: 30 | hid(Tensor): A set of decoder hidden states of shape `T*B*H` 31 | where `T` == 1, `B` is batch dim and `H` is hidden state dim. 32 | ctx(Tensor): A set of annotations of shape `S*B*C` where `S` 33 | is the source timestep dim, `B` is batch dim and `C` 34 | is annotation dim. 35 | ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes 36 | in the padded positions. 37 | 38 | Returns: 39 | scores(Tensor): A tensor of shape `S*B` containing normalized 40 | attention scores for each position and sample. 41 | z_t(Tensor): A tensor of shape `B*H` containing the final 42 | attended context vector for this target decoding timestep. 43 | 44 | Notes: 45 | This will only work when `T==1` for now. 46 | """ 47 | # inner_sum -> SxBxC + TxBxC 48 | inner_sum = self.ctx2ctx(ctx) + self.hid2ctx(hid) 49 | 50 | if self.bias is not None: 51 | inner_sum.add_(self.bias) 52 | 53 | # Compute scores- > SxB 54 | scores = self.mlp( 55 | self.activ(inner_sum)).div(self.temperature).squeeze(-1) 56 | 57 | # Normalize attention scores correctly -> S*B 58 | if ctx_mask is not None: 59 | # Mask out padded positions with -inf so that they get 0 attention 60 | scores.masked_fill_((1 - ctx_mask).bool(), -1e8) 61 | 62 | alpha = F.softmax(scores, dim=0) 63 | 64 | # Transform final context vector to H for further decoders 65 | return alpha, self.ctx2hid((alpha.unsqueeze(-1) * ctx).sum(0)) 66 | -------------------------------------------------------------------------------- /pysimt/layers/attention/uniform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | 4 | 5 | class UniformAttention(torch.nn.Module): 6 | """A dummy non-parametric attention layer that applies uniform weights.""" 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def forward(self, hid, ctx, ctx_mask=None): 11 | alpha = torch.ones(*ctx.shape[:2], device=ctx.device).div(ctx.shape[0]) 12 | wctx = (alpha.unsqueeze(-1) * ctx).sum(0) 13 | return alpha, wctx 14 | -------------------------------------------------------------------------------- /pysimt/layers/decoders/__init__.py: -------------------------------------------------------------------------------- 1 | from .conditional import ConditionalGRUDecoder 2 | from .tf_decoder import TFDecoder 3 | 4 | 5 | def get_decoder(type_): 6 | """Only expose ones with compatible __init__() arguments for now.""" 7 | return { 8 | 'cond': ConditionalGRUDecoder, 9 | 'tf': TFDecoder, 10 | }[type_] 11 | -------------------------------------------------------------------------------- /pysimt/layers/encoders/__init__.py: -------------------------------------------------------------------------------- 1 | from .recurrent import RecurrentEncoder 2 | from .transformers import TFEncoder 3 | from .vis_features import VisualFeaturesEncoder 4 | from .speech_lstm import SpeechLSTM 5 | -------------------------------------------------------------------------------- /pysimt/layers/encoders/vis_features.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ...utils.nn import generate_visual_features_padding_masks 4 | from .. import FF 5 | 6 | 7 | class VisualFeaturesEncoder(nn.Module): 8 | """A facility encoder for pre-extracted visual features. 9 | 10 | Arguments: 11 | input_size (int): number of channels in the last dimension of 12 | the features. 13 | proj_dim(int, optional): If not `None`, add a final projection 14 | layer similar to a 1x1 Conv2D. 15 | proj_activ(str, optional): Non-linearity for projection layer. 16 | `None` or `linear` does not apply any non-linearity. 17 | layer_norm(bool, optional): Apply layer normalization. 18 | l2_norm(bool, optional): L2-normalize features. 19 | dropout (float, optional): Optional dropout to be applied on the 20 | projected visual features. 21 | pool (bool, optional): If True, applies global average pooling 22 | to reduce conv features to a single vector. 23 | 24 | Input: 25 | x (Tensor): A tensor of shape (w*h, batch_size, input_size) 26 | 27 | Output: 28 | h (Tensor): A tensor of shape (w*h, batch_size, proj_dim) 29 | mask (None): No masking is done for visual features. 30 | """ 31 | def __init__(self, input_size, proj_dim=None, proj_activ=None, 32 | layer_norm=False, l2_norm=False, dropout=0.0, pool=False, image_masking=False): 33 | super().__init__() 34 | 35 | self.ctx_size = input_size 36 | self.l2_norm = l2_norm 37 | self._image_masking = image_masking 38 | 39 | output_layers = [] 40 | if proj_dim is not None: 41 | output_layers.append( 42 | FF(input_size, proj_dim, activ=proj_activ)) 43 | self.ctx_size = proj_dim 44 | 45 | if layer_norm: 46 | output_layers.append(nn.LayerNorm(self.ctx_size)) 47 | 48 | if dropout > 0: 49 | output_layers.append(nn.Dropout(dropout)) 50 | 51 | self.output = nn.Sequential(*output_layers) 52 | 53 | # Variables for caching 54 | self._states, self._mask = None, None 55 | 56 | def forward(self, x, **kwargs): 57 | if self._image_masking: 58 | self._mask = generate_visual_features_padding_masks(x) 59 | if self.l2_norm: 60 | x.div_(x.norm(p=2, dim=-1, keepdim=True)) 61 | self._states = self.output(x) 62 | return self._states, self._mask 63 | 64 | def get_states(self, up_to=int(1e6)): 65 | assert self._states is not None, \ 66 | "encoder was not called for caching the states." 67 | return self._states, self._mask 68 | -------------------------------------------------------------------------------- /pysimt/layers/ff.py: -------------------------------------------------------------------------------- 1 | """A convenience feed-forward layer with non-linearity support.""" 2 | 3 | import math 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch import nn 8 | 9 | from ..utils.nn import get_activation_fn 10 | 11 | 12 | class FF(nn.Module): 13 | """A convenience feed-forward layer with non-linearity option. 14 | 15 | Args: 16 | input_size: The size of the input features 17 | hidden_size: The size of the output features 18 | bias: If `False`, disables the bias component 19 | bias_zero: If `False`, randomly initialize the bias instead of zero 20 | initialization 21 | activ: The activation function name that will be searched 22 | in `torch` and `torch.nn.functional` modules. `None` or `linear` 23 | disables the activation function 24 | 25 | Example: 26 | >>> FF(300, 400, bias=True, activ='tanh') # a tanh MLP 27 | >>> FF(300, 400, bias=False, activ=None) # a linear layer 28 | """ 29 | 30 | def __init__(self, input_size, hidden_size, bias=True, 31 | bias_zero=True, activ=None): 32 | """""" 33 | super().__init__() 34 | self.input_size = input_size 35 | self.hidden_size = hidden_size 36 | self.use_bias = bias 37 | self.bias_zero = bias_zero 38 | self.activ_type = activ 39 | if self.activ_type in (None, 'linear'): 40 | self.activ_type = 'linear' 41 | self.weight = nn.Parameter(torch.Tensor(hidden_size, input_size)) 42 | self.activ = get_activation_fn(activ) 43 | 44 | if self.use_bias: 45 | self.bias = nn.Parameter(torch.Tensor(hidden_size)) 46 | else: 47 | self.register_parameter('bias', None) 48 | 49 | self.reset_parameters() 50 | 51 | def reset_parameters(self): 52 | stdv = 1. / math.sqrt(self.weight.size(1)) 53 | self.weight.data.uniform_(-stdv, stdv) 54 | if self.use_bias: 55 | if self.bias_zero: 56 | self.bias.data.zero_() 57 | else: 58 | self.bias.data.uniform_(-stdv, stdv) 59 | 60 | def forward(self, input): 61 | return self.activ(F.linear(input, self.weight, self.bias)) 62 | 63 | def __repr__(self): 64 | repr_ = self.__class__.__name__ + '(' \ 65 | + 'input_size=' + str(self.input_size) \ 66 | + ', hidden_size=' + str(self.hidden_size) \ 67 | + ', activ=' + str(self.activ_type) \ 68 | + ', bias=' + str(self.use_bias) 69 | if self.use_bias: 70 | repr_ += ', bias_zero=' + str(self.bias_zero) 71 | return repr_ + ')' 72 | -------------------------------------------------------------------------------- /pysimt/layers/fusion.py: -------------------------------------------------------------------------------- 1 | """A convenience layer that merges an arbitrary number of inputs.""" 2 | 3 | import operator 4 | from typing import Optional 5 | from functools import reduce 6 | 7 | import torch 8 | 9 | from . import FF 10 | from ..utils.nn import get_activation_fn 11 | 12 | 13 | class Fusion(torch.nn.Module): 14 | """A convenience layer that merges an arbitrary number of inputs using 15 | concatenation, addition or multiplication. It then applies an optional 16 | non-linearity given by the `activ` argument. If `operation==concat`, 17 | additional arguments should be provided to define an adaptor MLP 18 | that will project the concatenated vector into a lower dimensional space. 19 | 20 | Args: 21 | operation: `concat`, `sum` or `mul` for concatenation, addition, and 22 | multiplication respectively 23 | activ: The activation function name that will be searched 24 | in `torch` and `torch.nn.functional` modules. `None` or `linear` 25 | disables the activation function 26 | input_size: Only required for `concat` fusion, to denote the concatenated 27 | input vector size. This will be used to add an MLP adaptor layer 28 | after concatenation to project the fused vector into a lower 29 | dimension 30 | output_size: Only required for `concat` fusion, to denote the 31 | output size of the aforementioned adaptor layer 32 | """ 33 | def __init__(self, 34 | operation: str = 'concat', 35 | activ: Optional[str] = 'linear', 36 | input_size: Optional[int] = None, 37 | output_size: Optional[int] = None): 38 | """""" 39 | super().__init__() 40 | 41 | self.operation = operation 42 | self.activ = activ 43 | self.forward = getattr(self, '_{}'.format(self.operation)) 44 | self.activ = get_activation_fn(activ) 45 | self.adaptor = lambda x: x 46 | 47 | if self.operation == 'concat' or input_size != output_size: 48 | self.adaptor = FF(input_size, output_size, bias=False, activ=None) 49 | 50 | def _sum(self, inputs): 51 | return self.activ(self.adaptor(reduce(operator.add, inputs))) 52 | 53 | def _mul(self, inputs): 54 | return self.activ(self.adaptor(reduce(operator.mul, inputs))) 55 | 56 | def _concat(self, inputs): 57 | return self.activ(self.adaptor(torch.cat(inputs, dim=-1))) 58 | 59 | def __repr__(self): 60 | return f"Fusion(type={self.operation}, activ={self.activ})" 61 | -------------------------------------------------------------------------------- /pysimt/layers/pool.py: -------------------------------------------------------------------------------- 1 | """A convenience layer to apply pooling to a sequential tensor.""" 2 | 3 | import torch 4 | 5 | 6 | class Pool(torch.nn.Module): 7 | """A convenience layer to apply various sorts of pooling to a 8 | sequential tensor. The pooling operation can be `last`, `mean`, `max`, or 9 | `sum`. 10 | 11 | Args: 12 | operation: The pooling operator. 13 | It should be one from `last`, `mean`, `max`, `sum`. 14 | pool_dim: The dimension along which the pooling will be applied 15 | keepdim: Passed along to the underlying `torch` functions for 16 | `max`, `mean` and `sum` variants. 17 | 18 | Examples: 19 | >>> import torch 20 | >>> from pysimt.layers import Pool 21 | >>> x = torch.rand(10, 32, 200) # n_timesteps, n_samples, feat_dim 22 | >>> p = Pool('sum', 0) 23 | >>> torch.equal(p(x), x.sum(0, keepdim=True)) 24 | True 25 | >>> p = Pool('max', 0) 26 | >>> torch.equal(p(x), x.max(0, keepdim=True)[0]) 27 | True 28 | >>> p = Pool('mean', 0) 29 | >>> torch.equal(p(x), x.mean(0, keepdim=True)) 30 | True 31 | >>> p = Pool('last', 0) 32 | >>> torch.equal(p(x), x.select(0, -1).unsqueeze(0)) 33 | True 34 | >>> torch.equal(p(x), x[-1].unsqueeze(0)) 35 | True 36 | >>> p = Pool('last', 1) 37 | >>> torch.equal(p(x), x.select(1, -1).unsqueeze(0)) 38 | True 39 | """ 40 | def __init__(self, operation: str, pool_dim: int, keepdim: bool = True): 41 | """""" 42 | super().__init__() 43 | 44 | self.operation = operation 45 | self.pool_dim = pool_dim 46 | self.keepdim = keepdim 47 | 48 | assert self.operation in ["last", "mean", "max", "sum"], \ 49 | "Pool() operation should be mean, max, sum or last." 50 | 51 | # Assign the shortcut 52 | self.forward = getattr(self, '_{}'.format(self.operation)) 53 | 54 | def _last(self, x: torch.Tensor) -> torch.Tensor: 55 | return x.select(self.pool_dim, -1).unsqueeze(0) 56 | 57 | def _max(self, x: torch.Tensor) -> torch.Tensor: 58 | return torch.max(x, dim=self.pool_dim, keepdim=self.keepdim)[0] 59 | 60 | def _mean(self, x: torch.Tensor) -> torch.Tensor: 61 | return torch.mean(x, dim=self.pool_dim, keepdim=self.keepdim) 62 | 63 | def _sum(self, x: torch.Tensor) -> torch.Tensor: 64 | return torch.sum(x, dim=self.pool_dim, keepdim=self.keepdim) 65 | 66 | def __repr__(self): 67 | return "Pool(operation={}, pool_dim={}, keepdim={})".format( 68 | self.operation, self.pool_dim, self.keepdim) 69 | -------------------------------------------------------------------------------- /pysimt/layers/positionwise_ff.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from . import FF 4 | from .transformers import BaseSublayer 5 | 6 | 7 | class PositionwiseFF(nn.Module): 8 | """Positionwise Feed-forward layer. 9 | 10 | Arguments: 11 | 12 | Input: 13 | 14 | Output: 15 | """ 16 | 17 | def __init__(self, model_dim, ff_dim, activ='gelu', dropout=0.1): 18 | """ 19 | Creates a PositionwiseFF. 20 | :param model_dim: The model dimensions. 21 | :param ff_dim: The feedforward dimensions. 22 | :param activ: The activation function. Default: gelu 23 | :param dropout: The amount of dropout. Default: 0.1 24 | """ 25 | super().__init__() 26 | self.model_dim = model_dim 27 | self.ff_dim = ff_dim 28 | self.activ = activ 29 | 30 | # Create the layers 31 | self.layers = nn.Sequential( 32 | FF(self.model_dim, self.ff_dim, activ=self.activ), 33 | nn.Dropout(dropout), 34 | FF(self.ff_dim, self.model_dim, activ=None), 35 | ) 36 | 37 | def forward(self, x): 38 | return self.layers(x) 39 | 40 | 41 | class PositionwiseSublayer(BaseSublayer): 42 | def __init__(self, model_dim, ff_dim, ff_activ='gelu', dropout=0.1, is_pre_norm=False): 43 | """ 44 | Creates a PositionwiseSublayer. 45 | :param model_dim: The model dimensions. 46 | :param ff_dim: The dimensions of the feed forward network. 47 | :param ff_activ: The activation of the feed forward network. 48 | :param dropout: The dropout rate. 49 | :param is_pre_norm: Whether the layer type is pre_norm. Default: True. 50 | """ 51 | super().__init__(model_dim, dropout, is_pre_norm) 52 | self.feed_forward = PositionwiseFF(model_dim, ff_dim, ff_activ, dropout=dropout) 53 | 54 | def forward(self, x, mask=None): 55 | """ 56 | Performs a forward pass over the PositionwiseSublayer. 57 | :param x: The input x. 58 | :param mask: The input mask. 59 | :return: The output from the forward pass of the PositionwiseSublayer. 60 | """ 61 | residual = x 62 | x = self.apply_pre_norm_if_needed(x) 63 | x = self.feed_forward(x) 64 | x = self.apply_residual(residual, x) 65 | x = self.apply_post_norm_if_needed(x) 66 | return x 67 | -------------------------------------------------------------------------------- /pysimt/layers/selector.py: -------------------------------------------------------------------------------- 1 | """A utility layer that returns a particular element from the previous layer.""" 2 | 3 | from torch import nn, Tensor 4 | from typing import Iterable, Any 5 | 6 | 7 | class Selector(nn.Module): 8 | """Utility layer that selects and returns a particular element out of 9 | a tuple. It is useful to select a particular output from the previous layer, 10 | when used in constructs such as `torch.nn.Sequential()`. 11 | 12 | Args: 13 | index: The position to select from the given input. 14 | 15 | Example: 16 | >>> layers = [] 17 | >>> layers.append(torch.nn.GRU(200, 400)) 18 | # By default, GRU returns (output, h_n) but we are not interested in h_n 19 | >>> layers.append(Selector(0)) 20 | >>> layers.append(torch.nn.Dropout(0.2)) 21 | >>> self.block = nn.Sequential(*layers) 22 | """ 23 | def __init__(self, index: int): 24 | """""" 25 | super().__init__() 26 | self.index = index 27 | 28 | def forward(self, x: Iterable[Tensor]) -> Tensor: 29 | """Returns the pre-determined `self.index`'th position of `x`.""" 30 | return x[self.index] 31 | 32 | def __repr__(self): 33 | return f"Selector(index={self.index})" 34 | -------------------------------------------------------------------------------- /pysimt/layers/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sublayer import BaseSublayer 2 | from .self_attention_sublayer import SelfAttentionSublayer 3 | from .cross_attention_sublayer import CrossAttentionSublayer 4 | from .cross_attention_sublayer_mm_flat import FlatMMCrossAttentionSublayer 5 | from .cross_attention_sublayer_mm_hier import HierarchicalMMCrossAttentionSublayer 6 | from .cross_attention_sublayer_mm_serial import SerialMMCrossAttentionSublayer 7 | from .cross_attention_sublayer_mm_parallel import ParallelMMCrossAttentionSublayer 8 | -------------------------------------------------------------------------------- /pysimt/layers/transformers/base_sublayer.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class BaseSublayer(nn.Module): 5 | 6 | def __init__(self, model_dim, dropout=0.1, is_pre_norm=False): 7 | """ 8 | Creates a BaseSublayer. 9 | :param model_dim: The model dimension. 10 | :param dropout: The dropout layer. 11 | :param is_pre_norm: Whether it should use pre_norm transformer layers. Default: False. 12 | """ 13 | super().__init__() 14 | self.is_pre_norm = is_pre_norm 15 | self.layer_norm = nn.LayerNorm(model_dim, eps=1e-6) 16 | self.dropout = nn.Dropout(dropout) 17 | 18 | def forward(self, **kwargs): 19 | raise NotImplementedError("BaseSublayer does not implement forward.") 20 | 21 | def apply_pre_norm_if_needed(self, x): 22 | """ 23 | Applies pre_norm to the input if needed. If pre_norm is false, the input remains unchanged. 24 | :param x: The input. 25 | :return: The output. 26 | """ 27 | if self.is_pre_norm: 28 | x = self.layer_norm(x) 29 | return x 30 | 31 | def apply_post_norm_if_needed(self, x): 32 | """ 33 | Applies post_norm to the input if needed. If pre_norm is true, the input remains unchanged. 34 | :param x: The input. 35 | :return: The output. 36 | """ 37 | if not self.is_pre_norm: 38 | x = self.layer_norm(x) 39 | return x 40 | 41 | def apply_residual(self, residual, x): 42 | """ 43 | Applies the residual connection. 44 | :param residual: The residual. 45 | :param x: The input x. 46 | :return: The output of the residual connection. 47 | """ 48 | return residual + self.dropout(x) 49 | -------------------------------------------------------------------------------- /pysimt/layers/transformers/cross_attention_sublayer.py: -------------------------------------------------------------------------------- 1 | from ..attention import ScaledDotAttention 2 | from .base_sublayer import BaseSublayer 3 | 4 | 5 | class CrossAttentionSublayer(BaseSublayer): 6 | def __init__(self, model_dim, n_heads, dropout=0.1, 7 | attn_dropout=0.0, is_pre_norm=False): 8 | """ 9 | Creates a CrossAttentionSublayer. 10 | :param model_dim: The model dimension. 11 | :param n_heads: The number of attention heads. 12 | :param dropout: The dropout rate for the residual connection. 13 | :param is_pre_norm: Whether the layer type is pre_norm. Default: True. 14 | """ 15 | super().__init__(model_dim, dropout, is_pre_norm) 16 | self.attn = ScaledDotAttention(model_dim, n_heads, attn_dropout) 17 | 18 | def forward(self, query, key, value, mask=None, **kwargs): 19 | """ 20 | Performs a forward pass over the CrossAttentionSublayer. 21 | :param query: The query. For encoder-decoder attention, it is the output from the previous decoder layer. 22 | :param key: The key. For encoder-decoder attention, it is the output from the encoder. 23 | :param value: The mask. For encoder-decoder attention, it is the output from the encoder. 24 | :param mask: The mask. For encoder-decoder attention, it is the encoder mask. 25 | :return: The output of the CrossAttentionSublayer. 26 | """ 27 | residual = query 28 | query = self.apply_pre_norm_if_needed(query) 29 | attn_out, attn_weights = self.attn((query, key, value, mask)) 30 | out = self.apply_residual(residual, attn_out) 31 | out = self.apply_post_norm_if_needed(out) 32 | return out, attn_weights 33 | -------------------------------------------------------------------------------- /pysimt/layers/transformers/cross_attention_sublayer_mm_flat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sublayer import BaseSublayer 4 | from ..attention import ScaledDotAttention 5 | from ...utils.nn import generate_default_mask 6 | 7 | 8 | class FlatMMCrossAttentionSublayer(BaseSublayer): 9 | def __init__(self, model_dim, n_heads, dropout=0.1, 10 | attn_dropout=0.0, is_pre_norm=False): 11 | """ 12 | Creates a FlatMMCrossAttentionSublayer. 13 | :param model_dim: The model dimensions. 14 | :param n_heads: The number of attention heads. 15 | :param dropout: The dropout rate for the residual connection. 16 | :param is_pre_norm: Whether the layer type is pre_norm. Default: True. 17 | """ 18 | super().__init__(model_dim, dropout, is_pre_norm) 19 | self.multimodal_attn = ScaledDotAttention( 20 | model_dim, n_heads, attn_dropout) 21 | 22 | def forward(self, query, key_txt, value_txt, mask_txt, 23 | key_img, value_img, mask_img=None): 24 | """ 25 | Performs a forward pass. 26 | :param query: The query for the attention layers. 27 | :param key_txt: The key for the textual modality. If None, it is set to the query. 28 | :param value_txt: The value for the textual modality. If None, it is set to the query. 29 | :param mask_txt: The textual modality mask. 30 | :param key_img: The key for the visual modality. 31 | :param value_img: The value for the visual modality. 32 | :param mask_img: The visual modality mask. Default: None. 33 | :return: 34 | """ 35 | residual = query 36 | query = self.apply_pre_norm_if_needed(query) 37 | if key_txt is None: 38 | key_txt = query 39 | if value_txt is None: 40 | value_txt = query 41 | 42 | combined_mask = self._generate_combined_mask( 43 | key_img, mask_img, mask_txt) 44 | 45 | multimodal_key = torch.cat((key_img, key_txt), dim=0) 46 | multimodal_value = torch.cat((value_img, value_txt), dim=0) 47 | attn_multimodal, attn_weights = self.multimodal_attn( 48 | (query, multimodal_key, multimodal_value, combined_mask)) 49 | 50 | out = self.apply_residual(residual, attn_multimodal) 51 | out = self.apply_post_norm_if_needed(out) 52 | return out, attn_weights 53 | 54 | @staticmethod 55 | def _generate_combined_mask(key_img, mask_img, mask_txt): 56 | if mask_img is None: 57 | mask_img = generate_default_mask(key_img, mask_txt.shape[1]) 58 | combined_mask = torch.cat((mask_img, mask_txt), dim=-1) 59 | return combined_mask 60 | -------------------------------------------------------------------------------- /pysimt/layers/transformers/cross_attention_sublayer_mm_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..attention import ScaledDotAttention 4 | from .base_sublayer import BaseSublayer 5 | 6 | 7 | class ParallelMMCrossAttentionSublayer(BaseSublayer): 8 | def __init__(self, model_dim, n_heads, dropout=0.1, attn_dropout=0.0, is_pre_norm=False, fusion='sum'): 9 | """ 10 | Creates a ParallelCrossAttentionSublayer. 11 | :param model_dim: The model dimensions. 12 | :param n_heads: The number of attention heads. 13 | :param dropout: The dropout rate for the residual connection. 14 | :param is_pre_norm: Whether the layer type is pre_norm. Default: True. 15 | """ 16 | super().__init__(model_dim, dropout, is_pre_norm) 17 | self.attn_txt = ScaledDotAttention(model_dim, n_heads, attn_dropout) 18 | self.attn_img = ScaledDotAttention(model_dim, n_heads, attn_dropout) 19 | self.fusion = fusion 20 | 21 | def forward(self, query, key_txt, value_txt, mask_txt, key_img, value_img, mask_img=None): 22 | """ 23 | Performs a forward pass over the CrossAttentionSublayer. 24 | :param query: The query. For encoder-decoder attention, it is the output from the previous decoder layer. 25 | :param key_txt: The key. For encoder-decoder attention, it is the output from the encoder. 26 | :param value_txt: The mask. For encoder-decoder attention, it is the output from the encoder. 27 | :param value_img: 28 | :param key_img: 29 | :param mask_txt: The textual encoder mask. 30 | :param mask_img: The visual features mask. 31 | :return: The output of the CrossAttentionSublayer. 32 | """ 33 | residual = query 34 | query = self.apply_pre_norm_if_needed(query) 35 | 36 | attn_txt, attn_weights_txt = self.attn_txt((query, key_txt, value_txt, mask_txt)) 37 | attn_img, attn_weights_img = self.attn_img((query, key_img, value_img, mask_img)) 38 | 39 | attn_combined = torch.add(attn_txt, attn_img) 40 | out = self.apply_residual(residual, attn_combined) 41 | out = self.apply_post_norm_if_needed(out) 42 | return out, {'txt': attn_weights_txt, 'img': attn_weights_img} 43 | -------------------------------------------------------------------------------- /pysimt/layers/transformers/cross_attention_sublayer_mm_serial.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from .cross_attention_sublayer import CrossAttentionSublayer 4 | 5 | 6 | class SerialMMCrossAttentionSublayer(nn.Module): 7 | def __init__(self, model_dim, n_heads, dropout=0.1, 8 | attn_dropout=0.0, is_pre_norm=False): 9 | """ 10 | Creates a ParallelCrossAttentionSublayer. 11 | :param model_dim: The model dimensions. 12 | :param n_heads: The number of attention heads. 13 | :param dropout: The dropout rate for the residual connection. 14 | :param is_pre_norm: Whether the layer type is pre_norm. Default: True. 15 | """ 16 | super().__init__() 17 | self.attn_txt = CrossAttentionSublayer( 18 | model_dim, n_heads, dropout, attn_dropout, is_pre_norm) 19 | self.attn_img = CrossAttentionSublayer( 20 | model_dim, n_heads, dropout, attn_dropout, is_pre_norm) 21 | 22 | def forward(self, query, key_txt, value_txt, mask_txt, 23 | key_img, value_img, mask_img=None): 24 | """ 25 | Performs a forward pass over the CrossAttentionSublayer. 26 | :param query: The query. For encoder-decoder attention, it is the output from the previous decoder layer. 27 | :param key_txt: The key. For encoder-decoder attention, it is the output from the encoder. 28 | :param value_txt: The mask. For encoder-decoder attention, it is the output from the encoder. 29 | :param value_img: 30 | :param key_img: 31 | :param mask_txt: The textual encoder mask. 32 | :param mask_img: The visual features mask. 33 | :return: The output of the CrossAttentionSublayer. 34 | """ 35 | attn_txt, attn_weights_txt = self.attn_txt( 36 | query, key_txt, value_txt, mask_txt) 37 | attn_img, attn_weights_img = self.attn_img( 38 | attn_txt, key_img, value_img, mask_img) 39 | return attn_img, {'txt': attn_weights_txt, 'img': attn_weights_img} 40 | -------------------------------------------------------------------------------- /pysimt/layers/transformers/self_attention_sublayer.py: -------------------------------------------------------------------------------- 1 | from ..attention import ScaledDotAttention 2 | from .base_sublayer import BaseSublayer 3 | 4 | 5 | class SelfAttentionSublayer(BaseSublayer): 6 | 7 | def __init__(self, model_dim, n_heads, dropout=0.1, 8 | attn_dropout=0.0, is_pre_norm=False): 9 | """ 10 | Creates a SelfAttentionSublayer. 11 | :param model_dim: The model dimensions. 12 | :param n_heads: The number of attention heads. 13 | :param dropout: The dropout rate for the residual connection. 14 | :param is_pre_norm: Whether the layer type is pre_norm. Default: True. 15 | """ 16 | super().__init__(model_dim, dropout, is_pre_norm) 17 | self.attn = ScaledDotAttention(model_dim, n_heads, attn_dropout) 18 | 19 | def forward(self, x, mask=None): 20 | """ 21 | Performs a forward pass over the SelfAttentionSublayer. 22 | :param x: The input. Will be used as query, key and value. 23 | :param mask: The input mask. 24 | :return: The output of the SelfAttentionSublayer. 25 | """ 26 | residual = x 27 | x = self.apply_pre_norm_if_needed(x) 28 | attn_out, attn_weights = self.attn((x, x, x, mask)) 29 | out = self.apply_residual(residual, attn_out) 30 | out = self.apply_post_norm_if_needed(out) 31 | return out, attn_weights 32 | -------------------------------------------------------------------------------- /pysimt/logger.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import logging 3 | 4 | from .utils.resource_mgr import res_mgr 5 | 6 | 7 | def setup(opts=None): 8 | _format = '%(message)s' 9 | 10 | formatter = logging.Formatter(_format) 11 | logger = logging.getLogger('pysimt') 12 | logger.setLevel(logging.DEBUG) 13 | 14 | con_handler = logging.StreamHandler() 15 | con_handler.setFormatter(formatter) 16 | logger.addHandler(con_handler) 17 | 18 | if opts is not None: 19 | log_file = str(pathlib.Path(opts['save_path']) / 20 | opts['subfolder'] / opts['exp_id']) + '.log' 21 | file_handler = logging.FileHandler(log_file, mode='w') 22 | file_handler.setFormatter(formatter) 23 | logger.addHandler(file_handler) 24 | 25 | res_mgr.register_handler(logger) 26 | return logger 27 | -------------------------------------------------------------------------------- /pysimt/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | class NoamScheduler: 2 | """NoamScheduler implementation from the `Attention is all you need!` paper.""" 3 | def __init__(self, optimizer, tf_model_dim, learning_rate, lr_warmup_steps=4000): 4 | """ 5 | Creates a NoamScheduler, implementing the formula from the Attention is all you need! paper. 6 | :param optimizer: The optimizer. 7 | :param tf_model_dim: The model dimensions. 8 | :param learning_rate: The learning rate. 9 | :param lr_warmup_steps: The warmup steps. 10 | """ 11 | assert tf_model_dim is not None, 'tf_model_dim must be set to the model dimensions noam decay' 12 | assert lr_warmup_steps > 0, 'lr_warmup_steps must be greater than 0 for noam decay' 13 | self.optimizer = optimizer 14 | self._num_steps = 0 15 | self.lr_warmup_steps = lr_warmup_steps 16 | self.tf_model_dim = tf_model_dim 17 | self._learning_rate = learning_rate 18 | 19 | def step(self): 20 | """ 21 | Reduces the learning rate according to the formula in Attention is all you need! and performs an optimizer step. 22 | """ 23 | self._num_steps += 1 24 | current_learning_rate = self.get_decay() * self._learning_rate 25 | for parameter in self.optimizer.param_groups: 26 | parameter['lr'] = current_learning_rate 27 | self.optimizer.step() 28 | 29 | def get_decay(self): 30 | return self.tf_model_dim ** (-0.5) * min(self._num_steps ** (-0.5), 31 | self._num_steps * self.lr_warmup_steps ** (-1.5)) 32 | -------------------------------------------------------------------------------- /pysimt/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .metric import Metric 2 | from .multibleu import BLEUScorer 3 | from .sacrebleu import SACREBLEUScorer 4 | from .meteor import METEORScorer 5 | from .cer import CERScorer 6 | from .wer import WERScorer 7 | from .simnmt import AVPScorer, AVLScorer, CWMScorer 8 | 9 | """These metrics can be used in early stopping.""" 10 | 11 | # Generation related metrics 12 | beam_metrics = ["BLEU", "SACREBLEU", "METEOR", "CER", "WER"] 13 | 14 | metric_info = { 15 | 'BLEU': 'max', 16 | 'SACREBLEU': 'max', 17 | 'METEOR': 'max', 18 | 'LOSS': 'min', 19 | 'ACC': 'max', 20 | 'RECALL': 'max', 21 | 'PRECISION': 'max', 22 | 'F1': 'max', 23 | 'CER': 'min', 24 | 'WER': 'max', 25 | # simultaneous translation 26 | 'AVP': 'min', # Average proportion (Cho and Esipova, 2016) 27 | 'AVL': 'min', # Average Lagging (Ma et al., 2019 (STACL)) 28 | 'DAL': 'min', # Differentiable AL (not implemented) 29 | 'CW': 'min', # Consecutive wait (Gu et al., 2017) [Not Implemented] 30 | } 31 | -------------------------------------------------------------------------------- /pysimt/metrics/cer.py: -------------------------------------------------------------------------------- 1 | """Character error rate (CER).""" 2 | 3 | from typing import Iterable, Union, Optional 4 | import editdistance 5 | 6 | from .metric import Metric 7 | 8 | 9 | class CERScorer: 10 | """Computes the character error rate (CER) metric and returns a `Metric` 11 | object. 12 | 13 | Args: 14 | refs: List of reference text files. Only the first one will be used 15 | hyps: Either a string denoting the hypotheses' filename, or 16 | a list that contains the hypotheses strings themselves 17 | language: unused 18 | lowercase: unused 19 | """ 20 | def compute(self, refs: Iterable[str], 21 | hyps: Union[str, Iterable[str]], 22 | language: Optional[str] = None, 23 | lowercase: bool = False) -> Metric: 24 | if isinstance(hyps, str): 25 | # hyps is a file 26 | hyp_sents = open(hyps).read().strip().split('\n') 27 | elif isinstance(hyps, list): 28 | hyp_sents = hyps 29 | 30 | # refs is a list, take its first item 31 | with open(refs[0]) as f: 32 | ref_sents = f.read().strip().split('\n') 33 | 34 | assert len(hyp_sents) == len(ref_sents), "CER: # of sentences does not match." 35 | 36 | n_ref_chars = 0 37 | n_ref_tokens = 0 38 | dist_chars = 0 39 | dist_tokens = 0 40 | for hyp, ref in zip(hyp_sents, ref_sents): 41 | hyp_chars = hyp.split(' ') 42 | ref_chars = ref.split(' ') 43 | n_ref_chars += len(ref_chars) 44 | dist_chars += editdistance.eval(hyp_chars, ref_chars) 45 | 46 | # Convert char-based sentences to token-based ones 47 | hyp_tokens = hyp.replace(' ', '').replace('', ' ').strip().split(' ') 48 | ref_tokens = ref.replace(' ', '').replace('', ' ').strip().split(' ') 49 | n_ref_tokens += len(ref_tokens) 50 | dist_tokens += editdistance.eval(hyp_tokens, ref_tokens) 51 | 52 | cer = (100 * dist_chars) / n_ref_chars 53 | wer = (100 * dist_tokens) / n_ref_tokens 54 | 55 | verbose_score = "{:.3f}% (n_errors = {}, n_ref_chars = {}, WER = {:.3f}%)".format( 56 | cer, dist_chars, n_ref_chars, wer) 57 | 58 | return Metric('CER', cer, verbose_score, higher_better=False) 59 | -------------------------------------------------------------------------------- /pysimt/metrics/metric.py: -------------------------------------------------------------------------------- 1 | """Base Metric class to be derived from.""" 2 | 3 | from functools import total_ordering 4 | 5 | 6 | @total_ordering 7 | class Metric: 8 | """A base class that will be inherited by evaluation metrics. 9 | 10 | Args: 11 | name: A name for the metric that will be kept internally after upper-casing 12 | score: A floating point score 13 | detailed_score: A custom, more detailed string 14 | representing the score given above 15 | higher_better: If `False`, the smaller the better 16 | """ 17 | def __init__(self, name: str, score: float, 18 | detailed_score: str = "", higher_better: bool = True): 19 | self.name = name.upper() 20 | self.score = score 21 | self.detailed_score = detailed_score 22 | self.higher_better = higher_better 23 | 24 | def __eq__(self, other): 25 | return self.score == other.score 26 | 27 | def __lt__(self, other): 28 | return self.score < other.score 29 | 30 | def __repr__(self): 31 | rhs = (self.detailed_score if self.detailed_score 32 | else "%.2f" % self.score) 33 | return self.name + ' = ' + rhs 34 | -------------------------------------------------------------------------------- /pysimt/metrics/multibleu.py: -------------------------------------------------------------------------------- 1 | """Tokenized BLEU through sacreBLEU API.""" 2 | 3 | from typing import Union, Iterable, TextIO 4 | 5 | from sacrebleu import corpus_bleu 6 | 7 | from ..utils.misc import listify 8 | from ..utils.io import read_reference_files, read_hypothesis_file 9 | from .metric import Metric 10 | 11 | 12 | class BLEUScorer: 13 | """Computes the multi-bleu equivalent using SacreBLEU, with tokenization 14 | option disabled. 15 | 16 | Args: 17 | refs: List of reference text files 18 | hyps: A file path, or a list of hypothesis strings or an open file handle 19 | language: unused 20 | """ 21 | def compute(self, refs: Iterable[str], 22 | hyps: Union[str, Iterable[str], TextIO], 23 | language=None) -> Metric: 24 | if isinstance(hyps, str): 25 | hyps = read_hypothesis_file(hyps) 26 | 27 | assert isinstance(hyps, list) 28 | 29 | refs = read_reference_files(*listify(refs)) 30 | 31 | score = corpus_bleu(hyps, refs, tokenize='none') 32 | verbose_score = ' '.join(score.format().split()[2:]) 33 | float_score = score.score 34 | return Metric('BLEU', float_score, verbose_score) 35 | -------------------------------------------------------------------------------- /pysimt/metrics/sacrebleu.py: -------------------------------------------------------------------------------- 1 | """Detokenized BLEU i.e. sacreBLEU.""" 2 | 3 | from typing import Union, Iterable, TextIO 4 | 5 | from sacrebleu import corpus_bleu 6 | 7 | from ..utils.misc import listify 8 | from ..utils.io import read_reference_files, read_hypothesis_file 9 | from .metric import Metric 10 | 11 | 12 | class SACREBLEUScorer: 13 | """Computes the usual SacreBLEU metric with the default v13a tokenizer. 14 | This metric expects de-tokenized references and hypotheses, i.e. 15 | it only makes sense to use this with SPM files and the `de-spm` 16 | post-processing filter. For the more usual tokenized BLEU, check the 17 | `BLEU` metric. 18 | 19 | Args: 20 | refs: List of reference text files 21 | hyps: A file path, or a list of hypothesis strings or an open file handle 22 | language: unused 23 | """ 24 | def compute(self, refs: Iterable[str], 25 | hyps: Union[str, Iterable[str], TextIO], 26 | language=None) -> Metric: 27 | if isinstance(hyps, str): 28 | hyps = read_hypothesis_file(hyps) 29 | 30 | assert isinstance(hyps, list) 31 | 32 | refs = read_reference_files(*listify(refs)) 33 | 34 | score = corpus_bleu(hyps, refs) 35 | verbose_score = ' '.join(score.format().split()[2:]) 36 | float_score = score.score 37 | return Metric('SACREBLEU', float_score, verbose_score) 38 | -------------------------------------------------------------------------------- /pysimt/metrics/wer.py: -------------------------------------------------------------------------------- 1 | """Word error rate (WER).""" 2 | 3 | from typing import Iterable, Union, Optional 4 | import editdistance 5 | 6 | from .metric import Metric 7 | 8 | 9 | class WERScorer: 10 | """Computes the word error rate (WER) metric and returns a `Metric` 11 | object. 12 | 13 | Args: 14 | refs: List of reference text files. Only the first one will be used 15 | hyps: Either a string denoting the hypotheses' filename, or 16 | a list that contains the hypotheses strings themselves 17 | language: unused 18 | lowercase: unused 19 | """ 20 | def compute(self, refs: Iterable[str], 21 | hyps: Union[str, Iterable[str]], 22 | language: Optional[str] = None, 23 | lowercase: bool = False) -> Metric: 24 | if isinstance(hyps, str): 25 | # hyps is a file 26 | hyp_sents = open(hyps).read().strip().split('\n') 27 | elif isinstance(hyps, list): 28 | hyp_sents = hyps 29 | 30 | # refs is a list, take its first item 31 | with open(refs[0]) as f: 32 | ref_sents = f.read().strip().split('\n') 33 | 34 | assert len(hyp_sents) == len(ref_sents), "WER: # of sentences does not match." 35 | 36 | n_ref_tokens = 0 37 | dist = 0 38 | for hyp, ref in zip(hyp_sents, ref_sents): 39 | hyp_tokens = hyp.split(' ') 40 | ref_tokens = ref.split(' ') 41 | n_ref_tokens += len(ref_tokens) 42 | dist += editdistance.eval(hyp_tokens, ref_tokens) 43 | 44 | score = (100 * dist) / n_ref_tokens 45 | verbose_score = "{:.3f}% (n_errors = {}, n_ref_tokens = {})".format( 46 | score, dist, n_ref_tokens) 47 | 48 | return Metric('WER', score, verbose_score, higher_better=False) 49 | -------------------------------------------------------------------------------- /pysimt/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .snmt_rnn import SimultaneousNMT 2 | from .snmt_rnn_waitk import SimultaneousWaitKNMT 3 | 4 | from .snmt_rnn_encatt import EncoderSelfAttentionSimultaneousNMT 5 | from .snmt_rnn_encatt_waitk import EncoderSelfAttentionSimultaneousWaitKNMT 6 | 7 | from .snmt_tf import SimultaneousTFNMT 8 | from .snmt_tf_waitk import SimultaneousTFWaitKNMT 9 | -------------------------------------------------------------------------------- /pysimt/models/snmt_rnn_encatt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from torch import nn 4 | from ..layers.attention import MultiheadAttention 5 | 6 | from . import SimultaneousNMT 7 | 8 | 9 | logger = logging.getLogger('pysimt') 10 | 11 | 12 | class EncoderSelfAttentionSimultaneousNMT(SimultaneousNMT): 13 | """Simultaneous self-attentive MMT i.e. the ENC-O* model in the paper.""" 14 | 15 | def set_defaults(self): 16 | super().set_defaults() 17 | self.defaults.update({ 18 | 'n_heads': 1, 19 | 'att_dropout': 0.0, 20 | }) 21 | 22 | def setup(self, is_train=True): 23 | """Sets up NN topology by creating the layers.""" 24 | encoders = {} 25 | for key in self.topology.srcs.keys(): 26 | encoders[key] = getattr(self, f'create_{key}_encoder')() 27 | 28 | # Separate out visual encoder to avoid multimodal decoder-side 29 | # attention to be enabled 30 | self.ff_vis_enc = encoders.pop('image') 31 | 32 | self.encoders = nn.ModuleDict(encoders) 33 | self.dec = self.create_decoder(encoders=self.encoders) 34 | 35 | # create the cross-modal self-attention network 36 | self.mm_attn = MultiheadAttention( 37 | self.opts.model['enc_dim'], self.opts.model['enc_dim'], 38 | n_heads=self.opts.model['n_heads'], 39 | dropout=self.opts.model['att_dropout'], attn_type='cross') 40 | self.mm_lnorm = nn.LayerNorm(self.opts.model['enc_dim']) 41 | 42 | # Share encoder and decoder weights 43 | if self.opts.model['tied_emb'] == '3way': 44 | self.encoders[str(self.sl)].emb.weight = self.dec.emb.weight 45 | 46 | def cache_enc_states(self, batch): 47 | """Caches encoder states internally by forward-pass'ing each encoder.""" 48 | self.encoders['src'](batch['src']) 49 | self.ff_vis_enc(batch['image']) 50 | 51 | src_states, src_mask = self.encoders['src'].get_states() 52 | img_states, img_mask = self.ff_vis_enc.get_states() 53 | 54 | # key values are image states 55 | kv = img_states.transpose(0, 1) 56 | attn_out = self.mm_attn( 57 | q=src_states.transpose(0, 1), k=kv, v=kv, 58 | q_mask=src_mask.transpose(0, 1).logical_not()).transpose(0, 1) 59 | 60 | # Inject this into the encoder itself for caching 61 | self.encoders['src']._states = self.mm_lnorm(src_states + attn_out) 62 | -------------------------------------------------------------------------------- /pysimt/models/snmt_rnn_encatt_waitk.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from . import EncoderSelfAttentionSimultaneousNMT 4 | 5 | logger = logging.getLogger('pysimt') 6 | 7 | 8 | """This is the training-time wait-k model from: 9 | Ma et al. (2018), STACL: Simultaneous Translation with Implicit Anticipation 10 | and Controllable Latency using Prefix-to-Prefix Framework, arXiv:1810.08398 11 | 12 | The only required parameter is the `k` argument for training. When decoding, 13 | pass the `k` argument explicitly to `pysimt translate`. A large enough `k` 14 | should produce the same results as the `snmt.py` model. 15 | """ 16 | 17 | 18 | class EncoderSelfAttentionSimultaneousWaitKNMT(EncoderSelfAttentionSimultaneousNMT): 19 | def set_defaults(self): 20 | super().set_defaults() 21 | self.defaults.update({ 22 | # Decoding/training simultaneous NMT args 23 | 'translator_type': 'wk', # This model implements train-time wait-k 24 | 'translator_args': {'k': 1e4}, # k as in wait-k in training 25 | 'consecutive_warmup': 0, # consecutive training for this many epochs 26 | }) 27 | 28 | def __init__(self, opts): 29 | super().__init__(opts) 30 | assert self.opts.model['translator_type'] != 'bs', \ 31 | 'Beam search not compatible with simultaneous models' 32 | 33 | def forward(self, batch, **kwargs): 34 | """Training forward-pass with explicit timestep-based loop.""" 35 | loss = 0.0 36 | 37 | k = int(self.opts.model['translator_args']['k']) 38 | if self.training: 39 | epoch_count = kwargs['ectr'] 40 | if epoch_count <= self.opts.model['consecutive_warmup']: 41 | # warming up, use full contexts 42 | k = int(1e4) 43 | 44 | # Cache encoder states first 45 | self.cache_enc_states(batch) 46 | 47 | # Initial state is None i.e. 0. 48 | h = self.dec.f_init() 49 | 50 | # Convert target token indices to embeddings -> T*B*E 51 | y = batch[self.tl] 52 | y_emb = self.dec.emb(y) 53 | 54 | # -1: So that we skip the timestep where input is 55 | for t in range(y_emb.size(0) - 1): 56 | ########################################### 57 | # waitk: pass partial context incrementally 58 | ########################################### 59 | state_dict = self.get_enc_state_dict(up_to=k + t) 60 | log_p, h = self.dec.f_next(state_dict, y_emb[t], h) 61 | loss += self.dec.nll_loss(log_p, y[t + 1]) 62 | 63 | return { 64 | 'loss': loss, 65 | 'n_items': y[1:].nonzero(as_tuple=False).size(0), 66 | } 67 | -------------------------------------------------------------------------------- /pysimt/models/snmt_rnn_waitk.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from . import SimultaneousNMT 4 | 5 | logger = logging.getLogger('pysimt') 6 | 7 | 8 | """This is the training-time wait-k model from: 9 | Ma et al. (2018), STACL: Simultaneous Translation with Implicit Anticipation 10 | and Controllable Latency using Prefix-to-Prefix Framework, arXiv:1810.08398 11 | 12 | The only required parameter is the `k` argument for training. When decoding, 13 | pass the `k` argument explicitly to `pysimt translate`. A large enough `k` 14 | should produce the same results as the `snmt.py` model. 15 | """ 16 | 17 | 18 | class SimultaneousWaitKNMT(SimultaneousNMT): 19 | 20 | def set_defaults(self): 21 | super().set_defaults() 22 | self.defaults.update({ 23 | # Decoding/training simultaneous NMT args 24 | 'translator_type': 'wk', # This model implements train-time wait-k 25 | 'translator_args': {'k': 1e4}, # k as in wait-k in training 26 | 'consecutive_warmup': 0, # consecutive training for this many epochs 27 | }) 28 | 29 | def __init__(self, opts): 30 | super().__init__(opts) 31 | assert self.opts.model['translator_type'] != 'bs', \ 32 | 'Beam search not compatible with simultaneous models' 33 | 34 | def forward(self, batch, **kwargs): 35 | """Training forward-pass with explicit timestep-based loop.""" 36 | loss = 0.0 37 | 38 | k = int(self.opts.model['translator_args']['k']) 39 | if self.training: 40 | epoch_count = kwargs['ectr'] 41 | if epoch_count <= self.opts.model['consecutive_warmup']: 42 | # warming up, use full contexts 43 | k = int(1e4) 44 | 45 | # Cache encoder states first 46 | self.cache_enc_states(batch) 47 | 48 | # Initial state is None i.e. 0. 49 | h = self.dec.f_init() 50 | 51 | # Convert target token indices to embeddings -> T*B*E 52 | y = batch[self.tl] 53 | y_emb = self.dec.emb(y) 54 | 55 | # -1: So that we skip the timestep where input is 56 | for t in range(y_emb.size(0) - 1): 57 | ########################################### 58 | # waitk: pass partial context incrementally 59 | ########################################### 60 | state_dict = self.get_enc_state_dict(up_to=k + t) 61 | log_p, h = self.dec.f_next(state_dict, y_emb[t], h) 62 | loss += self.dec.nll_loss(log_p, y[t + 1]) 63 | 64 | return { 65 | 'loss': loss, 66 | 'n_items': y[1:].nonzero(as_tuple=False).size(0), 67 | } 68 | -------------------------------------------------------------------------------- /pysimt/models/snmt_tf_waitk.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from . import SimultaneousTFNMT 4 | 5 | logger = logging.getLogger('pysimt') 6 | 7 | """This is the training-time wait-k model from: 8 | Ma et al. (2018), STACL: Simultaneous Translation with Implicit Anticipation 9 | and Controllable Latency using Prefix-to-Prefix Framework, arXiv:1810.08398 10 | 11 | The only required parameter is the `k` argument for training. When decoding, 12 | pass the `k` argument explicitly to `pysimt translate`. A large enough `k` 13 | should produce the same results as the `snmt.py` model. 14 | """ 15 | 16 | 17 | class SimultaneousTFWaitKNMT(SimultaneousTFNMT): 18 | 19 | def set_defaults(self): 20 | super().set_defaults() 21 | self.defaults.update({ 22 | # Decoding/training simultaneous NMT args 23 | 'translator_type': 'wk', # This model implements train-time wait-k 24 | 'translator_args': {'k': 1e4}, # k as in wait-k in training 25 | 'consecutive_warmup': 0, # consecutive training for this many epochs 26 | }) 27 | 28 | def __init__(self, opts): 29 | super().__init__(opts) 30 | assert not self.opts.model['enc_bidirectional'], \ 31 | 'Bidirectional TF encoder is not currently supported for simultaneous MT.' 32 | assert self.opts.model['translator_type'] != 'bs', \ 33 | 'Beam search not compatible with simultaneous models' 34 | 35 | def forward(self, batch, **kwargs): 36 | """ 37 | Performs a forward pass. 38 | :param batch: The batch. 39 | :param kwargs: Any extra arguments. 40 | :return: The output from the forward pass. 41 | """ 42 | k = int(self.opts.model['translator_args']['k']) 43 | if self.training: 44 | epoch_count = kwargs['ectr'] 45 | if epoch_count <= self.opts.model['consecutive_warmup']: 46 | # warming up, use full contexts 47 | k = int(1e4) 48 | 49 | # Pass 'k' to the model. 50 | return super().forward(batch, k=k) 51 | -------------------------------------------------------------------------------- /pysimt/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .bucket import BucketBatchSampler 2 | from .approx import ApproximateBucketBatchSampler 3 | 4 | 5 | def get_sampler(type_): 6 | return { 7 | 'bucket': BucketBatchSampler, 8 | 'approximate': ApproximateBucketBatchSampler, 9 | }[type_.lower()] 10 | -------------------------------------------------------------------------------- /pysimt/translators/__init__.py: -------------------------------------------------------------------------------- 1 | from .greedy import GreedySearch 2 | from .sim_greedy import SimultaneousGreedySearch 3 | from .waitk_greedy import SimultaneousWaitKGreedySearch 4 | from .beam import BeamSearch 5 | 6 | def get_translator(_type): 7 | return { 8 | 'gs': GreedySearch, 9 | 'sgs': SimultaneousGreedySearch, 10 | 'wk': SimultaneousWaitKGreedySearch, 11 | 'bs': BeamSearch, 12 | }[_type] 13 | -------------------------------------------------------------------------------- /pysimt/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | -------------------------------------------------------------------------------- /pysimt/utils/data.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | 3 | 4 | def sort_predictions(data_loader, results): 5 | """Recovers the dataset order when bucketing samplers are used.""" 6 | if getattr(data_loader.batch_sampler, 'store_indices', False): 7 | results = [results[i] for i, j in sorted( 8 | enumerate(data_loader.batch_sampler.orig_idxs), key=lambda k: k[1])] 9 | return results 10 | 11 | 12 | def make_dataloader(dataset, pin_memory=False, num_workers=0): 13 | return DataLoader( 14 | dataset, batch_sampler=dataset.sampler, 15 | collate_fn=dataset.collate_fn, 16 | pin_memory=pin_memory, num_workers=num_workers) 17 | -------------------------------------------------------------------------------- /pysimt/utils/io.py: -------------------------------------------------------------------------------- 1 | import bz2 2 | import gzip 3 | import lzma 4 | import pathlib 5 | 6 | from collections import deque 7 | from typing import List, Iterable, Any 8 | 9 | import numpy as np 10 | from tqdm import tqdm 11 | 12 | 13 | class FileRotator: 14 | """A fixed queue with Path() elements where pushing a new element pops 15 | the oldest one and removes it from disk. 16 | 17 | Arguments: 18 | maxlen(int): The capacity of the queue. 19 | """ 20 | 21 | def __init__(self, maxlen): 22 | self.maxlen = maxlen 23 | self.elems = deque(maxlen=self.maxlen) 24 | 25 | def push(self, elem): 26 | if len(self.elems) == self.maxlen: 27 | # Remove oldest item 28 | popped = self.elems.pop() 29 | if popped.exists(): 30 | popped.unlink() 31 | 32 | # Add new item 33 | self.elems.appendleft(elem) 34 | 35 | def __repr__(self): 36 | return self.elems.__repr__() 37 | 38 | 39 | def fopen(filename: str, key: str = None): 40 | """gzip,bzip2,xz,numpy aware file opening function.""" 41 | assert '*' not in str(filename), "Glob patterns not supported in fopen()" 42 | 43 | filename = str(pathlib.Path(filename).expanduser()) 44 | if filename.endswith('.gz'): 45 | return gzip.open(filename, 'rt') 46 | elif filename.endswith('.bz2'): 47 | return bz2.open(filename, 'rt') 48 | elif filename.endswith(('.xz', '.lzma')): 49 | return lzma.open(filename, 'rt') 50 | elif filename.endswith(('.npy', '.npz')): 51 | if filename.endswith('.npz'): 52 | assert key is not None, "No key= given for .npz file." 53 | return np.load(filename)[key] 54 | else: 55 | return np.load(filename) 56 | else: 57 | # Plain text 58 | return open(filename, 'r') 59 | 60 | 61 | def read_hypothesis_file(fname: str) -> List[str]: 62 | """Reads lines from a text file and returns it as a list of strings.""" 63 | lines = [] 64 | with open(fname) as f: 65 | for line in f: 66 | lines.append(line.strip()) 67 | return lines 68 | 69 | 70 | def read_reference_files(*args) -> List[List[str]]: 71 | """Read every file given in `args` and produce a list of lists that 72 | supports multiple references.""" 73 | all_lines = [] 74 | 75 | for fname in args: 76 | lines = [] 77 | with open(fname) as f: 78 | for line in f: 79 | lines.append(line.strip()) 80 | all_lines.append(lines) 81 | 82 | ref_lens = [len(lns) for lns in all_lines] 83 | assert len(set(ref_lens)) == 1, \ 84 | "Reference streams do not have the same lengths." 85 | 86 | return all_lines 87 | 88 | 89 | def progress_bar(iterator: Iterable[Any], unit: str = 'it'): 90 | """Wraps the given iterator into tqdm for progress bar rendering.""" 91 | return tqdm(iterator, unit=unit, ncols=70, smoothing=0) 92 | -------------------------------------------------------------------------------- /pysimt/utils/ml_metrics.py: -------------------------------------------------------------------------------- 1 | class Loss: 2 | """Accumulates and computes correctly training and validation losses.""" 3 | def __init__(self): 4 | self.reset() 5 | 6 | def reset(self): 7 | self._loss = 0 8 | self._denom = 0 9 | self.batch_loss = 0 10 | 11 | def update(self, loss, n_items): 12 | # Store last batch loss 13 | self.batch_loss = loss.item() 14 | # Add it to cumulative loss 15 | self._loss += self.batch_loss 16 | # Normalize batch loss w.r.t n_items 17 | self.batch_loss /= n_items 18 | # Accumulate n_items inside the denominator 19 | self._denom += n_items 20 | 21 | def get(self): 22 | if self._denom == 0: 23 | return 0 24 | return self._loss / self._denom 25 | 26 | @property 27 | def denom(self): 28 | return self._denom 29 | -------------------------------------------------------------------------------- /pysimt/utils/tensorboard.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pathlib 3 | 4 | from torch.utils.tensorboard import SummaryWriter 5 | 6 | 7 | class TensorBoard: 8 | def __init__(self, model, log_dir, exp_id, subfolder): 9 | self.model = model 10 | self.log_dir = log_dir 11 | self.exp_id = exp_id 12 | self.subfolder = subfolder 13 | self.writer = None 14 | self.available = bool(self.log_dir) 15 | 16 | # Call setup 17 | self.setup() 18 | 19 | def _nop(self, *args, **kwargs): 20 | return 21 | 22 | def setup(self): 23 | """Setups TensorBoard logger.""" 24 | if not self.available: 25 | self.replace_loggers() 26 | return 27 | 28 | # Construct full folder path 29 | self.log_dir = pathlib.Path(self.log_dir).expanduser() 30 | self.log_dir = self.log_dir / self.subfolder / self.exp_id 31 | self.log_dir.mkdir(parents=True, exist_ok=True) 32 | 33 | # Set up summary writer 34 | self.writer = SummaryWriter(self.log_dir) 35 | 36 | def replace_loggers(self): 37 | """Replace all log_* methods with dummy _nop.""" 38 | self.log_metrics = self._nop 39 | self.log_scalar = self._nop 40 | self.log_activations = self._nop 41 | self.log_gradients = self._nop 42 | 43 | def log_metrics(self, metrics, step, suffix=''): 44 | """Logs evaluation metrics as scalars.""" 45 | for metric in metrics: 46 | self.writer.add_scalar(suffix + metric.name, metric.score, 47 | global_step=step) 48 | 49 | def log_scalar(self, name, value, step): 50 | """Logs single scalar value.""" 51 | self.writer.add_scalar(name, value, global_step=step) 52 | 53 | def log_activations(self, step): 54 | """Logs activations by layer.""" 55 | pass 56 | 57 | def log_gradients(self, step): 58 | """Logs gradients by layer.""" 59 | pass 60 | 61 | def close(self): 62 | """Closes TensorBoard handle.""" 63 | if self.available: 64 | self.writer.close() 65 | 66 | def __repr__(self): 67 | if not self.log_dir: 68 | return "No 'tensorboard_dir' given in config" 69 | return "TensorBoard is active" 70 | -------------------------------------------------------------------------------- /scripts/decode_greedy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Only decode for snmt models and not waitk. It does not make sense for the latter 4 | 5 | # Set GPU0 if not set 6 | export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} 7 | test_set="test_2016_flickr,test_2017_flickr,test_2017_mscoco" 8 | 9 | 10 | # Greedy decode everything (batched) 11 | for ckpt in `find -L -name '*simultaneousnmt-*.best.loss.ckpt'`; do 12 | fname=`basename $ckpt` 13 | prefix=${ckpt/.best.loss.ckpt/} 14 | log=${ckpt/.best.loss.ckpt/.log} 15 | grep -q 'Training finished' ${log} 16 | if [ "$?" == "0" ]; then 17 | # check for the availability of one test set 18 | if [ ! -f "${prefix}.test_2017_flickr.gs" ]; then 19 | pysimt translate -m 60 -s ${test_set} -f gs -o ${prefix} $ckpt 20 | fi 21 | fi 22 | done 23 | -------------------------------------------------------------------------------- /scripts/decode_test_waitk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set GPU0 if not set 4 | export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} 5 | test_set="test_2016_flickr,test_2017_flickr,test_2017_mscoco" 6 | 7 | 8 | # Test-time wait-k for SNMT (not re-trained) models 9 | for ckpt in `find -L -name '*simultaneousnmt-*.best.loss.ckpt'`; do 10 | fname=`basename $ckpt` 11 | prefix=${ckpt/.best.loss.ckpt/} 12 | log=${ckpt/.best.loss.ckpt/.log} 13 | grep -q 'Training finished' ${log} 14 | if [ "$?" == "0" ]; then 15 | # check for the availabilty of one test set 16 | if [ ! -f "${prefix}.test_2017_flickr.wait1.gs" ]; then 17 | pysimt translate -m 60 -s ${test_set} -b 1 -f wk --n-init-tokens "1,2,3,4,5,6,7" \ 18 | -o ${prefix} $ckpt 19 | fi 20 | fi 21 | done 22 | -------------------------------------------------------------------------------- /scripts/decode_train_waitk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set GPU0 if not set 4 | export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} 5 | test_set="test_2016_flickr,test_2017_flickr,test_2017_mscoco" 6 | 7 | 8 | # Train-time wait-k 9 | for ckpt in `find -L -name '*simultaneouswaitk*.best.loss.ckpt'`; do 10 | fname=`basename $ckpt` 11 | model=`dirname $ckpt` 12 | k=`echo $model | sed -r 's#\./wait([0-9])-rnn.*#\1#'` 13 | prefix=${ckpt/.best.loss.ckpt/} 14 | log=${ckpt/.best.loss.ckpt/.log} 15 | grep -q 'Training finished' ${log} 16 | if [ "$?" == "0" ]; then 17 | # check for the availability of one test set 18 | if [ ! -f "${prefix}.test_2017_flickr.wait${k}.gs" ]; then 19 | pysimt translate -m 60 -s ${test_set} -b 1 -f wk --n-init-tokens "$k" \ 20 | -o ${prefix} $ckpt 21 | fi 22 | fi 23 | done 24 | -------------------------------------------------------------------------------- /scripts/decode_wait_if_diff.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set GPU0 if not set 4 | export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} 5 | test_set="test_2016_flickr,test_2017_flickr,test_2017_mscoco" 6 | 7 | # Simultaneous greedy decode (Cho and Esipova, 2016) 8 | # only decode SNMT models and not waitk 9 | for ckpt in `find -L -name '*simultaneousnmt-*.best.loss.ckpt'`; do 10 | fname=`basename $ckpt` 11 | prefix=${ckpt/.best.loss.ckpt/} 12 | log=${ckpt/.best.loss.ckpt/.log} 13 | grep -q 'Training finished' ${log} 14 | if [ "$?" == "0" ]; then 15 | # check for the availabilty of one test set 16 | if [ ! -f "${prefix}.test_2017_flickr.s1_d1_wait_if_diff.gs" ]; then 17 | pysimt translate -m 60 -s ${test_set} -b 1 -f sgs --n-init-tokens "1,2" \ 18 | --delta "1" --criteria "wait_if_diff" -o ${prefix} $ckpt 19 | fi 20 | fi 21 | done 22 | -------------------------------------------------------------------------------- /scripts/decode_wait_if_worse.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set GPU0 if not set 4 | export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} 5 | test_set="test_2016_flickr,test_2017_flickr,test_2017_mscoco" 6 | 7 | # Simultaneous greedy decode (Cho and Esipova, 2016) 8 | # only decode SNMT models and not waitk 9 | for ckpt in `find -L -name '*simultaneousnmt-*.best.loss.ckpt'`; do 10 | fname=`basename $ckpt` 11 | prefix=${ckpt/.best.loss.ckpt/} 12 | log=${ckpt/.best.loss.ckpt/.log} 13 | grep -q 'Training finished' ${log} 14 | if [ "$?" == "0" ]; then 15 | # check for the availabilty of one test set 16 | if [ ! -f "${prefix}.test_2017_flickr.s1_d1_wait_if_worse.gs" ]; then 17 | pysimt translate -m 60 -s ${test_set} -b 1 -f sgs --n-init-tokens "1,2" \ 18 | --delta "1" --criteria "wait_if_worse" -o ${prefix} $ckpt 19 | fi 20 | fi 21 | done 22 | -------------------------------------------------------------------------------- /scripts/delay_metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | from pathlib import Path 6 | 7 | import tabulate 8 | import sacrebleu 9 | 10 | from pysimt.metrics.simnmt import AVPScorer, AVLScorer 11 | 12 | 13 | """This script should be run from within the parent folder where each pysimt 14 | experiment resides.""" 15 | 16 | 17 | def read_lines_from_file(fname): 18 | lines = [] 19 | with open(fname) as f: 20 | for line in f: 21 | lines.append(line.strip()) 22 | return lines 23 | 24 | 25 | if __name__ == '__main__': 26 | results = {} 27 | trglang = sys.argv[1] 28 | if trglang not in ('en', 'de', 'fr', 'cs'): 29 | print(f'Usage: {sys.argv[0]} [action files]') 30 | sys.exit(1) 31 | 32 | scorers = [ 33 | AVPScorer(add_trg_eos=False), 34 | AVLScorer(add_trg_eos=False), 35 | ] 36 | 37 | act_files = sys.argv[2:] 38 | 39 | # get test set 40 | test_sets = set([a.split('.')[1] for a in act_files]) 41 | assert len(test_sets) == 1, "Different test set files given" 42 | test_set = list(test_sets)[0] 43 | print(f'Test set is {test_set}, target language is {trglang}\n\n') 44 | 45 | ref_root = Path(__file__).parent / f'../data/multi30k/en-{trglang}' 46 | ref_file = ref_root / f'{test_set}.lc.norm.tok.{trglang}.dehyph' 47 | if ref_file.exists(): 48 | refs = read_lines_from_file(ref_file) 49 | else: 50 | raise RuntimeError(f'{ref_file} does not exist') 51 | 52 | for act_file in act_files: 53 | # Compute delay metrics 54 | scores = [s.compute_from_file(act_file) for s in scorers] 55 | results[act_file] = {s.name: s.score for s in scores} 56 | 57 | # try to reach hypothesis file 58 | hyp_file = act_file.replace('.acts', '.gs') 59 | if os.path.exists(hyp_file): 60 | hyps = read_lines_from_file(hyp_file) 61 | bleu = sacrebleu.corpus_bleu( 62 | hyps, [refs], tokenize='none', lowercase=False).score 63 | else: 64 | bleu = -1.0 65 | 66 | results[act_file]['BLEU'] = bleu 67 | results[act_file]['Q/AVP'] = bleu / scores[0].score 68 | 69 | if results: 70 | headers = ['Name'] + list(next(iter(results.values())).keys()) 71 | results = [[name, *[scores[key] for key in headers[1:]]] for name, scores in results.items()] 72 | results = sorted(results, key=lambda x: x[headers.index('BLEU')]) 73 | print(tabulate.tabulate(results, headers=headers, floatfmt='.2f')) 74 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import setuptools 3 | 4 | 5 | def get_pysimt_version(): 6 | with open('pysimt/__init__.py') as f: 7 | s = f.read().split('\n')[0] 8 | if '__version__' not in s: 9 | raise RuntimeError('Can not detect version from pysimt/__init__.py') 10 | return eval(s.split(' ')[-1]) 11 | 12 | 13 | setuptools.setup( 14 | name='pysimt', 15 | version=get_pysimt_version(), 16 | description='A PyTorch framework for Simultaneous Neural Machine Translation', 17 | url='https://github.com/ImperialNLP/pysimt', 18 | author='Ozan Caglayan, Veneta Haralampieva, Julia Ive, Andy Li', 19 | author_email='o.caglayan@ic.ac.uk', 20 | license='MIT', 21 | classifiers=[ 22 | 'Intended Audience :: Science/Research', 23 | 'Topic :: Scientific/Engineering', 24 | 'License :: OSI Approved :: MIT License', 25 | 'Programming Language :: Python :: 3 :: Only', 26 | 'Programming Language :: Python :: 3.7', 27 | 'Operating System :: POSIX', 28 | ], 29 | keywords='nmt neural-mt simultaneous translation sequence-to-sequence deep-learning pytorch', 30 | python_requires='~=3.7', 31 | install_requires=[ 32 | 'numpy', 'tqdm', 'pillow', 33 | 'torch', 'torchvision', 'sacrebleu>1.4.10', 34 | ], 35 | packages=setuptools.find_packages(), 36 | scripts=[str(p) for p in pathlib.Path('bin').glob('*')], 37 | zip_safe=False) 38 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 90 3 | ignore = E116,E241,E265,W504,E501 4 | exclude = docs,examples,build 5 | --------------------------------------------------------------------------------