├── .gitignore
├── LICENSE
├── README.md
├── baseline-1M-enfr
    ├── local
    │   └── parse_options.sh
    ├── results
    └── run.sh
├── baseline-small
    ├── local
    │   └── parse_options.sh
    └── run.sh
└── romance-multi-way
    ├── local
        └── parse_options.sh
    └── run.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Lua sources
 2 | luac.out
 3 | 
 4 | # luarocks build files
 5 | *.src.rock
 6 | *.zip
 7 | *.tar.gz
 8 | 
 9 | # Object files
10 | *.o
11 | *.os
12 | *.ko
13 | *.obj
14 | *.elf
15 | 
16 | # Precompiled Headers
17 | *.gch
18 | *.pch
19 | 
20 | # Libraries
21 | *.lib
22 | *.a
23 | *.la
24 | *.lo
25 | *.def
26 | *.exp
27 | 
28 | # Shared objects (inc. Windows DLLs)
29 | *.dll
30 | *.so
31 | *.so.*
32 | *.dylib
33 | 
34 | # Executables
35 | *.exe
36 | *.out
37 | *.app
38 | *.i*86
39 | *.x86_64
40 | *.hex
41 | 
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 OpenNMT
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Recipes
 2 | Recipes for training OpenNMT systems
 3 | 
 4 | 
 5 | You will find here some "recipes" which basically script the end-to-end data preparation, preprocessing, training and evaluation.
 6 | 
 7 | ## Requirements
 8 | 
 9 | * You do need OpenNMT - see [here](http://opennmt.net/OpenNMT/installation/). If you clone Recipes.git repo at the same level as OpenNMT.git on your local computer, you don't need to update the PATH
10 | in the scripts. Otherwise update the line `OPENNMT_PATH=../../OpenNMT`
11 | * for evaluation scripts, you do need perl `XML::Twig` module (`perl -MCPAN -e 'install XML::Twig`)
12 | 
13 | ## The recipes
14 | 
15 | ### Baseline-1M-enfr
16 | Train a baseline English-French model, use case feature and onmt reversible tokenization.  GPU highly recommended. Training takes 75 minutes per epoch on a single GTX 1080.
17 | Parameters: 2x500 layers, 13 epochs. See script for the details.
18 | Data: set of 1 million parallel sentences (extract of Europarl, Newscommentaries, ..)
19 | See the results file for the evaluation.
20 | 
21 | ### Romance Multi-way
22 | See http://forum.opennmt.net/t/training-romance-multi-way-model/86  
23 | GPU highly recommended. Training takes 4 1/2 hours per epoch on a single GTX 1080.
24 | Parameters: 2x500 layers, 13 epochs. See script for the details.
25 | 


--------------------------------------------------------------------------------
/baseline-1M-enfr/local/parse_options.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
 4 | #                 Arnab Ghoshal, Karel Vesely
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | 
20 | # Parse command-line options.
21 | # To be sourced by another script (as in ". parse_options.sh").
22 | # Option format is: --option-name arg
23 | # and shell variable "option_name" gets set to value "arg."
24 | # The exception is --help, which takes no arguments, but prints the 
25 | # $help_message variable (if defined).
26 | 
27 | 
28 | ###
29 | ### The --config file options have lower priority to command line 
30 | ### options, so we need to import them first...
31 | ###
32 | 
33 | # Now import all the configs specified by command-line, in left-to-right order
34 | for ((argpos=1; argpos<$#; argpos++)); do
35 |   if [ "${!argpos}" == "--config" ]; then
36 |     argpos_plus1=$((argpos+1))
37 |     config=${!argpos_plus1}
38 |     [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
39 |     . $config  # source the config file.
40 |   fi
41 | done
42 | 
43 | 
44 | ###
45 | ### No we process the command line options
46 | ###
47 | while true; do
48 |   [ -z "${1:-}" ] && break;  # break if there are no arguments
49 |   case "$1" in
50 |     # If the enclosing script is called with --help option, print the help 
51 |     # message and exit.  Scripts should put help messages in $help_message
52 |   --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
53 | 	  else printf "$help_message\n" 1>&2 ; fi; 
54 | 	  exit 0 ;; 
55 |   --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
56 |        exit 1 ;;
57 |     # If the first command-line argument begins with "--" (e.g. --foo-bar), 
58 |     # then work out the variable name as $name, which will equal "foo_bar".
59 |   --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 
60 |     # Next we test whether the variable in question is undefned-- if so it's 
61 |     # an invalid option and we die.  Note: $0 evaluates to the name of the 
62 |     # enclosing script.
63 |     # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
64 |     # is undefined.  We then have to wrap this test inside "eval" because 
65 |     # foo_bar is itself inside a variable ($name).
66 |       eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
67 |       
68 |       oldval="`eval echo \\$$name`";
69 |     # Work out whether we seem to be expecting a Boolean argument.
70 |       if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 
71 | 	was_bool=true;
72 |       else 
73 | 	was_bool=false;
74 |       fi
75 | 
76 |     # Set the variable to the right value-- the escaped quotes make it work if
77 |     # the option had spaces, like --cmd "queue.pl -sync y"
78 |       eval $name=\"$2\"; 
79 |         
80 |     # Check that Boolean-valued arguments are really Boolean.
81 |       if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
82 |         echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
83 |         exit 1;
84 |       fi
85 |       shift 2;
86 |       ;;
87 |   *) break;
88 |   esac
89 | done
90 | 
91 | 
92 | # Check for an empty argument to the --cmd option, which can easily occur as a 
93 | # result of scripting errors.
94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
95 | 
96 | 
97 | true; # so this script returns exit code 0.
98 | 


--------------------------------------------------------------------------------
/baseline-1M-enfr/results:
--------------------------------------------------------------------------------
 1 | 
 2 | MT evaluation scorer began on 2017 Jan 12 at 15:05:37
 3 | command line:  tools/mteval-v13a.pl -r data/testsets-enfr/News/newstest2014-fren-ref.fr.sgm -s data/testsets-enfr/News/newstest2014-fren-src.en.sgm -t exp/newstest2014-fren-tgt.trans.fr.sgm -c
 4 |   Evaluation of any-to-fr translation using:
 5 |     src set "newstest2014" (176 docs, 3003 segs)
 6 |     ref set "newstest2014" (1 refs)
 7 |     tst set "newstest2014" (1 systems)
 8 | 
 9 | length ratio: 0.946537138126407 (73173/77306), penalty (log): -0.0564825823732797
10 | NIST score = 7.2576  BLEU score = 0.2614 for system "tst"
11 | 
12 | # ------------------------------------------------------------------------
13 | 
14 | Individual N-gram scoring
15 |         1-gram   2-gram   3-gram   4-gram   5-gram   6-gram   7-gram   8-gram   9-gram
16 |         ------   ------   ------   ------   ------   ------   ------   ------   ------
17 |  NIST:  5.3558   1.5459   0.3097   0.0399   0.0062   0.0009   0.0004   0.0001   0.0001  "tst"
18 | 
19 |  BLEU:  0.5852   0.3372   0.2146   0.1382   0.0913   0.0614   0.0416   0.0282   0.0191  "tst"
20 | 
21 | # ------------------------------------------------------------------------
22 | Cumulative N-gram scoring
23 |         1-gram   2-gram   3-gram   4-gram   5-gram   6-gram   7-gram   8-gram   9-gram
24 |         ------   ------   ------   ------   ------   ------   ------   ------   ------
25 |  NIST:  5.3558   6.9018   7.2115   7.2514   7.2576   7.2585   7.2589   7.2590   7.2591  "tst"
26 | 
27 |  BLEU:  0.5530   0.4198   0.3294   0.2614   0.2094   0.1691   0.1372   0.1118   0.0913  "tst"
28 | MT evaluation scorer ended on 2017 Jan 12 at 15:05:46
29 | 
30 | 
31 | 
32 | For information, Multi Bleu score on generic test set:
33 | BLEU = 31.01, 58.4/37.6/26.1/18.5 (BP=0.967, ratio=0.967, hyp_len=42238, ref_len=43658)
34 | 


--------------------------------------------------------------------------------
/baseline-1M-enfr/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # Copyright 2017 Ubiqus   (Author: Vincent Nguyen)
  4 | #		 Systran  (Author: Jean Senellart)
  5 | # License MIT
  6 | #
  7 | # This recipe shows how to build an openNMT translation model from English to French
  8 | # based on a limited resource (1 Mio segments)
  9 | #
 10 | # Based on the tuto from the OpenNMT forum
 11 | 
 12 | 
 13 | # TODO test is GPU is present or not
 14 | CUDA_VISIBLE_DEVICES=0
 15 | decode_cpu=false
 16 | 
 17 | # Make symlinks to access OpenNMT scripts - change this line if needed
 18 | OPENNMT_PATH=../../OpenNMT
 19 | [ ! -h tools ] && ln -s $OPENNMT_PATH/tools tools
 20 | [ ! -h preprocess.lua ] && ln -s $OPENNMT_PATH/preprocess.lua preprocess.lua
 21 | [ ! -h train.lua ] && ln -s $OPENNMT_PATH/train.lua train.lua
 22 | [ ! -h translate.lua ] && ln -s $OPENNMT_PATH/translate.lua translate.lua
 23 | [ ! -h onmt ] && ln -s $OPENNMT_PATH/onmt onmt
 24 | 
 25 | # this is usefull to skip some stages during step by step execution
 26 | stage=0
 27 | 
 28 | # if you want to run without training and use an existing model in the "exp" folder set notrain to true
 29 | notrain=false
 30 | 
 31 | # making these variables to make replication easier for other languages
 32 | sl=en
 33 | tl=fr
 34 | 
 35 | # training corpus - baseline-1M/baseline-2M available
 36 | corpus=baseline-1M
 37 | 
 38 | # At the moment only "stage" option is available anyway
 39 | . local/parse_options.sh
 40 | 
 41 | # Data download and preparation
 42 | 
 43 | if [ $stage -le 0 ]; then
 44 | # TODO put this part in a local/download_data.sh script ?
 45 |   mkdir -p data
 46 |   cd data
 47 |   if [ ! -f $corpus-$sl$tl.tgz ]; then
 48 |     echo "$0: downloading the baseline corpus from amazon s3"
 49 |     wget https://s3.amazonaws.com/opennmt-trainingdata/$corpus-$sl$tl.tgz
 50 |     tar xzfv $corpus-$sl$tl.tgz
 51 |   fi
 52 |   if [ ! -f testsets-$sl$tl.tgz ]; then
 53 |     echo "$0: downloading the baseline corpus from amazon s3"
 54 |     wget https://s3.amazonaws.com/opennmt-tests/testsets-$sl$tl.tgz
 55 |     tar xzfv testsets-$sl$tl.tgz
 56 |   fi
 57 |   cd ../local
 58 |   if [ ! -f mteval-v13a.pl ]; then
 59 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/mteval-v13a.pl
 60 |   fi
 61 |   if [ ! -f input-from-sgm.perl ]; then
 62 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/ems/support/input-from-sgm.perl
 63 |   fi
 64 |   if [ ! -f wrap-xml.perl ]; then
 65 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/ems/support/wrap-xml.perl
 66 |   fi
 67 |   if [ ! -f multi-bleu.perl ]; then
 68 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/multi-bleu.perl
 69 |   fi
 70 |   cd ..
 71 | fi
 72 | 
 73 | # Tokenize the Corpus
 74 | if [ $stage -le 1 ]; then
 75 |   echo "$0: tokenizing corpus and test sets"
 76 |   for f in data/$corpus-$sl$tl/*.?? ; do th tools/tokenize.lua -case_feature -joiner_annotate < $f > $f.tok ; done
 77 | fi
 78 | 
 79 | # Preprocess the data - decide here the vocabulary size 50000 default value
 80 | if [ $stage -le 2 ]; then
 81 |   mkdir -p exp
 82 |   echo "$0: preprocessing corpus"
 83 |   th preprocess.lua -src_vocab_size 50000 -tgt_vocab_size 50000 \
 84 |   -train_src data/$corpus-$sl$tl/*_train.$sl.tok \
 85 |   -train_tgt data/$corpus-$sl$tl/*_train.$tl.tok \
 86 |   -valid_src data/$corpus-$sl$tl/*_valid.$sl.tok \
 87 |   -valid_tgt data/$corpus-$sl$tl/*_valid.$tl.tok -save_data exp/data-$corpus-$sl$tl
 88 | fi
 89 | 
 90 | # Train the model !!!! even if OS cuda device ID is 0 you need -gpuid=1
 91 | # Decide here the number of epochs, learning rate, which epoch to start decay, decay rate
 92 | # if you change number of epochs do not forget to change the model name too
 93 | if [ $stage -le 3 ]; then
 94 |   if [ $notrain = false ]; then
 95 |     echo "$0: training starting, will take a while."
 96 |     th train.lua -data  exp/data-$corpus-$sl$tl-train.t7 \
 97 |     -save_model exp/model-$corpus-$sl$tl \
 98 |     -end_epoch 13 -start_decay_at 5 -learning_rate_decay 0.65 -gpuid 1
 99 |     cp -f exp/model-$corpus-$sl$tl"_epoch13_"*".t7" exp/model-$corpus-$sl$tl"_final.t7"
100 |   else
101 |     echo "$0: using an existing model"
102 |     if [ ! -f exp/model-$corpus-$sl$tl"_final.t7" ]; then
103 |       echo "$0: mode file does not exist"
104 |       exit 1
105 |     fi
106 |   fi
107 | fi
108 | 
109 | # Deploy model for CPU usage
110 | if [ $stage -le 4 ]; then
111 |   if [ $decode_cpu = true ]; then
112 |     th tools/release_model.lua -force -model exp/model-$corpus-$sl$tl"_final.t7" -output_model exp/model-$corpus-$sl$tl"_cpu.t7" -gpuid 1
113 |   fi
114 | fi
115 | 
116 | # Translate using gpu
117 | # you can change this by changing the model name from _final to _cpu and remove -gpuid 1
118 | if [ $stage -le 5 ]; then
119 |   [ $decode_cpu = true ] && dec_opt="" || dec_opt="-gpuid 1"
120 |   th translate.lua -replace_unk -model exp/model-$corpus-$sl$tl"_final.t7" \
121 |   -src data/$corpus-$sl$tl/*_test.$sl.tok -output exp/${corpus}_test.hyp.$tl.tok $dec_opt
122 | fi
123 | 
124 | # Evaluate the generic test set with multi-bleu
125 | if [ $stage -le 6 ]; then
126 |   th tools/detokenize.lua -case_feature < exp/${corpus}_test.hyp.$tl.tok > exp/${corpus}_test.hyp.$tl.detok
127 |   perl local/multi-bleu.perl data/$corpus-$sl$tl/*_test.$tl \
128 |   < exp/${corpus}_test.hyp.$tl.detok > exp/${corpus}_test_multibleu.txt
129 | fi
130 | 
131 | ###############################
132 | #### Newstest Evaluation
133 | ####
134 | 
135 | if [ $stage -le 7 ]; then
136 | 
137 | testset=newstest2014-$sl$tl
138 | 
139 |   perl local/input-from-sgm.perl < data/testsets-$sl$tl/News/$testset-src.$sl.sgm \
140 |   > data/testsets-$sl$tl/News/$testset-src.$sl
141 | 
142 |   th tools/tokenize.lua -case_feature -joiner_annotate < data/testsets-$sl$tl/News/$testset-src.$sl \
143 |   > data/testsets-$sl$tl/News/$testset-src.$sl.tok
144 | 
145 |   [ $decode_cpu = true ] && dec_opt="" || dec_opt="-gpuid 1"
146 | 
147 |   th translate.lua -replace_unk -model exp/model-$corpus-$sl$tl"_final"*.t7 \
148 |   -src data/testsets-$sl$tl/News/$testset-src.$sl.tok \
149 |   -output exp/$testset-tgt.trans.$tl.tok $dec_opt
150 | 
151 |   th tools/detokenize.lua -case_feature < exp/$testset-tgt.trans.$tl.tok \
152 |   > exp/$testset-tgt.trans.$tl
153 | 
154 | # Wrap-xml to convert to sgm
155 |   perl local/wrap-xml.perl $tl data/testsets-$sl$tl/News/$testset-src.$sl.sgm tst \
156 |   < exp/$testset-tgt.trans.$tl \
157 |   > exp/$testset-tgt.trans.$tl.sgm
158 | 
159 |   perl local/mteval-v13a.pl -r data/testsets-$sl$tl/News/$testset-ref.$tl.sgm \
160 |   -s data/testsets-$sl$tl/News/$testset-src.$sl.sgm -t exp/$testset-tgt.trans.$tl.sgm \
161 |   -c > exp/nist-bleu-$testset
162 | fi
163 | 
164 | 


--------------------------------------------------------------------------------
/baseline-small/local/parse_options.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
 4 | #                 Arnab Ghoshal, Karel Vesely
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | 
20 | # Parse command-line options.
21 | # To be sourced by another script (as in ". parse_options.sh").
22 | # Option format is: --option-name arg
23 | # and shell variable "option_name" gets set to value "arg."
24 | # The exception is --help, which takes no arguments, but prints the 
25 | # $help_message variable (if defined).
26 | 
27 | 
28 | ###
29 | ### The --config file options have lower priority to command line 
30 | ### options, so we need to import them first...
31 | ###
32 | 
33 | # Now import all the configs specified by command-line, in left-to-right order
34 | for ((argpos=1; argpos<$#; argpos++)); do
35 |   if [ "${!argpos}" == "--config" ]; then
36 |     argpos_plus1=$((argpos+1))
37 |     config=${!argpos_plus1}
38 |     [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
39 |     . $config  # source the config file.
40 |   fi
41 | done
42 | 
43 | 
44 | ###
45 | ### No we process the command line options
46 | ###
47 | while true; do
48 |   [ -z "${1:-}" ] && break;  # break if there are no arguments
49 |   case "$1" in
50 |     # If the enclosing script is called with --help option, print the help 
51 |     # message and exit.  Scripts should put help messages in $help_message
52 |   --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
53 | 	  else printf "$help_message\n" 1>&2 ; fi; 
54 | 	  exit 0 ;; 
55 |   --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
56 |        exit 1 ;;
57 |     # If the first command-line argument begins with "--" (e.g. --foo-bar), 
58 |     # then work out the variable name as $name, which will equal "foo_bar".
59 |   --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 
60 |     # Next we test whether the variable in question is undefned-- if so it's 
61 |     # an invalid option and we die.  Note: $0 evaluates to the name of the 
62 |     # enclosing script.
63 |     # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
64 |     # is undefined.  We then have to wrap this test inside "eval" because 
65 |     # foo_bar is itself inside a variable ($name).
66 |       eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
67 |       
68 |       oldval="`eval echo \\$$name`";
69 |     # Work out whether we seem to be expecting a Boolean argument.
70 |       if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 
71 | 	was_bool=true;
72 |       else 
73 | 	was_bool=false;
74 |       fi
75 | 
76 |     # Set the variable to the right value-- the escaped quotes make it work if
77 |     # the option had spaces, like --cmd "queue.pl -sync y"
78 |       eval $name=\"$2\"; 
79 |         
80 |     # Check that Boolean-valued arguments are really Boolean.
81 |       if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
82 |         echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
83 |         exit 1;
84 |       fi
85 |       shift 2;
86 |       ;;
87 |   *) break;
88 |   esac
89 | done
90 | 
91 | 
92 | # Check for an empty argument to the --cmd option, which can easily occur as a 
93 | # result of scripting errors.
94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
95 | 
96 | 
97 | true; # so this script returns exit code 0.
98 | 


--------------------------------------------------------------------------------
/baseline-small/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # Copyright 2017 Ubiqus   (Author: Vincent Nguyen)
  4 | #		 
  5 | # License MIT
  6 | #
  7 | # This recipe shows how to build an openNMT translation model from French to English
  8 | # based on 
  9 | # Global Voices
 10 | # News Commentary v11
 11 | # This script does not download the datasets, you need to drop the files in data/public
 12 | # same for the test set newstest2014
 13 | # making these variables to make replication easier for other languages
 14 | 
 15 | sl=fr
 16 | tl=en
 17 | corpus[1]=data/public/News-Commentary11.en-fr.clean
 18 | corpus[2]=data/public/GlobalVoices.en-fr.clean
 19 | 
 20 | vocab_size=50000
 21 | seq_len=50
 22 | 
 23 | testset=newstest2014-fren
 24 | 
 25 | use_bpe=false
 26 | bpe_size=32000
 27 | [ $use_bpe = false ] && bpe_model="" || bpe_model="-bpe_model data/train-$sl$tl.bpe32000"
 28 | use_case=false
 29 | [ $use_case = false ] && case_feat="" || case_feat="-case_feature"
 30 | 
 31 | # TODO test is GPU is present or not
 32 | CUDA_VISIBLE_DEVICES=0
 33 | decode_cpu=false
 34 | 
 35 | # Make symlinks to access OpenNMT scripts - change this line if needed
 36 | OPENNMT_PATH=../../OpenNMT
 37 | [ ! -h tools ] && ln -s $OPENNMT_PATH/tools tools
 38 | [ ! -h preprocess.lua ] && ln -s $OPENNMT_PATH/preprocess.lua preprocess.lua
 39 | [ ! -h train.lua ] && ln -s $OPENNMT_PATH/train.lua train.lua
 40 | [ ! -h translate.lua ] && ln -s $OPENNMT_PATH/translate.lua translate.lua
 41 | [ ! -h onmt ] && ln -s $OPENNMT_PATH/onmt onmt
 42 | 
 43 | # this is usefull to skip some stages during step by step execution
 44 | stage=0
 45 | 
 46 | # if you want to run without training and use an existing model in the "exp" folder set notrain to true
 47 | notrain=false
 48 | 
 49 | # At the moment only "stage" option is available anyway
 50 | . local/parse_options.sh
 51 | 
 52 | function score_epoch {
 53 |   # convert sgm input into text file
 54 |   local/input-from-sgm.perl < data/public/test/$testset-src.$sl.sgm > data/$testset-src.$sl
 55 |   # tokenize the text file
 56 |   th tools/tokenize.lua $case_feat -mode aggressive -joiner_annotate $bpe_model < data/$testset-src.$sl > data/$testset-src.$sl.tok
 57 |   # translate the test set
 58 |   [ $decode_cpu = true ] && dec_opt="" || dec_opt="-gpuid 1"
 59 |   th translate.lua -replace_unk -disable_logs -model exp/model-$sl$tl"_epoch"$1"_"*.t7 \
 60 |   -src data/$testset-src.$sl.tok \
 61 |   -output exp/$testset-tgt.trans.$tl.tok $dec_opt
 62 |   # detokenize
 63 |   th tools/detokenize.lua $case_feat < exp/$testset-tgt.trans.$tl.tok \
 64 |   > exp/$testset-tgt.trans.$tl
 65 |   # Wrap-xml to convert to sgm the translated text
 66 |   local/wrap-xml.perl $tl data/public/test/$testset-src.$sl.sgm tst \
 67 |   < exp/$testset-tgt.trans.$tl > exp/$testset-tgt.trans.$tl.sgm
 68 |   # compute the bleu score
 69 |   local/mteval-v13a.pl -r data/public/test/$testset-ref.$tl.sgm \
 70 |   -s data/public/test/$testset-src.$sl.sgm -t exp/$testset-tgt.trans.$tl.sgm \
 71 |   -c > exp/nist-bleu-$testset-epoch-$1
 72 | 
 73 |   [ $decode_cpu = true ] && dec_opt="" || dec_opt="-gpuid 1"
 74 |   th translate.lua -replace_unk -disable_logs -model exp/model-$sl$tl"_epoch"$1"_"*".t7" \
 75 |   -src data/valid.$sl.tok -output exp/valid.hyp.$tl.tok $dec_opt
 76 | 
 77 |   th tools/detokenize.lua $case_feat < exp/valid.hyp.$tl.tok > exp/valid.hyp.$tl.detok
 78 |   th tools/detokenize.lua $case_feat < data/valid.$tl.tok > exp/valid.$tl.detok
 79 |   local/multi-bleu.perl exp/valid.$tl.detok \
 80 |   < exp/valid.hyp.$tl.detok > exp/generic_test_multibleu-detok-epoch$1.txt
 81 |   local/multi-bleu.perl data/valid.$tl.tok \
 82 |   < exp/valid.hyp.$tl.tok > exp/generic_test_multibleu-tok-epoch$1.txt
 83 | }
 84 | 
 85 | 
 86 | if [ $stage -le 0 ]; then
 87 |   cd local
 88 |   if [ ! -f mteval-v13a.pl ]; then
 89 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/mteval-v13a.pl
 90 |   fi
 91 |   if [ ! -f input-from-sgm.perl ]; then
 92 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/ems/support/input-from-sgm.perl
 93 |   fi
 94 |   if [ ! -f wrap-xml.perl ]; then
 95 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/ems/support/wrap-xml.perl
 96 |   fi
 97 |   if [ ! -f multi-bleu.perl ]; then
 98 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/multi-bleu.perl
 99 |   fi
100 |   if [ ! -f learn_bpe.py ]; then
101 |     wget https://raw.githubusercontent.com/rsennrich/subword-nmt/master/learn_bpe.py
102 |   fi
103 |   cd ..
104 | fi
105 | 
106 | 
107 | # Prepare Corpus, build BPE model, build dictionary
108 | if [ $stage -le 1 ]; then
109 | 
110 |   if $use_bpe; then
111 |     echo "$0: tokenizing corpus for BPE modelling"
112 |     for ((i=1; i<= ${#corpus[@]}; i++))
113 |     do
114 |       for f in ${corpus[$i]}.$sl ${corpus[$i]}.$tl 
115 |       do 
116 |        file=$(basename $f)
117 |        th tools/tokenize.lua -mode aggressive -nparallel 6 < $f > data/$file.rawtok
118 |       done
119 |     done
120 |     cat data/*.rawtok | python local/learn_bpe.py -s $bpe_size > data/train-$sl$tl.bpe$bpe_size
121 |     rm data/*.rawtok
122 |   fi
123 | 
124 |   echo "$0: tokenizing corpus"
125 |   for ((i=1; i<= ${#corpus[@]}; i++))
126 |   do
127 |     for f in ${corpus[$i]}.$sl ${corpus[$i]}.$tl 
128 |     do 
129 |      file=$(basename $f)
130 |      th tools/tokenize.lua -mode aggressive $case_feat -joiner_annotate -nparallel 6 \
131 |      $bpe_model < $f > data/$file.tok
132 |     done
133 |   done
134 | 
135 |   echo "$0: building dictionaries based on public and private data"
136 |   cat data/*.$sl.tok > data/tempo.$sl.tok
137 |   cat data/*.$tl.tok > data/tempo.$tl.tok
138 |   th tools/build_vocab.lua -data data/tempo.$sl.tok -save_vocab data/dict.$sl -vocab_size $vocab_size
139 |   th tools/build_vocab.lua -data data/tempo.$tl.tok -save_vocab data/dict.$tl -vocab_size $vocab_size
140 |   rm data/tempo.??.tok
141 | 
142 |   echo "$0: preparing public and private training sets"
143 |   for ((i=1; i<= ${#corpus[@]}; i++))
144 |   do
145 |     file=$(basename ${corpus[$i]}.$sl)
146 |     cat data/$file.tok >> data/train-full.$sl.tok
147 |     file=$(basename ${corpus[$i]}.$tl)
148 |     cat data/$file.tok >> data/train-full.$tl.tok
149 |   done
150 | 
151 |   local/testset.pl -n 2000 -o data/valid.$sl.tok -h data/train.$sl.tok < data/train-full.$sl.tok > lines-tmp.txt
152 |   local/lineextract.pl lines-tmp.txt < data/train-full.$tl.tok > data/valid.$tl.tok
153 |   local/heldextract.pl lines-tmp.txt < data/train-full.$tl.tok > data/train.$tl.tok
154 |   rm data/train-full.*.tok
155 |   rm lines-tmp.txt
156 | 
157 | fi
158 | 
159 | 
160 | # Preprocess the data - decide here the vocabulary size 50000 default value
161 | if [ $stage -le 2 ]; then
162 |   mkdir -p exp
163 |   echo "$0: preprocessing corpus"
164 |     th preprocess.lua -src_vocab_size $vocab_size -tgt_vocab_size $vocab_size \
165 |     -src_seq_length $seq_len -tgt_seq_length $seq_len \
166 |     -train_src data/train.$sl.tok -train_tgt data/train.$tl.tok \
167 |     -valid_src data/valid.$sl.tok -valid_tgt data/valid.$tl.tok \
168 |     -src_vocab data/dict.$sl.dict -tgt_vocab data/dict.$tl.dict \
169 |     -save_data exp/data-$sl$tl
170 | fi
171 | 
172 | # Train the model !!!! even if OS cuda device ID is 0 you need -gpuid=1
173 | # Decide here the number of epochs, learning rate, which epoch to start decay, decay rate
174 | # if you change number of epochs do not forget to change the model name too
175 | 
176 | # Train on corpus
177 | 
178 | if [ $stage -le 3 ]; then
179 |     learning_rate=1
180 |     start_decay_at=6
181 |     learning_rate_decay=0.5
182 |     echo "$0: training public corpus starting, will take a while."
183 |       # train first epoch
184 |       th train.lua -layers 2 -rnn_size 512 -data exp/data-$sl$tl-train.t7 \
185 |       -save_model exp/model-$sl$tl -dropout 0.3 -report_every 500 -word_vec_size 512 \
186 |       -start_epoch 1 -end_epoch 1 -max_batch_size 32 \
187 |       -learning_rate $learning_rate -start_decay_at $start_decay_at \
188 |       -learning_rate_decay $learning_rate_decay -gpuid 1
189 |       # score it -sample 50000 -sample_tgt_vocab -sample_type partition 
190 |       score_epoch 1
191 |  #     th tools/release_model.lua -force -model exp/model-$sl$tl"_epoch1_"*".t7" \
192 |  #      -output_model exp/modelcpu-$sl$tl"_epoch1.t7" -gpuid 1
193 | 
194 |       for epoch in 2 3 4 5 6 7 8 9 10
195 |       do
196 |         prev_epoch=$(expr $epoch - 1)
197 |         [ $epoch -ge $start_decay_at ] && \
198 |         learning_rate=`awk 'BEGIN{printf("%0.4f", '$learning_rate' * '$learning_rate_decay')}'`
199 |         th train.lua -rnn_size 512 -train_from exp/model-$sl$tl"_epoch"$prev_epoch"_"*".t7" \
200 |         -data exp/data-$sl$tl-train.t7 \
201 |         -save_model exp/model-$sl$tl -report_every 500 -word_vec_size 512 \
202 |         -start_epoch $epoch -end_epoch $epoch -max_batch_size 32 \
203 |         -learning_rate $learning_rate -start_decay_at $start_decay_at \
204 |         -learning_rate_decay $learning_rate_decay -gpuid 1
205 |         # score it
206 |         score_epoch $epoch
207 | #        th tools/release_model.lua -force -model exp/model-$sl$tl"_epoch"$epoch"_"*".t7" \
208 | #         -output_model exp/modelcpu-$sl$tl"_epoch"$epoch".t7" -gpuid 1
209 |       done
210 | fi
211 | 


--------------------------------------------------------------------------------
/romance-multi-way/local/parse_options.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
 4 | #                 Arnab Ghoshal, Karel Vesely
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | 
20 | # Parse command-line options.
21 | # To be sourced by another script (as in ". parse_options.sh").
22 | # Option format is: --option-name arg
23 | # and shell variable "option_name" gets set to value "arg."
24 | # The exception is --help, which takes no arguments, but prints the 
25 | # $help_message variable (if defined).
26 | 
27 | 
28 | ###
29 | ### The --config file options have lower priority to command line 
30 | ### options, so we need to import them first...
31 | ###
32 | 
33 | # Now import all the configs specified by command-line, in left-to-right order
34 | for ((argpos=1; argpos<$#; argpos++)); do
35 |   if [ "${!argpos}" == "--config" ]; then
36 |     argpos_plus1=$((argpos+1))
37 |     config=${!argpos_plus1}
38 |     [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
39 |     . $config  # source the config file.
40 |   fi
41 | done
42 | 
43 | 
44 | ###
45 | ### No we process the command line options
46 | ###
47 | while true; do
48 |   [ -z "${1:-}" ] && break;  # break if there are no arguments
49 |   case "$1" in
50 |     # If the enclosing script is called with --help option, print the help 
51 |     # message and exit.  Scripts should put help messages in $help_message
52 |   --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
53 | 	  else printf "$help_message\n" 1>&2 ; fi; 
54 | 	  exit 0 ;; 
55 |   --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
56 |        exit 1 ;;
57 |     # If the first command-line argument begins with "--" (e.g. --foo-bar), 
58 |     # then work out the variable name as $name, which will equal "foo_bar".
59 |   --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 
60 |     # Next we test whether the variable in question is undefned-- if so it's 
61 |     # an invalid option and we die.  Note: $0 evaluates to the name of the 
62 |     # enclosing script.
63 |     # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
64 |     # is undefined.  We then have to wrap this test inside "eval" because 
65 |     # foo_bar is itself inside a variable ($name).
66 |       eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
67 |       
68 |       oldval="`eval echo \\$$name`";
69 |     # Work out whether we seem to be expecting a Boolean argument.
70 |       if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 
71 | 	was_bool=true;
72 |       else 
73 | 	was_bool=false;
74 |       fi
75 | 
76 |     # Set the variable to the right value-- the escaped quotes make it work if
77 |     # the option had spaces, like --cmd "queue.pl -sync y"
78 |       eval $name=\"$2\"; 
79 |         
80 |     # Check that Boolean-valued arguments are really Boolean.
81 |       if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
82 |         echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
83 |         exit 1;
84 |       fi
85 |       shift 2;
86 |       ;;
87 |   *) break;
88 |   esac
89 | done
90 | 
91 | 
92 | # Check for an empty argument to the --cmd option, which can easily occur as a 
93 | # result of scripting errors.
94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
95 | 
96 | 
97 | true; # so this script returns exit code 0.
98 | 


--------------------------------------------------------------------------------
/romance-multi-way/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # Copyright 2017 Ubiqus   (Author: Vincent Nguyen)
  4 | #		 Systran  (Author: Jean Senellart)
  5 | # License MIT
  6 | #
  7 | # This recipe shows how to build an openNMT translation model for Romance Multi way languages
  8 | # based on 200 000 parallel sentences for each pair
  9 | #
 10 | # Based on the tuto from the OpenNMT forum
 11 | 
 12 | 
 13 | # TODO test is GPU is present or not
 14 | CUDA_VISIBLE_DEVICES=0
 15 | decode_cpu=false
 16 | 
 17 | # Make symlinks to access OpenNMT scripts - change this line if needed
 18 | OPENNMT_PATH=../../OpenNMT
 19 | [ ! -h tools ] && ln -s $OPENNMT_PATH/tools tools
 20 | [ ! -h preprocess.lua ] && ln -s $OPENNMT_PATH/preprocess.lua preprocess.lua
 21 | [ ! -h train.lua ] && ln -s $OPENNMT_PATH/train.lua train.lua
 22 | [ ! -h translate.lua ] && ln -s $OPENNMT_PATH/translate.lua translate.lua
 23 | [ ! -h onmt ] && ln -s $OPENNMT_PATH/onmt onmt
 24 | 
 25 | # this is usefull to skip some stages during step by step execution
 26 | stage=0
 27 | 
 28 | # if you want to run without training and use an existing model in the "exp" folder set notrain to true
 29 | notrain=false
 30 | 
 31 | # At the moment only "stage" option is available anyway
 32 | . local/parse_options.sh
 33 | 
 34 | # Data download and preparation
 35 | 
 36 | if [ $stage -le 0 ]; then
 37 | # TODO put this part in a local/download_data.sh script ?
 38 |   mkdir -p data
 39 |   cd data
 40 |   if [ ! -f multi-esfritptro-parallel.tgz ]; then
 41 |     echo "$0: downloading the baseline corpus from amazon s3"
 42 |     wget https://s3.amazonaws.com/opennmt-trainingdata/multi-esfritptro-parallel.tgz
 43 |     tar xzfv multi-esfritptro-parallel.tgz
 44 |   fi
 45 |   cd ../local
 46 |   if [ ! -f mteval-v13a.pl ]; then
 47 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/mteval-v13a.pl
 48 |   fi
 49 |   if [ ! -f input-from-sgm.perl ]; then
 50 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/ems/support/input-from-sgm.perl
 51 |   fi
 52 |   if [ ! -f wrap-xml.perl ]; then
 53 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/ems/support/wrap-xml.perl
 54 |   fi
 55 |   if [ ! -f multi-bleu.perl ]; then
 56 |     wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/multi-bleu.perl
 57 |   fi
 58 |   if [ ! -f learn_bpe.py ]; then
 59 |     wget https://raw.githubusercontent.com/rsennrich/subword-nmt/master/learn_bpe.py
 60 |   fi
 61 |   cd ..
 62 | fi
 63 | 
 64 | # Tokenize and prepare the Corpus
 65 | if [ $stage -le 1 ]; then
 66 |   echo "$0: tokenizing corpus"
 67 |   for f in data/train*.?? ; do th tools/tokenize.lua < $f > $f.rawtok ; done
 68 |   cat data/train*.rawtok | python local/learn_bpe.py -s 32000 > data/esfritptro.bpe32000
 69 |   for f in data/*-????.?? ; do \
 70 |     th tools/tokenize.lua -case_feature -joiner_annotate -nparrallel 4 -bpe_model data/esfritptro.bpe32000 < $f > $f.tok
 71 |   done
 72 |   for set in train valid test ; do rm data/$set-multi.???.tok ; done
 73 |   for src in es fr it pt ro ; do
 74 |     for tgt in es fr it pt ro ; do
 75 |       [ ! $src = $tgt ] && perl -i.bak -pe "s//__opt_tgt_$tgt\xEF\xBF\xA8N /" data/*-$src$tgt.$src.tok
 76 |       for set in train valid test ; do
 77 |         [ ! $src = $tgt ] && cat data/$set-$src$tgt.$src.tok >> data/$set-multi.src.tok
 78 |         [ ! $src = $tgt ] && cat data/$set-$src$tgt.$tgt.tok >> data/$set-multi.tgt.tok
 79 |       done
 80 |     done
 81 |   done
 82 |   paste data/valid-multi.src.tok data/valid-multi.tgt.tok | shuf > data/valid-multi.srctgt.tok
 83 |   head -2000 data/valid-multi.srctgt.tok | cut -f1 > data/valid-multi2000.src.tok
 84 |   head -2000 data/valid-multi.srctgt.tok | cut -f2 > data/valid-multi2000.tgt.tok
 85 | fi
 86 | 
 87 | # Preprocess the data - decide here the vocabulary size 50000 default value
 88 | if [ $stage -le 2 ]; then
 89 |   mkdir -p exp
 90 |   echo "$0: preprocessing corpus"
 91 |   th preprocess.lua -src_vocab_size 50000 -tgt_vocab_size 50000 \
 92 |   -train_src data/train-multi.src.tok -train_tgt data/train-multi.tgt.tok \
 93 |   -valid_src data/valid-multi2000.src.tok -valid_tgt data/valid-multi2000.tgt.tok \
 94 |   -save_data exp/model-multi
 95 | fi
 96 | 
 97 | # Train the model !!!! even if OS cuda device ID is 0 you need -gpuid=1
 98 | # Decide here the number of epochs, learning rate, which epoch to start decay, decay rate
 99 | # if you change number of epochs do not forget to change the model name too
100 | # This example has a smaller topology compared to tuto for faster training (worse results)
101 | if [ $stage -le 3 ]; then
102 |   if [ $notrain = false ]; then
103 |     echo "$0: training starting, will take a while."
104 |     th train.lua -layers 2 -rnn_size 500 -brnn -word_vec_size 600 \
105 |     -end_epoch 13 -learning_rate 1 -start_decay_at 5 -learning_rate_decay 0.65 \
106 |     -data  exp/model-multi-train.t7 -save_model exp/model-multi-2-500-600 -gpuid 1
107 |     cp -f exp/model-multi-2-500-600"_epoch13_"*".t7" exp/model-multi-2-500-600"_final.t7"
108 |   else
109 |     echo "$0: using an existing model"
110 |     if [ ! -f exp/model-multi-2-500-600"_final.t7" ]; then
111 |       echo "$0: mode file does not exist"
112 |       exit 1
113 |     fi
114 |   fi
115 | fi
116 | 
117 | # Deploy model for CPU usage
118 | if [ $stage -le 4 ]; then
119 |   if [ $decode_cpu = true ]; then
120 |     th tools/release_model.lua -force -model exp/model-multi-2-500-600"_final.t7" \
121 |     -output_model exp/model-multi-2-500-600"_cpu.t7" -gpuid 1
122 |   fi
123 | fi
124 | 
125 | # Translate using gpu
126 | # you can change this by changing the model name from _final to _cpu and remove -gpuid 1
127 | if [ $stage -le 5 ]; then
128 |   [ $decode_cpu = true ] && dec_opt="" || dec_opt="-gpuid 1"
129 |   for src in es fr it pt ro ; do
130 |     for tgt in es fr it pt ro ; do
131 |       [ ! $src = $tgt ] && th translate.lua -replace_unk -model exp/model-multi-2-500-600"_final"*".t7" \
132 |        -src data/test-$src$tgt.$src.tok -output exp/test-$src$tgt.hyp.$tgt.tok $dec_opt
133 |     done
134 |   done
135 | fi
136 | 
137 | # Evaluate the generic test set with multi-bleu
138 | if [ $stage -le 6 ]; then
139 |   for src in es fr it pt ro ; do
140 |     for tgt in es fr it pt ro ; do
141 |       [ ! $src = $tgt ] && local/multi-bleu.perl data/test-$src$tgt.$tgt.tok \
142 |       < exp/test-$src$tgt.hyp.$tgt.tok > exp/test-$src$tgt"_multibleu".txt
143 |     done
144 |   done
145 |   grep BLEU exp/*multibleu.txt
146 | fi
147 | 
148 | 
149 | 


--------------------------------------------------------------------------------