├── LICENSE
├── Makefile
├── README.md
├── data
    ├── README.txt
    ├── char_vocab.txt
    ├── dev_infl.txt
    ├── morph_vocab.txt
    ├── test_infl.txt
    └── train_infl.txt
└── src
    ├── enc-dec-attn.cc
    ├── enc-dec-attn.h
    ├── enc-dec.cc
    ├── enc-dec.h
    ├── eval-ensemble-enc-dec-attn.cc
    ├── eval-ensemble-enc-dec.cc
    ├── eval-ensemble-joint-enc-beam.cc
    ├── eval-ensemble-joint-enc-dec-morph.cc
    ├── eval-ensemble-joint-enc-morph.cc
    ├── eval-ensemble-lm-joint-enc.cc
    ├── eval-ensemble-lm-sep-morph.cc
    ├── eval-ensemble-no-enc.cc
    ├── eval-ensemble-sep-morph-beam.cc
    ├── eval-ensemble-sep-morph.cc
    ├── joint-enc-dec-morph.cc
    ├── joint-enc-dec-morph.h
    ├── joint-enc-morph.cc
    ├── joint-enc-morph.h
    ├── lm-joint-enc.cc
    ├── lm-joint-enc.h
    ├── lm-sep-morph.cc
    ├── lm-sep-morph.h
    ├── lm.cc
    ├── lm.h
    ├── no-enc.cc
    ├── no-enc.h
    ├── sep-morph.cc
    ├── sep-morph.h
    ├── train-enc-dec-attn.cc
    ├── train-enc-dec.cc
    ├── train-joint-enc-dec-morph.cc
    ├── train-joint-enc-morph.cc
    ├── train-lm-joint-enc.cc
    ├── train-lm-sep-morph.cc
    ├── train-no-enc.cc
    ├── train-sep-morph.cc
    ├── utils.cc
    └── utils.h


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Manaal Faruqui
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CC=g++
 2 | CNN_DIR=$(CNN)
 3 | EIGEN_DIR=$(EIGEN)
 4 | BOOST_DIR=$(BOOST)
 5 | 
 6 | CNN_BUILD_DIR=$(CNN_DIR)/build
 7 | INCS=-I$(CNN_DIR) -I$(CNN_BUILD_DIR) -I$(EIGEN_DIR)
 8 | LIBS=-L$(CNN_BUILD_DIR)/cnn/ -L$(BOOST_DIR)/lib
 9 | FINAL=-lcnn -lboost_regex -lboost_serialization -lboost_program_options -lrt -lpthread
10 | CFLAGS=-std=c++11 -Ofast -g -march=native -pipe
11 | BINDIR=bin
12 | OBJDIR=obj
13 | SRCDIR=src
14 | 
15 | .PHONY: clean
16 | all: make_dirs $(BINDIR)/train-sep-morph $(BINDIR)/eval-ensemble-sep-morph $(BINDIR)/train-joint-enc-morph $(BINDIR)/eval-ensemble-joint-enc-morph $(BINDIR)/train-lm-sep-morph $(BINDIR)/eval-ensemble-lm-sep-morph $(BINDIR)/train-joint-enc-dec-morph $(BINDIR)/eval-ensemble-joint-enc-dec-morph $(BINDIR)/eval-ensemble-sep-morph-beam $(BINDIR)/train-lm-joint-enc $(BINDIR)/eval-ensemble-lm-joint-enc $(BINDIR)/eval-ensemble-joint-enc-beam $(BINDIR)/train-no-enc $(BINDIR)/eval-ensemble-no-enc $(BINDIR)/train-enc-dec $(BINDIR)/eval-ensemble-enc-dec $(BINDIR)/train-enc-dec-attn $(BINDIR)/eval-ensemble-enc-dec-attn
17 | 
18 | make_dirs:
19 | 	mkdir -p $(OBJDIR)
20 | 	mkdir -p $(BINDIR)
21 | 
22 | include $(wildcard $(OBJDIR)/*.d)
23 | 
24 | $(OBJDIR)/%.o: $(SRCDIR)/%.cc
25 | 	$(CC) $(CFLAGS) $(INCS) -c $< -o $@
26 | 	$(CC) -MM -MP -MT "$@" $(CFLAGS) $(INCS) $< > $(OBJDIR)/$*.d
27 | 
28 | $(BINDIR)/train-sep-morph: $(addprefix $(OBJDIR)/, train-sep-morph.o sep-morph.o utils.o)
29 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
30 | 
31 | $(BINDIR)/train-no-enc: $(addprefix $(OBJDIR)/, train-no-enc.o no-enc.o utils.o)
32 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
33 | 
34 | $(BINDIR)/train-enc-dec: $(addprefix $(OBJDIR)/, train-enc-dec.o enc-dec.o utils.o)
35 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
36 | 
37 | $(BINDIR)/train-enc-dec-attn: $(addprefix $(OBJDIR)/, train-enc-dec-attn.o enc-dec-attn.o utils.o)
38 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
39 | 
40 | $(BINDIR)/train-joint-enc-morph: $(addprefix $(OBJDIR)/, train-joint-enc-morph.o joint-enc-morph.o utils.o)
41 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
42 | 
43 | $(BINDIR)/train-joint-enc-dec-morph: $(addprefix $(OBJDIR)/, train-joint-enc-dec-morph.o joint-enc-dec-morph.o utils.o)
44 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
45 | 
46 | $(BINDIR)/train-lm-sep-morph: $(addprefix $(OBJDIR)/, train-lm-sep-morph.o lm-sep-morph.o utils.o lm.o)
47 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
48 | 
49 | $(BINDIR)/train-lm-joint-enc: $(addprefix $(OBJDIR)/, train-lm-joint-enc.o lm-joint-enc.o utils.o lm.o)
50 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
51 | 
52 | $(BINDIR)/eval-ensemble-lm-joint-enc: $(addprefix $(OBJDIR)/, eval-ensemble-lm-joint-enc.o utils.o lm-joint-enc.o lm.o)
53 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
54 | 
55 | $(BINDIR)/eval-ensemble-sep-morph: $(addprefix $(OBJDIR)/, eval-ensemble-sep-morph.o utils.o sep-morph.o)
56 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
57 | 
58 | $(BINDIR)/eval-ensemble-sep-morph-spanish-gen: $(addprefix $(OBJDIR)/, eval-ensemble-sep-morph-spanish-gen.o utils.o sep-morph.o)
59 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
60 | 
61 | $(BINDIR)/eval-ensemble-no-enc: $(addprefix $(OBJDIR)/, eval-ensemble-no-enc.o utils.o no-enc.o)
62 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
63 | 
64 | $(BINDIR)/eval-ensemble-enc-dec: $(addprefix $(OBJDIR)/, eval-ensemble-enc-dec.o utils.o enc-dec.o)
65 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
66 | 
67 | $(BINDIR)/eval-ensemble-enc-dec-attn: $(addprefix $(OBJDIR)/, eval-ensemble-enc-dec-attn.o utils.o enc-dec-attn.o)
68 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
69 | 
70 | $(BINDIR)/eval-ensemble-lm-sep-morph: $(addprefix $(OBJDIR)/, eval-ensemble-lm-sep-morph.o utils.o lm-sep-morph.o lm.o)
71 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
72 | 
73 | $(BINDIR)/eval-ensemble-joint-enc-morph: $(addprefix $(OBJDIR)/, eval-ensemble-joint-enc-morph.o utils.o joint-enc-morph.o)
74 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
75 | 
76 | $(BINDIR)/eval-ensemble-joint-enc-dec-morph: $(addprefix $(OBJDIR)/, eval-ensemble-joint-enc-dec-morph.o utils.o joint-enc-dec-morph.o)
77 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
78 | 
79 | $(BINDIR)/eval-ensemble-sep-morph-beam: $(addprefix $(OBJDIR)/, eval-ensemble-sep-morph-beam.o utils.o sep-morph.o)
80 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
81 | 
82 | $(BINDIR)/eval-ensemble-joint-enc-beam: $(addprefix $(OBJDIR)/, eval-ensemble-joint-enc-beam.o utils.o joint-enc-morph.o)
83 | 	$(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
84 | 
85 | 
86 | clean:
87 | 	rm -rf $(BINDIR)/*
88 | 	rm -rf $(OBJDIR)/*
89 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # morph-trans
 2 | Manaal Faruqui, manaalfar@gmail.com
 3 | 
 4 | This tool is used to generate morphologically inflected forms of a given word according to a specified morphological attribute. The input word is treated as a sequence of characters and the output word is generated as the transofrmed sequence of characters using a variant of neural encoder-decoder architecture. This is described in Faruqui et al (NAACL, 2016).
 5 | 
 6 | ###Requirements
 7 | 
 8 | 1. CNN neural network library: https://github.com/clab/cnn
 9 | 2. C++ BOOST library: http://www.boost.org/
10 | 3. C++ Eigen library: http://eigen.tuxfamily.org/
11 | 
12 | Please download and compile these libraries.
13 | 
14 | ###Data
15 | 
16 | You need three files to train the inflection generation system. Two files that contain vocabulary of the characters, and the vocabulary of the morphological attributes. The third file contains the training/dev/test data. Sample files can be found in data/ .
17 | 
18 | * Character vocabulary file: char_vocab.txt
19 | 
20 | ```<s> </s> a b c g f ä ...```
21 | 
22 | * Morphological attribute file: morph_vocab.txt
23 | 
24 | ```case=nominative:number=singular case=dative:number=plural ...```
25 | 
26 | * Inflection train/dev/test file: infl.txt
27 | 
28 | ```<s> a a l </s>|<s> a a l e s </s>|case=genitive:number=singular```
29 | 
30 | * Optional language models: [here] (https://drive.google.com/folderview?id=0B93-ltInuGUyeGhDUVZSeWkyUVE&usp=sharing)
31 | 
32 | ###Compile
33 | 
34 | After you have installed, CNN, Boost and Eige, you can simply install the software by typing the following command:-
35 | 
36 | ```make CNN=cnn-dir BOOST=boost-dir EIGEN=eigen-dir```
37 | 
38 | ###Run
39 | 
40 | To train the inflection generation system, simply run the following:-
41 | 
42 | ```./bin/train-sep-morph char_vocab.txt morph_vocab.txt train_infl.txt dev_infl.txt 100 30 1e-5 1 model.txt```
43 | 
44 | Here, 100 is the hidden layer size of the LSTM, 30 is the number of iterations for training, 1e-5 is the l2 regularization strength and 1 is the number of layers in the LSTM.
45 | 
46 | To test the system, run:-
47 | 
48 | ```./bin/eval-ensemble-sep-morph char_vocab.txt morph_vocab.txt test_infl.txt model1.txt model2.txt model3.txt ... > output.txt```
49 | 
50 | This can use an ensemble of models for evaluation. If you want to use only one model, just provide one model. The sep-moprh model is the model that provided us best supervised results. Other models can be used in the same way. Baseline encoder-decoder models can be trained using ```train-enc-dec``` and ```train-enc-dec-attn``` models.
51 | 
52 | ###Reference
53 | ```
54 | @inproceedings{faruqui:2016:infl,
55 |   author    = {Faruqui, Manaal and Tsvetkov, Yulia and Neubig, Graham and Dyer, Chris},
56 |   title     = {Morphological Inflection Generation Using Character Sequence to Sequence Learning},
57 |   journal = {Proc. of NAACL},
58 |   year      = {2016},
59 | }
60 | ```
61 | 


--------------------------------------------------------------------------------
/data/README.txt:
--------------------------------------------------------------------------------
1 | This is the German Nouns inflection dataset from Durrent and DeNero (2013).
2 | 


--------------------------------------------------------------------------------
/data/char_vocab.txt:
--------------------------------------------------------------------------------
1 | ß </s> é è <s> æ ä ü ö a c b e d g f i h k j m l o n q p s r u t w v y x z
2 | 


--------------------------------------------------------------------------------
/data/morph_vocab.txt:
--------------------------------------------------------------------------------
1 | case=nominative:number=singular case=dative:number=plural case=nominative:number=plural case=genitive:number=singular case=dative:number=singular case=accusative:number=singular case=accusative:number=plural case=genitive:number=plural
2 | 


--------------------------------------------------------------------------------
/src/enc-dec-attn.h:
--------------------------------------------------------------------------------
  1 | #ifndef ENC_DEC_ATTN_H_
  2 | #define ENC_DEC_ATTN_H_
  3 | 
  4 | #include "cnn/nodes.h"
  5 | #include "cnn/cnn.h"
  6 | #include "cnn/rnn.h"
  7 | #include "cnn/gru.h"
  8 | #include "cnn/lstm.h"
  9 | #include "cnn/training.h"
 10 | #include "cnn/gpu-ops.h"
 11 | #include "cnn/expr.h"
 12 | 
 13 | #include "utils.h"
 14 | 
 15 | #include <boost/archive/text_oarchive.hpp>
 16 | #include <boost/archive/text_iarchive.hpp>
 17 | #include <unordered_map>
 18 | #include <queue>
 19 | #include <limits>
 20 | 
 21 | using namespace std;
 22 | using namespace cnn;
 23 | using namespace cnn::expr;
 24 | 
 25 | class EncDecAttn {
 26 |  public:
 27 |   vector<LSTMBuilder> input_forward, input_backward, output_forward;
 28 |   vector<LookupParameters*> char_vecs;
 29 | 
 30 |   Expression hidden_to_output, hidden_to_output_bias;
 31 |   vector<Parameters*> phidden_to_output, phidden_to_output_bias;
 32 | 
 33 |   Expression transform_encoded, transform_encoded_bias;
 34 |   vector<Parameters*> ptransform_encoded, ptransform_encoded_bias;
 35 | 
 36 |   Expression compress_hidden, compress_hidden_bias;
 37 |   vector<Parameters*> pcompress_hidden, pcompress_hidden_bias;
 38 |   
 39 |   unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
 40 |   vector<LookupParameters*> eps_vecs;
 41 | 
 42 |   EncDecAttn() {}
 43 | 
 44 |   EncDecAttn(const unsigned& char_length, const unsigned& hidden_length,
 45 |            const unsigned& vocab_length, const unsigned& layers,
 46 |            const unsigned& num_morph, vector<Model*>* m,
 47 |            vector<AdadeltaTrainer>* optimizer);
 48 | 
 49 |   void InitParams(vector<Model*>* m);
 50 | 
 51 |   void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
 52 | 
 53 |   void RunFwdBwd(const unsigned& morph_id, const vector<unsigned>& inputs,
 54 |                  Expression* hidden, vector<Expression>* all_hidden, 
 55 |                  ComputationGraph *cg);
 56 | 
 57 |   Expression GetAvgAttnLayer(const Expression& hidden,
 58 |                            const vector<Expression>& all_input_hidden) const ;
 59 | 
 60 |   void TransformEncodedInput(Expression* encoded_input) const;
 61 | 
 62 |   void TransformEncodedInputDuringDecoding(Expression* encoded_input) const;
 63 | 
 64 |   void ProjectToOutput(const Expression& hidden,
 65 |                        const vector<Expression>& all_input_hidden,
 66 |                        Expression* out) const;
 67 | 
 68 |   Expression ComputeLoss(const vector<Expression>& hidden_units,
 69 |                          const vector<unsigned>& targets,
 70 |                          const vector<Expression>& all_input_hidden) const;
 71 | 
 72 |   float Train(const unsigned& morph_id, const vector<unsigned>& inputs,
 73 |               const vector<unsigned>& outputs, AdadeltaTrainer* ada_gd);
 74 | 
 75 |   friend class boost::serialization::access;
 76 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
 77 |     ar & char_len;
 78 |     ar & hidden_len;
 79 |     ar & vocab_len;
 80 |     ar & layers;
 81 |     ar & morph_len;
 82 |     ar & max_eps;
 83 |   }
 84 | };
 85 | 
 86 | void
 87 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
 88 |                const vector<unsigned>& input_ids,
 89 |                vector<unsigned>* pred_target_ids, vector<EncDecAttn*>* ensmb_model);
 90 | 
 91 | void
 92 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
 93 |                    unordered_map<string, unsigned>& char_to_id,
 94 |                    const vector<unsigned>& input_ids,
 95 |                    vector<vector<unsigned> >* sequences, vector<float>* tm_scores,
 96 |                    vector<EncDecAttn*>* ensmb_model);
 97 | 
 98 | void Serialize(string& filename, EncDecAttn& model, vector<Model*>* cnn_model);
 99 | 
100 | void Read(string& filename, EncDecAttn* model, vector<Model*>* cnn_model);
101 | 
102 | 
103 | #endif
104 | 


--------------------------------------------------------------------------------
/src/enc-dec.cc:
--------------------------------------------------------------------------------
  1 | #include "enc-dec.h"
  2 | 
  3 | using namespace std;
  4 | using namespace cnn;
  5 | using namespace cnn::expr;
  6 | 
  7 | string BOW = "<s>", EOW = "</s>";
  8 | int MAX_PRED_LEN = 100;
  9 | float NEG_INF = numeric_limits<int>::min();
 10 | 
 11 | EncDec::EncDec(const unsigned& char_length, const unsigned& hidden_length,
 12 |                    const unsigned& vocab_length, const unsigned& num_layers,
 13 |                    const unsigned& num_morph, vector<Model*>* m,
 14 |                    vector<AdadeltaTrainer>* optimizer) {
 15 |   char_len = char_length;
 16 |   hidden_len = hidden_length;
 17 |   vocab_len = vocab_length;
 18 |   layers = num_layers;
 19 |   morph_len = num_morph;
 20 |   InitParams(m);
 21 | }
 22 | 
 23 | void EncDec::InitParams(vector<Model*>* m) {
 24 |   for (unsigned i = 0; i < morph_len; ++i) {
 25 |     input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
 26 |     input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
 27 |     output_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
 28 | 
 29 |     phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len}));
 30 |     phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1}));
 31 | 
 32 |     char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len}));
 33 | 
 34 |     ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
 35 |                                                           2 * hidden_len}));
 36 |     ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
 37 | 
 38 |     //eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
 39 |   }
 40 | }
 41 | 
 42 | void EncDec::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
 43 |   input_forward[morph_id].new_graph(*cg);
 44 |   input_backward[morph_id].new_graph(*cg);
 45 |   output_forward[morph_id].new_graph(*cg);
 46 | 
 47 |   hidden_to_output = parameter(*cg, phidden_to_output[morph_id]);
 48 |   hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]);
 49 | 
 50 |   transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
 51 |   transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
 52 | }
 53 | 
 54 | void EncDec::RunFwdBwd(const unsigned& morph_id,
 55 |                          const vector<unsigned>& inputs,
 56 |                          Expression* hidden, ComputationGraph *cg) {
 57 |   vector<Expression> input_vecs;
 58 |   for (const unsigned& input_id : inputs) {
 59 |     input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id));
 60 |   }
 61 | 
 62 |   // Run forward LSTM
 63 |   Expression forward_unit;
 64 |   input_forward[morph_id].start_new_sequence();
 65 |   for (unsigned i = 0; i < input_vecs.size(); ++i) {
 66 |     forward_unit = input_forward[morph_id].add_input(input_vecs[i]);
 67 |   }
 68 | 
 69 |   // Run backward LSTM
 70 |   Expression backward_unit;
 71 |   input_backward[morph_id].start_new_sequence();
 72 |   for (int i = input_vecs.size() - 1; i >= 0; --i) {
 73 |     backward_unit = input_backward[morph_id].add_input(input_vecs[i]);
 74 |   }
 75 | 
 76 |   // Concatenate the forward and back hidden layers
 77 |   *hidden = concatenate({forward_unit, backward_unit});
 78 | }
 79 | 
 80 | void EncDec::TransformEncodedInput(Expression* encoded_input) const {
 81 |   *encoded_input = affine_transform({transform_encoded_bias,
 82 |                                      transform_encoded, *encoded_input});
 83 | }
 84 | 
 85 | void EncDec::ProjectToOutput(const Expression& hidden, Expression* out) const {
 86 |   *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
 87 | }
 88 | 
 89 | Expression EncDec::ComputeLoss(const vector<Expression>& hidden_units,
 90 |                                  const vector<unsigned>& targets) const {
 91 |   assert(hidden_units.size() == targets.size());
 92 |   vector<Expression> losses;
 93 |   for (unsigned i = 0; i < hidden_units.size(); ++i) {
 94 |     Expression out;
 95 |     ProjectToOutput(hidden_units[i], &out);
 96 |     losses.push_back(pickneglogsoftmax(out, targets[i]));
 97 |   }
 98 |   return sum(losses);
 99 | }
100 | 
101 | float EncDec::Train(const unsigned& morph_id, const vector<unsigned>& inputs,
102 |                       const vector<unsigned>& outputs, AdadeltaTrainer* ada_gd) {
103 |   ComputationGraph cg;
104 |   AddParamsToCG(morph_id, &cg);
105 | 
106 |   // Encode and Transform to feed into decoder
107 |   Expression encoded_input_vec;
108 |   RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg);
109 |   TransformEncodedInput(&encoded_input_vec);
110 | 
111 |   // Use this encoded word vector to predict the transformed word
112 |   vector<Expression> input_vecs_for_dec;
113 |   vector<unsigned> output_ids_for_pred;
114 |   for (unsigned i = 0; i < outputs.size(); ++i) {
115 |     if (i < outputs.size() - 1) { 
116 |       // '</s>' will not be fed as input -- it needs to be predicted.
117 |         input_vecs_for_dec.push_back(lookup(cg, char_vecs[morph_id], outputs[i]));
118 |     }
119 |     if (i > 0) {  // '<s>' will not be predicted in the output -- its fed in.
120 |       output_ids_for_pred.push_back(outputs[i]);
121 |     }
122 |   }
123 | 
124 |   vector<Expression> decoder_hidden_units;
125 |   vector<Expression> init;
126 |   for (unsigned i = 0; i < layers; ++i) {
127 |     init.push_back(encoded_input_vec);  // init cell of decoder
128 |   }
129 |   for (unsigned i = 0; i < layers; ++i) {
130 |     init.push_back(tanh(encoded_input_vec));  // init hidden layer of decoder
131 |   }
132 |   output_forward[morph_id].start_new_sequence(init);
133 |   for (const auto& vec : input_vecs_for_dec) {
134 |     decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
135 |   }
136 |   Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred);
137 | 
138 |   float return_loss = as_scalar(cg.forward());
139 |   cg.backward();
140 |   ada_gd->update(1.0f);
141 |   return return_loss;
142 | }
143 | 
144 | void
145 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
146 |                const vector<unsigned>& input_ids,
147 |                vector<unsigned>* pred_target_ids, vector<EncDec*>* ensmb_model) {
148 |   ComputationGraph cg;
149 | 
150 |   unsigned ensmb = ensmb_model->size();
151 |   //vector<Expression> encoded_word_vecs;
152 |   for (unsigned i = 0; i < ensmb; ++i) {
153 |     Expression encoded_word_vec;
154 |     auto model = (*ensmb_model)[i];
155 |     model->AddParamsToCG(morph_id, &cg);
156 |     model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
157 |     model->TransformEncodedInput(&encoded_word_vec);
158 |     //encoded_word_vecs.push_back(encoded_word_vec);
159 |     vector<Expression> init;
160 |     for (unsigned i = 0; i < model->layers; ++i) {
161 |       init.push_back(encoded_word_vec);  // init cell of decoder
162 |     }
163 |     for (unsigned i = 0; i < model->layers; ++i) {
164 |       init.push_back(tanh(encoded_word_vec));  // init hidden layer of decoder
165 |     }
166 |     model->output_forward[morph_id].start_new_sequence(init);
167 |   }
168 | 
169 |   unsigned out_index = 1;
170 |   unsigned pred_index = char_to_id[BOW];
171 |   while (pred_target_ids->size() < MAX_PRED_LEN) {
172 |     vector<Expression> ensmb_out;
173 |     pred_target_ids->push_back(pred_index);
174 |     if (pred_index == char_to_id[EOW]) {
175 |       return;  // If the end is found, break from the loop and return
176 |     }
177 | 
178 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
179 |       auto model = (*ensmb_model)[ensmb_id];
180 |       Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index);
181 |       Expression input, input_char_vec;
182 |       /*if (out_index < input_ids.size()) {
183 |         input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]);
184 |       } else {
185 |         input_char_vec = lookup(cg, model->eps_vecs[morph_id],
186 |                                 min(unsigned(out_index - input_ids.size()),
187 |                                              model->max_eps - 1));
188 |       }
189 |       input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
190 |                            input_char_vec});*/
191 |       input = prev_output_vec;
192 | 
193 |       Expression hidden = model->output_forward[morph_id].add_input(input);
194 |       Expression out;
195 |       model->ProjectToOutput(hidden, &out);
196 |       ensmb_out.push_back(log_softmax(out));
197 |     }
198 | 
199 |     Expression out = sum(ensmb_out) / ensmb_out.size();
200 |     vector<float> dist = as_vector(cg.incremental_forward());
201 |     pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
202 |     out_index++;
203 |   }
204 | }
205 | 
206 | void
207 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 
208 |                    unordered_map<string, unsigned>& char_to_id,
209 |                    const vector<unsigned>& input_ids,
210 |                    vector<vector<unsigned> >* sequences, vector<float>* tm_scores,
211 |                    vector<EncDec*>* ensmb_model) {
212 |   unsigned out_index = 1;
213 |   unsigned ensmb = ensmb_model->size();
214 |   ComputationGraph cg;
215 | 
216 |   // Compute stuff for every model in the ensemble.
217 |   vector<Expression> encoded_word_vecs;
218 |   vector<Expression> ensmb_out;
219 |   for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
220 |     auto& model = *(*ensmb_model)[ensmb_id];
221 |     model.AddParamsToCG(morph_id, &cg);
222 | 
223 |     Expression encoded_word_vec;
224 |     model.RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
225 |     model.TransformEncodedInput(&encoded_word_vec);
226 |     encoded_word_vecs.push_back(encoded_word_vec);
227 | 
228 |     vector<Expression> init;
229 |     for (unsigned i = 0; i < model.layers; ++i) {
230 |       init.push_back(encoded_word_vec);  // init cell of decoder
231 |     }
232 |     for (unsigned i = 0; i < model.layers; ++i) {
233 |       init.push_back(tanh(encoded_word_vec));  // init hidden layer of decoder
234 |     }
235 |     model.output_forward[morph_id].start_new_sequence(init);
236 | 
237 |     Expression prev_output_vec = lookup(cg, model.char_vecs[morph_id],
238 |                                         char_to_id[BOW]);
239 |     /*Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
240 |                                     lookup(cg, model.char_vecs[morph_id],
241 |                                     input_ids[out_index])});*/
242 |     Expression input = prev_output_vec;
243 |     Expression hidden = model.output_forward[morph_id].add_input(input);
244 |     Expression out;
245 |     model.ProjectToOutput(hidden, &out);
246 |     out = log_softmax(out);
247 |     ensmb_out.push_back(out);
248 |   }
249 | 
250 |   // Compute the average of the ensemble output.
251 |   Expression out_dist = average(ensmb_out);
252 |   vector<float> log_dist = as_vector(cg.incremental_forward());
253 |   priority_queue<pair<float, unsigned> > init_queue;
254 |   for (unsigned i = 0; i < log_dist.size(); ++i) {
255 |     init_queue.push(make_pair(log_dist[i], i));
256 |   }
257 |   unsigned vocab_size = log_dist.size();
258 | 
259 |   // Initialise the beam_size sequences, scores, hidden states.
260 |   vector<float> log_scores;
261 |   vector<vector<RNNPointer> > prev_states;
262 |   for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
263 |     vector<unsigned> seq;
264 |     seq.push_back(char_to_id[BOW]);
265 |     seq.push_back(init_queue.top().second);
266 |     sequences->push_back(seq);
267 |     log_scores.push_back(init_queue.top().first);
268 |    
269 |     vector<RNNPointer> ensmb_states;
270 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
271 |       auto& model = *(*ensmb_model)[ensmb_id];
272 |       ensmb_states.push_back(model.output_forward[morph_id].state());
273 |     }
274 |     prev_states.push_back(ensmb_states);
275 |     init_queue.pop();
276 |   }
277 | 
278 |   vector<cnn::real> neg_inf(vocab_size, NEG_INF);
279 |   Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf);
280 |  
281 |   vector<bool> active_beams(beam_size, true);
282 |   while (true) {
283 |     out_index++;      
284 |     priority_queue<pair<float, pair<unsigned, unsigned> > > probs_queue;
285 |     vector<vector<RNNPointer> > curr_states;
286 |     vector<Expression> out_dist;
287 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
288 |       if (active_beams[beam_id]) {
289 |         unsigned prev_out_char = (*sequences)[beam_id].back();
290 |         vector<Expression> ensmb_out;
291 |         vector<RNNPointer> ensmb_states;
292 |         for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) {
293 |           auto& model = *(*ensmb_model)[ensmb_id];
294 |           /*Expression input_char_vec;
295 |           /if (out_index < input_ids.size()) { 
296 |             input_char_vec = lookup(cg, model.char_vecs[morph_id], input_ids[out_index]);
297 |           } else { 
298 |             input_char_vec = lookup(cg, model.eps_vecs[morph_id],
299 |                                     min(unsigned(out_index - input_ids.size()),
300 |                                                  model.max_eps - 1));
301 |           }*/
302 | 
303 |           Expression prev_out_vec = lookup(cg, model.char_vecs[morph_id], prev_out_char);
304 |           //Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_out_vec,
305 |           //                                input_char_vec});
306 |          
307 |           Expression input = prev_out_vec;
308 |           Expression hidden = model.output_forward[morph_id].add_input(
309 |                                 prev_states[beam_id][ensmb_id], input);
310 |           ensmb_states.push_back(model.output_forward[morph_id].state());
311 |           
312 |           Expression out;
313 |           model.ProjectToOutput(hidden, &out);
314 |           out = log_softmax(out);
315 |           ensmb_out.push_back(out);
316 |         }
317 |         curr_states.push_back(ensmb_states);
318 |         out_dist.push_back(average(ensmb_out));
319 |       } else {
320 |         vector<RNNPointer> dummy(ensmb, RNNPointer(0));
321 |         curr_states.push_back(dummy);
322 |         out_dist.push_back(neg_inf_vec);
323 |       }
324 |     }
325 | 
326 |     Expression all_scores = concatenate(out_dist);
327 |     vector<float> log_dist = as_vector(cg.incremental_forward());
328 | 
329 |     for (unsigned index = 0; index < log_dist.size(); ++index) {
330 |       unsigned beam_id = index / vocab_size;
331 |       unsigned char_id = index % vocab_size;
332 |       if (active_beams[beam_id]) {
333 |         pair<unsigned, unsigned> location = make_pair(beam_id, char_id);
334 |         probs_queue.push(pair<float, pair<unsigned, unsigned> >(
335 |                          log_scores[beam_id] + log_dist[index], location));
336 |       }
337 |     }
338 |       
339 |     // Find the beam_size best now and update the variables.
340 |     unordered_map<unsigned, vector<unsigned> > new_seq;
341 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
342 |       if (active_beams[beam_id]) {
343 |         float log_prob = probs_queue.top().first;
344 |         pair<unsigned, unsigned> location = probs_queue.top().second;
345 |         unsigned old_beam_id = location.first, char_id = location.second;
346 | 
347 |         vector<unsigned> seq = (*sequences)[old_beam_id];
348 |         seq.push_back(char_id);
349 |         new_seq[beam_id] = seq;
350 |         log_scores[beam_id] = log_prob;  // Update the score
351 | 
352 |         prev_states[beam_id] = curr_states[old_beam_id];  // Update hidden state
353 |         probs_queue.pop();
354 |       }
355 |     }
356 |       
357 |     // Update the sequences now.
358 |     for (auto& it : new_seq) {
359 |       (*sequences)[it.first] = it.second;
360 |     }
361 | 
362 |     // Check if a sequence should be made inactive.
363 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
364 |       if (active_beams[beam_id] && 
365 |           ((*sequences)[beam_id].back() == char_to_id[EOW] ||
366 |            (*sequences)[beam_id].size() > MAX_PRED_LEN)) {
367 |         active_beams[beam_id] = false;
368 |       }
369 |     }
370 | 
371 |     // Check if all sequences are inactive.
372 |     bool all_inactive = true;
373 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
374 |       if (active_beams[beam_id]) {
375 |         all_inactive = false;
376 |         break;
377 |       }
378 |     }
379 | 
380 |     if (all_inactive) {
381 |       *tm_scores = log_scores;
382 |       return;
383 |     }
384 |   }
385 | }
386 | 
387 | void Serialize(string& filename, EncDec& model, vector<Model*>* cnn_models) {
388 |   ofstream outfile(filename);
389 |   if (!outfile.is_open()) {
390 |     cerr << "File opening failed" << endl;
391 |   }
392 | 
393 |   boost::archive::text_oarchive oa(outfile);
394 |   oa & model;
395 |   for (unsigned i = 0; i < cnn_models->size(); ++i) {
396 |     oa & *(*cnn_models)[i];
397 |   }
398 | 
399 |   cerr << "Saved model to: " << filename << endl;
400 |   outfile.close();
401 | }
402 | 
403 | void Read(string& filename, EncDec* model, vector<Model*>* cnn_models) {
404 |   ifstream infile(filename);
405 |   if (!infile.is_open()) {
406 |     cerr << "File opening failed" << endl;
407 |   }
408 | 
409 |   boost::archive::text_iarchive ia(infile);
410 |   ia & *model;
411 |   for (unsigned i = 0; i < model->morph_len; ++i) {
412 |     Model *cnn_model = new Model();
413 |     cnn_models->push_back(cnn_model);
414 |   }
415 | 
416 |   model->InitParams(cnn_models);
417 |   for (unsigned i = 0; i < model->morph_len; ++i) {
418 |     ia & *(*cnn_models)[i];
419 |   }
420 | 
421 |   cerr << "Loaded model from: " << filename << endl;
422 |   infile.close();
423 | }
424 | 
425 | 


--------------------------------------------------------------------------------
/src/enc-dec.h:
--------------------------------------------------------------------------------
 1 | #ifndef ENC_DEC_H_
 2 | #define ENC_DEC_H_
 3 | 
 4 | #include "cnn/nodes.h"
 5 | #include "cnn/cnn.h"
 6 | #include "cnn/rnn.h"
 7 | #include "cnn/gru.h"
 8 | #include "cnn/lstm.h"
 9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 | 
13 | #include "utils.h"
14 | 
15 | #include <boost/archive/text_oarchive.hpp>
16 | #include <boost/archive/text_iarchive.hpp>
17 | #include <unordered_map>
18 | #include <queue>
19 | #include <limits>
20 | 
21 | using namespace std;
22 | using namespace cnn;
23 | using namespace cnn::expr;
24 | 
25 | class EncDec {
26 |  public:
27 |   vector<LSTMBuilder> input_forward, input_backward, output_forward;
28 |   vector<LookupParameters*> char_vecs;
29 | 
30 |   Expression hidden_to_output, hidden_to_output_bias;
31 |   vector<Parameters*> phidden_to_output, phidden_to_output_bias;
32 | 
33 |   Expression transform_encoded, transform_encoded_bias;
34 |   vector<Parameters*> ptransform_encoded, ptransform_encoded_bias;
35 |   
36 |   unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
37 |   vector<LookupParameters*> eps_vecs;
38 | 
39 |   EncDec() {}
40 | 
41 |   EncDec(const unsigned& char_length, const unsigned& hidden_length,
42 |            const unsigned& vocab_length, const unsigned& layers,
43 |            const unsigned& num_morph, vector<Model*>* m,
44 |            vector<AdadeltaTrainer>* optimizer);
45 | 
46 |   void InitParams(vector<Model*>* m);
47 | 
48 |   void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
49 | 
50 |   void RunFwdBwd(const unsigned& morph_id, const vector<unsigned>& inputs,
51 |                  Expression* hidden, ComputationGraph *cg);
52 | 
53 |   void TransformEncodedInput(Expression* encoded_input) const;
54 | 
55 |   void TransformEncodedInputDuringDecoding(Expression* encoded_input) const;
56 | 
57 |   void ProjectToOutput(const Expression& hidden, Expression* out) const;
58 | 
59 |   Expression ComputeLoss(const vector<Expression>& hidden_units,
60 |                          const vector<unsigned>& targets) const;
61 | 
62 |   float Train(const unsigned& morph_id, const vector<unsigned>& inputs,
63 |               const vector<unsigned>& outputs, AdadeltaTrainer* ada_gd);
64 | 
65 |   friend class boost::serialization::access;
66 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
67 |     ar & char_len;
68 |     ar & hidden_len;
69 |     ar & vocab_len;
70 |     ar & layers;
71 |     ar & morph_len;
72 |     ar & max_eps;
73 |   }
74 | };
75 | 
76 | void
77 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
78 |                const vector<unsigned>& input_ids,
79 |                vector<unsigned>* pred_target_ids, vector<EncDec*>* ensmb_model);
80 | 
81 | void
82 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
83 |                    unordered_map<string, unsigned>& char_to_id,
84 |                    const vector<unsigned>& input_ids,
85 |                    vector<vector<unsigned> >* sequences, vector<float>* tm_scores,
86 |                    vector<EncDec*>* ensmb_model);
87 | 
88 | void Serialize(string& filename, EncDec& model, vector<Model*>* cnn_model);
89 | 
90 | void Read(string& filename, EncDec* model, vector<Model*>* cnn_model);
91 | 
92 | 
93 | #endif
94 | 


--------------------------------------------------------------------------------
/src/eval-ensemble-enc-dec-attn.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/rnn.h"
 4 | #include "cnn/gru.h"
 5 | #include "cnn/lstm.h"
 6 | #include "cnn/training.h"
 7 | #include "cnn/gpu-ops.h"
 8 | #include "cnn/expr.h"
 9 | 
10 | #include "utils.h"
11 | #include "enc-dec-attn.h"
12 | 
13 | #include <iostream>
14 | #include <fstream>
15 | 
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 | 
20 | int main(int argc, char** argv) {
21 |   cnn::Initialize(argc, argv);
22 | 
23 |   string vocab_filename = argv[1];  // vocabulary of words/characters
24 |   string morph_filename = argv[2];
25 |   string test_filename = argv[3];
26 |   unsigned beam_size = 2;
27 | 
28 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
29 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
30 | 
31 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
32 |   unsigned vocab_size = char_to_id.size();
33 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
34 |   unsigned morph_size = morph_to_id.size();
35 | 
36 |   vector<string> test_data;  // Read the dev file in a vector
37 |   ReadData(test_filename, &test_data);
38 | 
39 |   vector<vector<Model*> > ensmb_m;
40 |   vector<EncDecAttn> ensmb_nn;
41 |   for (unsigned i = 0; i < argc - 4; ++i) {
42 |     vector<Model*> m;
43 |     EncDecAttn nn;
44 |     string f = argv[i + 4];
45 |     Read(f, &nn, &m);
46 |     ensmb_m.push_back(m);
47 |     ensmb_nn.push_back(nn);
48 |   }
49 | 
50 |   // Read the test file and output predictions for the words.
51 |   string line;
52 |   double correct = 0, total = 0;
53 |   vector<EncDecAttn*> object_pointers;
54 |   for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
55 |     object_pointers.push_back(&ensmb_nn[i]);
56 |   }
57 |   for (string& line : test_data) {
58 |     vector<string> items = split_line(line, '|');
59 |     vector<unsigned> input_ids, target_ids, pred_target_ids;
60 |     input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
61 |     for (const string& ch : split_line(items[0], ' ')) {
62 |       input_ids.push_back(char_to_id[ch]);
63 |     }
64 |     for (const string& ch : split_line(items[1], ' ')) {
65 |       target_ids.push_back(char_to_id[ch]);
66 |     }
67 |     unsigned morph_id = morph_to_id[items[2]];
68 |     EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
69 |                    &object_pointers);
70 | 
71 |     string prediction = "";
72 |     for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
73 |       prediction += id_to_char[pred_target_ids[i]];
74 |       if (i != pred_target_ids.size() - 1) {
75 |         prediction += " ";
76 |       }
77 |     }
78 |     if (prediction == items[1]) {
79 |       correct += 1;     
80 |     } else {
81 |       //cout << "GOLD: " << line << endl;
82 |       //cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
83 |     }
84 |     total += 1;
85 |     cout << items[0] << "|" << prediction << "|" << items[2] << endl;
86 |   }
87 |   cerr << "Prediction Accuracy: " << correct / total << endl;
88 |   return 1;
89 | }
90 | 


--------------------------------------------------------------------------------
/src/eval-ensemble-enc-dec.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/rnn.h"
 4 | #include "cnn/gru.h"
 5 | #include "cnn/lstm.h"
 6 | #include "cnn/training.h"
 7 | #include "cnn/gpu-ops.h"
 8 | #include "cnn/expr.h"
 9 | 
10 | #include "utils.h"
11 | #include "enc-dec.h"
12 | 
13 | #include <iostream>
14 | #include <fstream>
15 | 
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 | 
20 | int main(int argc, char** argv) {
21 |   cnn::Initialize(argc, argv);
22 | 
23 |   string vocab_filename = argv[1];  // vocabulary of words/characters
24 |   string morph_filename = argv[2];
25 |   string test_filename = argv[3];
26 |   unsigned beam_size = 2;
27 | 
28 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
29 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
30 | 
31 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
32 |   unsigned vocab_size = char_to_id.size();
33 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
34 |   unsigned morph_size = morph_to_id.size();
35 | 
36 |   vector<string> test_data;  // Read the dev file in a vector
37 |   ReadData(test_filename, &test_data);
38 | 
39 |   vector<vector<Model*> > ensmb_m;
40 |   vector<EncDec> ensmb_nn;
41 |   for (unsigned i = 0; i < argc - 4; ++i) {
42 |     vector<Model*> m;
43 |     EncDec nn;
44 |     string f = argv[i + 4];
45 |     Read(f, &nn, &m);
46 |     ensmb_m.push_back(m);
47 |     ensmb_nn.push_back(nn);
48 |   }
49 | 
50 |   // Read the test file and output predictions for the words.
51 |   string line;
52 |   double correct = 0, total = 0;
53 |   vector<EncDec*> object_pointers;
54 |   for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
55 |     object_pointers.push_back(&ensmb_nn[i]);
56 |   }
57 |   for (string& line : test_data) {
58 |     vector<string> items = split_line(line, '|');
59 |     vector<unsigned> input_ids, target_ids, pred_target_ids;
60 |     input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
61 |     for (const string& ch : split_line(items[0], ' ')) {
62 |       input_ids.push_back(char_to_id[ch]);
63 |     }
64 |     for (const string& ch : split_line(items[1], ' ')) {
65 |       target_ids.push_back(char_to_id[ch]);
66 |     }
67 |     unsigned morph_id = morph_to_id[items[2]];
68 |     EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
69 |                    &object_pointers);
70 | 
71 |     string prediction = "";
72 |     for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
73 |       prediction += id_to_char[pred_target_ids[i]];
74 |       if (i != pred_target_ids.size() - 1) {
75 |         prediction += " ";
76 |       }
77 |     }
78 |     if (prediction == items[1]) {
79 |       correct += 1;     
80 |     } else {
81 |       //cout << "GOLD: " << line << endl;
82 |       //cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
83 |     }
84 |     total += 1;
85 |     cout << items[0] << "|" << prediction << "|" << items[2] << endl;
86 |   }
87 |   cerr << "Prediction Accuracy: " << correct / total << endl;
88 |   return 1;
89 | }
90 | 


--------------------------------------------------------------------------------
/src/eval-ensemble-joint-enc-beam.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | This file outputs all the strings in the beam.
 3 | */
 4 | #include "cnn/nodes.h"
 5 | #include "cnn/cnn.h"
 6 | #include "cnn/rnn.h"
 7 | #include "cnn/gru.h"
 8 | #include "cnn/lstm.h"
 9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 | 
13 | #include "utils.h"
14 | #include "joint-enc-morph.h"
15 | 
16 | #include <iostream>
17 | #include <fstream>
18 | 
19 | using namespace std;
20 | using namespace cnn;
21 | using namespace cnn::expr;
22 | 
23 | int main(int argc, char** argv) {
24 |   cnn::Initialize(argc, argv);
25 | 
26 |   string vocab_filename = argv[1];  // vocabulary of words/characters
27 |   string morph_filename = argv[2];
28 |   string test_filename = argv[3];
29 |   unsigned beam_size = atoi(argv[4]);
30 | 
31 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
32 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
33 | 
34 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
35 |   unsigned vocab_size = char_to_id.size();
36 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
37 |   unsigned morph_size = morph_to_id.size();
38 | 
39 |   vector<string> test_data;  // Read the dev file in a vector
40 |   ReadData(test_filename, &test_data);
41 | 
42 |   vector<vector<Model*> > ensmb_m;
43 |   vector<JointEncMorph> ensmb_nn;
44 |   for (unsigned i = 0; i < argc - 5; ++i) {
45 |     vector<Model*> m;
46 |     JointEncMorph nn;
47 |     string f = argv[i + 5];
48 |     Read(f, &nn, &m);
49 |     ensmb_m.push_back(m);
50 |     ensmb_nn.push_back(nn);
51 |   }
52 | 
53 |   // Read the test file and output predictions for the words.
54 |   string line;
55 |   double correct = 0, total = 0;
56 |   vector<JointEncMorph*> object_pointers;
57 |   for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
58 |     object_pointers.push_back(&ensmb_nn[i]);
59 |   }
60 | 
61 |   for (string& line : test_data) {
62 |     vector<string> items = split_line(line, '|');
63 |     vector<unsigned> input_ids, target_ids, pred_target_ids;
64 |     input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
65 |     for (const string& ch : split_line(items[0], ' ')) {
66 |       input_ids.push_back(char_to_id[ch]);
67 |     }
68 |     for (const string& ch : split_line(items[1], ' ')) {
69 |       target_ids.push_back(char_to_id[ch]);
70 |     }
71 | 
72 |     vector<vector<unsigned> > pred_beams;
73 |     vector<float> beam_score;
74 |     unsigned morph_id = morph_to_id[items[2]];
75 |     EnsembleBeamDecode(morph_id, beam_size, char_to_id, input_ids, &pred_beams,
76 |                        &beam_score, &object_pointers);
77 | 
78 |     cout << "GOLD: " << line << endl;
79 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
80 |       pred_target_ids = pred_beams[beam_id];
81 |       string prediction = "";
82 |       for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 
83 |         prediction += id_to_char[pred_target_ids[i]];
84 |         if (i != pred_target_ids.size() - 1) {
85 |           prediction += " ";
86 |         }
87 |       }
88 |       cout << "PRED: " << prediction << " " << beam_score[beam_id] << endl;
89 |     }
90 |   }
91 |   return 1;
92 | }
93 | 


--------------------------------------------------------------------------------
/src/eval-ensemble-joint-enc-dec-morph.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/rnn.h"
 4 | #include "cnn/gru.h"
 5 | #include "cnn/lstm.h"
 6 | #include "cnn/training.h"
 7 | #include "cnn/gpu-ops.h"
 8 | #include "cnn/expr.h"
 9 | 
10 | #include "utils.h"
11 | #include "joint-enc-dec-morph.h"
12 | 
13 | #include <iostream>
14 | #include <fstream>
15 | 
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 | 
20 | int main(int argc, char** argv) {
21 |   cnn::Initialize(argc, argv);
22 | 
23 |   string vocab_filename = argv[1];  // vocabulary of words/characters
24 |   string morph_filename = argv[2];
25 |   string test_filename = argv[3];
26 | 
27 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
28 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
29 | 
30 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
31 |   unsigned vocab_size = char_to_id.size();
32 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
33 |   unsigned morph_size = morph_to_id.size();
34 | 
35 |   vector<string> test_data;  // Read the dev file in a vector
36 |   ReadData(test_filename, &test_data);
37 | 
38 |   vector<vector<Model*> > ensmb_m;
39 |   vector<JointEncDecMorph> ensmb_nn;
40 |   for (unsigned i = 0; i < argc - 4; ++i) {
41 |     vector<Model*> m;
42 |     JointEncDecMorph nn;
43 |     string f = argv[i + 4];
44 |     Read(f, &nn, &m);
45 |     ensmb_m.push_back(m);
46 |     ensmb_nn.push_back(nn);
47 |   }
48 | 
49 |   // Read the test file and output predictions for the words.
50 |   string line;
51 |   vector<JointEncDecMorph*> object_pointers;
52 |   for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
53 |     object_pointers.push_back(&ensmb_nn[i]);
54 |   }
55 |   double correct = 0, total = 0;
56 |   for (string& line : test_data) {
57 |     vector<string> items = split_line(line, '|');
58 |     vector<unsigned> input_ids, target_ids, pred_target_ids;
59 |     input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
60 |     for (const string& ch : split_line(items[0], ' ')) {
61 |       input_ids.push_back(char_to_id[ch]);
62 |     }
63 |     for (const string& ch : split_line(items[1], ' ')) {
64 |       target_ids.push_back(char_to_id[ch]);
65 |     }
66 |     unsigned morph_id = morph_to_id[items[2]];
67 |     EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
68 |                    &object_pointers);
69 | 
70 |     string prediction = "";
71 |     for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
72 |       prediction += id_to_char[pred_target_ids[i]];
73 |       if (i != pred_target_ids.size() - 1) {
74 |         prediction += " ";
75 |       }
76 |     }
77 |     if (prediction == items[1]) {
78 |       correct += 1;     
79 |     } else {
80 |       cout << "GOLD: " << line << endl;
81 |       cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
82 |     }
83 |     total += 1;
84 |   }
85 |   cerr << "Prediction Accuracy: " << correct / total << endl;
86 |   return 1;
87 | }
88 | 


--------------------------------------------------------------------------------
/src/eval-ensemble-joint-enc-morph.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/rnn.h"
 4 | #include "cnn/gru.h"
 5 | #include "cnn/lstm.h"
 6 | #include "cnn/training.h"
 7 | #include "cnn/gpu-ops.h"
 8 | #include "cnn/expr.h"
 9 | 
10 | #include "utils.h"
11 | #include "joint-enc-morph.h"
12 | 
13 | #include <iostream>
14 | #include <fstream>
15 | 
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 | 
20 | int main(int argc, char** argv) {
21 |   cnn::Initialize(argc, argv);
22 | 
23 |   string vocab_filename = argv[1];  // vocabulary of words/characters
24 |   string morph_filename = argv[2];
25 |   string test_filename = argv[3];
26 | 
27 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
28 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
29 | 
30 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
31 |   unsigned vocab_size = char_to_id.size();
32 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
33 |   unsigned morph_size = morph_to_id.size();
34 | 
35 |   vector<string> test_data;  // Read the dev file in a vector
36 |   ReadData(test_filename, &test_data);
37 | 
38 |   vector<vector<Model*> > ensmb_m;
39 |   vector<JointEncMorph> ensmb_nn;
40 |   for (unsigned i = 0; i < argc - 4; ++i) {
41 |     vector<Model*> m;
42 |     JointEncMorph nn;
43 |     string f = argv[i + 4];
44 |     Read(f, &nn, &m);
45 |     ensmb_m.push_back(m);
46 |     ensmb_nn.push_back(nn);
47 |   }
48 | 
49 |   // Read the test file and output predictions for the words.
50 |   string line;
51 |   vector<JointEncMorph*> object_pointers;
52 |   for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
53 |     object_pointers.push_back(&ensmb_nn[i]);
54 |   }
55 |   double correct = 0, total = 0;
56 |   for (string& line : test_data) {
57 |     vector<string> items = split_line(line, '|');
58 |     vector<unsigned> input_ids, target_ids, pred_target_ids;
59 |     input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
60 |     for (const string& ch : split_line(items[0], ' ')) {
61 |       input_ids.push_back(char_to_id[ch]);
62 |     }
63 |     for (const string& ch : split_line(items[1], ' ')) {
64 |       target_ids.push_back(char_to_id[ch]);
65 |     }
66 |     unsigned morph_id = morph_to_id[items[2]];
67 |     EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
68 |                    &object_pointers);
69 | 
70 |     string prediction = "";
71 |     for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
72 |       prediction += id_to_char[pred_target_ids[i]];
73 |       if (i != pred_target_ids.size() - 1) {
74 |         prediction += " ";
75 |       }
76 |     }
77 |     if (prediction == items[1]) {
78 |       correct += 1;     
79 |     } else {
80 |       cout << "GOLD: " << line << endl;
81 |       cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
82 |     }
83 |     total += 1;
84 |   }
85 |   cerr << "Prediction Accuracy: " << correct / total << endl;
86 |   return 1;
87 | }
88 | 


--------------------------------------------------------------------------------
/src/eval-ensemble-lm-joint-enc.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/rnn.h"
 4 | #include "cnn/gru.h"
 5 | #include "cnn/lstm.h"
 6 | #include "cnn/training.h"
 7 | #include "cnn/gpu-ops.h"
 8 | #include "cnn/expr.h"
 9 | 
10 | #include "lm.h"
11 | #include "utils.h"
12 | #include "lm-joint-enc.h"
13 | 
14 | #include <iostream>
15 | #include <fstream>
16 | 
17 | using namespace std;
18 | using namespace cnn;
19 | using namespace cnn::expr;
20 | 
21 | int main(int argc, char** argv) {
22 |   cnn::Initialize(argc, argv);
23 | 
24 |   string vocab_filename = argv[1];  // vocabulary of words/characters
25 |   string morph_filename = argv[2];
26 |   string test_filename = argv[3];
27 |   string lm_model_filename = argv[4];
28 | 
29 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
30 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
31 | 
32 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
33 |   unsigned vocab_size = char_to_id.size();
34 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
35 |   unsigned morph_size = morph_to_id.size();
36 | 
37 |   vector<string> test_data;  // Read the dev file in a vector
38 |   ReadData(test_filename, &test_data);
39 | 
40 |   LM lm(lm_model_filename, char_to_id, id_to_char);
41 | 
42 |   vector<vector<Model*> > ensmb_m;
43 |   vector<LMJointEnc> ensmb_nn;
44 |   for (unsigned i = 0; i < argc - 5; ++i) {
45 |     vector<Model*> m;
46 |     LMJointEnc nn;
47 |     string f = argv[i + 5];
48 |     Read(f, &nn, &m);
49 |     ensmb_m.push_back(m);
50 |     ensmb_nn.push_back(nn);
51 |   }
52 | 
53 |   // Read the test file and output predictions for the words.
54 |   string line;
55 |   vector<LMJointEnc*> object_pointers;
56 |   for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
57 |     object_pointers.push_back(&ensmb_nn[i]);
58 |   }
59 |   double correct = 0, total = 0;
60 |   for (string& line : test_data) {
61 |     vector<string> items = split_line(line, '|');
62 |     vector<unsigned> input_ids, target_ids, pred_target_ids;
63 |     input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
64 |     for (const string& ch : split_line(items[0], ' ')) {
65 |       input_ids.push_back(char_to_id[ch]);
66 |     }
67 |     for (const string& ch : split_line(items[1], ' ')) {
68 |       target_ids.push_back(char_to_id[ch]);
69 |     }
70 |     unsigned morph_id = morph_to_id[items[2]];
71 |     EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, &lm,
72 |                    &object_pointers);
73 | 
74 |     string prediction = "";
75 |     for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
76 |       prediction += id_to_char[pred_target_ids[i]];
77 |       if (i != pred_target_ids.size() - 1) {
78 |         prediction += " ";
79 |       }
80 |     }
81 |     if (prediction == items[1]) {
82 |       correct += 1;     
83 |     } else {
84 |       cout << "GOLD: " << line << endl;
85 |       cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
86 |     }
87 |     total += 1;
88 |   }
89 |   cerr << "Prediction Accuracy: " << correct / total << endl;
90 |   return 1;
91 | }
92 | 


--------------------------------------------------------------------------------
/src/eval-ensemble-lm-sep-morph.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/rnn.h"
 4 | #include "cnn/gru.h"
 5 | #include "cnn/lstm.h"
 6 | #include "cnn/training.h"
 7 | #include "cnn/gpu-ops.h"
 8 | #include "cnn/expr.h"
 9 | 
10 | #include "lm.h"
11 | #include "utils.h"
12 | #include "lm-sep-morph.h"
13 | 
14 | #include <iostream>
15 | #include <fstream>
16 | 
17 | using namespace std;
18 | using namespace cnn;
19 | using namespace cnn::expr;
20 | 
21 | int main(int argc, char** argv) {
22 |   cnn::Initialize(argc, argv);
23 | 
24 |   string vocab_filename = argv[1];  // vocabulary of words/characters
25 |   string morph_filename = argv[2];
26 |   string test_filename = argv[3];
27 |   string lm_model_filename = argv[4];
28 | 
29 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
30 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
31 | 
32 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
33 |   unsigned vocab_size = char_to_id.size();
34 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
35 |   unsigned morph_size = morph_to_id.size();
36 | 
37 |   vector<string> test_data;  // Read the dev file in a vector
38 |   ReadData(test_filename, &test_data);
39 | 
40 |   LM lm(lm_model_filename, char_to_id, id_to_char);
41 | 
42 |   vector<vector<Model*> > ensmb_m;
43 |   vector<LMSepMorph> ensmb_nn;
44 |   for (unsigned i = 0; i < argc - 5; ++i) {
45 |     vector<Model*> m;
46 |     LMSepMorph nn;
47 |     string f = argv[i + 5];
48 |     Read(f, &nn, &m);
49 |     ensmb_m.push_back(m);
50 |     ensmb_nn.push_back(nn);
51 |   }
52 | 
53 |   // Read the test file and output predictions for the words.
54 |   vector<LMSepMorph*> object_pointers;
55 |   for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
56 |     object_pointers.push_back(&ensmb_nn[i]);
57 |   }
58 | 
59 |   string line;
60 |   double correct = 0, total = 0;
61 |   for (string& line : test_data) {
62 |     vector<string> items = split_line(line, '|');
63 |     vector<unsigned> input_ids, target_ids, pred_target_ids;
64 |     input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
65 | 
66 |     string input = items[0], output = items[1];
67 |     for (const string& ch : split_line(input, ' ')) {
68 |       input_ids.push_back(char_to_id[ch]);
69 |     }
70 |     for (const string& ch : split_line(output, ' ')) {
71 |       target_ids.push_back(char_to_id[ch]);
72 |     }
73 |     unsigned morph_id = morph_to_id[items[2]];
74 |     EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, &lm,
75 |                    &object_pointers);
76 | 
77 |     string prediction = "";
78 |     for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
79 |       prediction += id_to_char[pred_target_ids[i]];
80 |       if (i != pred_target_ids.size() - 1) {
81 |         prediction += " ";
82 |       }
83 |     }
84 |     if (prediction == output) {
85 |       correct += 1;     
86 |     } else {
87 |       cout << "GOLD: " << line << endl;
88 |       cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
89 |     }
90 |     total += 1;
91 |   }
92 |   cerr << "Prediction Accuracy: " << correct / total << endl;
93 |   return 1;
94 | }
95 | 


--------------------------------------------------------------------------------
/src/eval-ensemble-no-enc.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/rnn.h"
 4 | #include "cnn/gru.h"
 5 | #include "cnn/lstm.h"
 6 | #include "cnn/training.h"
 7 | #include "cnn/gpu-ops.h"
 8 | #include "cnn/expr.h"
 9 | 
10 | #include "utils.h"
11 | #include "no-enc.h"
12 | 
13 | #include <iostream>
14 | #include <fstream>
15 | 
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 | 
20 | int main(int argc, char** argv) {
21 |   cnn::Initialize(argc, argv);
22 | 
23 |   string vocab_filename = argv[1];  // vocabulary of words/characters
24 |   string morph_filename = argv[2];
25 |   string test_filename = argv[3];
26 |   unsigned beam_size = 2;
27 | 
28 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
29 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
30 | 
31 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
32 |   unsigned vocab_size = char_to_id.size();
33 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
34 |   unsigned morph_size = morph_to_id.size();
35 | 
36 |   vector<string> test_data;  // Read the dev file in a vector
37 |   ReadData(test_filename, &test_data);
38 | 
39 |   vector<vector<Model*> > ensmb_m;
40 |   vector<NoEnc> ensmb_nn;
41 |   for (unsigned i = 0; i < argc - 4; ++i) {
42 |     vector<Model*> m;
43 |     NoEnc nn;
44 |     string f = argv[i + 4];
45 |     Read(f, &nn, &m);
46 |     ensmb_m.push_back(m);
47 |     ensmb_nn.push_back(nn);
48 |   }
49 | 
50 |   // Read the test file and output predictions for the words.
51 |   string line;
52 |   double correct = 0, total = 0;
53 |   vector<NoEnc*> object_pointers;
54 |   for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
55 |     object_pointers.push_back(&ensmb_nn[i]);
56 |   }
57 |   for (string& line : test_data) {
58 |     vector<string> items = split_line(line, '|');
59 |     vector<unsigned> input_ids, target_ids, pred_target_ids;
60 |     input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
61 |     for (const string& ch : split_line(items[0], ' ')) {
62 |       input_ids.push_back(char_to_id[ch]);
63 |     }
64 |     for (const string& ch : split_line(items[1], ' ')) {
65 |       target_ids.push_back(char_to_id[ch]);
66 |     }
67 |     unsigned morph_id = morph_to_id[items[2]];
68 |     EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
69 |                    &object_pointers);
70 | 
71 |     string prediction = "";
72 |     for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
73 |       prediction += id_to_char[pred_target_ids[i]];
74 |       if (i != pred_target_ids.size() - 1) {
75 |         prediction += " ";
76 |       }
77 |     }
78 |     if (prediction == items[1]) {
79 |       correct += 1;     
80 |     } else {
81 |       //cout << "GOLD: " << line << endl;
82 |       //cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
83 |     }
84 |     total += 1;
85 |     cout << items[0] << "|" << prediction << "|" << items[2] << endl;
86 |   }
87 |   cerr << "Prediction Accuracy: " << correct / total << endl;
88 |   return 1;
89 | }
90 | 


--------------------------------------------------------------------------------
/src/eval-ensemble-sep-morph-beam.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | This file outputs all the strings in the beam.
 3 | */
 4 | #include "cnn/nodes.h"
 5 | #include "cnn/cnn.h"
 6 | #include "cnn/rnn.h"
 7 | #include "cnn/gru.h"
 8 | #include "cnn/lstm.h"
 9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 | 
13 | #include "utils.h"
14 | #include "sep-morph.h"
15 | 
16 | #include <iostream>
17 | #include <fstream>
18 | 
19 | using namespace std;
20 | using namespace cnn;
21 | using namespace cnn::expr;
22 | 
23 | int main(int argc, char** argv) {
24 |   cnn::Initialize(argc, argv);
25 | 
26 |   string vocab_filename = argv[1];  // vocabulary of words/characters
27 |   string morph_filename = argv[2];
28 |   string test_filename = argv[3];
29 |   string lm_filename = argv[4];
30 |   unsigned beam_size = atoi(argv[5]);
31 | 
32 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
33 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
34 | 
35 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
36 |   unsigned vocab_size = char_to_id.size();
37 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
38 |   unsigned morph_size = morph_to_id.size();
39 | 
40 |   vector<string> test_data;  // Read the dev file in a vector
41 |   ReadData(test_filename, &test_data);
42 | 
43 |   vector<vector<Model*> > ensmb_m;
44 |   vector<SepMorph> ensmb_nn;
45 |   for (unsigned i = 0; i < argc - 6; ++i) {
46 |     vector<Model*> m;
47 |     SepMorph nn;
48 |     string f = argv[i + 6];
49 |     Read(f, &nn, &m);
50 |     ensmb_m.push_back(m);
51 |     ensmb_nn.push_back(nn);
52 |   }
53 | 
54 |   // Read the test file and output predictions for the words.
55 |   string line;
56 |   double correct = 0, total = 0;
57 |   vector<SepMorph*> object_pointers;
58 |   for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
59 |     object_pointers.push_back(&ensmb_nn[i]);
60 |   }
61 | 
62 |   for (string& line : test_data) {
63 |     vector<string> items = split_line(line, '|');
64 |     vector<unsigned> input_ids, target_ids, pred_target_ids;
65 |     input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
66 |     for (const string& ch : split_line(items[0], ' ')) {
67 |       input_ids.push_back(char_to_id[ch]);
68 |     }
69 |     for (const string& ch : split_line(items[1], ' ')) {
70 |       target_ids.push_back(char_to_id[ch]);
71 |     }
72 | 
73 |     vector<vector<unsigned> > pred_beams;
74 |     vector<float> beam_score;
75 |     unsigned morph_id = morph_to_id[items[2]];
76 |     EnsembleBeamDecode(morph_id, beam_size, char_to_id, input_ids, &pred_beams,
77 |                        &beam_score, &object_pointers);
78 | 
79 |     cout << "GOLD: " << line << endl;
80 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
81 |       pred_target_ids = pred_beams[beam_id];
82 |       string prediction = "";
83 |       for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 
84 |         prediction += id_to_char[pred_target_ids[i]];
85 |         if (i != pred_target_ids.size() - 1) {
86 |           prediction += " ";
87 |         }
88 |       }
89 |       cout << "PRED: " << prediction << " " << beam_score[beam_id] << endl;
90 |     }
91 |   }
92 |   return 1;
93 | }
94 | 


--------------------------------------------------------------------------------
/src/eval-ensemble-sep-morph.cc:
--------------------------------------------------------------------------------
 1 | #include "cnn/nodes.h"
 2 | #include "cnn/cnn.h"
 3 | #include "cnn/rnn.h"
 4 | #include "cnn/gru.h"
 5 | #include "cnn/lstm.h"
 6 | #include "cnn/training.h"
 7 | #include "cnn/gpu-ops.h"
 8 | #include "cnn/expr.h"
 9 | 
10 | #include "utils.h"
11 | #include "sep-morph.h"
12 | 
13 | #include <iostream>
14 | #include <fstream>
15 | 
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 | 
20 | int main(int argc, char** argv) {
21 |   cnn::Initialize(argc, argv);
22 | 
23 |   string vocab_filename = argv[1];  // vocabulary of words/characters
24 |   string morph_filename = argv[2];
25 |   string test_filename = argv[3];
26 | 
27 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
28 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
29 | 
30 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
31 |   unsigned vocab_size = char_to_id.size();
32 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
33 |   unsigned morph_size = morph_to_id.size();
34 | 
35 |   vector<string> test_data;  // Read the dev file in a vector
36 |   ReadData(test_filename, &test_data);
37 | 
38 |   vector<vector<Model*> > ensmb_m;
39 |   vector<SepMorph> ensmb_nn;
40 |   for (unsigned i = 0; i < argc - 4; ++i) {
41 |     vector<Model*> m;
42 |     SepMorph nn;
43 |     string f = argv[i + 4];
44 |     Read(f, &nn, &m);
45 |     ensmb_m.push_back(m);
46 |     ensmb_nn.push_back(nn);
47 |   }
48 | 
49 |   // Read the test file and output predictions for the words.
50 |   string line;
51 |   double correct = 0, total = 0;
52 |   vector<SepMorph*> object_pointers;
53 |   for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
54 |     object_pointers.push_back(&ensmb_nn[i]);
55 |   }
56 |   for (string& line : test_data) {
57 |     vector<string> items = split_line(line, '|');
58 |     vector<unsigned> input_ids, target_ids, pred_target_ids;
59 |     input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
60 |     for (const string& ch : split_line(items[0], ' ')) {
61 |       input_ids.push_back(char_to_id[ch]);
62 |     }
63 |     for (const string& ch : split_line(items[1], ' ')) {
64 |       target_ids.push_back(char_to_id[ch]);
65 |     }
66 |     unsigned morph_id = morph_to_id[items[2]];
67 |     EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
68 |                    &object_pointers);
69 | 
70 |     string prediction = "";
71 |     for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
72 |       prediction += id_to_char[pred_target_ids[i]];
73 |       if (i != pred_target_ids.size() - 1) {
74 |         prediction += " ";
75 |       }
76 |     }
77 |     if (prediction == items[1]) {
78 |       correct += 1;     
79 |     } else {
80 |       cout << "GOLD: " << line << endl;
81 |       cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
82 |     }
83 |     total += 1;
84 |   }
85 |   cerr << "Prediction Accuracy: " << correct / total << endl;
86 |   return 1;
87 | }
88 | 


--------------------------------------------------------------------------------
/src/joint-enc-dec-morph.cc:
--------------------------------------------------------------------------------
  1 | #include "joint-enc-dec-morph.h"
  2 | 
  3 | using namespace std;
  4 | using namespace cnn;
  5 | using namespace cnn::expr;
  6 | 
  7 | string BOW = "<s>", EOW = "</s>";
  8 | unsigned MAX_PRED_LEN = 100;
  9 | 
 10 | JointEncDecMorph::JointEncDecMorph(
 11 |   const unsigned& char_length, const unsigned& hidden_length,
 12 |   const unsigned& vocab_length, const unsigned& num_layers,
 13 |   const unsigned& num_morph, vector<Model*>* m,
 14 |   vector<AdadeltaTrainer>* optimizer) {
 15 |   char_len = char_length;
 16 |   hidden_len = hidden_length;
 17 |   vocab_len = vocab_length;
 18 |   layers = num_layers;
 19 |   morph_len = num_morph;
 20 |   InitParams(m);
 21 | }
 22 | 
 23 | void JointEncDecMorph::InitParams(vector<Model*>* m) {
 24 |   // Have all the shared parameters in one model
 25 |   input_forward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
 26 |   input_backward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
 27 |   output_forward = LSTMBuilder(layers, 2 * char_len + hidden_len,
 28 |                                hidden_len, (*m)[morph_len]);
 29 | 
 30 |   char_vecs = (*m)[morph_len]->add_lookup_parameters(vocab_len, {char_len});
 31 | 
 32 |   phidden_to_output = (*m)[morph_len]->add_parameters({vocab_len, hidden_len});
 33 |   phidden_to_output_bias = (*m)[morph_len]->add_parameters({vocab_len, 1});
 34 | 
 35 |   eps_vecs = (*m)[morph_len]->add_lookup_parameters(max_eps, {char_len});
 36 | 
 37 |   for (unsigned i = 0; i < morph_len; ++i) {
 38 |     ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
 39 |                                                           2 * hidden_len}));
 40 |     ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
 41 |   }
 42 | }
 43 | 
 44 | void JointEncDecMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
 45 |   input_forward.new_graph(*cg);
 46 |   input_backward.new_graph(*cg);
 47 |   output_forward.new_graph(*cg);
 48 | 
 49 |   hidden_to_output = parameter(*cg, phidden_to_output);
 50 |   hidden_to_output_bias = parameter(*cg, phidden_to_output_bias);
 51 | 
 52 |   transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
 53 |   transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
 54 | }
 55 | 
 56 | void JointEncDecMorph::RunFwdBwd(const vector<unsigned>& inputs,
 57 |                               Expression* hidden, ComputationGraph *cg) {
 58 |   vector<Expression> input_vecs;
 59 |   for (const unsigned& input_id : inputs) {
 60 |     input_vecs.push_back(lookup(*cg, char_vecs, input_id));
 61 |   }
 62 | 
 63 |   // Run forward LSTM
 64 |   Expression forward_unit;
 65 |   input_forward.start_new_sequence();
 66 |   for (unsigned i = 0; i < input_vecs.size(); ++i) {
 67 |     forward_unit = input_forward.add_input(input_vecs[i]);
 68 |   }
 69 | 
 70 |   // Run backward LSTM
 71 |   Expression backward_unit;
 72 |   input_backward.start_new_sequence();
 73 |   for (int i = input_vecs.size() - 1; i >= 0; --i) {
 74 |     backward_unit = input_backward.add_input(input_vecs[i]);
 75 |   }
 76 | 
 77 |   // Concatenate the forward and back hidden layers
 78 |   *hidden = concatenate({forward_unit, backward_unit});
 79 | }
 80 | 
 81 | void JointEncDecMorph::TransformEncodedInput(Expression* encoded_input) const {
 82 |   *encoded_input = affine_transform({transform_encoded_bias,
 83 |                                      transform_encoded, *encoded_input});
 84 | }
 85 | 
 86 | void JointEncDecMorph::ProjectToOutput(const Expression& hidden, Expression* out)
 87 |   const {
 88 |   *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
 89 | }
 90 | 
 91 | 
 92 | Expression JointEncDecMorph::ComputeLoss(const vector<Expression>& hidden_units,
 93 |                                       const vector<unsigned>& targets) const {
 94 |   assert(hidden_units.size() == targets.size());
 95 |   vector<Expression> losses;
 96 |   for (unsigned i = 0; i < hidden_units.size(); ++i) {
 97 |     Expression out;
 98 |     ProjectToOutput(hidden_units[i], &out);
 99 |     losses.push_back(pickneglogsoftmax(out, targets[i]));
100 |   }
101 |   return sum(losses);
102 | }
103 | 
104 | float JointEncDecMorph::Train(const unsigned& morph_id, const vector<unsigned>& inputs,
105 |                            const vector<unsigned>& outputs, AdadeltaTrainer* opt,
106 |                            AdadeltaTrainer* shared_opt) {
107 |   ComputationGraph cg;
108 |   AddParamsToCG(morph_id, &cg);
109 | 
110 |   // Encode and Transform to feed into decoder
111 |   Expression encoded_input_vec;
112 |   RunFwdBwd(inputs, &encoded_input_vec, &cg);
113 |   TransformEncodedInput(&encoded_input_vec);
114 | 
115 |   // Use this encoded word vector to predict the transformed word
116 |   vector<Expression> input_vecs_for_dec;
117 |   vector<unsigned> output_ids_for_pred;
118 |   for (unsigned i = 0; i < outputs.size(); ++i) {
119 |     if (i < outputs.size() - 1) { 
120 |       // '</s>' will not be fed as input -- it needs to be predicted.
121 |       if (i < inputs.size() - 1) {
122 |         input_vecs_for_dec.push_back(concatenate(
123 |             {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
124 |              lookup(cg, char_vecs, inputs[i + 1])}));
125 |       } else {
126 |         input_vecs_for_dec.push_back(concatenate(
127 |             {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
128 |              lookup(cg, eps_vecs, min(unsigned(i - inputs.size()), max_eps - 1))}));
129 |       }
130 |     }
131 |     if (i > 0) {  // '<s>' will not be predicted in the output -- its fed in.
132 |       output_ids_for_pred.push_back(outputs[i]);
133 |     }
134 |   }
135 | 
136 |   vector<Expression> decoder_hidden_units;
137 |   output_forward.start_new_sequence();
138 |   for (const auto& vec : input_vecs_for_dec) {
139 |     decoder_hidden_units.push_back(output_forward.add_input(vec));
140 |   }
141 |   Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred);
142 | 
143 |   float return_loss = as_scalar(cg.forward());
144 |   cg.backward();
145 |   opt->update(1.0f);  // Update the morph specific parameters
146 |   shared_opt->update(1.0f);  // Update the shared parameters
147 |   return return_loss;
148 | }
149 | 
150 | void Serialize(string& filename, JointEncDecMorph& model,
151 |                vector<Model*>* cnn_models) {
152 |   ofstream outfile(filename);
153 |   if (!outfile.is_open()) {
154 |     cerr << "File opening failed" << endl;
155 |   }
156 | 
157 |   boost::archive::text_oarchive oa(outfile);
158 |   oa & model;
159 |   for (unsigned i = 0; i < cnn_models->size(); ++i) {
160 |     oa & *(*cnn_models)[i];
161 |   }
162 | 
163 |   cerr << "Saved model to: " << filename << endl;
164 |   outfile.close();
165 | }
166 | 
167 | void Read(string& filename, JointEncDecMorph* model, vector<Model*>* cnn_models) {
168 |   ifstream infile(filename);
169 |   if (!infile.is_open()) {
170 |     cerr << "File opening failed" << endl;
171 |   }
172 | 
173 |   boost::archive::text_iarchive ia(infile);
174 |   ia & *model;
175 |   for (unsigned i = 0; i < model->morph_len + 1; ++i) {
176 |     Model *cnn_model = new Model();
177 |     cnn_models->push_back(cnn_model);
178 |   }
179 | 
180 |   model->InitParams(cnn_models);
181 |   for (unsigned i = 0; i < cnn_models->size(); ++i) {
182 |     ia & *(*cnn_models)[i];
183 |   }
184 | 
185 |   cerr << "Loaded model from: " << filename << endl;
186 |   infile.close();
187 | }
188 | 
189 | void
190 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
191 |                const vector<unsigned>& input_ids, vector<unsigned>* pred_target_ids,
192 |                vector<JointEncDecMorph*>* ensmb_model) {
193 |   ComputationGraph cg;
194 | 
195 |   unsigned ensmb = ensmb_model->size();
196 |   vector<Expression> encoded_word_vecs;
197 |   for (unsigned i = 0; i < ensmb; ++i) {
198 |     Expression encoded_word_vec;
199 |     auto model = (*ensmb_model)[i];
200 |     model->AddParamsToCG(morph_id, &cg);
201 |     model->RunFwdBwd(input_ids, &encoded_word_vec, &cg);
202 |     model->TransformEncodedInput(&encoded_word_vec);
203 |     encoded_word_vecs.push_back(encoded_word_vec);
204 |     model->output_forward.start_new_sequence();
205 |   }
206 | 
207 |   unsigned out_index = 1;
208 |   unsigned pred_index = char_to_id[BOW];
209 |   while (pred_target_ids->size() < MAX_PRED_LEN) {
210 |     vector<Expression> ensmb_out;
211 |     pred_target_ids->push_back(pred_index);
212 |     if (pred_index == char_to_id[EOW]) {
213 |       return;  // If the end is found, break from the loop and return
214 |     }
215 | 
216 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
217 |       auto model = (*ensmb_model)[ensmb_id];
218 |       Expression prev_output_vec = lookup(cg, model->char_vecs, pred_index);
219 |       Expression input, input_char_vec;
220 |       if (out_index < input_ids.size()) {
221 |         input_char_vec = lookup(cg, model->char_vecs, input_ids[out_index]);
222 |       } else {
223 |         input_char_vec = lookup(cg, model->eps_vecs,
224 |                                 min(unsigned(out_index - input_ids.size()),
225 |                                              model->max_eps - 1));
226 |       }
227 |       input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
228 |                            input_char_vec});
229 | 
230 |       Expression hidden = model->output_forward.add_input(input);
231 |       Expression out;
232 |       model->ProjectToOutput(hidden, &out);
233 |       ensmb_out.push_back(log_softmax(out));
234 |     }
235 | 
236 |     Expression out = sum(ensmb_out) / ensmb_out.size();
237 |     vector<float> dist = as_vector(cg.incremental_forward());
238 |     pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
239 |     out_index++;
240 |   }
241 | }
242 | 


--------------------------------------------------------------------------------
/src/joint-enc-dec-morph.h:
--------------------------------------------------------------------------------
 1 | #ifndef JOINT_ENC_DEC_MORPH_H_
 2 | #define JOINT_ENC_DEC_MORPH_H_
 3 | 
 4 | #include "cnn/nodes.h"
 5 | #include "cnn/cnn.h"
 6 | #include "cnn/rnn.h"
 7 | #include "cnn/gru.h"
 8 | #include "cnn/lstm.h"
 9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 | 
13 | #include "utils.h"
14 | 
15 | #include <boost/archive/text_oarchive.hpp>
16 | #include <boost/archive/text_iarchive.hpp>
17 | #include <unordered_map>
18 | 
19 | using namespace std;
20 | using namespace cnn;
21 | using namespace cnn::expr;
22 | 
23 | // The class has a shared encoder acorss all morphological types.
24 | // Other auxiliary units are also shared, except for the decoder and
25 | // input transofrmation parameters.
26 | class JointEncDecMorph {
27 |  public:
28 |   LSTMBuilder input_forward, input_backward, output_forward;  // Shared encoder
29 |   LookupParameters* char_vecs;  // Shared char vectors
30 | 
31 |   Expression hidden_to_output, hidden_to_output_bias;
32 |   Parameters *phidden_to_output, *phidden_to_output_bias;  // Shared
33 | 
34 |   // The only non-shared component is tranformation.
35 |   Expression transform_encoded, transform_encoded_bias;
36 |   vector<Parameters*> ptransform_encoded, ptransform_encoded_bias;
37 | 
38 |   LookupParameters* eps_vecs;
39 |   
40 |   unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
41 | 
42 |   JointEncDecMorph() {}
43 | 
44 |   JointEncDecMorph(const unsigned& char_length, const unsigned& hidden_length,
45 |            const unsigned& vocab_length, const unsigned& layers,
46 |            const unsigned& num_morph, vector<Model*>* m,
47 |            vector<AdadeltaTrainer>* optimizer);
48 | 
49 |   void InitParams(vector<Model*>* m);
50 | 
51 |   void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
52 | 
53 |   void RunFwdBwd(const vector<unsigned>& inputs,
54 |                  Expression* hidden, ComputationGraph *cg);
55 | 
56 |   void TransformEncodedInput(Expression* encoded_input) const; 
57 | 
58 |   void ProjectToOutput(const Expression& hidden, Expression* out) const;
59 | 
60 |   Expression ComputeLoss(const vector<Expression>& hidden_units,
61 |                          const vector<unsigned>& targets) const;
62 | 
63 |   float Train(const unsigned& morph_id, const vector<unsigned>& inputs,
64 |               const vector<unsigned>& outputs, AdadeltaTrainer* opt,
65 |               AdadeltaTrainer* shared_opt);
66 | 
67 |   friend class boost::serialization::access;
68 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
69 |     ar & char_len;
70 |     ar & hidden_len;
71 |     ar & vocab_len;
72 |     ar & layers;
73 |     ar & morph_len;
74 |     ar & max_eps;
75 |   }
76 | };
77 | 
78 | void Serialize(string& filename, JointEncDecMorph& model, vector<Model*>* cnn_model);
79 | 
80 | void Read(string& filename, JointEncDecMorph* model, vector<Model*>* cnn_model);
81 | 
82 | void
83 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
84 |                const vector<unsigned>& input_ids, vector<unsigned>* pred_target_ids,
85 |                vector<JointEncDecMorph*>* ensmb_model);
86 | 
87 | #endif
88 | 


--------------------------------------------------------------------------------
/src/joint-enc-morph.cc:
--------------------------------------------------------------------------------
  1 | #include "joint-enc-morph.h"
  2 | 
  3 | #include <queue>
  4 | 
  5 | using namespace std;
  6 | using namespace cnn;
  7 | using namespace cnn::expr;
  8 | 
  9 | string BOW = "<s>", EOW = "</s>";
 10 | unsigned MAX_PRED_LEN = 100;
 11 | float NEG_INF = numeric_limits<int>::min();
 12 | 
 13 | JointEncMorph::JointEncMorph(
 14 |   const unsigned& char_length, const unsigned& hidden_length,
 15 |   const unsigned& vocab_length, const unsigned& num_layers,
 16 |   const unsigned& num_morph, vector<Model*>* m,
 17 |   vector<AdadeltaTrainer>* optimizer) {
 18 |   char_len = char_length;
 19 |   hidden_len = hidden_length;
 20 |   vocab_len = vocab_length;
 21 |   layers = num_layers;
 22 |   morph_len = num_morph;
 23 |   InitParams(m);
 24 | }
 25 | 
 26 | void JointEncMorph::InitParams(vector<Model*>* m) {
 27 | 
 28 |   // Have all the shared parameters in one model
 29 |   input_forward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
 30 |   input_backward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
 31 | 
 32 |   char_vecs = (*m)[morph_len]->add_lookup_parameters(vocab_len, {char_len});
 33 | 
 34 |   phidden_to_output = (*m)[morph_len]->add_parameters({vocab_len, hidden_len});
 35 |   phidden_to_output_bias = (*m)[morph_len]->add_parameters({vocab_len, 1});
 36 | 
 37 |   for (unsigned i = 0; i < morph_len; ++i) {
 38 |     output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len,
 39 |                                          hidden_len, (*m)[i]));
 40 | 
 41 |     ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
 42 |                                                           2 * hidden_len}));
 43 |     ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
 44 | 
 45 |     eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
 46 |   }
 47 | }
 48 | 
 49 | void JointEncMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
 50 |   input_forward.new_graph(*cg);
 51 |   input_backward.new_graph(*cg);
 52 |   output_forward[morph_id].new_graph(*cg);
 53 | 
 54 |   hidden_to_output = parameter(*cg, phidden_to_output);
 55 |   hidden_to_output_bias = parameter(*cg, phidden_to_output_bias);
 56 | 
 57 |   transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
 58 |   transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
 59 | }
 60 | 
 61 | void JointEncMorph::RunFwdBwd(const vector<unsigned>& inputs,
 62 |                               Expression* hidden, ComputationGraph *cg) {
 63 |   vector<Expression> input_vecs;
 64 |   for (const unsigned& input_id : inputs) {
 65 |     input_vecs.push_back(lookup(*cg, char_vecs, input_id));
 66 |   }
 67 | 
 68 |   // Run forward LSTM
 69 |   Expression forward_unit;
 70 |   input_forward.start_new_sequence();
 71 |   for (unsigned i = 0; i < input_vecs.size(); ++i) {
 72 |     forward_unit = input_forward.add_input(input_vecs[i]);
 73 |   }
 74 | 
 75 |   // Run backward LSTM
 76 |   Expression backward_unit;
 77 |   input_backward.start_new_sequence();
 78 |   for (int i = input_vecs.size() - 1; i >= 0; --i) {
 79 |     backward_unit = input_backward.add_input(input_vecs[i]);
 80 |   }
 81 | 
 82 |   // Concatenate the forward and back hidden layers
 83 |   *hidden = concatenate({forward_unit, backward_unit});
 84 | }
 85 | 
 86 | void JointEncMorph::TransformEncodedInput(Expression* encoded_input) const {
 87 |   *encoded_input = affine_transform({transform_encoded_bias,
 88 |                                      transform_encoded, *encoded_input});
 89 | }
 90 | 
 91 | void JointEncMorph::ProjectToOutput(const Expression& hidden, Expression* out)
 92 |   const {
 93 |   *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
 94 | }
 95 | 
 96 | 
 97 | Expression JointEncMorph::ComputeLoss(const vector<Expression>& hidden_units,
 98 |                                       const vector<unsigned>& targets) const {
 99 |   assert(hidden_units.size() == targets.size());
100 |   vector<Expression> losses;
101 |   for (unsigned i = 0; i < hidden_units.size(); ++i) {
102 |     Expression out;
103 |     ProjectToOutput(hidden_units[i], &out);
104 |     losses.push_back(pickneglogsoftmax(out, targets[i]));
105 |   }
106 |   return sum(losses);
107 | }
108 | 
109 | float JointEncMorph::Train(const unsigned& morph_id, const vector<unsigned>& inputs,
110 |                            const vector<unsigned>& outputs, AdadeltaTrainer* opt,
111 |                            AdadeltaTrainer* shared_opt) {
112 |   ComputationGraph cg;
113 |   AddParamsToCG(morph_id, &cg);
114 | 
115 |   // Encode and Transform to feed into decoder
116 |   Expression encoded_input_vec;
117 |   RunFwdBwd(inputs, &encoded_input_vec, &cg);
118 |   TransformEncodedInput(&encoded_input_vec);
119 | 
120 |   // Use this encoded word vector to predict the transformed word
121 |   vector<Expression> input_vecs_for_dec;
122 |   vector<unsigned> output_ids_for_pred;
123 |   for (unsigned i = 0; i < outputs.size(); ++i) {
124 |     if (i < outputs.size() - 1) { 
125 |       // '</s>' will not be fed as input -- it needs to be predicted.
126 |       if (i < inputs.size() - 1) {
127 |         input_vecs_for_dec.push_back(concatenate(
128 |             {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
129 |              lookup(cg, char_vecs, inputs[i + 1])}));
130 |       } else {
131 |         input_vecs_for_dec.push_back(concatenate(
132 |             {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
133 |              lookup(cg, eps_vecs[morph_id],
134 |                     min(unsigned(i - inputs.size()), max_eps - 1))}));
135 |       }
136 |     }
137 |     if (i > 0) {  // '<s>' will not be predicted in the output -- its fed in.
138 |       output_ids_for_pred.push_back(outputs[i]);
139 |     }
140 |   }
141 | 
142 |   vector<Expression> decoder_hidden_units;
143 |   output_forward[morph_id].start_new_sequence();
144 |   for (const auto& vec : input_vecs_for_dec) {
145 |     decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
146 |   }
147 |   Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred);
148 | 
149 |   float return_loss = as_scalar(cg.forward());
150 |   cg.backward();
151 |   opt->update(1.0f);  // Update the morph specific parameters
152 |   shared_opt->update(1.0f);  // Update the shared parameters
153 |   return return_loss;
154 | }
155 | 
156 | void Serialize(string& filename, JointEncMorph& model,
157 |                vector<Model*>* cnn_models) {
158 |   ofstream outfile(filename);
159 |   if (!outfile.is_open()) {
160 |     cerr << "File opening failed: " << filename << endl;
161 |   }
162 | 
163 |   boost::archive::text_oarchive oa(outfile);
164 |   oa & model;
165 |   for (unsigned i = 0; i < cnn_models->size(); ++i) {
166 |     oa & *(*cnn_models)[i];
167 |   }
168 | 
169 |   cerr << "Saved model to: " << filename << endl;
170 |   outfile.close();
171 | }
172 | 
173 | void Read(string& filename, JointEncMorph* model, vector<Model*>* cnn_models) {
174 |   ifstream infile(filename);
175 |   if (!infile.is_open()) {
176 |     cerr << "File opening failed: " << filename << endl;
177 |   }
178 | 
179 |   boost::archive::text_iarchive ia(infile);
180 |   ia & *model;
181 |   for (unsigned i = 0; i < model->morph_len + 1; ++i) {
182 |     Model *cnn_model = new Model();
183 |     cnn_models->push_back(cnn_model);
184 |   }
185 | 
186 |   model->InitParams(cnn_models);
187 |   for (unsigned i = 0; i < cnn_models->size(); ++i) {
188 |     ia & *(*cnn_models)[i];
189 |   }
190 | 
191 |   cerr << "Loaded model from: " << filename << endl;
192 |   infile.close();
193 | }
194 | 
195 | void
196 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
197 |                const vector<unsigned>& input_ids, vector<unsigned>* pred_target_ids,
198 |                vector<JointEncMorph*>* ensmb_model) {
199 |   ComputationGraph cg;
200 | 
201 |   unsigned ensmb = ensmb_model->size();
202 |   vector<Expression> encoded_word_vecs;
203 |   for (unsigned i = 0; i < ensmb; ++i) {
204 |     Expression encoded_word_vec;
205 |     auto model = (*ensmb_model)[i];
206 |     model->AddParamsToCG(morph_id, &cg);
207 |     model->RunFwdBwd(input_ids, &encoded_word_vec, &cg);
208 |     model->TransformEncodedInput(&encoded_word_vec);
209 |     encoded_word_vecs.push_back(encoded_word_vec);
210 |     model->output_forward[morph_id].start_new_sequence();
211 |   }
212 | 
213 |   unsigned out_index = 1;
214 |   unsigned pred_index = char_to_id[BOW];
215 |   while (pred_target_ids->size() < MAX_PRED_LEN) {
216 |     vector<Expression> ensmb_out;
217 |     pred_target_ids->push_back(pred_index);
218 |     if (pred_index == char_to_id[EOW]) {
219 |       return;  // If the end is found, break from the loop and return
220 |     }
221 | 
222 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
223 |       auto model = (*ensmb_model)[ensmb_id];
224 |       Expression prev_output_vec = lookup(cg, model->char_vecs, pred_index);
225 |       Expression input, input_char_vec;
226 |       if (out_index < input_ids.size()) {
227 |         input_char_vec = lookup(cg, model->char_vecs, input_ids[out_index]);
228 |       } else {
229 |         input_char_vec = lookup(cg, model->eps_vecs[morph_id],
230 |                                 min(unsigned(out_index - input_ids.size()),
231 |                                              model->max_eps - 1));
232 |       }
233 |       input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
234 |                            input_char_vec});
235 | 
236 |       Expression hidden = model->output_forward[morph_id].add_input(input);
237 |       Expression out;
238 |       model->ProjectToOutput(hidden, &out);
239 |       ensmb_out.push_back(log_softmax(out));
240 |     }
241 | 
242 |     Expression out = sum(ensmb_out) / ensmb_out.size();
243 |     vector<float> dist = as_vector(cg.incremental_forward());
244 |     pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
245 |     out_index++;
246 |   }
247 | }
248 | 
249 | void
250 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
251 |                    unordered_map<string, unsigned>& char_to_id,
252 |                    const vector<unsigned>& input_ids,
253 |                    vector<vector<unsigned> >* sequences, vector<float>* tm_scores,
254 |                    vector<JointEncMorph*>* ensmb_model) {
255 |   unsigned out_index = 1;
256 |   unsigned ensmb = ensmb_model->size();
257 |   ComputationGraph cg;
258 | 
259 |   // Compute stuff for every model in the ensemble.
260 |   vector<Expression> encoded_word_vecs;
261 |   vector<Expression> ensmb_out;
262 |   for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
263 |     auto& model = *(*ensmb_model)[ensmb_id];
264 |     model.AddParamsToCG(morph_id, &cg);
265 | 
266 |     Expression encoded_word_vec;
267 |     model.RunFwdBwd(input_ids, &encoded_word_vec, &cg);
268 |     model.TransformEncodedInput(&encoded_word_vec);
269 |     encoded_word_vecs.push_back(encoded_word_vec);
270 |     model.output_forward[morph_id].start_new_sequence();
271 | 
272 |     Expression prev_output_vec = lookup(cg, model.char_vecs,
273 |                                         char_to_id[BOW]);
274 |     Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
275 |                                     lookup(cg, model.char_vecs,
276 |                                     input_ids[out_index])});
277 |     Expression hidden = model.output_forward[morph_id].add_input(input);
278 |     Expression out;
279 |     model.ProjectToOutput(hidden, &out);
280 |     out = log_softmax(out);
281 |     ensmb_out.push_back(out);
282 |   }
283 | 
284 |   // Compute the average of the ensemble output.
285 |   Expression out_dist = average(ensmb_out);
286 |   vector<float> log_dist = as_vector(cg.incremental_forward());
287 |   priority_queue<pair<float, unsigned> > init_queue;
288 |   for (unsigned i = 0; i < log_dist.size(); ++i) {
289 |     init_queue.push(make_pair(log_dist[i], i));
290 |   }
291 |   unsigned vocab_size = log_dist.size();
292 | 
293 |   // Initialise the beam_size sequences, scores, hidden states.
294 |   vector<float> log_scores;
295 |   vector<vector<RNNPointer> > prev_states;
296 |   for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
297 |     vector<unsigned> seq;
298 |     seq.push_back(char_to_id[BOW]);
299 |     seq.push_back(init_queue.top().second);
300 |     sequences->push_back(seq);
301 |     log_scores.push_back(init_queue.top().first);
302 | 
303 |     vector<RNNPointer> ensmb_states;
304 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
305 |       auto& model = *(*ensmb_model)[ensmb_id];
306 |       ensmb_states.push_back(model.output_forward[morph_id].state());
307 |     }
308 |     prev_states.push_back(ensmb_states);
309 |     init_queue.pop();
310 |   }
311 | 
312 |   vector<cnn::real> neg_inf(vocab_size, NEG_INF);
313 |   Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf);
314 | 
315 |   vector<bool> active_beams(beam_size, true);
316 |   while (true) {
317 |     out_index++;
318 |     priority_queue<pair<float, pair<unsigned, unsigned> > > probs_queue;
319 |     vector<vector<RNNPointer> > curr_states;
320 |     vector<Expression> out_dist;
321 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
322 |       if (active_beams[beam_id]) {
323 |         unsigned prev_out_char = (*sequences)[beam_id].back();
324 |         vector<Expression> ensmb_out;
325 |         vector<RNNPointer> ensmb_states;
326 |         for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) {
327 |           auto& model = *(*ensmb_model)[ensmb_id];
328 |           Expression input_char_vec;
329 |           if (out_index < input_ids.size()) {
330 |             input_char_vec = lookup(cg, model.char_vecs, input_ids[out_index]);
331 |           } else {
332 |             input_char_vec = lookup(cg, model.eps_vecs[morph_id],
333 |                                     min(unsigned(out_index - input_ids.size()),
334 |                                                  model.max_eps - 1));
335 |           }
336 | 
337 |           Expression prev_out_vec = lookup(cg, model.char_vecs, prev_out_char);
338 |           Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_out_vec,
339 |                                           input_char_vec});
340 |           Expression hidden = model.output_forward[morph_id].add_input(
341 |                                 prev_states[beam_id][ensmb_id], input);
342 |           ensmb_states.push_back(model.output_forward[morph_id].state());
343 | 
344 |           Expression out;
345 |           model.ProjectToOutput(hidden, &out);
346 |           out = log_softmax(out);
347 |           ensmb_out.push_back(out);
348 |         }
349 |         curr_states.push_back(ensmb_states);
350 |         out_dist.push_back(average(ensmb_out));
351 |       } else {
352 |         vector<RNNPointer> dummy(ensmb, RNNPointer(0));
353 |         curr_states.push_back(dummy);
354 |         out_dist.push_back(neg_inf_vec);
355 |       }
356 |     }
357 | 
358 |     Expression all_scores = concatenate(out_dist);
359 |     vector<float> log_dist = as_vector(cg.incremental_forward());
360 | 
361 |     for (unsigned index = 0; index < log_dist.size(); ++index) {
362 |       unsigned beam_id = index / vocab_size;
363 |       unsigned char_id = index % vocab_size;
364 |       if (active_beams[beam_id]) {
365 |         pair<unsigned, unsigned> location = make_pair(beam_id, char_id);
366 |         probs_queue.push(pair<float, pair<unsigned, unsigned> >(
367 |                          log_scores[beam_id] + log_dist[index], location));
368 |       }
369 |     }
370 | 
371 |     // Find the beam_size best now and update the variables.
372 |     unordered_map<unsigned, vector<unsigned> > new_seq;
373 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
374 |       if (active_beams[beam_id]) {
375 |         float log_prob = probs_queue.top().first;
376 |         pair<unsigned, unsigned> location = probs_queue.top().second;
377 |         unsigned old_beam_id = location.first, char_id = location.second;
378 | 
379 |         vector<unsigned> seq = (*sequences)[old_beam_id];
380 |         seq.push_back(char_id);
381 |         new_seq[beam_id] = seq;
382 |         log_scores[beam_id] = log_prob;  // Update the score
383 | 
384 |         prev_states[beam_id] = curr_states[old_beam_id];  // Update hidden state
385 |         probs_queue.pop();
386 |       }
387 |     }
388 | 
389 |     // Update the sequences now.
390 |     for (auto& it : new_seq) {
391 |       (*sequences)[it.first] = it.second;
392 |     }
393 | 
394 |     // Check if a sequence should be made inactive.
395 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
396 |       if (active_beams[beam_id] &&
397 |           ((*sequences)[beam_id].back() == char_to_id[EOW] ||
398 |            (*sequences)[beam_id].size() > MAX_PRED_LEN)) {
399 |         active_beams[beam_id] = false;
400 |       }
401 |     }
402 | 
403 |     // Check if all sequences are inactive.
404 |     bool all_inactive = true;
405 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
406 |       if (active_beams[beam_id]) {
407 |         all_inactive = false;
408 |         break;
409 |       }
410 |     }
411 | 
412 |     if (all_inactive) {
413 |       *tm_scores = log_scores;
414 |       return;
415 |     }
416 |   }
417 | }
418 | 
419 | 


--------------------------------------------------------------------------------
/src/joint-enc-morph.h:
--------------------------------------------------------------------------------
 1 | #ifndef JOINT_ENC_MORPH_H_
 2 | #define JOINT_ENC_MORPH_H_
 3 | 
 4 | #include "cnn/nodes.h"
 5 | #include "cnn/cnn.h"
 6 | #include "cnn/rnn.h"
 7 | #include "cnn/gru.h"
 8 | #include "cnn/lstm.h"
 9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 | 
13 | #include "utils.h"
14 | 
15 | #include <boost/archive/text_oarchive.hpp>
16 | #include <boost/archive/text_iarchive.hpp>
17 | #include <unordered_map>
18 | 
19 | using namespace std;
20 | using namespace cnn;
21 | using namespace cnn::expr;
22 | 
23 | // The class has a shared encoder acorss all morphological types.
24 | // Other auxiliary units are also shared, except for the decoder and
25 | // input transofrmation parameters.
26 | class JointEncMorph {
27 |  public:
28 |   LSTMBuilder input_forward, input_backward;  // Shared encoder
29 |   vector<LSTMBuilder> output_forward;
30 |   LookupParameters* char_vecs;  // Shared char vectors
31 | 
32 |   Expression hidden_to_output, hidden_to_output_bias;
33 |   Parameters *phidden_to_output, *phidden_to_output_bias;  // Shared
34 | 
35 |   Expression transform_encoded, transform_encoded_bias;
36 |   vector<Parameters*> ptransform_encoded, ptransform_encoded_bias;
37 | 
38 |   vector<LookupParameters*> eps_vecs; // Not sharing the epsilon vectors
39 |   
40 |   unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
41 | 
42 |   JointEncMorph() {}
43 | 
44 |   JointEncMorph(const unsigned& char_length, const unsigned& hidden_length,
45 |            const unsigned& vocab_length, const unsigned& layers,
46 |            const unsigned& num_morph, vector<Model*>* m,
47 |            vector<AdadeltaTrainer>* optimizer);
48 | 
49 |   void InitParams(vector<Model*>* m);
50 | 
51 |   void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
52 | 
53 |   void RunFwdBwd(const vector<unsigned>& inputs,
54 |                  Expression* hidden, ComputationGraph *cg);
55 | 
56 |   void TransformEncodedInput(Expression* encoded_input) const; 
57 | 
58 |   void ProjectToOutput(const Expression& hidden, Expression* out) const;
59 | 
60 |   Expression ComputeLoss(const vector<Expression>& hidden_units,
61 |                          const vector<unsigned>& targets) const;
62 | 
63 |   float Train(const unsigned& morph_id, const vector<unsigned>& inputs,
64 |               const vector<unsigned>& outputs, AdadeltaTrainer* opt,
65 |               AdadeltaTrainer* shared_opt);
66 | 
67 |   friend class boost::serialization::access;
68 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
69 |     ar & char_len;
70 |     ar & hidden_len;
71 |     ar & vocab_len;
72 |     ar & layers;
73 |     ar & morph_len;
74 |     ar & max_eps;
75 |   }
76 | };
77 | 
78 | void Serialize(string& filename, JointEncMorph& model, vector<Model*>* cnn_model);
79 | 
80 | void Read(string& filename, JointEncMorph* model, vector<Model*>* cnn_model);
81 | 
82 | void
83 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
84 |                const vector<unsigned>& input_ids, vector<unsigned>* pred_target_ids,
85 |                vector<JointEncMorph*>* ensmb_model);
86 | 
87 | void
88 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
89 |                    unordered_map<string, unsigned>& char_to_id,
90 |                    const vector<unsigned>& input_ids,
91 |                    vector<vector<unsigned> >* sequences, vector<float>* tm_scores,
92 |                    vector<JointEncMorph*>* ensmb_model);
93 | 
94 | #endif
95 | 


--------------------------------------------------------------------------------
/src/lm-joint-enc.cc:
--------------------------------------------------------------------------------
  1 | #include "lm-joint-enc.h"
  2 | 
  3 | using namespace std;
  4 | using namespace cnn;
  5 | using namespace cnn::expr;
  6 | 
  7 | string BOW = "<s>", EOW = "</s>";
  8 | unsigned MAX_PRED_LEN = 100;
  9 | 
 10 | LMJointEnc::LMJointEnc(
 11 |   const unsigned& char_length, const unsigned& hidden_length,
 12 |   const unsigned& vocab_length, const unsigned& num_layers,
 13 |   const unsigned& num_morph, vector<Model*>* m,
 14 |   vector<AdadeltaTrainer>* optimizer) {
 15 |   char_len = char_length;
 16 |   hidden_len = hidden_length;
 17 |   vocab_len = vocab_length;
 18 |   layers = num_layers;
 19 |   morph_len = num_morph;
 20 |   InitParams(m);
 21 | }
 22 | 
 23 | void LMJointEnc::InitParams(vector<Model*>* m) {
 24 |   // Have all the shared parameters in one model
 25 |   input_forward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
 26 |   input_backward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
 27 | 
 28 |   char_vecs = (*m)[morph_len]->add_lookup_parameters(vocab_len, {char_len});
 29 | 
 30 |   phidden_to_output = (*m)[morph_len]->add_parameters({vocab_len, hidden_len});
 31 |   phidden_to_output_bias = (*m)[morph_len]->add_parameters({vocab_len, 1});
 32 | 
 33 |   lm_pos_weights = (*m)[morph_len]->add_lookup_parameters(
 34 |                                   max_lm_pos_weights, {1});
 35 | 
 36 |   for (unsigned i = 0; i < morph_len; ++i) {
 37 |     output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len,
 38 |                                          hidden_len, (*m)[i]));
 39 | 
 40 |     ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
 41 |                                                           2 * hidden_len}));
 42 |     ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
 43 | 
 44 |     eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
 45 |   }
 46 | }
 47 | 
 48 | void LMJointEnc::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
 49 |   input_forward.new_graph(*cg);
 50 |   input_backward.new_graph(*cg);
 51 |   output_forward[morph_id].new_graph(*cg);
 52 | 
 53 |   hidden_to_output = parameter(*cg, phidden_to_output);
 54 |   hidden_to_output_bias = parameter(*cg, phidden_to_output_bias);
 55 | 
 56 |   transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
 57 |   transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
 58 | }
 59 | 
 60 | void LMJointEnc::RunFwdBwd(const vector<unsigned>& inputs,
 61 |                               Expression* hidden, ComputationGraph *cg) {
 62 |   vector<Expression> input_vecs;
 63 |   for (const unsigned& input_id : inputs) {
 64 |     input_vecs.push_back(lookup(*cg, char_vecs, input_id));
 65 |   }
 66 | 
 67 |   // Run forward LSTM
 68 |   Expression forward_unit;
 69 |   input_forward.start_new_sequence();
 70 |   for (unsigned i = 0; i < input_vecs.size(); ++i) {
 71 |     forward_unit = input_forward.add_input(input_vecs[i]);
 72 |   }
 73 | 
 74 |   // Run backward LSTM
 75 |   Expression backward_unit;
 76 |   input_backward.start_new_sequence();
 77 |   for (int i = input_vecs.size() - 1; i >= 0; --i) {
 78 |     backward_unit = input_backward.add_input(input_vecs[i]);
 79 |   }
 80 | 
 81 |   // Concatenate the forward and back hidden layers
 82 |   *hidden = concatenate({forward_unit, backward_unit});
 83 | }
 84 | 
 85 | void LMJointEnc::TransformEncodedInput(Expression* encoded_input) const {
 86 |   *encoded_input = affine_transform({transform_encoded_bias,
 87 |                                      transform_encoded, *encoded_input});
 88 | }
 89 | 
 90 | void LMJointEnc::ProjectToOutput(const Expression& hidden, Expression* out)
 91 |   const {
 92 |   *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
 93 | }
 94 | 
 95 | Expression LMJointEnc::ComputeLoss(const vector<Expression>& hidden_units,
 96 |                                    const vector<unsigned>& targets,
 97 |                                    LM *lm, ComputationGraph* cg) const {
 98 |   vector<Expression> losses;
 99 |   vector<unsigned> incremental_targets;
100 |   incremental_targets.push_back(lm->char_to_id[BOW]);
101 |   for (unsigned i = 0; i < hidden_units.size(); ++i) {
102 |     Expression out;
103 |     ProjectToOutput(hidden_units[i], &out);
104 |     Expression trans_lp = log_softmax(out);
105 | 
106 |     // Calculate the LM probabilities of all possible outputs.
107 |     Expression lm_lp = LogProbDist(incremental_targets, lm, cg);
108 | 
109 |     unsigned lm_index = min(i + 1, max_lm_pos_weights - 1);
110 |     Expression lm_weight = lookup(*cg, lm_pos_weights, lm_index);
111 | 
112 |     //Expression total_lp = trans_lp + cwise_multiply(lm_lp, Softplus(lm_weight));
113 |     Expression total_lp = trans_lp + lm_lp * Softplus(lm_weight);
114 |     losses.push_back(pickneglogsoftmax(total_lp, targets[i]));
115 |     incremental_targets.push_back(targets[i]);
116 |   }
117 |   return sum(losses);
118 | }
119 | 
120 | float LMJointEnc::Train(const unsigned& morph_id, const vector<unsigned>& inputs,
121 |                            const vector<unsigned>& outputs,
122 |                            LM* lm, AdadeltaTrainer* opt,
123 |                            AdadeltaTrainer* shared_opt) {
124 |   ComputationGraph cg;
125 |   AddParamsToCG(morph_id, &cg);
126 | 
127 |   // Encode and Transform to feed into decoder
128 |   Expression encoded_input_vec;
129 |   RunFwdBwd(inputs, &encoded_input_vec, &cg);
130 |   TransformEncodedInput(&encoded_input_vec);
131 | 
132 |   // Use this encoded word vector to predict the transformed word
133 |   vector<Expression> input_vecs_for_dec;
134 |   vector<unsigned> output_ids_for_pred;
135 |   for (unsigned i = 0; i < outputs.size(); ++i) {
136 |     if (i < outputs.size() - 1) { 
137 |       // '</s>' will not be fed as input -- it needs to be predicted.
138 |       if (i < inputs.size() - 1) {
139 |         input_vecs_for_dec.push_back(concatenate(
140 |             {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
141 |              lookup(cg, char_vecs, inputs[i + 1])}));
142 |       } else {
143 |         input_vecs_for_dec.push_back(concatenate(
144 |             {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
145 |              lookup(cg, eps_vecs[morph_id],
146 |                     min(unsigned(i - inputs.size()), max_eps - 1))}));
147 |       }
148 |     }
149 |     if (i > 0) {  // '<s>' will not be predicted in the output -- its fed in.
150 |       output_ids_for_pred.push_back(outputs[i]);
151 |     }
152 |   }
153 | 
154 |   vector<Expression> decoder_hidden_units;
155 |   output_forward[morph_id].start_new_sequence();
156 |   for (const auto& vec : input_vecs_for_dec) {
157 |     decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
158 |   }
159 |   Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred, lm,
160 |                                 &cg);
161 | 
162 |   float return_loss = as_scalar(cg.forward());
163 |   cg.backward();
164 |   opt->update(1.0f);  // Update the morph specific parameters
165 |   shared_opt->update(1.0f);  // Update the shared parameters
166 |   return return_loss;
167 | }
168 | 
169 | void Serialize(string& filename, LMJointEnc& model,
170 |                vector<Model*>* cnn_models) {
171 |   ofstream outfile(filename);
172 |   if (!outfile.is_open()) {
173 |     cerr << "File opening failed" << endl;
174 |   }
175 | 
176 |   boost::archive::text_oarchive oa(outfile);
177 |   oa & model;
178 |   for (unsigned i = 0; i < cnn_models->size(); ++i) {
179 |     oa & *(*cnn_models)[i];
180 |   }
181 | 
182 |   cerr << "Saved model to: " << filename << endl;
183 |   outfile.close();
184 | }
185 | 
186 | void Read(string& filename, LMJointEnc* model, vector<Model*>* cnn_models) {
187 |   ifstream infile(filename);
188 |   if (!infile.is_open()) {
189 |     cerr << "File opening failed" << endl;
190 |   }
191 | 
192 |   boost::archive::text_iarchive ia(infile);
193 |   ia & *model;
194 |   for (unsigned i = 0; i < model->morph_len + 1; ++i) {
195 |     Model *cnn_model = new Model();
196 |     cnn_models->push_back(cnn_model);
197 |   }
198 | 
199 |   model->InitParams(cnn_models);
200 |   for (unsigned i = 0; i < cnn_models->size(); ++i) {
201 |     ia & *(*cnn_models)[i];
202 |   }
203 | 
204 |   cerr << "Loaded model from: " << filename << endl;
205 |   infile.close();
206 | }
207 | 
208 | // Computes the probability of all possible next characters given
209 | // a sequence, but removes the first character (<s>).
210 | Expression LogProbDist(const vector<unsigned>& seq, LM *lm,
211 |                        ComputationGraph *cg) {
212 |   vector<float> lm_dist(lm->char_to_id.size(), 0.f);
213 | 
214 |   // Remove the first (<s>) character.
215 |   vector<unsigned> seq_without_start(seq.begin() + 1, seq.end());
216 |   for (const auto& it : lm->char_to_id) {
217 |     vector<unsigned> possible_seq(seq_without_start);
218 |     possible_seq.push_back(it.second);
219 |     lm_dist[it.second] = lm->LogProbSeq(possible_seq);
220 |   }
221 |   return input(*cg, {(long) lm_dist.size()}, lm_dist);
222 | }
223 | 
224 | float Softplus(float x) {
225 |   return log(1 + exp(x));
226 | }
227 | 
228 | Expression Softplus(Expression x) {
229 |   return log(1 + exp(x));
230 | }
231 | 
232 | void
233 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
234 |                const vector<unsigned>& input_ids, vector<unsigned>* pred_target_ids,
235 |                LM* lm, vector<LMJointEnc*>* ensmb_model) {
236 |   ComputationGraph cg;
237 | 
238 |   unsigned ensmb = ensmb_model->size();
239 |   vector<Expression> encoded_word_vecs;
240 |   for (unsigned i = 0; i < ensmb; ++i) {
241 |     Expression encoded_word_vec;
242 |     auto model = (*ensmb_model)[i];
243 |     model->AddParamsToCG(morph_id, &cg);
244 |     model->RunFwdBwd(input_ids, &encoded_word_vec, &cg);
245 |     model->TransformEncodedInput(&encoded_word_vec);
246 |     encoded_word_vecs.push_back(encoded_word_vec);
247 |     model->output_forward[morph_id].start_new_sequence();
248 |   }
249 | 
250 |   unsigned out_index = 1;
251 |   unsigned pred_index = char_to_id[BOW];
252 |   while (pred_target_ids->size() < MAX_PRED_LEN) {
253 |     pred_target_ids->push_back(pred_index);
254 |     if (pred_index == char_to_id[EOW]) {
255 |       return;  // If the end is found, break from the loop and return
256 |     }
257 | 
258 |     vector<Expression> ensmb_out;
259 |     Expression lm_dist = LogProbDist(*pred_target_ids, lm, &cg);
260 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
261 |       auto model = (*ensmb_model)[ensmb_id];
262 |       Expression prev_output_vec = lookup(cg, model->char_vecs, pred_index);
263 |       Expression input, input_char_vec;
264 |       if (out_index < input_ids.size()) {
265 |         input_char_vec = lookup(cg, model->char_vecs, input_ids[out_index]);
266 |       } else {
267 |         input_char_vec = lookup(cg, model->eps_vecs[morph_id],
268 |                                 min(unsigned(out_index - input_ids.size()),
269 |                                              model->max_eps - 1));
270 |       }
271 |       input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
272 |                            input_char_vec});
273 | 
274 |       Expression hidden = model->output_forward[morph_id].add_input(input);
275 |       Expression tm_prob;
276 |       model->ProjectToOutput(hidden, &tm_prob);
277 |       tm_prob = log_softmax(tm_prob);
278 | 
279 |       unsigned lm_index = min(out_index, model->max_lm_pos_weights - 1);
280 |       Expression lm_weight = lookup(cg, model->lm_pos_weights, lm_index);
281 |       Expression total_lp = tm_prob + lm_dist * Softplus(lm_weight);
282 | 
283 |       ensmb_out.push_back(log_softmax(total_lp));
284 |     }
285 | 
286 |     Expression out = average(ensmb_out);
287 |     vector<float> dist = as_vector(cg.incremental_forward());
288 |     pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
289 |     out_index++;
290 |   }
291 | }
292 | 


--------------------------------------------------------------------------------
/src/lm-joint-enc.h:
--------------------------------------------------------------------------------
  1 | #ifndef LM_JOINT_ENC_H_
  2 | #define LM_JOINT_ENC_H_
  3 | 
  4 | #include "cnn/nodes.h"
  5 | #include "cnn/cnn.h"
  6 | #include "cnn/rnn.h"
  7 | #include "cnn/gru.h"
  8 | #include "cnn/lstm.h"
  9 | #include "cnn/training.h"
 10 | #include "cnn/gpu-ops.h"
 11 | #include "cnn/expr.h"
 12 | 
 13 | #include "lm.h"
 14 | #include "utils.h"
 15 | 
 16 | #include <boost/archive/text_oarchive.hpp>
 17 | #include <boost/archive/text_iarchive.hpp>
 18 | #include <unordered_map>
 19 | 
 20 | using namespace std;
 21 | using namespace cnn;
 22 | using namespace cnn::expr;
 23 | 
 24 | // The class has a shared encoder acorss all morphological types.
 25 | // Other auxiliary units are also shared, except for the decoder and
 26 | // input transofrmation parameters.
 27 | class LMJointEnc {
 28 |  public:
 29 |   LSTMBuilder input_forward, input_backward;  // Shared encoder
 30 |   vector<LSTMBuilder> output_forward;
 31 |   LookupParameters* char_vecs;  // Shared char vectors
 32 | 
 33 |   Expression hidden_to_output, hidden_to_output_bias;
 34 |   Parameters *phidden_to_output, *phidden_to_output_bias;  // Shared
 35 | 
 36 |   Expression transform_encoded, transform_encoded_bias;
 37 |   vector<Parameters*> ptransform_encoded, ptransform_encoded_bias;
 38 | 
 39 |   vector<LookupParameters*> eps_vecs; // Not sharing the epsilon vectors
 40 | 
 41 |   LookupParameters* lm_pos_weights;
 42 |   
 43 |   unsigned char_len, hidden_len, vocab_len, layers, morph_len;
 44 |   unsigned max_lm_pos_weights = 20, max_eps = 5;
 45 | 
 46 |   LMJointEnc() {}
 47 | 
 48 |   LMJointEnc(const unsigned& char_length, const unsigned& hidden_length,
 49 |            const unsigned& vocab_length, const unsigned& layers,
 50 |            const unsigned& num_morph, vector<Model*>* m,
 51 |            vector<AdadeltaTrainer>* optimizer);
 52 | 
 53 |   void InitParams(vector<Model*>* m);
 54 | 
 55 |   void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
 56 | 
 57 |   void RunFwdBwd(const vector<unsigned>& inputs,
 58 |                  Expression* hidden, ComputationGraph *cg);
 59 | 
 60 |   void TransformEncodedInput(Expression* encoded_input) const; 
 61 | 
 62 |   void ProjectToOutput(const Expression& hidden, Expression* out) const;
 63 | 
 64 |   Expression ComputeLoss(const vector<Expression>& hidden_units,
 65 |                          const vector<unsigned>& targets, LM *lm, 
 66 |                          ComputationGraph* cg) const;
 67 | 
 68 |   float Train(const unsigned& morph_id, const vector<unsigned>& inputs,
 69 |               const vector<unsigned>& outputs, LM *lm, AdadeltaTrainer* opt,
 70 |               AdadeltaTrainer* shared_opt);
 71 | 
 72 |   friend class boost::serialization::access;
 73 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
 74 |     ar & char_len;
 75 |     ar & hidden_len;
 76 |     ar & vocab_len;
 77 |     ar & layers;
 78 |     ar & morph_len;
 79 |     ar & max_eps;
 80 |     ar & max_lm_pos_weights;
 81 |   }
 82 | };
 83 | 
 84 | Expression LogProbDist(const vector<unsigned>& seq,
 85 |                        LM *lm, ComputationGraph *cg);
 86 | 
 87 | Expression Softplus(Expression x);
 88 | 
 89 | float Softplus(float x);
 90 | 
 91 | 
 92 | void Serialize(string& filename, LMJointEnc& model, vector<Model*>* cnn_model);
 93 | 
 94 | void Read(string& filename, LMJointEnc* model, vector<Model*>* cnn_model);
 95 | 
 96 | void
 97 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
 98 |                const vector<unsigned>& input_ids, vector<unsigned>* pred_target_ids,
 99 |                LM *lm, vector<LMJointEnc*>* ensmb_model);
100 | 
101 | #endif
102 | 


--------------------------------------------------------------------------------
/src/lm-sep-morph.cc:
--------------------------------------------------------------------------------
  1 | #include "lm-sep-morph.h"
  2 | 
  3 | using namespace std;
  4 | using namespace cnn;
  5 | using namespace cnn::expr;
  6 | using cnn::expr::input;
  7 | 
  8 | string BOW = "<s>", EOW = "</s>";
  9 | int MAX_PRED_LEN = 100;
 10 | 
 11 | LMSepMorph::LMSepMorph(const unsigned& char_length, const unsigned& hidden_length,
 12 |                    const unsigned& vocab_length, const unsigned& num_layers,
 13 |                    const unsigned& num_morph, vector<Model*>* m,
 14 |                    vector<AdadeltaTrainer>* optimizer) {
 15 |   char_len = char_length;
 16 |   hidden_len = hidden_length;
 17 |   vocab_len = vocab_length;
 18 |   layers = num_layers;
 19 |   morph_len = num_morph;
 20 |   InitParams(m);
 21 | }
 22 | 
 23 | void LMSepMorph::InitParams(vector<Model*>* m) {
 24 |   for (unsigned i = 0; i < morph_len; ++i) {
 25 |     input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
 26 |     input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
 27 |     output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len,
 28 |                                          hidden_len, (*m)[i]));
 29 | 
 30 |     phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len}));
 31 |     phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1}));
 32 | 
 33 |     char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len}));
 34 | 
 35 |     ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
 36 |                                                           2 * hidden_len}));
 37 |     ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
 38 | 
 39 |     eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
 40 |     lm_pos_weights.push_back((*m)[i]->add_lookup_parameters(
 41 |         max_lm_pos_weights, {1}));
 42 |   }
 43 | }
 44 | 
 45 | void LMSepMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
 46 |   input_forward[morph_id].new_graph(*cg);
 47 |   input_backward[morph_id].new_graph(*cg);
 48 |   output_forward[morph_id].new_graph(*cg);
 49 | 
 50 |   hidden_to_output = parameter(*cg, phidden_to_output[morph_id]);
 51 |   hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]);
 52 | 
 53 |   transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
 54 |   transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
 55 | }
 56 | 
 57 | void LMSepMorph::RunFwdBwd(const unsigned& morph_id,
 58 |                          const vector<unsigned>& inputs,
 59 |                          Expression* hidden, ComputationGraph *cg) {
 60 |   vector<Expression> input_vecs;
 61 |   for (const unsigned& input_id : inputs) {
 62 |     input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id));
 63 |   }
 64 | 
 65 |   // Run forward LSTM
 66 |   Expression forward_unit;
 67 |   input_forward[morph_id].start_new_sequence();
 68 |   for (unsigned i = 0; i < input_vecs.size(); ++i) {
 69 |     forward_unit = input_forward[morph_id].add_input(input_vecs[i]);
 70 |   }
 71 | 
 72 |   // Run backward LSTM
 73 |   Expression backward_unit;
 74 |   input_backward[morph_id].start_new_sequence();
 75 |   for (int i = input_vecs.size() - 1; i >= 0; --i) {
 76 |     backward_unit = input_backward[morph_id].add_input(input_vecs[i]);
 77 |   }
 78 | 
 79 |   // Concatenate the forward and back hidden layers
 80 |   *hidden = concatenate({forward_unit, backward_unit});
 81 | }
 82 | 
 83 | void LMSepMorph::TransformEncodedInput(Expression* encoded_input) const {
 84 |   *encoded_input = affine_transform({transform_encoded_bias,
 85 |                                      transform_encoded, *encoded_input});
 86 | }
 87 | 
 88 | void LMSepMorph::ProjectToOutput(const Expression& hidden, Expression* out) const {
 89 |   *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
 90 | }
 91 | 
 92 | 
 93 | Expression LMSepMorph::ComputeLoss(const unsigned& morph_id,
 94 |                                    const vector<Expression>& hidden_units,
 95 |                                    const vector<unsigned>& targets,
 96 |                                    LM *lm, ComputationGraph* cg) const {
 97 |   vector<Expression> losses;
 98 |   vector<unsigned> incremental_targets;
 99 |   incremental_targets.push_back(lm->char_to_id[BOW]);
100 |   for (unsigned i = 0; i < hidden_units.size(); ++i) {
101 |     Expression out;
102 |     ProjectToOutput(hidden_units[i], &out);
103 |     Expression trans_lp = log_softmax(out);
104 | 
105 |     // Calculate the LM probabilities of all possible outputs.
106 |     Expression lm_lp = LogProbDist(incremental_targets, lm, cg);
107 | 
108 |     unsigned lm_index = min(i + 1, max_lm_pos_weights - 1);
109 |     Expression lm_weight = lookup(*cg, lm_pos_weights[morph_id], lm_index);
110 | 
111 |     //Expression total_lp = trans_lp + cwise_multiply(lm_lp, Softplus(lm_weight));
112 |     Expression total_lp = trans_lp + lm_lp * Softplus(lm_weight);
113 |     losses.push_back(pickneglogsoftmax(total_lp, targets[i]));
114 |     incremental_targets.push_back(targets[i]);
115 |   }
116 |   return sum(losses);
117 | }
118 | 
119 | float LMSepMorph::Train(const unsigned& morph_id,
120 |                         const vector<unsigned>& inputs,
121 |                         const vector<unsigned>& outputs, LM *lm,
122 |                         AdadeltaTrainer* ada_gd) {
123 |   ComputationGraph cg;
124 |   AddParamsToCG(morph_id, &cg);
125 | 
126 |   // Encode and Transform to feed into decoder
127 |   Expression encoded_input_vec;
128 |   RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg);
129 |   TransformEncodedInput(&encoded_input_vec);
130 | 
131 |   // Use this encoded word vector to predict the transformed word
132 |   vector<Expression> input_vecs_for_dec;
133 |   vector<unsigned> output_ids_for_pred;
134 |   for (unsigned i = 0; i < outputs.size(); ++i) {
135 |     if (i < outputs.size() - 1) { 
136 |       // '</s>' will not be fed as input -- it needs to be predicted.
137 |       if (i < inputs.size() - 1) {
138 |         input_vecs_for_dec.push_back(concatenate(
139 |             {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]),
140 |              lookup(cg, char_vecs[morph_id], inputs[i + 1])}));
141 |       } else {
142 |         input_vecs_for_dec.push_back(concatenate(
143 |             {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]),
144 |              lookup(cg, eps_vecs[morph_id], min(unsigned(i - inputs.size()), max_eps - 1))}));
145 |       }
146 |     }
147 |     if (i > 0) {  // '<s>' will not be predicted in the output -- its fed in.
148 |       output_ids_for_pred.push_back(outputs[i]);
149 |     }
150 |   }
151 | 
152 |   vector<Expression> decoder_hidden_units;
153 |   output_forward[morph_id].start_new_sequence();
154 |   for (const auto& vec : input_vecs_for_dec) {
155 |     decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
156 |   }
157 | 
158 |   // If its the first iteration, do not use language model.
159 |   Expression loss = ComputeLoss(morph_id, decoder_hidden_units,
160 |                                 output_ids_for_pred, lm, &cg);
161 | 
162 |   float return_loss = as_scalar(cg.incremental_forward());
163 |   cg.backward();
164 |   ada_gd->update(1.0f);
165 | 
166 |   return return_loss;
167 | }
168 | 
169 | // Computes the probability of all possible next characters given
170 | // a sequence, but removes the first character (<s>).
171 | Expression LogProbDist(const vector<unsigned>& seq, LM *lm,
172 |                        ComputationGraph *cg) {
173 |   vector<float> lm_dist(lm->char_to_id.size(), 0.f);
174 | 
175 |   // Remove the first (<s>) character.
176 |   vector<unsigned> seq_without_start(seq.begin() + 1, seq.end());
177 |   for (const auto& it : lm->char_to_id) {
178 |     vector<unsigned> possible_seq(seq_without_start);
179 |     possible_seq.push_back(it.second);
180 |     lm_dist[it.second] = lm->LogProbSeq(possible_seq);
181 |   }
182 |   return input(*cg, {(long) lm_dist.size()}, lm_dist);
183 | }
184 | 
185 | void
186 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
187 |                const vector<unsigned>& input_ids, vector<unsigned>* pred_target_ids,
188 |                LM* lm, vector<LMSepMorph*>* ensmb_model) {
189 |   ComputationGraph cg;
190 | 
191 |   unsigned ensmb = ensmb_model->size();
192 |   vector<Expression> encoded_word_vecs;
193 |   for (unsigned i = 0; i < ensmb; ++i) {
194 |     Expression encoded_word_vec;
195 |     auto model = (*ensmb_model)[i];
196 |     model->AddParamsToCG(morph_id, &cg);
197 |     model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
198 |     model->TransformEncodedInput(&encoded_word_vec);
199 |     encoded_word_vecs.push_back(encoded_word_vec);
200 |     model->output_forward[morph_id].start_new_sequence();
201 |   }
202 | 
203 |   unsigned out_index = 1;
204 |   unsigned pred_index = char_to_id[BOW];
205 |   while (pred_target_ids->size() < MAX_PRED_LEN) {
206 |     pred_target_ids->push_back(pred_index);
207 |     if (pred_index == char_to_id[EOW]) {
208 |       // Print the lm weights.
209 |       /*for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
210 |           auto model = (*ensmb_model)[ensmb_id];
211 |           for (unsigned i = 1; i < model->max_lm_pos_weights; ++i) {
212 |             float p = model->lm_pos_weights[morph_id]->values[i].v[0];
213 |             cerr << Softplus(p) << " ";
214 |           }
215 |           cerr << endl;
216 |       }*/
217 | 
218 |       return;  // If the end is found, break from the loop and return
219 |     }
220 | 
221 |     vector<Expression> ensmb_out;
222 |     Expression lm_dist = LogProbDist(*pred_target_ids, lm, &cg);
223 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
224 |       auto model = (*ensmb_model)[ensmb_id];
225 |       Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index);
226 |       Expression input, input_char_vec;
227 |       if (out_index < input_ids.size()) {
228 |         input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]);
229 |       } else {
230 |         input_char_vec = lookup(cg, model->eps_vecs[morph_id],
231 |                                 min(unsigned(out_index - input_ids.size()),
232 |                                              model->max_eps - 1));
233 |       }
234 |       input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
235 |                            input_char_vec});
236 | 
237 |       Expression hidden = model->output_forward[morph_id].add_input(input);
238 |       Expression tm_prob;
239 |       model->ProjectToOutput(hidden, &tm_prob);
240 |       tm_prob = log_softmax(tm_prob);
241 | 
242 |       unsigned lm_index = min(out_index, model->max_lm_pos_weights - 1);
243 |       Expression lm_weight = lookup(cg, model->lm_pos_weights[morph_id], lm_index);
244 |       Expression total_lp = tm_prob + lm_dist * Softplus(lm_weight);
245 | 
246 |       ensmb_out.push_back(log_softmax(total_lp));
247 |     }
248 |     Expression total_dist = average(ensmb_out);
249 | 
250 |     vector<float> dist = as_vector(cg.incremental_forward());
251 |     pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
252 |     out_index++;
253 |   }
254 | }
255 | 
256 | float Softplus(float x) {
257 |   return log(1 + exp(x));
258 | }
259 | 
260 | Expression Softplus(Expression x) {
261 |   return log(1 + exp(x));
262 | }
263 | 
264 | void Serialize(string& filename, LMSepMorph& model, vector<Model*>* cnn_models) {
265 |   ofstream outfile(filename);
266 |   if (!outfile.is_open()) {
267 |     cerr << "File opening failed" << endl;
268 |   }
269 | 
270 |   boost::archive::text_oarchive oa(outfile);
271 |   oa & model;
272 |   for (unsigned i = 0; i < cnn_models->size(); ++i) {
273 |     oa & *(*cnn_models)[i];
274 |   }
275 | 
276 |   cerr << "Saved model to: " << filename << endl;
277 |   outfile.close();
278 | }
279 | 
280 | void Read(string& filename, LMSepMorph* model, vector<Model*>* cnn_models) {
281 |   ifstream infile(filename);
282 |   if (!infile.is_open()) {
283 |     cerr << "File opening failed" << endl;
284 |   }
285 | 
286 |   boost::archive::text_iarchive ia(infile);
287 |   ia & *model;
288 |   for (unsigned i = 0; i < model->morph_len; ++i) {
289 |     Model *cnn_model = new Model();
290 |     cnn_models->push_back(cnn_model);
291 |   }
292 | 
293 |   model->InitParams(cnn_models);
294 |   for (unsigned i = 0; i < model->morph_len; ++i) {
295 |     ia & *(*cnn_models)[i];
296 |   }
297 | 
298 |   cerr << "Loaded model from: " << filename << endl;
299 |   infile.close();
300 | }
301 | 
302 | 


--------------------------------------------------------------------------------
/src/lm-sep-morph.h:
--------------------------------------------------------------------------------
 1 | #ifndef SEP_MORPH_H_
 2 | #define SEP_MORPH_H_
 3 | 
 4 | #include "cnn/nodes.h"
 5 | #include "cnn/cnn.h"
 6 | #include "cnn/rnn.h"
 7 | #include "cnn/gru.h"
 8 | #include "cnn/lstm.h"
 9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 | 
13 | #include "lm.h"
14 | #include "utils.h"
15 | 
16 | #include <boost/archive/text_oarchive.hpp>
17 | #include <boost/archive/text_iarchive.hpp>
18 | #include <unordered_map>
19 | 
20 | using namespace std;
21 | using namespace cnn;
22 | using namespace cnn::expr;
23 | 
24 | class LMSepMorph {
25 |  public:
26 |   vector<LSTMBuilder> input_forward, input_backward, output_forward;
27 |   vector<LookupParameters*> char_vecs;
28 | 
29 |   Expression hidden_to_output, hidden_to_output_bias;
30 |   vector<Parameters*> phidden_to_output, phidden_to_output_bias;
31 | 
32 |   Expression transform_encoded, transform_encoded_bias;
33 |   vector<Parameters*> ptransform_encoded, ptransform_encoded_bias;
34 |   
35 |   unsigned char_len, hidden_len, vocab_len, layers, morph_len;
36 |   unsigned max_eps = 5, max_lm_pos_weights = 20;
37 |   vector<LookupParameters*> eps_vecs;
38 |   vector<LookupParameters*> lm_pos_weights;
39 | 
40 |   LMSepMorph() {}
41 | 
42 |   LMSepMorph(const unsigned& char_length, const unsigned& hidden_length,
43 |            const unsigned& vocab_length, const unsigned& layers,
44 |            const unsigned& num_morph, vector<Model*>* m,
45 |            vector<AdadeltaTrainer>* optimizer);
46 | 
47 |   void InitParams(vector<Model*>* m);
48 | 
49 |   void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
50 | 
51 |   void RunFwdBwd(const unsigned& morph_id, const vector<unsigned>& inputs,
52 |                  Expression* hidden, ComputationGraph *cg);
53 | 
54 |   void TransformEncodedInput(Expression* encoded_input) const;
55 | 
56 |   void TransformEncodedInputDuringDecoding(Expression* encoded_input) const;
57 | 
58 |   void ProjectToOutput(const Expression& hidden, Expression* out) const;
59 | 
60 |   Expression ComputeLoss(const unsigned& morph_id,
61 |                          const vector<Expression>& hidden_units,
62 |                          const vector<unsigned>& targets,
63 |                          LM *lm, ComputationGraph* cg) const;
64 | 
65 |   float Train(const unsigned& morph_id, const vector<unsigned>& inputs,
66 |               const vector<unsigned>& outputs, LM* lm, AdadeltaTrainer* ada_gd);
67 | 
68 |   friend class boost::serialization::access;
69 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
70 |     ar & char_len;
71 |     ar & hidden_len;
72 |     ar & vocab_len;
73 |     ar & layers;
74 |     ar & morph_len;
75 |     ar & max_eps;
76 |     ar & max_lm_pos_weights;
77 |   }
78 | };
79 | 
80 | Expression LogProbDist(const vector<unsigned>& seq,
81 |                        LM *lm, ComputationGraph *cg);
82 | 
83 | Expression Softplus(Expression x);
84 | 
85 | float Softplus(float x);
86 | 
87 | void
88 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
89 |                const vector<unsigned>& input_ids,
90 |                vector<unsigned>* pred_target_ids, LM* lm,
91 |                vector<LMSepMorph*>* ensmb_model);
92 | 
93 | void Serialize(string& filename, LMSepMorph& model, vector<Model*>* cnn_model);
94 | 
95 | void Read(string& filename, LMSepMorph* model, vector<Model*>* cnn_model);
96 | 
97 | #endif
98 | 


--------------------------------------------------------------------------------
/src/lm.cc:
--------------------------------------------------------------------------------
 1 | #include "lm.h"
 2 | 
 3 | LM::LM(string& lm_model_file, unordered_map<string, unsigned>& char_id,
 4 |        unordered_map<unsigned, string>& id_char) {
 5 |   ifstream model_file(lm_model_file);
 6 |   if (model_file.is_open()) {
 7 |     string line;
 8 |     char_to_id = char_id;
 9 |     id_to_char = id_char;
10 |     while(getline(model_file, line)) {
11 |       vector<string> items = split_line(line, '\t');
12 |       if (items.size() == 2) {
13 |         vector<unsigned> seq;
14 |         for (string& ch : split_line(items[1], ' ')) {
15 |           seq.push_back(char_to_id[ch]);
16 |         }
17 |         lp[HashSeq(seq)] = atof(items[0].c_str());
18 |       } else if (items.size() == 3) {
19 |         vector<unsigned> seq;
20 |         for (string& ch : split_line(items[1], ' ')) {
21 |           seq.push_back(char_to_id[ch]);
22 |         }
23 |         lp[HashSeq(seq)] = atof(items[0].c_str());
24 |         b[HashSeq(seq)] = atof(items[2].c_str());
25 |       }
26 |     }
27 |     model_file.close();
28 |     cerr << "LM loaded from: " << lm_model_file << endl;
29 |     cerr << "LM size: " << lp.size() << endl;
30 |   } else {
31 |     cerr << "File opening failed" << endl;
32 |     exit(0);
33 |   }
34 | }
35 | 
36 | size_t LM::HashSeq(vector<unsigned>& seq) {
37 |   return boost::hash_range(seq.begin(), seq.end());
38 | }
39 | 
40 | void
41 | PrintSeq(vector<unsigned>& seq, unordered_map<unsigned, string>& id_to_char) {
42 |   for (unsigned i = 0; i < seq.size(); ++i) {
43 |     cerr << id_to_char[seq[i]] << " ";
44 |   }
45 |   cerr << endl;
46 | }
47 | 
48 | /* LogProbSeq(w1, w2, ..., wn) = LogProbSeq(w2, w3, ..., wn)
49 |                                 + backoff(w1, w2, ..., wn-1);
50 |    http://cmusphinx.sourceforge.net/wiki/sphinx4:standardgrammarformats
51 | 
52 |    Assumes that unigrams are always present in lp[].
53 | */
54 | float LM::LogProb(vector<unsigned>& seq) {
55 |   size_t seq_hash = HashSeq(seq);
56 |   auto it_seq = lp.find(seq_hash);
57 |   if (it_seq == lp.end()) {
58 |     vector<unsigned> backoff_seq(seq.begin(), seq.end() - 1);
59 |     size_t backoff_hash = HashSeq(backoff_seq);
60 |     auto it_backoff = b.find(backoff_hash);
61 | 
62 |     float backoff;
63 |     if (it_backoff == b.end()) {
64 |       backoff = 0.0;
65 |     } else {
66 |       backoff = it_backoff->second;
67 |     }
68 | 
69 |     vector<unsigned> small(seq.begin() + 1, seq.end());
70 |     lp[seq_hash] = backoff + LogProb(small);
71 |     return lp[seq_hash];
72 |   } else {
73 |     return it_seq->second;  // Return the log prob 
74 |   }
75 | }
76 | 
77 | float LM::LogProbSeq(vector<unsigned>& seq) {
78 |   float score = 0.;
79 |   for (unsigned i = 1; i <= seq.size(); ++i) {
80 |     vector<unsigned> temp(seq.begin(), seq.begin()+ i);
81 |     score += LogProb(temp);
82 |   }
83 |   return score;
84 | }
85 | 


--------------------------------------------------------------------------------
/src/lm.h:
--------------------------------------------------------------------------------
 1 | #ifndef LM_H_
 2 | #define LM_H_
 3 | 
 4 | #include "utils.h"
 5 | 
 6 | #include <boost/functional/hash.hpp>
 7 | #include <unordered_map>
 8 | 
 9 | using namespace std;
10 | 
11 | class LM {
12 |  public:
13 |   unordered_map<string, unsigned> char_to_id;
14 |   unordered_map<unsigned, string> id_to_char;
15 | 
16 |   LM (string& lm_model_file, unordered_map<string, unsigned>& char_id,
17 |       unordered_map<unsigned, string>& id_char);
18 |   float LogProbSeq(vector<unsigned>& seq);
19 |   float LogProb(vector<unsigned>& seq);
20 |   size_t HashSeq(vector<unsigned>& seq);
21 | 
22 |  private:
23 |   unordered_map<size_t, float> lp, b;
24 | };
25 | 
26 | void
27 | PrintSeq(vector<unsigned>& seq, unordered_map<unsigned, string>& id_to_char);
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/src/no-enc.cc:
--------------------------------------------------------------------------------
  1 | #include "no-enc.h"
  2 | 
  3 | using namespace std;
  4 | using namespace cnn;
  5 | using namespace cnn::expr;
  6 | 
  7 | string BOW = "<s>", EOW = "</s>";
  8 | int MAX_PRED_LEN = 100;
  9 | float NEG_INF = numeric_limits<int>::min();
 10 | 
 11 | NoEnc::NoEnc(const unsigned& char_length, const unsigned& hidden_length,
 12 |                    const unsigned& vocab_length, const unsigned& num_layers,
 13 |                    const unsigned& num_morph, vector<Model*>* m,
 14 |                    vector<AdadeltaTrainer>* optimizer) {
 15 |   char_len = char_length;
 16 |   hidden_len = hidden_length;
 17 |   vocab_len = vocab_length;
 18 |   layers = num_layers;
 19 |   morph_len = num_morph;
 20 |   InitParams(m);
 21 | }
 22 | 
 23 | void NoEnc::InitParams(vector<Model*>* m) {
 24 |   for (unsigned i = 0; i < morph_len; ++i) {
 25 |     //input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
 26 |     //input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
 27 |     output_forward.push_back(LSTMBuilder(layers, 2 * char_len,  // + hidden_len,
 28 |                                          hidden_len, (*m)[i]));
 29 | 
 30 |     phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len}));
 31 |     phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1}));
 32 | 
 33 |     char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len}));
 34 | 
 35 |     //ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
 36 |     //                                                      2 * hidden_len}));
 37 |     //ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
 38 | 
 39 |     eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
 40 |   }
 41 | }
 42 | 
 43 | void NoEnc::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
 44 |   //input_forward[morph_id].new_graph(*cg);
 45 |   //input_backward[morph_id].new_graph(*cg);
 46 |   output_forward[morph_id].new_graph(*cg);
 47 | 
 48 |   hidden_to_output = parameter(*cg, phidden_to_output[morph_id]);
 49 |   hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]);
 50 | 
 51 |   //transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
 52 |   //transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
 53 | }
 54 | 
 55 | /*void NoEnc::RunFwdBwd(const unsigned& morph_id,
 56 |                          const vector<unsigned>& inputs,
 57 |                          Expression* hidden, ComputationGraph *cg) {
 58 |   vector<Expression> input_vecs;
 59 |   for (const unsigned& input_id : inputs) {
 60 |     input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id));
 61 |   }
 62 | 
 63 |   // Run forward LSTM
 64 |   Expression forward_unit;
 65 |   input_forward[morph_id].start_new_sequence();
 66 |   for (unsigned i = 0; i < input_vecs.size(); ++i) {
 67 |     forward_unit = input_forward[morph_id].add_input(input_vecs[i]);
 68 |   }
 69 | 
 70 |   // Run backward LSTM
 71 |   Expression backward_unit;
 72 |   input_backward[morph_id].start_new_sequence();
 73 |   for (int i = input_vecs.size() - 1; i >= 0; --i) {
 74 |     backward_unit = input_backward[morph_id].add_input(input_vecs[i]);
 75 |   }
 76 | 
 77 |   // Concatenate the forward and back hidden layers
 78 |   *hidden = concatenate({forward_unit, backward_unit});
 79 | }
 80 | 
 81 | void NoEnc::TransformEncodedInput(Expression* encoded_input) const {
 82 |   *encoded_input = affine_transform({transform_encoded_bias,
 83 |                                      transform_encoded, *encoded_input});
 84 | }*/
 85 | 
 86 | void NoEnc::ProjectToOutput(const Expression& hidden, Expression* out) const {
 87 |   *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
 88 | }
 89 | 
 90 | Expression NoEnc::ComputeLoss(const vector<Expression>& hidden_units,
 91 |                                  const vector<unsigned>& targets) const {
 92 |   assert(hidden_units.size() == targets.size());
 93 |   vector<Expression> losses;
 94 |   for (unsigned i = 0; i < hidden_units.size(); ++i) {
 95 |     Expression out;
 96 |     ProjectToOutput(hidden_units[i], &out);
 97 |     losses.push_back(pickneglogsoftmax(out, targets[i]));
 98 |   }
 99 |   return sum(losses);
100 | }
101 | 
102 | float NoEnc::Train(const unsigned& morph_id, const vector<unsigned>& inputs,
103 |                       const vector<unsigned>& outputs, AdadeltaTrainer* ada_gd) {
104 |   ComputationGraph cg;
105 |   AddParamsToCG(morph_id, &cg);
106 | 
107 |   // Encode and Transform to feed into decoder
108 |   //Expression encoded_input_vec;
109 |   //RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg);
110 |   //TransformEncodedInput(&encoded_input_vec);
111 | 
112 |   // Use this encoded word vector to predict the transformed word
113 |   vector<Expression> input_vecs_for_dec;
114 |   vector<unsigned> output_ids_for_pred;
115 |   for (unsigned i = 0; i < outputs.size(); ++i) {
116 |     if (i < outputs.size() - 1) { 
117 |       // '</s>' will not be fed as input -- it needs to be predicted.
118 |       if (i < inputs.size() - 1) {
119 |         input_vecs_for_dec.push_back(concatenate(
120 |             {lookup(cg, char_vecs[morph_id], outputs[i]),
121 |              lookup(cg, char_vecs[morph_id], inputs[i + 1])}));
122 |       } else {
123 |         input_vecs_for_dec.push_back(concatenate(
124 |             {lookup(cg, char_vecs[morph_id], outputs[i]),
125 |              lookup(cg, eps_vecs[morph_id], min(unsigned(i - inputs.size()), max_eps - 1))}));
126 |       }
127 |     }
128 |     if (i > 0) {  // '<s>' will not be predicted in the output -- its fed in.
129 |       output_ids_for_pred.push_back(outputs[i]);
130 |     }
131 |   }
132 | 
133 |   vector<Expression> decoder_hidden_units;
134 |   output_forward[morph_id].start_new_sequence();
135 |   for (const auto& vec : input_vecs_for_dec) {
136 |     decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
137 |   }
138 |   Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred);
139 | 
140 |   float return_loss = as_scalar(cg.forward());
141 |   cg.backward();
142 |   ada_gd->update(1.0f);
143 |   return return_loss;
144 | }
145 | 
146 | void
147 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
148 |                const vector<unsigned>& input_ids,
149 |                vector<unsigned>* pred_target_ids, vector<NoEnc*>* ensmb_model) {
150 |   ComputationGraph cg;
151 | 
152 |   /*auto temp_model = *(*ensmb_model)[0];
153 |   for (unsigned char_id = 0; char_id < temp_model.vocab_len; ++char_id) {
154 |      for (unsigned index = 0; index < temp_model.char_len; ++index) {
155 |        cout << temp_model.char_vecs[0]->values[char_id].v[index] << " ";
156 |      }
157 |      cout << endl;
158 |   }
159 |   exit(0);*/
160 | 
161 |   unsigned ensmb = ensmb_model->size();
162 |   vector<Expression> encoded_word_vecs;
163 |   for (unsigned i = 0; i < ensmb; ++i) {
164 |     Expression encoded_word_vec;
165 |     auto model = (*ensmb_model)[i];
166 |     model->AddParamsToCG(morph_id, &cg);
167 |     /*model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
168 |     model->TransformEncodedInput(&encoded_word_vec);
169 |     encoded_word_vecs.push_back(encoded_word_vec);*/
170 |     model->output_forward[morph_id].start_new_sequence();
171 |   }
172 | 
173 |   unsigned out_index = 1;
174 |   unsigned pred_index = char_to_id[BOW];
175 |   while (pred_target_ids->size() < MAX_PRED_LEN) {
176 |     vector<Expression> ensmb_out;
177 |     pred_target_ids->push_back(pred_index);
178 |     if (pred_index == char_to_id[EOW]) {
179 |       return;  // If the end is found, break from the loop and return
180 |     }
181 | 
182 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
183 |       auto model = (*ensmb_model)[ensmb_id];
184 |       Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index);
185 |       Expression input, input_char_vec;
186 |       if (out_index < input_ids.size()) {
187 |         input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]);
188 |       } else {
189 |         input_char_vec = lookup(cg, model->eps_vecs[morph_id],
190 |                                 min(unsigned(out_index - input_ids.size()),
191 |                                              model->max_eps - 1));
192 |       }
193 |       input = concatenate({//encoded_word_vecs[ensmb_id],
194 |                            prev_output_vec,
195 |                            input_char_vec});
196 | 
197 |       Expression hidden = model->output_forward[morph_id].add_input(input);
198 |       Expression out;
199 |       model->ProjectToOutput(hidden, &out);
200 |       ensmb_out.push_back(log_softmax(out));
201 |     }
202 | 
203 |     Expression out = sum(ensmb_out) / ensmb_out.size();
204 |     vector<float> dist = as_vector(cg.incremental_forward());
205 |     pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
206 |     out_index++;
207 |   }
208 | }
209 | 
210 | void
211 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 
212 |                    unordered_map<string, unsigned>& char_to_id,
213 |                    const vector<unsigned>& input_ids,
214 |                    vector<vector<unsigned> >* sequences, vector<float>* tm_scores,
215 |                    vector<NoEnc*>* ensmb_model) {
216 |   unsigned out_index = 1;
217 |   unsigned ensmb = ensmb_model->size();
218 |   ComputationGraph cg;
219 | 
220 |   // Compute stuff for every model in the ensemble.
221 |   vector<Expression> encoded_word_vecs;
222 |   vector<Expression> ensmb_out;
223 |   for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
224 |     auto& model = *(*ensmb_model)[ensmb_id];
225 |     model.AddParamsToCG(morph_id, &cg);
226 | 
227 |     /*Expression encoded_word_vec;
228 |     model.RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
229 |     model.TransformEncodedInput(&encoded_word_vec);
230 |     encoded_word_vecs.push_back(encoded_word_vec);*/
231 |     model.output_forward[morph_id].start_new_sequence();
232 | 
233 |     Expression prev_output_vec = lookup(cg, model.char_vecs[morph_id],
234 |                                         char_to_id[BOW]);
235 |     Expression input = concatenate({//encoded_word_vecs[ensmb_id],
236 |                                     prev_output_vec,
237 |                                     lookup(cg, model.char_vecs[morph_id],
238 |                                     input_ids[out_index])});
239 |     Expression hidden = model.output_forward[morph_id].add_input(input);
240 |     Expression out;
241 |     model.ProjectToOutput(hidden, &out);
242 |     out = log_softmax(out);
243 |     ensmb_out.push_back(out);
244 |   }
245 | 
246 |   // Compute the average of the ensemble output.
247 |   Expression out_dist = average(ensmb_out);
248 |   vector<float> log_dist = as_vector(cg.incremental_forward());
249 |   priority_queue<pair<float, unsigned> > init_queue;
250 |   for (unsigned i = 0; i < log_dist.size(); ++i) {
251 |     init_queue.push(make_pair(log_dist[i], i));
252 |   }
253 |   unsigned vocab_size = log_dist.size();
254 | 
255 |   // Initialise the beam_size sequences, scores, hidden states.
256 |   vector<float> log_scores;
257 |   vector<vector<RNNPointer> > prev_states;
258 |   for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
259 |     vector<unsigned> seq;
260 |     seq.push_back(char_to_id[BOW]);
261 |     seq.push_back(init_queue.top().second);
262 |     sequences->push_back(seq);
263 |     log_scores.push_back(init_queue.top().first);
264 |    
265 |     vector<RNNPointer> ensmb_states;
266 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
267 |       auto& model = *(*ensmb_model)[ensmb_id];
268 |       ensmb_states.push_back(model.output_forward[morph_id].state());
269 |     }
270 |     prev_states.push_back(ensmb_states);
271 |     init_queue.pop();
272 |   }
273 | 
274 |   vector<cnn::real> neg_inf(vocab_size, NEG_INF);
275 |   Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf);
276 |  
277 |   vector<bool> active_beams(beam_size, true);
278 |   while (true) {
279 |     out_index++;      
280 |     priority_queue<pair<float, pair<unsigned, unsigned> > > probs_queue;
281 |     vector<vector<RNNPointer> > curr_states;
282 |     vector<Expression> out_dist;
283 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
284 |       if (active_beams[beam_id]) {
285 |         unsigned prev_out_char = (*sequences)[beam_id].back();
286 |         vector<Expression> ensmb_out;
287 |         vector<RNNPointer> ensmb_states;
288 |         for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) {
289 |           auto& model = *(*ensmb_model)[ensmb_id];
290 |           Expression input_char_vec;
291 |           if (out_index < input_ids.size()) { 
292 |             input_char_vec = lookup(cg, model.char_vecs[morph_id], input_ids[out_index]);
293 |           } else { 
294 |             input_char_vec = lookup(cg, model.eps_vecs[morph_id],
295 |                                     min(unsigned(out_index - input_ids.size()),
296 |                                                  model.max_eps - 1));
297 |           }
298 | 
299 |           Expression prev_out_vec = lookup(cg, model.char_vecs[morph_id], prev_out_char);
300 |           Expression input = concatenate({//encoded_word_vecs[ensmb_id],
301 |                                           prev_out_vec,
302 |                                           input_char_vec});
303 |           Expression hidden = model.output_forward[morph_id].add_input(
304 |                                 prev_states[beam_id][ensmb_id], input);
305 |           ensmb_states.push_back(model.output_forward[morph_id].state());
306 |           
307 |           Expression out;
308 |           model.ProjectToOutput(hidden, &out);
309 |           out = log_softmax(out);
310 |           ensmb_out.push_back(out);
311 |         }
312 |         curr_states.push_back(ensmb_states);
313 |         out_dist.push_back(average(ensmb_out));
314 |       } else {
315 |         vector<RNNPointer> dummy(ensmb, RNNPointer(0));
316 |         curr_states.push_back(dummy);
317 |         out_dist.push_back(neg_inf_vec);
318 |       }
319 |     }
320 | 
321 |     Expression all_scores = concatenate(out_dist);
322 |     vector<float> log_dist = as_vector(cg.incremental_forward());
323 | 
324 |     for (unsigned index = 0; index < log_dist.size(); ++index) {
325 |       unsigned beam_id = index / vocab_size;
326 |       unsigned char_id = index % vocab_size;
327 |       if (active_beams[beam_id]) {
328 |         pair<unsigned, unsigned> location = make_pair(beam_id, char_id);
329 |         probs_queue.push(pair<float, pair<unsigned, unsigned> >(
330 |                          log_scores[beam_id] + log_dist[index], location));
331 |       }
332 |     }
333 |       
334 |     // Find the beam_size best now and update the variables.
335 |     unordered_map<unsigned, vector<unsigned> > new_seq;
336 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
337 |       if (active_beams[beam_id]) {
338 |         float log_prob = probs_queue.top().first;
339 |         pair<unsigned, unsigned> location = probs_queue.top().second;
340 |         unsigned old_beam_id = location.first, char_id = location.second;
341 | 
342 |         vector<unsigned> seq = (*sequences)[old_beam_id];
343 |         seq.push_back(char_id);
344 |         new_seq[beam_id] = seq;
345 |         log_scores[beam_id] = log_prob;  // Update the score
346 | 
347 |         prev_states[beam_id] = curr_states[old_beam_id];  // Update hidden state
348 |         probs_queue.pop();
349 |       }
350 |     }
351 |       
352 |     // Update the sequences now.
353 |     for (auto& it : new_seq) {
354 |       (*sequences)[it.first] = it.second;
355 |     }
356 | 
357 |     // Check if a sequence should be made inactive.
358 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
359 |       if (active_beams[beam_id] && 
360 |           ((*sequences)[beam_id].back() == char_to_id[EOW] ||
361 |            (*sequences)[beam_id].size() > MAX_PRED_LEN)) {
362 |         active_beams[beam_id] = false;
363 |       }
364 |     }
365 | 
366 |     // Check if all sequences are inactive.
367 |     bool all_inactive = true;
368 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
369 |       if (active_beams[beam_id]) {
370 |         all_inactive = false;
371 |         break;
372 |       }
373 |     }
374 | 
375 |     if (all_inactive) {
376 |       *tm_scores = log_scores;
377 |       return;
378 |     }
379 |   }
380 | }
381 | 
382 | void Serialize(string& filename, NoEnc& model, vector<Model*>* cnn_models) {
383 |   ofstream outfile(filename);
384 |   if (!outfile.is_open()) {
385 |     cerr << "File opening failed" << endl;
386 |   }
387 | 
388 |   boost::archive::text_oarchive oa(outfile);
389 |   oa & model;
390 |   for (unsigned i = 0; i < cnn_models->size(); ++i) {
391 |     oa & *(*cnn_models)[i];
392 |   }
393 | 
394 |   cerr << "Saved model to: " << filename << endl;
395 |   outfile.close();
396 | }
397 | 
398 | void Read(string& filename, NoEnc* model, vector<Model*>* cnn_models) {
399 |   ifstream infile(filename);
400 |   if (!infile.is_open()) {
401 |     cerr << "File opening failed" << endl;
402 |   }
403 | 
404 |   boost::archive::text_iarchive ia(infile);
405 |   ia & *model;
406 |   for (unsigned i = 0; i < model->morph_len; ++i) {
407 |     Model *cnn_model = new Model();
408 |     cnn_models->push_back(cnn_model);
409 |   }
410 | 
411 |   model->InitParams(cnn_models);
412 |   for (unsigned i = 0; i < model->morph_len; ++i) {
413 |     ia & *(*cnn_models)[i];
414 |   }
415 | 
416 |   cerr << "Loaded model from: " << filename << endl;
417 |   infile.close();
418 | }
419 | 
420 | 


--------------------------------------------------------------------------------
/src/no-enc.h:
--------------------------------------------------------------------------------
 1 | #ifndef NO_ENC_H_
 2 | #define NO_ENC_H_
 3 | 
 4 | #include "cnn/nodes.h"
 5 | #include "cnn/cnn.h"
 6 | #include "cnn/rnn.h"
 7 | #include "cnn/gru.h"
 8 | #include "cnn/lstm.h"
 9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 | 
13 | #include "utils.h"
14 | 
15 | #include <boost/archive/text_oarchive.hpp>
16 | #include <boost/archive/text_iarchive.hpp>
17 | #include <unordered_map>
18 | #include <queue>
19 | #include <limits>
20 | 
21 | using namespace std;
22 | using namespace cnn;
23 | using namespace cnn::expr;
24 | 
25 | class NoEnc {
26 |  public:
27 |   vector<LSTMBuilder> output_forward;
28 |   vector<LookupParameters*> char_vecs;
29 | 
30 |   Expression hidden_to_output, hidden_to_output_bias;
31 |   vector<Parameters*> phidden_to_output, phidden_to_output_bias;
32 | 
33 |   //Expression transform_encoded, transform_encoded_bias;
34 |   //vector<Parameters*> ptransform_encoded, ptransform_encoded_bias;
35 |   
36 |   unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
37 |   vector<LookupParameters*> eps_vecs;
38 | 
39 |   NoEnc() {}
40 | 
41 |   NoEnc(const unsigned& char_length, const unsigned& hidden_length,
42 |            const unsigned& vocab_length, const unsigned& layers,
43 |            const unsigned& num_morph, vector<Model*>* m,
44 |            vector<AdadeltaTrainer>* optimizer);
45 | 
46 |   void InitParams(vector<Model*>* m);
47 | 
48 |   void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
49 | 
50 |   //void RunFwdBwd(const unsigned& morph_id, const vector<unsigned>& inputs,
51 |   //               Expression* hidden, ComputationGraph *cg);
52 | 
53 |   //void TransformEncodedInput(Expression* encoded_input) const;
54 | 
55 |   //void TransformEncodedInputDuringDecoding(Expression* encoded_input) const;
56 | 
57 |   void ProjectToOutput(const Expression& hidden, Expression* out) const;
58 | 
59 |   Expression ComputeLoss(const vector<Expression>& hidden_units,
60 |                          const vector<unsigned>& targets) const;
61 | 
62 |   float Train(const unsigned& morph_id, const vector<unsigned>& inputs,
63 |               const vector<unsigned>& outputs, AdadeltaTrainer* ada_gd);
64 | 
65 |   friend class boost::serialization::access;
66 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
67 |     ar & char_len;
68 |     ar & hidden_len;
69 |     ar & vocab_len;
70 |     ar & layers;
71 |     ar & morph_len;
72 |     ar & max_eps;
73 |   }
74 | };
75 | 
76 | void
77 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
78 |                const vector<unsigned>& input_ids,
79 |                vector<unsigned>* pred_target_ids, vector<NoEnc*>* ensmb_model);
80 | 
81 | void
82 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
83 |                    unordered_map<string, unsigned>& char_to_id,
84 |                    const vector<unsigned>& input_ids,
85 |                    vector<vector<unsigned> >* sequences, vector<float>* tm_scores,
86 |                    vector<NoEnc*>* ensmb_model);
87 | 
88 | void Serialize(string& filename, NoEnc& model, vector<Model*>* cnn_model);
89 | 
90 | void Read(string& filename, NoEnc* model, vector<Model*>* cnn_model);
91 | 
92 | 
93 | #endif
94 | 


--------------------------------------------------------------------------------
/src/sep-morph.cc:
--------------------------------------------------------------------------------
  1 | #include "sep-morph.h"
  2 | 
  3 | using namespace std;
  4 | using namespace cnn;
  5 | using namespace cnn::expr;
  6 | 
  7 | string BOW = "<s>", EOW = "</s>";
  8 | int MAX_PRED_LEN = 100;
  9 | float NEG_INF = numeric_limits<int>::min();
 10 | 
 11 | SepMorph::SepMorph(const unsigned& char_length, const unsigned& hidden_length,
 12 |                    const unsigned& vocab_length, const unsigned& num_layers,
 13 |                    const unsigned& num_morph, vector<Model*>* m,
 14 |                    vector<AdadeltaTrainer>* optimizer) {
 15 |   char_len = char_length;
 16 |   hidden_len = hidden_length;
 17 |   vocab_len = vocab_length;
 18 |   layers = num_layers;
 19 |   morph_len = num_morph;
 20 |   InitParams(m);
 21 | }
 22 | 
 23 | void SepMorph::InitParams(vector<Model*>* m) {
 24 |   for (unsigned i = 0; i < morph_len; ++i) {
 25 |     input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
 26 |     input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
 27 |     output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len,
 28 |                                          hidden_len, (*m)[i]));
 29 | 
 30 |     phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len}));
 31 |     phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1}));
 32 | 
 33 |     char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len}));
 34 | 
 35 |     ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
 36 |                                                           2 * hidden_len}));
 37 |     ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
 38 | 
 39 |     eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
 40 |   }
 41 | }
 42 | 
 43 | void SepMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
 44 |   input_forward[morph_id].new_graph(*cg);
 45 |   input_backward[morph_id].new_graph(*cg);
 46 |   output_forward[morph_id].new_graph(*cg);
 47 | 
 48 |   hidden_to_output = parameter(*cg, phidden_to_output[morph_id]);
 49 |   hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]);
 50 | 
 51 |   transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
 52 |   transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
 53 | }
 54 | 
 55 | void SepMorph::RunFwdBwd(const unsigned& morph_id,
 56 |                          const vector<unsigned>& inputs,
 57 |                          Expression* hidden, ComputationGraph *cg) {
 58 |   vector<Expression> input_vecs;
 59 |   for (const unsigned& input_id : inputs) {
 60 |     input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id));
 61 |   }
 62 | 
 63 |   // Run forward LSTM
 64 |   Expression forward_unit;
 65 |   input_forward[morph_id].start_new_sequence();
 66 |   for (unsigned i = 0; i < input_vecs.size(); ++i) {
 67 |     forward_unit = input_forward[morph_id].add_input(input_vecs[i]);
 68 |   }
 69 | 
 70 |   // Run backward LSTM
 71 |   Expression backward_unit;
 72 |   input_backward[morph_id].start_new_sequence();
 73 |   for (int i = input_vecs.size() - 1; i >= 0; --i) {
 74 |     backward_unit = input_backward[morph_id].add_input(input_vecs[i]);
 75 |   }
 76 | 
 77 |   // Concatenate the forward and back hidden layers
 78 |   *hidden = concatenate({forward_unit, backward_unit});
 79 | }
 80 | 
 81 | void SepMorph::TransformEncodedInput(Expression* encoded_input) const {
 82 |   *encoded_input = affine_transform({transform_encoded_bias,
 83 |                                      transform_encoded, *encoded_input});
 84 | }
 85 | 
 86 | void SepMorph::ProjectToOutput(const Expression& hidden, Expression* out) const {
 87 |   *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
 88 | }
 89 | 
 90 | Expression SepMorph::ComputeLoss(const vector<Expression>& hidden_units,
 91 |                                  const vector<unsigned>& targets) const {
 92 |   assert(hidden_units.size() == targets.size());
 93 |   vector<Expression> losses;
 94 |   for (unsigned i = 0; i < hidden_units.size(); ++i) {
 95 |     Expression out;
 96 |     ProjectToOutput(hidden_units[i], &out);
 97 |     losses.push_back(pickneglogsoftmax(out, targets[i]));
 98 |   }
 99 |   return sum(losses);
100 | }
101 | 
102 | float SepMorph::Train(const unsigned& morph_id, const vector<unsigned>& inputs,
103 |                       const vector<unsigned>& outputs, AdadeltaTrainer* ada_gd) {
104 |   ComputationGraph cg;
105 |   AddParamsToCG(morph_id, &cg);
106 | 
107 |   // Encode and Transform to feed into decoder
108 |   Expression encoded_input_vec;
109 |   RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg);
110 |   TransformEncodedInput(&encoded_input_vec);
111 | 
112 |   // Use this encoded word vector to predict the transformed word
113 |   vector<Expression> input_vecs_for_dec;
114 |   vector<unsigned> output_ids_for_pred;
115 |   for (unsigned i = 0; i < outputs.size(); ++i) {
116 |     if (i < outputs.size() - 1) { 
117 |       // '</s>' will not be fed as input -- it needs to be predicted.
118 |       if (i < inputs.size() - 1) {
119 |         input_vecs_for_dec.push_back(concatenate(
120 |             {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]),
121 |              lookup(cg, char_vecs[morph_id], inputs[i + 1])}));
122 |       } else {
123 |         input_vecs_for_dec.push_back(concatenate(
124 |             {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]),
125 |              lookup(cg, eps_vecs[morph_id], min(unsigned(i - inputs.size()), max_eps - 1))}));
126 |       }
127 |     }
128 |     if (i > 0) {  // '<s>' will not be predicted in the output -- its fed in.
129 |       output_ids_for_pred.push_back(outputs[i]);
130 |     }
131 |   }
132 | 
133 |   vector<Expression> decoder_hidden_units;
134 |   output_forward[morph_id].start_new_sequence();
135 |   for (const auto& vec : input_vecs_for_dec) {
136 |     decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
137 |   }
138 |   Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred);
139 | 
140 |   float return_loss = as_scalar(cg.forward());
141 |   cg.backward();
142 |   ada_gd->update(1.0f);
143 |   return return_loss;
144 | }
145 | 
146 | void
147 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
148 |                const vector<unsigned>& input_ids,
149 |                vector<unsigned>* pred_target_ids, vector<SepMorph*>* ensmb_model) {
150 |   ComputationGraph cg;
151 | 
152 |   unsigned ensmb = ensmb_model->size();
153 |   vector<Expression> encoded_word_vecs;
154 |   for (unsigned i = 0; i < ensmb; ++i) {
155 |     Expression encoded_word_vec;
156 |     auto model = (*ensmb_model)[i];
157 |     model->AddParamsToCG(morph_id, &cg);
158 |     model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
159 |     model->TransformEncodedInput(&encoded_word_vec);
160 |     encoded_word_vecs.push_back(encoded_word_vec);
161 |     model->output_forward[morph_id].start_new_sequence();
162 |   }
163 | 
164 |   unsigned out_index = 1;
165 |   unsigned pred_index = char_to_id[BOW];
166 |   while (pred_target_ids->size() < MAX_PRED_LEN) {
167 |     vector<Expression> ensmb_out;
168 |     pred_target_ids->push_back(pred_index);
169 |     if (pred_index == char_to_id[EOW]) {
170 |       return;  // If the end is found, break from the loop and return
171 |     }
172 | 
173 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
174 |       auto model = (*ensmb_model)[ensmb_id];
175 |       Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index);
176 |       Expression input, input_char_vec;
177 |       if (out_index < input_ids.size()) {
178 |         input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]);
179 |       } else {
180 |         input_char_vec = lookup(cg, model->eps_vecs[morph_id],
181 |                                 min(unsigned(out_index - input_ids.size()),
182 |                                              model->max_eps - 1));
183 |       }
184 |       input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
185 |                            input_char_vec});
186 | 
187 |       Expression hidden = model->output_forward[morph_id].add_input(input);
188 |       Expression out;
189 |       model->ProjectToOutput(hidden, &out);
190 |       ensmb_out.push_back(log_softmax(out));
191 |     }
192 | 
193 |     Expression out = sum(ensmb_out) / ensmb_out.size();
194 |     vector<float> dist = as_vector(cg.incremental_forward());
195 |     pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
196 |     out_index++;
197 |   }
198 | }
199 | 
200 | void
201 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 
202 |                    unordered_map<string, unsigned>& char_to_id,
203 |                    const vector<unsigned>& input_ids,
204 |                    vector<vector<unsigned> >* sequences, vector<float>* tm_scores,
205 |                    vector<SepMorph*>* ensmb_model) {
206 |   unsigned out_index = 1;
207 |   unsigned ensmb = ensmb_model->size();
208 |   ComputationGraph cg;
209 | 
210 |   // Compute stuff for every model in the ensemble.
211 |   vector<Expression> encoded_word_vecs;
212 |   vector<Expression> ensmb_out;
213 |   for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
214 |     auto& model = *(*ensmb_model)[ensmb_id];
215 |     model.AddParamsToCG(morph_id, &cg);
216 | 
217 |     Expression encoded_word_vec;
218 |     model.RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
219 |     model.TransformEncodedInput(&encoded_word_vec);
220 |     encoded_word_vecs.push_back(encoded_word_vec);
221 |     model.output_forward[morph_id].start_new_sequence();
222 | 
223 |     Expression prev_output_vec = lookup(cg, model.char_vecs[morph_id],
224 |                                         char_to_id[BOW]);
225 |     Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
226 |                                     lookup(cg, model.char_vecs[morph_id],
227 |                                     input_ids[out_index])});
228 |     Expression hidden = model.output_forward[morph_id].add_input(input);
229 |     Expression out;
230 |     model.ProjectToOutput(hidden, &out);
231 |     out = log_softmax(out);
232 |     ensmb_out.push_back(out);
233 |   }
234 | 
235 |   // Compute the average of the ensemble output.
236 |   Expression out_dist = average(ensmb_out);
237 |   vector<float> log_dist = as_vector(cg.incremental_forward());
238 |   priority_queue<pair<float, unsigned> > init_queue;
239 |   for (unsigned i = 0; i < log_dist.size(); ++i) {
240 |     init_queue.push(make_pair(log_dist[i], i));
241 |   }
242 |   unsigned vocab_size = log_dist.size();
243 | 
244 |   // Initialise the beam_size sequences, scores, hidden states.
245 |   vector<float> log_scores;
246 |   vector<vector<RNNPointer> > prev_states;
247 |   for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
248 |     vector<unsigned> seq;
249 |     seq.push_back(char_to_id[BOW]);
250 |     seq.push_back(init_queue.top().second);
251 |     sequences->push_back(seq);
252 |     log_scores.push_back(init_queue.top().first);
253 |    
254 |     vector<RNNPointer> ensmb_states;
255 |     for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
256 |       auto& model = *(*ensmb_model)[ensmb_id];
257 |       ensmb_states.push_back(model.output_forward[morph_id].state());
258 |     }
259 |     prev_states.push_back(ensmb_states);
260 |     init_queue.pop();
261 |   }
262 | 
263 |   vector<cnn::real> neg_inf(vocab_size, NEG_INF);
264 |   Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf);
265 |  
266 |   vector<bool> active_beams(beam_size, true);
267 |   while (true) {
268 |     out_index++;      
269 |     priority_queue<pair<float, pair<unsigned, unsigned> > > probs_queue;
270 |     vector<vector<RNNPointer> > curr_states;
271 |     vector<Expression> out_dist;
272 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
273 |       if (active_beams[beam_id]) {
274 |         unsigned prev_out_char = (*sequences)[beam_id].back();
275 |         vector<Expression> ensmb_out;
276 |         vector<RNNPointer> ensmb_states;
277 |         for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) {
278 |           auto& model = *(*ensmb_model)[ensmb_id];
279 |           Expression input_char_vec;
280 |           if (out_index < input_ids.size()) { 
281 |             input_char_vec = lookup(cg, model.char_vecs[morph_id], input_ids[out_index]);
282 |           } else { 
283 |             input_char_vec = lookup(cg, model.eps_vecs[morph_id],
284 |                                     min(unsigned(out_index - input_ids.size()),
285 |                                                  model.max_eps - 1));
286 |           }
287 | 
288 |           Expression prev_out_vec = lookup(cg, model.char_vecs[morph_id], prev_out_char);
289 |           Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_out_vec,
290 |                                           input_char_vec});
291 |           Expression hidden = model.output_forward[morph_id].add_input(
292 |                                 prev_states[beam_id][ensmb_id], input);
293 |           ensmb_states.push_back(model.output_forward[morph_id].state());
294 |           
295 |           Expression out;
296 |           model.ProjectToOutput(hidden, &out);
297 |           out = log_softmax(out);
298 |           ensmb_out.push_back(out);
299 |         }
300 |         curr_states.push_back(ensmb_states);
301 |         out_dist.push_back(average(ensmb_out));
302 |       } else {
303 |         vector<RNNPointer> dummy(ensmb, RNNPointer(0));
304 |         curr_states.push_back(dummy);
305 |         out_dist.push_back(neg_inf_vec);
306 |       }
307 |     }
308 | 
309 |     Expression all_scores = concatenate(out_dist);
310 |     vector<float> log_dist = as_vector(cg.incremental_forward());
311 | 
312 |     for (unsigned index = 0; index < log_dist.size(); ++index) {
313 |       unsigned beam_id = index / vocab_size;
314 |       unsigned char_id = index % vocab_size;
315 |       if (active_beams[beam_id]) {
316 |         pair<unsigned, unsigned> location = make_pair(beam_id, char_id);
317 |         probs_queue.push(pair<float, pair<unsigned, unsigned> >(
318 |                          log_scores[beam_id] + log_dist[index], location));
319 |       }
320 |     }
321 |       
322 |     // Find the beam_size best now and update the variables.
323 |     unordered_map<unsigned, vector<unsigned> > new_seq;
324 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
325 |       if (active_beams[beam_id]) {
326 |         float log_prob = probs_queue.top().first;
327 |         pair<unsigned, unsigned> location = probs_queue.top().second;
328 |         unsigned old_beam_id = location.first, char_id = location.second;
329 | 
330 |         vector<unsigned> seq = (*sequences)[old_beam_id];
331 |         seq.push_back(char_id);
332 |         new_seq[beam_id] = seq;
333 |         log_scores[beam_id] = log_prob;  // Update the score
334 | 
335 |         prev_states[beam_id] = curr_states[old_beam_id];  // Update hidden state
336 |         probs_queue.pop();
337 |       }
338 |     }
339 |       
340 |     // Update the sequences now.
341 |     for (auto& it : new_seq) {
342 |       (*sequences)[it.first] = it.second;
343 |     }
344 | 
345 |     // Check if a sequence should be made inactive.
346 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
347 |       if (active_beams[beam_id] && 
348 |           ((*sequences)[beam_id].back() == char_to_id[EOW] ||
349 |            (*sequences)[beam_id].size() > MAX_PRED_LEN)) {
350 |         active_beams[beam_id] = false;
351 |       }
352 |     }
353 | 
354 |     // Check if all sequences are inactive.
355 |     bool all_inactive = true;
356 |     for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
357 |       if (active_beams[beam_id]) {
358 |         all_inactive = false;
359 |         break;
360 |       }
361 |     }
362 | 
363 |     if (all_inactive) {
364 |       *tm_scores = log_scores;
365 |       return;
366 |     }
367 |   }
368 | }
369 | 
370 | void Serialize(string& filename, SepMorph& model, vector<Model*>* cnn_models) {
371 |   ofstream outfile(filename);
372 |   if (!outfile.is_open()) {
373 |     cerr << "File opening failed" << endl;
374 |   }
375 | 
376 |   boost::archive::text_oarchive oa(outfile);
377 |   oa & model;
378 |   for (unsigned i = 0; i < cnn_models->size(); ++i) {
379 |     oa & *(*cnn_models)[i];
380 |   }
381 | 
382 |   cerr << "Saved model to: " << filename << endl;
383 |   outfile.close();
384 | }
385 | 
386 | void Read(string& filename, SepMorph* model, vector<Model*>* cnn_models) {
387 |   ifstream infile(filename);
388 |   if (!infile.is_open()) {
389 |     cerr << "File opening failed" << endl;
390 |   }
391 | 
392 |   boost::archive::text_iarchive ia(infile);
393 |   ia & *model;
394 |   for (unsigned i = 0; i < model->morph_len; ++i) {
395 |     Model *cnn_model = new Model();
396 |     cnn_models->push_back(cnn_model);
397 |   }
398 | 
399 |   model->InitParams(cnn_models);
400 |   for (unsigned i = 0; i < model->morph_len; ++i) {
401 |     ia & *(*cnn_models)[i];
402 |   }
403 | 
404 |   cerr << "Loaded model from: " << filename << endl;
405 |   infile.close();
406 | }
407 | 
408 | 


--------------------------------------------------------------------------------
/src/sep-morph.h:
--------------------------------------------------------------------------------
 1 | #ifndef SEP_MORPH_H_
 2 | #define SEP_MORPH_H_
 3 | 
 4 | #include "cnn/nodes.h"
 5 | #include "cnn/cnn.h"
 6 | #include "cnn/rnn.h"
 7 | #include "cnn/gru.h"
 8 | #include "cnn/lstm.h"
 9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 | 
13 | #include "utils.h"
14 | 
15 | #include <boost/archive/text_oarchive.hpp>
16 | #include <boost/archive/text_iarchive.hpp>
17 | #include <unordered_map>
18 | #include <queue>
19 | #include <limits>
20 | 
21 | using namespace std;
22 | using namespace cnn;
23 | using namespace cnn::expr;
24 | 
25 | class SepMorph {
26 |  public:
27 |   vector<LSTMBuilder> input_forward, input_backward, output_forward;
28 |   vector<LookupParameters*> char_vecs;
29 | 
30 |   Expression hidden_to_output, hidden_to_output_bias;
31 |   vector<Parameters*> phidden_to_output, phidden_to_output_bias;
32 | 
33 |   Expression transform_encoded, transform_encoded_bias;
34 |   vector<Parameters*> ptransform_encoded, ptransform_encoded_bias;
35 |   
36 |   unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
37 |   vector<LookupParameters*> eps_vecs;
38 | 
39 |   SepMorph() {}
40 | 
41 |   SepMorph(const unsigned& char_length, const unsigned& hidden_length,
42 |            const unsigned& vocab_length, const unsigned& layers,
43 |            const unsigned& num_morph, vector<Model*>* m,
44 |            vector<AdadeltaTrainer>* optimizer);
45 | 
46 |   void InitParams(vector<Model*>* m);
47 | 
48 |   void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
49 | 
50 |   void RunFwdBwd(const unsigned& morph_id, const vector<unsigned>& inputs,
51 |                  Expression* hidden, ComputationGraph *cg);
52 | 
53 |   void TransformEncodedInput(Expression* encoded_input) const;
54 | 
55 |   void TransformEncodedInputDuringDecoding(Expression* encoded_input) const;
56 | 
57 |   void ProjectToOutput(const Expression& hidden, Expression* out) const;
58 | 
59 |   Expression ComputeLoss(const vector<Expression>& hidden_units,
60 |                          const vector<unsigned>& targets) const;
61 | 
62 |   float Train(const unsigned& morph_id, const vector<unsigned>& inputs,
63 |               const vector<unsigned>& outputs, AdadeltaTrainer* ada_gd);
64 | 
65 |   friend class boost::serialization::access;
66 |   template<class Archive> void serialize(Archive& ar, const unsigned int) {
67 |     ar & char_len;
68 |     ar & hidden_len;
69 |     ar & vocab_len;
70 |     ar & layers;
71 |     ar & morph_len;
72 |     ar & max_eps;
73 |   }
74 | };
75 | 
76 | void
77 | EnsembleDecode(const unsigned& morph_id, unordered_map<string, unsigned>& char_to_id,
78 |                const vector<unsigned>& input_ids,
79 |                vector<unsigned>* pred_target_ids, vector<SepMorph*>* ensmb_model);
80 | 
81 | void
82 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
83 |                    unordered_map<string, unsigned>& char_to_id,
84 |                    const vector<unsigned>& input_ids,
85 |                    vector<vector<unsigned> >* sequences, vector<float>* tm_scores,
86 |                    vector<SepMorph*>* ensmb_model);
87 | 
88 | void Serialize(string& filename, SepMorph& model, vector<Model*>* cnn_model);
89 | 
90 | void Read(string& filename, SepMorph* model, vector<Model*>* cnn_model);
91 | 
92 | 
93 | #endif
94 | 


--------------------------------------------------------------------------------
/src/train-enc-dec-attn.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/nodes.h"
  2 | #include "cnn/cnn.h"
  3 | #include "cnn/rnn.h"
  4 | #include "cnn/gru.h"
  5 | #include "cnn/lstm.h"
  6 | #include "cnn/training.h"
  7 | #include "cnn/gpu-ops.h"
  8 | #include "cnn/expr.h"
  9 | 
 10 | #include "utils.h"
 11 | #include "enc-dec-attn.h"
 12 | 
 13 | #include <boost/archive/text_oarchive.hpp>
 14 | #include <boost/archive/text_iarchive.hpp>
 15 | 
 16 | #include <iostream>
 17 | #include <fstream>
 18 | #include <unordered_map>
 19 | 
 20 | using namespace std;
 21 | using namespace cnn;
 22 | using namespace cnn::expr;
 23 | 
 24 | int main(int argc, char** argv) {
 25 |   cnn::Initialize(argc, argv);
 26 | 
 27 |   string vocab_filename = argv[1];  // vocabulary of words/characters
 28 |   string morph_filename = argv[2];
 29 |   string train_filename = argv[3];
 30 |   string test_filename = argv[4];
 31 |   unsigned hidden_size = atoi(argv[5]);
 32 |   unsigned num_iter = atoi(argv[6]);
 33 |   float reg_strength = atof(argv[7]);
 34 |   unsigned layers = atoi(argv[8]);
 35 |   string model_outputfilename = argv[9];
 36 | 
 37 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
 38 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
 39 | 
 40 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
 41 |   unsigned vocab_size = char_to_id.size();
 42 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
 43 |   unsigned morph_size = morph_to_id.size();
 44 | 
 45 |   vector<string> train_data;  // Read the training file in a vector
 46 |   ReadData(train_filename, &train_data);
 47 | 
 48 |   vector<string> test_data;  // Read the dev file in a vector
 49 |   ReadData(test_filename, &test_data);
 50 | 
 51 |   vector<Model*> m;
 52 |   vector<AdadeltaTrainer> optimizer;
 53 | 
 54 |   for (unsigned i = 0; i < morph_size; ++i) {
 55 |     m.push_back(new Model());
 56 |     AdadeltaTrainer ada(m[i], reg_strength);
 57 |     optimizer.push_back(ada);
 58 |   }
 59 | 
 60 |   unsigned char_size = vocab_size;
 61 |   EncDecAttn nn(char_size, hidden_size, vocab_size, layers, morph_size,
 62 |               &m, &optimizer);
 63 | 
 64 |   // Pre-train all models on all datasets.
 65 |   /*cerr << "Pre-training... " << endl; 
 66 |   for (unsigned morph_id = 0; morph_id < morph_size; ++morph_id) {
 67 |     cerr << "Morph class: " << morph_id + 1 << endl;
 68 |     for (string& line : train_data) {
 69 |       vector<string> items = split_line(line, '|');
 70 |       vector<unsigned> input_ids, target_ids;
 71 |       input_ids.clear(); target_ids.clear();
 72 |       for (const string& ch : split_line(items[0], ' ')) {
 73 |         input_ids.push_back(char_to_id[ch]);
 74 |       }
 75 |       for (const string& ch : split_line(items[1], ' ')) {
 76 |         target_ids.push_back(char_to_id[ch]);
 77 |       }
 78 |       //unsigned morph_id = morph_to_id[items[2]];
 79 |       double temp_loss = nn.Train(morph_id, input_ids, target_ids,
 80 |                                   &optimizer[morph_id]);
 81 |     }
 82 |   }
 83 |   cerr << "complete." << endl;*/
 84 | 
 85 |   // Read the training file and train the model
 86 |   double best_score = -1;
 87 |   vector<EncDecAttn*> object_list;
 88 |   object_list.push_back(&nn);
 89 |   for (unsigned iter = 0; iter < num_iter; ++iter) {
 90 |     unsigned line_id = 0;
 91 |     random_shuffle(train_data.begin(), train_data.end());
 92 |     vector<float> loss(morph_size, 0.0f);
 93 |     for (string& line : train_data) {
 94 |       vector<string> items = split_line(line, '|');
 95 |       vector<unsigned> input_ids, target_ids;
 96 |       input_ids.clear(); target_ids.clear();
 97 |       for (const string& ch : split_line(items[0], ' ')) {
 98 |         input_ids.push_back(char_to_id[ch]);
 99 |       }
100 |       for (const string& ch : split_line(items[1], ' ')) {
101 |         target_ids.push_back(char_to_id[ch]);
102 |       }
103 |       unsigned morph_id = morph_to_id[items[2]];
104 |       loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
105 |                                  &optimizer[morph_id]);
106 |       cerr << ++line_id << "\r";
107 |     }
108 | 
109 |     // Read the test file and output predictions for the words.
110 |     string line;
111 |     double correct = 0, total = 0;
112 |     for (string& line : test_data) {
113 |       vector<string> items = split_line(line, '|');
114 |       vector<unsigned> input_ids, target_ids, pred_target_ids;
115 |       input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
116 |       for (const string& ch : split_line(items[0], ' ')) {
117 |         input_ids.push_back(char_to_id[ch]);
118 |       }
119 |       for (const string& ch : split_line(items[1], ' ')) {
120 |         target_ids.push_back(char_to_id[ch]);
121 |       }
122 |       unsigned morph_id = morph_to_id[items[2]];
123 |       EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
124 |                      &object_list);
125 | 
126 |       string prediction = "";
127 |       for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
128 |         prediction += id_to_char[pred_target_ids[i]];
129 |         if (i != pred_target_ids.size() - 1) {
130 |           prediction += " ";
131 |         }
132 |       }
133 |       if (prediction == items[1]) {
134 |         correct += 1;
135 |       }
136 |       total += 1;
137 |     }
138 |     double curr_score = correct / total;
139 |     cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.);
140 |     cerr << "Prediction Accuracy: " << curr_score << endl;
141 |     if (curr_score > best_score) {
142 |       best_score = curr_score;
143 |       Serialize(model_outputfilename, nn, &m);
144 |     }
145 |   }
146 |   return 1;
147 | }
148 | 


--------------------------------------------------------------------------------
/src/train-enc-dec.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/nodes.h"
  2 | #include "cnn/cnn.h"
  3 | #include "cnn/rnn.h"
  4 | #include "cnn/gru.h"
  5 | #include "cnn/lstm.h"
  6 | #include "cnn/training.h"
  7 | #include "cnn/gpu-ops.h"
  8 | #include "cnn/expr.h"
  9 | 
 10 | #include "utils.h"
 11 | #include "enc-dec.h"
 12 | 
 13 | #include <boost/archive/text_oarchive.hpp>
 14 | #include <boost/archive/text_iarchive.hpp>
 15 | 
 16 | #include <iostream>
 17 | #include <fstream>
 18 | #include <unordered_map>
 19 | 
 20 | using namespace std;
 21 | using namespace cnn;
 22 | using namespace cnn::expr;
 23 | 
 24 | int main(int argc, char** argv) {
 25 |   cnn::Initialize(argc, argv);
 26 | 
 27 |   string vocab_filename = argv[1];  // vocabulary of words/characters
 28 |   string morph_filename = argv[2];
 29 |   string train_filename = argv[3];
 30 |   string test_filename = argv[4];
 31 |   unsigned hidden_size = atoi(argv[5]);
 32 |   unsigned num_iter = atoi(argv[6]);
 33 |   float reg_strength = atof(argv[7]);
 34 |   unsigned layers = atoi(argv[8]);
 35 |   string model_outputfilename = argv[9];
 36 | 
 37 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
 38 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
 39 | 
 40 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
 41 |   unsigned vocab_size = char_to_id.size();
 42 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
 43 |   unsigned morph_size = morph_to_id.size();
 44 | 
 45 |   vector<string> train_data;  // Read the training file in a vector
 46 |   ReadData(train_filename, &train_data);
 47 | 
 48 |   vector<string> test_data;  // Read the dev file in a vector
 49 |   ReadData(test_filename, &test_data);
 50 | 
 51 |   vector<Model*> m;
 52 |   vector<AdadeltaTrainer> optimizer;
 53 | 
 54 |   for (unsigned i = 0; i < morph_size; ++i) {
 55 |     m.push_back(new Model());
 56 |     AdadeltaTrainer ada(m[i], reg_strength);
 57 |     optimizer.push_back(ada);
 58 |   }
 59 | 
 60 |   unsigned char_size = vocab_size;
 61 |   EncDec nn(char_size, hidden_size, vocab_size, layers, morph_size,
 62 |               &m, &optimizer);
 63 | 
 64 |   // Read the training file and train the model
 65 |   double best_score = -1;
 66 |   vector<EncDec*> object_list;
 67 |   object_list.push_back(&nn);
 68 |   for (unsigned iter = 0; iter < num_iter; ++iter) {
 69 |     unsigned line_id = 0;
 70 |     random_shuffle(train_data.begin(), train_data.end());
 71 |     vector<float> loss(morph_size, 0.0f);
 72 |     for (string& line : train_data) {
 73 |       vector<string> items = split_line(line, '|');
 74 |       vector<unsigned> input_ids, target_ids;
 75 |       input_ids.clear(); target_ids.clear();
 76 |       for (const string& ch : split_line(items[0], ' ')) {
 77 |         input_ids.push_back(char_to_id[ch]);
 78 |       }
 79 |       for (const string& ch : split_line(items[1], ' ')) {
 80 |         target_ids.push_back(char_to_id[ch]);
 81 |       }
 82 |       unsigned morph_id = morph_to_id[items[2]];
 83 |       loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
 84 |                                  &optimizer[morph_id]);
 85 |       cerr << ++line_id << "\r";
 86 |     }
 87 | 
 88 |     // Read the test file and output predictions for the words.
 89 |     string line;
 90 |     double correct = 0, total = 0;
 91 |     for (string& line : test_data) {
 92 |       vector<string> items = split_line(line, '|');
 93 |       vector<unsigned> input_ids, target_ids, pred_target_ids;
 94 |       input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
 95 |       for (const string& ch : split_line(items[0], ' ')) {
 96 |         input_ids.push_back(char_to_id[ch]);
 97 |       }
 98 |       for (const string& ch : split_line(items[1], ' ')) {
 99 |         target_ids.push_back(char_to_id[ch]);
100 |       }
101 |       unsigned morph_id = morph_to_id[items[2]];
102 |       EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
103 |                      &object_list);
104 | 
105 |       string prediction = "";
106 |       for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
107 |         prediction += id_to_char[pred_target_ids[i]];
108 |         if (i != pred_target_ids.size() - 1) {
109 |           prediction += " ";
110 |         }
111 |       }
112 |       if (prediction == items[1]) {
113 |         correct += 1;
114 |       }
115 |       total += 1;
116 |     }
117 |     double curr_score = correct / total;
118 |     cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.);
119 |     cerr << "Prediction Accuracy: " << curr_score << endl;
120 |     if (curr_score > best_score) {
121 |       best_score = curr_score;
122 |       Serialize(model_outputfilename, nn, &m);
123 |     }
124 |   }
125 |   return 1;
126 | }
127 | 


--------------------------------------------------------------------------------
/src/train-joint-enc-dec-morph.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/nodes.h"
  2 | #include "cnn/cnn.h"
  3 | #include "cnn/rnn.h"
  4 | #include "cnn/gru.h"
  5 | #include "cnn/lstm.h"
  6 | #include "cnn/training.h"
  7 | #include "cnn/gpu-ops.h"
  8 | #include "cnn/expr.h"
  9 | 
 10 | #include "utils.h"
 11 | #include "joint-enc-dec-morph.h"
 12 | 
 13 | #include <iostream>
 14 | #include <unordered_map>
 15 | 
 16 | using namespace std;
 17 | using namespace cnn;
 18 | using namespace cnn::expr;
 19 | 
 20 | int main(int argc, char** argv) {
 21 |   cnn::Initialize(argc, argv);
 22 | 
 23 |   string vocab_filename = argv[1];  // vocabulary of words/characters
 24 |   string morph_filename = argv[2];
 25 |   string train_filename = argv[3];
 26 |   string test_filename = argv[4];
 27 |   unsigned hidden_size = atoi(argv[5]);
 28 |   unsigned num_iter = atoi(argv[6]);
 29 |   float reg_strength = atof(argv[7]);
 30 |   unsigned layers = atoi(argv[8]);
 31 |   string model_outputfilename = argv[9];
 32 | 
 33 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
 34 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
 35 | 
 36 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
 37 |   unsigned vocab_size = char_to_id.size();
 38 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
 39 |   unsigned morph_size = morph_to_id.size();
 40 | 
 41 |   vector<string> train_data;  // Read the training file in a vector
 42 |   ReadData(train_filename, &train_data);
 43 | 
 44 |   vector<string> test_data;  // Read the dev file in a vector
 45 |   ReadData(test_filename, &test_data);
 46 | 
 47 |   vector<Model*> m;
 48 |   vector<AdadeltaTrainer> optimizer;
 49 | 
 50 |   for (unsigned i = 0; i < morph_size + 1; ++i) {
 51 |     m.push_back(new Model());
 52 |     AdadeltaTrainer ada(m[i], reg_strength);
 53 |     optimizer.push_back(ada);
 54 |   }
 55 | 
 56 |   unsigned char_size = vocab_size;
 57 |   JointEncDecMorph nn(char_size, hidden_size, vocab_size, layers, morph_size,
 58 |                    &m, &optimizer);
 59 | 
 60 |   // Read the training file and train the model
 61 |   double best_score = -1;
 62 |   vector<JointEncDecMorph*> model_pointers;
 63 |   model_pointers.push_back(&nn);
 64 |   for (unsigned iter = 0; iter < num_iter; ++iter) {
 65 |     unsigned line_id = 0;
 66 |     random_shuffle(train_data.begin(), train_data.end());
 67 |     vector<float> loss(morph_size, 0.0f);
 68 |     for (string& line : train_data) {
 69 |       vector<string> items = split_line(line, '|');
 70 |       vector<unsigned> input_ids, target_ids;
 71 |       input_ids.clear(); target_ids.clear();
 72 |       for (const string& ch : split_line(items[0], ' ')) {
 73 |         input_ids.push_back(char_to_id[ch]);
 74 |       }
 75 |       for (const string& ch : split_line(items[1], ' ')) {
 76 |         target_ids.push_back(char_to_id[ch]);
 77 |       }
 78 |       unsigned morph_id = morph_to_id[items[2]];
 79 |       loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
 80 |                                  &optimizer[morph_id], &optimizer[morph_size]);
 81 |       cerr << ++line_id << "\r";
 82 |     }
 83 | 
 84 |     // Read the test file and output predictions for the words.
 85 |     string line;
 86 |     double correct = 0, total = 0;
 87 |     for (string& line : test_data) {
 88 |       vector<string> items = split_line(line, '|');
 89 |       vector<unsigned> input_ids, target_ids, pred_target_ids;
 90 |       input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
 91 |       for (const string& ch : split_line(items[0], ' ')) {
 92 |         input_ids.push_back(char_to_id[ch]);
 93 |       }
 94 |       for (const string& ch : split_line(items[1], ' ')) {
 95 |         target_ids.push_back(char_to_id[ch]);
 96 |       }
 97 |       unsigned morph_id = morph_to_id[items[2]];
 98 |       EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
 99 |                      &model_pointers);
100 | 
101 |       string prediction = "";
102 |       for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
103 |         prediction += id_to_char[pred_target_ids[i]];
104 |         if (i != pred_target_ids.size() - 1) {
105 |           prediction += " ";
106 |         }
107 |       }
108 |       if (prediction == items[1]) {
109 |         correct += 1;
110 |       }
111 |       total += 1;
112 |     }
113 |     double curr_score = correct / total;
114 |     cerr << "Iter " << iter + 1 << " ";
115 |     cerr << "Prediction Accuracy: " << curr_score << endl;
116 |     if (curr_score > best_score) {
117 |       best_score = curr_score;
118 |       Serialize(model_outputfilename, nn, &m);
119 |     }
120 |   }
121 |   return 1;
122 | }
123 | 


--------------------------------------------------------------------------------
/src/train-joint-enc-morph.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/nodes.h"
  2 | #include "cnn/cnn.h"
  3 | #include "cnn/rnn.h"
  4 | #include "cnn/gru.h"
  5 | #include "cnn/lstm.h"
  6 | #include "cnn/training.h"
  7 | #include "cnn/gpu-ops.h"
  8 | #include "cnn/expr.h"
  9 | 
 10 | #include "utils.h"
 11 | #include "joint-enc-morph.h"
 12 | 
 13 | #include <iostream>
 14 | #include <unordered_map>
 15 | 
 16 | using namespace std;
 17 | using namespace cnn;
 18 | using namespace cnn::expr;
 19 | 
 20 | int main(int argc, char** argv) {
 21 |   cnn::Initialize(argc, argv);
 22 | 
 23 |   string vocab_filename = argv[1];  // vocabulary of words/characters
 24 |   string morph_filename = argv[2];
 25 |   string train_filename = argv[3];
 26 |   string test_filename = argv[4];
 27 |   unsigned hidden_size = atoi(argv[5]);
 28 |   unsigned num_iter = atoi(argv[6]);
 29 |   float reg_strength = atof(argv[7]);
 30 |   unsigned layers = atoi(argv[8]);
 31 |   string model_outputfilename = argv[9];
 32 | 
 33 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
 34 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
 35 | 
 36 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
 37 |   unsigned vocab_size = char_to_id.size();
 38 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
 39 |   unsigned morph_size = morph_to_id.size();
 40 | 
 41 |   vector<string> train_data;  // Read the training file in a vector
 42 |   ReadData(train_filename, &train_data);
 43 | 
 44 |   vector<string> test_data;  // Read the dev file in a vector
 45 |   ReadData(test_filename, &test_data);
 46 | 
 47 |   vector<Model*> m;
 48 |   vector<AdadeltaTrainer> optimizer;
 49 | 
 50 |   for (unsigned i = 0; i < morph_size + 1; ++i) {
 51 |     m.push_back(new Model());
 52 |     AdadeltaTrainer ada(m[i], reg_strength);
 53 |     optimizer.push_back(ada);
 54 |   }
 55 | 
 56 |   unsigned char_size = vocab_size;
 57 |   JointEncMorph nn(char_size, hidden_size, vocab_size, layers, morph_size,
 58 |                    &m, &optimizer);
 59 | 
 60 |   // Read the training file and train the model
 61 |   double best_score = -1;
 62 |   vector<JointEncMorph*> model_pointers;
 63 |   model_pointers.push_back(&nn);
 64 |   for (unsigned iter = 0; iter < num_iter; ++iter) {
 65 |     unsigned line_id = 0;
 66 |     random_shuffle(train_data.begin(), train_data.end());
 67 |     vector<float> loss(morph_size, 0.0f);
 68 |     for (string& line : train_data) {
 69 |       vector<string> items = split_line(line, '|');
 70 |       vector<unsigned> input_ids, target_ids;
 71 |       input_ids.clear(); target_ids.clear();
 72 |       for (const string& ch : split_line(items[0], ' ')) {
 73 |         input_ids.push_back(char_to_id[ch]);
 74 |       }
 75 |       for (const string& ch : split_line(items[1], ' ')) {
 76 |         target_ids.push_back(char_to_id[ch]);
 77 |       }
 78 |       unsigned morph_id = morph_to_id[items[2]];
 79 |       loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
 80 |                                  &optimizer[morph_id], &optimizer[morph_size]);
 81 |       cerr << ++line_id << "\r";
 82 |     }
 83 | 
 84 |     // Read the test file and output predictions for the words.
 85 |     string line;
 86 |     double correct = 0, total = 0;
 87 |     for (string& line : test_data) {
 88 |       vector<string> items = split_line(line, '|');
 89 |       vector<unsigned> input_ids, target_ids, pred_target_ids;
 90 |       input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
 91 |       for (const string& ch : split_line(items[0], ' ')) {
 92 |         input_ids.push_back(char_to_id[ch]);
 93 |       }
 94 |       for (const string& ch : split_line(items[1], ' ')) {
 95 |         target_ids.push_back(char_to_id[ch]);
 96 |       }
 97 |       unsigned morph_id = morph_to_id[items[2]];
 98 |       EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
 99 |                      &model_pointers);
100 | 
101 |       string prediction = "";
102 |       for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
103 |         prediction += id_to_char[pred_target_ids[i]];
104 |         if (i != pred_target_ids.size() - 1) {
105 |           prediction += " ";
106 |         }
107 |       }
108 |       if (prediction == items[1]) {
109 |         correct += 1;
110 |       }
111 |       total += 1;
112 |     }
113 |     double curr_score = correct / total;
114 |     cerr << "Iter " << iter + 1 << " ";
115 |     cerr << "Prediction Accuracy: " << curr_score << endl;
116 |     if (curr_score > best_score) {
117 |       best_score = curr_score;
118 |       Serialize(model_outputfilename, nn, &m);
119 |     }
120 |   }
121 |   return 1;
122 | }
123 | 


--------------------------------------------------------------------------------
/src/train-lm-joint-enc.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/nodes.h"
  2 | #include "cnn/cnn.h"
  3 | #include "cnn/rnn.h"
  4 | #include "cnn/gru.h"
  5 | #include "cnn/lstm.h"
  6 | #include "cnn/training.h"
  7 | #include "cnn/gpu-ops.h"
  8 | #include "cnn/expr.h"
  9 | 
 10 | #include "lm.h"
 11 | #include "utils.h"
 12 | #include "lm-joint-enc.h"
 13 | 
 14 | #include <iostream>
 15 | #include <unordered_map>
 16 | 
 17 | using namespace std;
 18 | using namespace cnn;
 19 | using namespace cnn::expr;
 20 | 
 21 | int main(int argc, char** argv) {
 22 |   cnn::Initialize(argc, argv);
 23 | 
 24 |   string vocab_filename = argv[1];  // vocabulary of words/characters
 25 |   string morph_filename = argv[2];
 26 |   string train_filename = argv[3];
 27 |   string test_filename = argv[4];
 28 |   unsigned hidden_size = atoi(argv[5]);
 29 |   unsigned num_iter = atoi(argv[6]);
 30 |   float reg_strength = atof(argv[7]);
 31 |   unsigned layers = atoi(argv[8]);
 32 |   string lm_model_filename = argv[9];
 33 |   string model_outputfilename = argv[10];
 34 | 
 35 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
 36 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
 37 | 
 38 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
 39 |   unsigned vocab_size = char_to_id.size();
 40 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
 41 |   unsigned morph_size = morph_to_id.size();
 42 | 
 43 |   vector<string> train_data;  // Read the training file in a vector
 44 |   ReadData(train_filename, &train_data);
 45 | 
 46 |   vector<string> test_data;  // Read the dev file in a vector
 47 |   ReadData(test_filename, &test_data);
 48 | 
 49 |   LM lm(lm_model_filename, char_to_id, id_to_char);
 50 | 
 51 |   vector<Model*> m;
 52 |   vector<AdadeltaTrainer> optimizer;
 53 | 
 54 |   for (unsigned i = 0; i < morph_size + 1; ++i) {
 55 |     m.push_back(new Model());
 56 |     AdadeltaTrainer ada(m[i], reg_strength);
 57 |     optimizer.push_back(ada);
 58 |   }
 59 | 
 60 |   unsigned char_size = vocab_size;
 61 |   LMJointEnc nn(char_size, hidden_size, vocab_size, layers, morph_size,
 62 |                    &m, &optimizer);
 63 | 
 64 |   // Read the training file and train the model
 65 |   double best_score = -1;
 66 |   vector<LMJointEnc*> model_pointers;
 67 |   model_pointers.push_back(&nn);
 68 |   for (unsigned iter = 0; iter < num_iter; ++iter) {
 69 |     unsigned line_id = 0;
 70 |     random_shuffle(train_data.begin(), train_data.end());
 71 |     vector<float> loss(morph_size, 0.0f);
 72 |     for (string& line : train_data) {
 73 |       vector<string> items = split_line(line, '|');
 74 |       vector<unsigned> input_ids, target_ids;
 75 |       input_ids.clear(); target_ids.clear();
 76 |       for (const string& ch : split_line(items[0], ' ')) {
 77 |         input_ids.push_back(char_to_id[ch]);
 78 |       }
 79 |       for (const string& ch : split_line(items[1], ' ')) {
 80 |         target_ids.push_back(char_to_id[ch]);
 81 |       }
 82 |       unsigned morph_id = morph_to_id[items[2]];
 83 |       loss[morph_id] += nn.Train(morph_id, input_ids, target_ids, &lm,
 84 |                                  &optimizer[morph_id], &optimizer[morph_size]);
 85 |       cerr << ++line_id << "\r";
 86 |     }
 87 | 
 88 |     // Read the test file and output predictions for the words.
 89 |     string line;
 90 |     double correct = 0, total = 0;
 91 |     for (string& line : test_data) {
 92 |       vector<string> items = split_line(line, '|');
 93 |       vector<unsigned> input_ids, target_ids, pred_target_ids;
 94 |       input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
 95 |       for (const string& ch : split_line(items[0], ' ')) {
 96 |         input_ids.push_back(char_to_id[ch]);
 97 |       }
 98 |       for (const string& ch : split_line(items[1], ' ')) {
 99 |         target_ids.push_back(char_to_id[ch]);
100 |       }
101 |       unsigned morph_id = morph_to_id[items[2]];
102 |       EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
103 |                      &lm, &model_pointers);
104 | 
105 |       string prediction = "";
106 |       for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
107 |         prediction += id_to_char[pred_target_ids[i]];
108 |         if (i != pred_target_ids.size() - 1) {
109 |           prediction += " ";
110 |         }
111 |       }
112 |       if (prediction == items[1]) {
113 |         correct += 1;
114 |       }
115 |       total += 1;
116 |     }
117 |     double curr_score = correct / total;
118 |     cerr << "Iter " << iter + 1 << " ";
119 |     cerr << "Prediction Accuracy: " << curr_score << endl;
120 |     if (curr_score > best_score) {
121 |       best_score = curr_score;
122 |       Serialize(model_outputfilename, nn, &m);
123 |     }
124 |   }
125 |   return 1;
126 | }
127 | 


--------------------------------------------------------------------------------
/src/train-lm-sep-morph.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/nodes.h"
  2 | #include "cnn/cnn.h"
  3 | #include "cnn/rnn.h"
  4 | #include "cnn/gru.h"
  5 | #include "cnn/lstm.h"
  6 | #include "cnn/training.h"
  7 | #include "cnn/gpu-ops.h"
  8 | #include "cnn/expr.h"
  9 | 
 10 | #include "lm.h"
 11 | #include "utils.h"
 12 | #include "lm-sep-morph.h"
 13 | 
 14 | #include <boost/archive/text_oarchive.hpp>
 15 | #include <boost/archive/text_iarchive.hpp>
 16 | 
 17 | #include <iostream>
 18 | #include <fstream>
 19 | #include <unordered_map>
 20 | 
 21 | #include <fenv.h>
 22 | 
 23 | using namespace std;
 24 | using namespace cnn;
 25 | using namespace cnn::expr;
 26 | 
 27 | int main(int argc, char** argv) {
 28 |   cnn::Initialize(argc, argv);
 29 |   feenableexcept(FE_INVALID | FE_OVERFLOW | FE_DIVBYZERO);
 30 | 
 31 |   string vocab_filename = argv[1];  // vocabulary of words/characters
 32 |   string morph_filename = argv[2];
 33 |   string train_filename = argv[3];
 34 |   string test_filename = argv[4];
 35 |   unsigned hidden_size = atoi(argv[5]);
 36 |   unsigned num_iter = atoi(argv[6]);
 37 |   float reg_strength = atof(argv[7]);
 38 |   unsigned layers = atoi(argv[8]);
 39 |   string lm_model_filename = argv[9];
 40 |   string model_outputfilename = argv[10];
 41 | 
 42 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
 43 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
 44 | 
 45 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
 46 |   unsigned vocab_size = char_to_id.size();
 47 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
 48 |   unsigned morph_size = morph_to_id.size();
 49 | 
 50 |   vector<string> train_data;  // Read the training file in a vector
 51 |   ReadData(train_filename, &train_data);
 52 | 
 53 |   vector<string> test_data;  // Read the dev file in a vector
 54 |   ReadData(test_filename, &test_data);
 55 | 
 56 |   LM lm(lm_model_filename, char_to_id, id_to_char);
 57 | 
 58 |   vector<Model*> m;
 59 |   vector<AdadeltaTrainer> optimizer;
 60 | 
 61 |   for (unsigned i = 0; i < morph_size; ++i) {
 62 |     m.push_back(new Model());
 63 |     AdadeltaTrainer ada(m[i], reg_strength);
 64 |     optimizer.push_back(ada);
 65 |   }
 66 | 
 67 |   unsigned char_size = vocab_size;
 68 |   LMSepMorph nn(char_size, hidden_size, vocab_size, layers, morph_size,
 69 |                 &m, &optimizer);
 70 | 
 71 |   // Read the training file and train the model
 72 |   double best_score = -1;
 73 |   vector<LMSepMorph*> object_list;
 74 |   object_list.push_back(&nn);
 75 |   for (unsigned iter = 0; iter < num_iter; ++iter) {
 76 |     unsigned line_id = 0;
 77 |     random_shuffle(train_data.begin(), train_data.end());
 78 |     vector<float> loss(morph_size, 0.0f);
 79 |     for (string& line : train_data) {
 80 |       vector<string> items = split_line(line, '|');
 81 |       vector<unsigned> input_ids, target_ids;
 82 |       input_ids.clear(); target_ids.clear();
 83 | 
 84 |       string input = items[0], output = items[1];
 85 |       for (const string& ch : split_line(input, ' ')) {
 86 |         input_ids.push_back(char_to_id[ch]);
 87 |       }
 88 |       for (const string& ch : split_line(output, ' ')) {
 89 |         target_ids.push_back(char_to_id[ch]);
 90 |       }
 91 |       unsigned morph_id = morph_to_id[items[2]];
 92 |       loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
 93 |                                  &lm, &optimizer[morph_id]);
 94 |       cerr << ++line_id << "\r";
 95 |     }
 96 | 
 97 |     // Read the test file and output predictions for the words.
 98 |     string line;
 99 |     double correct = 0, total = 0;
100 |     for (string& line : test_data) {
101 |       vector<string> items = split_line(line, '|');
102 |       vector<unsigned> input_ids, target_ids, pred_target_ids;
103 |       input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
104 | 
105 |       string input = items[0], output = items[1];
106 |       for (const string& ch : split_line(input, ' ')) {
107 |         input_ids.push_back(char_to_id[ch]);
108 |       }
109 |       for (const string& ch : split_line(output, ' ')) {
110 |         target_ids.push_back(char_to_id[ch]);
111 |       }
112 |       unsigned morph_id = morph_to_id[items[2]];
113 |       EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, &lm,
114 |                      &object_list);
115 | 
116 |       string prediction = "";
117 |       for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
118 |         prediction += id_to_char[pred_target_ids[i]];
119 |         if (i != pred_target_ids.size() - 1) {
120 |           prediction += " ";
121 |         }
122 |       }
123 |       if (prediction == output) {
124 |         correct += 1;
125 |       }
126 |       total += 1;
127 |     }
128 |     double curr_score = correct / total;
129 |     cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.);
130 |     cerr << " Prediction Accuracy: " << curr_score << endl;
131 |     if (curr_score > best_score) {
132 |       best_score = curr_score;
133 |       Serialize(model_outputfilename, nn, &m);
134 |     }
135 |   }
136 |   return 1;
137 | }
138 | 


--------------------------------------------------------------------------------
/src/train-no-enc.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/nodes.h"
  2 | #include "cnn/cnn.h"
  3 | #include "cnn/rnn.h"
  4 | #include "cnn/gru.h"
  5 | #include "cnn/lstm.h"
  6 | #include "cnn/training.h"
  7 | #include "cnn/gpu-ops.h"
  8 | #include "cnn/expr.h"
  9 | 
 10 | #include "utils.h"
 11 | #include "no-enc.h"
 12 | 
 13 | #include <boost/archive/text_oarchive.hpp>
 14 | #include <boost/archive/text_iarchive.hpp>
 15 | 
 16 | #include <iostream>
 17 | #include <fstream>
 18 | #include <unordered_map>
 19 | 
 20 | using namespace std;
 21 | using namespace cnn;
 22 | using namespace cnn::expr;
 23 | 
 24 | int main(int argc, char** argv) {
 25 |   cnn::Initialize(argc, argv);
 26 | 
 27 |   string vocab_filename = argv[1];  // vocabulary of words/characters
 28 |   string morph_filename = argv[2];
 29 |   string train_filename = argv[3];
 30 |   string test_filename = argv[4];
 31 |   unsigned hidden_size = atoi(argv[5]);
 32 |   unsigned num_iter = atoi(argv[6]);
 33 |   float reg_strength = atof(argv[7]);
 34 |   unsigned layers = atoi(argv[8]);
 35 |   string model_outputfilename = argv[9];
 36 | 
 37 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
 38 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
 39 | 
 40 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
 41 |   unsigned vocab_size = char_to_id.size();
 42 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
 43 |   unsigned morph_size = morph_to_id.size();
 44 | 
 45 |   vector<string> train_data;  // Read the training file in a vector
 46 |   ReadData(train_filename, &train_data);
 47 | 
 48 |   vector<string> test_data;  // Read the dev file in a vector
 49 |   ReadData(test_filename, &test_data);
 50 | 
 51 |   vector<Model*> m;
 52 |   vector<AdadeltaTrainer> optimizer;
 53 | 
 54 |   for (unsigned i = 0; i < morph_size; ++i) {
 55 |     m.push_back(new Model());
 56 |     AdadeltaTrainer ada(m[i], reg_strength);
 57 |     optimizer.push_back(ada);
 58 |   }
 59 | 
 60 |   unsigned char_size = vocab_size;
 61 |   NoEnc nn(char_size, hidden_size, vocab_size, layers, morph_size,
 62 |               &m, &optimizer);
 63 | 
 64 |   // Pre-train all models on all datasets.
 65 |   /*cerr << "Pre-training... " << endl; 
 66 |   for (unsigned morph_id = 0; morph_id < morph_size; ++morph_id) {
 67 |     cerr << "Morph class: " << morph_id + 1 << endl;
 68 |     for (string& line : train_data) {
 69 |       vector<string> items = split_line(line, '|');
 70 |       vector<unsigned> input_ids, target_ids;
 71 |       input_ids.clear(); target_ids.clear();
 72 |       for (const string& ch : split_line(items[0], ' ')) {
 73 |         input_ids.push_back(char_to_id[ch]);
 74 |       }
 75 |       for (const string& ch : split_line(items[1], ' ')) {
 76 |         target_ids.push_back(char_to_id[ch]);
 77 |       }
 78 |       //unsigned morph_id = morph_to_id[items[2]];
 79 |       double temp_loss = nn.Train(morph_id, input_ids, target_ids,
 80 |                                   &optimizer[morph_id]);
 81 |     }
 82 |   }
 83 |   cerr << "complete." << endl;*/
 84 | 
 85 |   // Read the training file and train the model
 86 |   double best_score = -1;
 87 |   vector<NoEnc*> object_list;
 88 |   object_list.push_back(&nn);
 89 |   for (unsigned iter = 0; iter < num_iter; ++iter) {
 90 |     unsigned line_id = 0;
 91 |     random_shuffle(train_data.begin(), train_data.end());
 92 |     vector<float> loss(morph_size, 0.0f);
 93 |     for (string& line : train_data) {
 94 |       vector<string> items = split_line(line, '|');
 95 |       vector<unsigned> input_ids, target_ids;
 96 |       input_ids.clear(); target_ids.clear();
 97 |       for (const string& ch : split_line(items[0], ' ')) {
 98 |         input_ids.push_back(char_to_id[ch]);
 99 |       }
100 |       for (const string& ch : split_line(items[1], ' ')) {
101 |         target_ids.push_back(char_to_id[ch]);
102 |       }
103 |       unsigned morph_id = morph_to_id[items[2]];
104 |       loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
105 |                                  &optimizer[morph_id]);
106 |       cerr << ++line_id << "\r";
107 |     }
108 | 
109 |     // Read the test file and output predictions for the words.
110 |     string line;
111 |     double correct = 0, total = 0;
112 |     for (string& line : test_data) {
113 |       vector<string> items = split_line(line, '|');
114 |       vector<unsigned> input_ids, target_ids, pred_target_ids;
115 |       input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
116 |       for (const string& ch : split_line(items[0], ' ')) {
117 |         input_ids.push_back(char_to_id[ch]);
118 |       }
119 |       for (const string& ch : split_line(items[1], ' ')) {
120 |         target_ids.push_back(char_to_id[ch]);
121 |       }
122 |       unsigned morph_id = morph_to_id[items[2]];
123 |       EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
124 |                      &object_list);
125 | 
126 |       string prediction = "";
127 |       for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
128 |         prediction += id_to_char[pred_target_ids[i]];
129 |         if (i != pred_target_ids.size() - 1) {
130 |           prediction += " ";
131 |         }
132 |       }
133 |       if (prediction == items[1]) {
134 |         correct += 1;
135 |       }
136 |       total += 1;
137 |     }
138 |     double curr_score = correct / total;
139 |     cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.);
140 |     cerr << "Prediction Accuracy: " << curr_score << endl;
141 |     if (curr_score > best_score) {
142 |       best_score = curr_score;
143 |       Serialize(model_outputfilename, nn, &m);
144 |     }
145 |   }
146 |   return 1;
147 | }
148 | 


--------------------------------------------------------------------------------
/src/train-sep-morph.cc:
--------------------------------------------------------------------------------
  1 | #include "cnn/nodes.h"
  2 | #include "cnn/cnn.h"
  3 | #include "cnn/rnn.h"
  4 | #include "cnn/gru.h"
  5 | #include "cnn/lstm.h"
  6 | #include "cnn/training.h"
  7 | #include "cnn/gpu-ops.h"
  8 | #include "cnn/expr.h"
  9 | 
 10 | #include "utils.h"
 11 | #include "sep-morph.h"
 12 | 
 13 | #include <boost/archive/text_oarchive.hpp>
 14 | #include <boost/archive/text_iarchive.hpp>
 15 | 
 16 | #include <iostream>
 17 | #include <fstream>
 18 | #include <unordered_map>
 19 | 
 20 | using namespace std;
 21 | using namespace cnn;
 22 | using namespace cnn::expr;
 23 | 
 24 | int main(int argc, char** argv) {
 25 |   cnn::Initialize(argc, argv);
 26 | 
 27 |   string vocab_filename = argv[1];  // vocabulary of words/characters
 28 |   string morph_filename = argv[2];
 29 |   string train_filename = argv[3];
 30 |   string test_filename = argv[4];
 31 |   unsigned hidden_size = atoi(argv[5]);
 32 |   unsigned num_iter = atoi(argv[6]);
 33 |   float reg_strength = atof(argv[7]);
 34 |   unsigned layers = atoi(argv[8]);
 35 |   string model_outputfilename = argv[9];
 36 | 
 37 |   unordered_map<string, unsigned> char_to_id, morph_to_id;
 38 |   unordered_map<unsigned, string> id_to_char, id_to_morph;
 39 | 
 40 |   ReadVocab(vocab_filename, &char_to_id, &id_to_char);
 41 |   unsigned vocab_size = char_to_id.size();
 42 |   ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
 43 |   unsigned morph_size = morph_to_id.size();
 44 | 
 45 |   vector<string> train_data;  // Read the training file in a vector
 46 |   ReadData(train_filename, &train_data);
 47 | 
 48 |   vector<string> test_data;  // Read the dev file in a vector
 49 |   ReadData(test_filename, &test_data);
 50 | 
 51 |   vector<Model*> m;
 52 |   vector<AdadeltaTrainer> optimizer;
 53 | 
 54 |   for (unsigned i = 0; i < morph_size; ++i) {
 55 |     m.push_back(new Model());
 56 |     AdadeltaTrainer ada(m[i], reg_strength);
 57 |     optimizer.push_back(ada);
 58 |   }
 59 | 
 60 |   unsigned char_size = vocab_size;
 61 |   SepMorph nn(char_size, hidden_size, vocab_size, layers, morph_size,
 62 |               &m, &optimizer);
 63 | 
 64 |   // Read the training file and train the model
 65 |   double best_score = -1;
 66 |   vector<SepMorph*> object_list;
 67 |   object_list.push_back(&nn);
 68 |   for (unsigned iter = 0; iter < num_iter; ++iter) {
 69 |     unsigned line_id = 0;
 70 |     random_shuffle(train_data.begin(), train_data.end());
 71 |     vector<float> loss(morph_size, 0.0f);
 72 |     for (string& line : train_data) {
 73 |       vector<string> items = split_line(line, '|');
 74 |       vector<unsigned> input_ids, target_ids;
 75 |       input_ids.clear(); target_ids.clear();
 76 |       for (const string& ch : split_line(items[0], ' ')) {
 77 |         input_ids.push_back(char_to_id[ch]);
 78 |       }
 79 |       for (const string& ch : split_line(items[1], ' ')) {
 80 |         target_ids.push_back(char_to_id[ch]);
 81 |       }
 82 |       unsigned morph_id = morph_to_id[items[2]];
 83 |       loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
 84 |                                  &optimizer[morph_id]);
 85 |       cerr << ++line_id << "\r";
 86 |     }
 87 | 
 88 |     // Read the test file and output predictions for the words.
 89 |     string line;
 90 |     double correct = 0, total = 0;
 91 |     for (string& line : test_data) {
 92 |       vector<string> items = split_line(line, '|');
 93 |       vector<unsigned> input_ids, target_ids, pred_target_ids;
 94 |       input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
 95 |       for (const string& ch : split_line(items[0], ' ')) {
 96 |         input_ids.push_back(char_to_id[ch]);
 97 |       }
 98 |       for (const string& ch : split_line(items[1], ' ')) {
 99 |         target_ids.push_back(char_to_id[ch]);
100 |       }
101 |       unsigned morph_id = morph_to_id[items[2]];
102 |       EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
103 |                      &object_list);
104 | 
105 |       string prediction = "";
106 |       for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
107 |         prediction += id_to_char[pred_target_ids[i]];
108 |         if (i != pred_target_ids.size() - 1) {
109 |           prediction += " ";
110 |         }
111 |       }
112 |       if (prediction == items[1]) {
113 |         correct += 1;
114 |       }
115 |       total += 1;
116 |     }
117 |     double curr_score = correct / total;
118 |     cerr << "Iter " << iter + 1 << " ";
119 |     cerr << "Prediction Accuracy: " << curr_score << endl;
120 |     if (curr_score > best_score) {
121 |       best_score = curr_score;
122 |       Serialize(model_outputfilename, nn, &m);
123 |     }
124 |   }
125 |   return 1;
126 | }
127 | 


--------------------------------------------------------------------------------
/src/utils.cc:
--------------------------------------------------------------------------------
 1 | #include "utils.h"
 2 | 
 3 | vector<string> split_line(const string& line, char delim) {
 4 |   vector<string> words;
 5 |   stringstream ss(line);
 6 |   string item;
 7 |   while (std::getline(ss, item, delim)) {
 8 |     if (!item.empty())
 9 |       words.push_back(item);
10 |   }
11 |   return words;
12 | }
13 | 
14 | void ReadVocab(string& filename, unordered_map<string, unsigned>* item_to_id,
15 |                unordered_map<unsigned, string>* id_to_item) {
16 |   ifstream vocab_file(filename);
17 |   vector<string> chars;
18 |   if (vocab_file.is_open()) {  // Reading the vocab file
19 |     string line;
20 |     getline(vocab_file, line);
21 |     chars = split_line(line, ' ');
22 |   } else {
23 |     cerr << "File opening failed" << endl;
24 |   }
25 |   unsigned char_id = 0;
26 |   for (const string& ch : chars) {
27 |     (*item_to_id)[ch] = char_id;
28 |     (*id_to_item)[char_id] = ch;
29 |     char_id++;
30 |   }
31 |   vocab_file.close();
32 | }
33 | 
34 | void ReadData(string& filename, vector<string>* data) {
35 |   // Read the training file in a vector
36 |   ifstream train_file(filename);
37 |   if (train_file.is_open()) {
38 |     string line;
39 |     while (getline(train_file, line)) {
40 |       data->push_back(line);
41 |     }
42 |   }
43 |   train_file.close();
44 | }
45 | 


--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H_
 2 | #define UTILS_H_
 3 | 
 4 | #include <string>
 5 | #include <iostream>
 6 | #include <sstream>
 7 | #include <fstream>
 8 | #include <vector>
 9 | #include <unordered_map>
10 | 
11 | using namespace std;
12 | 
13 | vector<string> split_line(const string& line, char delim);
14 | 
15 | void ReadVocab(string& filename, unordered_map<string, unsigned>* item_to_id,
16 |                unordered_map<unsigned, string>* id_to_item);
17 | 
18 | void ReadData(string& filename, vector<string>* data);
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------