├── LICENSE ├── Makefile ├── README.md ├── data ├── README.txt ├── char_vocab.txt ├── dev_infl.txt ├── morph_vocab.txt ├── test_infl.txt └── train_infl.txt └── src ├── enc-dec-attn.cc ├── enc-dec-attn.h ├── enc-dec.cc ├── enc-dec.h ├── eval-ensemble-enc-dec-attn.cc ├── eval-ensemble-enc-dec.cc ├── eval-ensemble-joint-enc-beam.cc ├── eval-ensemble-joint-enc-dec-morph.cc ├── eval-ensemble-joint-enc-morph.cc ├── eval-ensemble-lm-joint-enc.cc ├── eval-ensemble-lm-sep-morph.cc ├── eval-ensemble-no-enc.cc ├── eval-ensemble-sep-morph-beam.cc ├── eval-ensemble-sep-morph.cc ├── joint-enc-dec-morph.cc ├── joint-enc-dec-morph.h ├── joint-enc-morph.cc ├── joint-enc-morph.h ├── lm-joint-enc.cc ├── lm-joint-enc.h ├── lm-sep-morph.cc ├── lm-sep-morph.h ├── lm.cc ├── lm.h ├── no-enc.cc ├── no-enc.h ├── sep-morph.cc ├── sep-morph.h ├── train-enc-dec-attn.cc ├── train-enc-dec.cc ├── train-joint-enc-dec-morph.cc ├── train-joint-enc-morph.cc ├── train-lm-joint-enc.cc ├── train-lm-sep-morph.cc ├── train-no-enc.cc ├── train-sep-morph.cc ├── utils.cc └── utils.h /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Manaal Faruqui 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CNN_DIR=$(CNN) 3 | EIGEN_DIR=$(EIGEN) 4 | BOOST_DIR=$(BOOST) 5 | 6 | CNN_BUILD_DIR=$(CNN_DIR)/build 7 | INCS=-I$(CNN_DIR) -I$(CNN_BUILD_DIR) -I$(EIGEN_DIR) 8 | LIBS=-L$(CNN_BUILD_DIR)/cnn/ -L$(BOOST_DIR)/lib 9 | FINAL=-lcnn -lboost_regex -lboost_serialization -lboost_program_options -lrt -lpthread 10 | CFLAGS=-std=c++11 -Ofast -g -march=native -pipe 11 | BINDIR=bin 12 | OBJDIR=obj 13 | SRCDIR=src 14 | 15 | .PHONY: clean 16 | all: make_dirs $(BINDIR)/train-sep-morph $(BINDIR)/eval-ensemble-sep-morph $(BINDIR)/train-joint-enc-morph $(BINDIR)/eval-ensemble-joint-enc-morph $(BINDIR)/train-lm-sep-morph $(BINDIR)/eval-ensemble-lm-sep-morph $(BINDIR)/train-joint-enc-dec-morph $(BINDIR)/eval-ensemble-joint-enc-dec-morph $(BINDIR)/eval-ensemble-sep-morph-beam $(BINDIR)/train-lm-joint-enc $(BINDIR)/eval-ensemble-lm-joint-enc $(BINDIR)/eval-ensemble-joint-enc-beam $(BINDIR)/train-no-enc $(BINDIR)/eval-ensemble-no-enc $(BINDIR)/train-enc-dec $(BINDIR)/eval-ensemble-enc-dec $(BINDIR)/train-enc-dec-attn $(BINDIR)/eval-ensemble-enc-dec-attn 17 | 18 | make_dirs: 19 | mkdir -p $(OBJDIR) 20 | mkdir -p $(BINDIR) 21 | 22 | include $(wildcard $(OBJDIR)/*.d) 23 | 24 | $(OBJDIR)/%.o: $(SRCDIR)/%.cc 25 | $(CC) $(CFLAGS) $(INCS) -c $< -o $@ 26 | $(CC) -MM -MP -MT "$@" $(CFLAGS) $(INCS) $< > $(OBJDIR)/$*.d 27 | 28 | $(BINDIR)/train-sep-morph: $(addprefix $(OBJDIR)/, train-sep-morph.o sep-morph.o utils.o) 29 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 30 | 31 | $(BINDIR)/train-no-enc: $(addprefix $(OBJDIR)/, train-no-enc.o no-enc.o utils.o) 32 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 33 | 34 | $(BINDIR)/train-enc-dec: $(addprefix $(OBJDIR)/, train-enc-dec.o enc-dec.o utils.o) 35 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 36 | 37 | $(BINDIR)/train-enc-dec-attn: $(addprefix $(OBJDIR)/, train-enc-dec-attn.o enc-dec-attn.o utils.o) 38 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 39 | 40 | $(BINDIR)/train-joint-enc-morph: $(addprefix $(OBJDIR)/, train-joint-enc-morph.o joint-enc-morph.o utils.o) 41 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 42 | 43 | $(BINDIR)/train-joint-enc-dec-morph: $(addprefix $(OBJDIR)/, train-joint-enc-dec-morph.o joint-enc-dec-morph.o utils.o) 44 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 45 | 46 | $(BINDIR)/train-lm-sep-morph: $(addprefix $(OBJDIR)/, train-lm-sep-morph.o lm-sep-morph.o utils.o lm.o) 47 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 48 | 49 | $(BINDIR)/train-lm-joint-enc: $(addprefix $(OBJDIR)/, train-lm-joint-enc.o lm-joint-enc.o utils.o lm.o) 50 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 51 | 52 | $(BINDIR)/eval-ensemble-lm-joint-enc: $(addprefix $(OBJDIR)/, eval-ensemble-lm-joint-enc.o utils.o lm-joint-enc.o lm.o) 53 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 54 | 55 | $(BINDIR)/eval-ensemble-sep-morph: $(addprefix $(OBJDIR)/, eval-ensemble-sep-morph.o utils.o sep-morph.o) 56 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 57 | 58 | $(BINDIR)/eval-ensemble-sep-morph-spanish-gen: $(addprefix $(OBJDIR)/, eval-ensemble-sep-morph-spanish-gen.o utils.o sep-morph.o) 59 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 60 | 61 | $(BINDIR)/eval-ensemble-no-enc: $(addprefix $(OBJDIR)/, eval-ensemble-no-enc.o utils.o no-enc.o) 62 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 63 | 64 | $(BINDIR)/eval-ensemble-enc-dec: $(addprefix $(OBJDIR)/, eval-ensemble-enc-dec.o utils.o enc-dec.o) 65 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 66 | 67 | $(BINDIR)/eval-ensemble-enc-dec-attn: $(addprefix $(OBJDIR)/, eval-ensemble-enc-dec-attn.o utils.o enc-dec-attn.o) 68 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 69 | 70 | $(BINDIR)/eval-ensemble-lm-sep-morph: $(addprefix $(OBJDIR)/, eval-ensemble-lm-sep-morph.o utils.o lm-sep-morph.o lm.o) 71 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 72 | 73 | $(BINDIR)/eval-ensemble-joint-enc-morph: $(addprefix $(OBJDIR)/, eval-ensemble-joint-enc-morph.o utils.o joint-enc-morph.o) 74 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 75 | 76 | $(BINDIR)/eval-ensemble-joint-enc-dec-morph: $(addprefix $(OBJDIR)/, eval-ensemble-joint-enc-dec-morph.o utils.o joint-enc-dec-morph.o) 77 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 78 | 79 | $(BINDIR)/eval-ensemble-sep-morph-beam: $(addprefix $(OBJDIR)/, eval-ensemble-sep-morph-beam.o utils.o sep-morph.o) 80 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 81 | 82 | $(BINDIR)/eval-ensemble-joint-enc-beam: $(addprefix $(OBJDIR)/, eval-ensemble-joint-enc-beam.o utils.o joint-enc-morph.o) 83 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL) 84 | 85 | 86 | clean: 87 | rm -rf $(BINDIR)/* 88 | rm -rf $(OBJDIR)/* 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # morph-trans 2 | Manaal Faruqui, manaalfar@gmail.com 3 | 4 | This tool is used to generate morphologically inflected forms of a given word according to a specified morphological attribute. The input word is treated as a sequence of characters and the output word is generated as the transofrmed sequence of characters using a variant of neural encoder-decoder architecture. This is described in Faruqui et al (NAACL, 2016). 5 | 6 | ###Requirements 7 | 8 | 1. CNN neural network library: https://github.com/clab/cnn 9 | 2. C++ BOOST library: http://www.boost.org/ 10 | 3. C++ Eigen library: http://eigen.tuxfamily.org/ 11 | 12 | Please download and compile these libraries. 13 | 14 | ###Data 15 | 16 | You need three files to train the inflection generation system. Two files that contain vocabulary of the characters, and the vocabulary of the morphological attributes. The third file contains the training/dev/test data. Sample files can be found in data/ . 17 | 18 | * Character vocabulary file: char_vocab.txt 19 | 20 | ``` a b c g f ä ...``` 21 | 22 | * Morphological attribute file: morph_vocab.txt 23 | 24 | ```case=nominative:number=singular case=dative:number=plural ...``` 25 | 26 | * Inflection train/dev/test file: infl.txt 27 | 28 | ``` a a l | a a l e s |case=genitive:number=singular``` 29 | 30 | * Optional language models: [here] (https://drive.google.com/folderview?id=0B93-ltInuGUyeGhDUVZSeWkyUVE&usp=sharing) 31 | 32 | ###Compile 33 | 34 | After you have installed, CNN, Boost and Eige, you can simply install the software by typing the following command:- 35 | 36 | ```make CNN=cnn-dir BOOST=boost-dir EIGEN=eigen-dir``` 37 | 38 | ###Run 39 | 40 | To train the inflection generation system, simply run the following:- 41 | 42 | ```./bin/train-sep-morph char_vocab.txt morph_vocab.txt train_infl.txt dev_infl.txt 100 30 1e-5 1 model.txt``` 43 | 44 | Here, 100 is the hidden layer size of the LSTM, 30 is the number of iterations for training, 1e-5 is the l2 regularization strength and 1 is the number of layers in the LSTM. 45 | 46 | To test the system, run:- 47 | 48 | ```./bin/eval-ensemble-sep-morph char_vocab.txt morph_vocab.txt test_infl.txt model1.txt model2.txt model3.txt ... > output.txt``` 49 | 50 | This can use an ensemble of models for evaluation. If you want to use only one model, just provide one model. The sep-moprh model is the model that provided us best supervised results. Other models can be used in the same way. Baseline encoder-decoder models can be trained using ```train-enc-dec``` and ```train-enc-dec-attn``` models. 51 | 52 | ###Reference 53 | ``` 54 | @inproceedings{faruqui:2016:infl, 55 | author = {Faruqui, Manaal and Tsvetkov, Yulia and Neubig, Graham and Dyer, Chris}, 56 | title = {Morphological Inflection Generation Using Character Sequence to Sequence Learning}, 57 | journal = {Proc. of NAACL}, 58 | year = {2016}, 59 | } 60 | ``` 61 | -------------------------------------------------------------------------------- /data/README.txt: -------------------------------------------------------------------------------- 1 | This is the German Nouns inflection dataset from Durrent and DeNero (2013). 2 | -------------------------------------------------------------------------------- /data/char_vocab.txt: -------------------------------------------------------------------------------- 1 | ß é è æ ä ü ö a c b e d g f i h k j m l o n q p s r u t w v y x z 2 | -------------------------------------------------------------------------------- /data/morph_vocab.txt: -------------------------------------------------------------------------------- 1 | case=nominative:number=singular case=dative:number=plural case=nominative:number=plural case=genitive:number=singular case=dative:number=singular case=accusative:number=singular case=accusative:number=plural case=genitive:number=plural 2 | -------------------------------------------------------------------------------- /src/enc-dec-attn.h: -------------------------------------------------------------------------------- 1 | #ifndef ENC_DEC_ATTN_H_ 2 | #define ENC_DEC_ATTN_H_ 3 | 4 | #include "cnn/nodes.h" 5 | #include "cnn/cnn.h" 6 | #include "cnn/rnn.h" 7 | #include "cnn/gru.h" 8 | #include "cnn/lstm.h" 9 | #include "cnn/training.h" 10 | #include "cnn/gpu-ops.h" 11 | #include "cnn/expr.h" 12 | 13 | #include "utils.h" 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | using namespace std; 22 | using namespace cnn; 23 | using namespace cnn::expr; 24 | 25 | class EncDecAttn { 26 | public: 27 | vector input_forward, input_backward, output_forward; 28 | vector char_vecs; 29 | 30 | Expression hidden_to_output, hidden_to_output_bias; 31 | vector phidden_to_output, phidden_to_output_bias; 32 | 33 | Expression transform_encoded, transform_encoded_bias; 34 | vector ptransform_encoded, ptransform_encoded_bias; 35 | 36 | Expression compress_hidden, compress_hidden_bias; 37 | vector pcompress_hidden, pcompress_hidden_bias; 38 | 39 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5; 40 | vector eps_vecs; 41 | 42 | EncDecAttn() {} 43 | 44 | EncDecAttn(const unsigned& char_length, const unsigned& hidden_length, 45 | const unsigned& vocab_length, const unsigned& layers, 46 | const unsigned& num_morph, vector* m, 47 | vector* optimizer); 48 | 49 | void InitParams(vector* m); 50 | 51 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg); 52 | 53 | void RunFwdBwd(const unsigned& morph_id, const vector& inputs, 54 | Expression* hidden, vector* all_hidden, 55 | ComputationGraph *cg); 56 | 57 | Expression GetAvgAttnLayer(const Expression& hidden, 58 | const vector& all_input_hidden) const ; 59 | 60 | void TransformEncodedInput(Expression* encoded_input) const; 61 | 62 | void TransformEncodedInputDuringDecoding(Expression* encoded_input) const; 63 | 64 | void ProjectToOutput(const Expression& hidden, 65 | const vector& all_input_hidden, 66 | Expression* out) const; 67 | 68 | Expression ComputeLoss(const vector& hidden_units, 69 | const vector& targets, 70 | const vector& all_input_hidden) const; 71 | 72 | float Train(const unsigned& morph_id, const vector& inputs, 73 | const vector& outputs, AdadeltaTrainer* ada_gd); 74 | 75 | friend class boost::serialization::access; 76 | template void serialize(Archive& ar, const unsigned int) { 77 | ar & char_len; 78 | ar & hidden_len; 79 | ar & vocab_len; 80 | ar & layers; 81 | ar & morph_len; 82 | ar & max_eps; 83 | } 84 | }; 85 | 86 | void 87 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 88 | const vector& input_ids, 89 | vector* pred_target_ids, vector* ensmb_model); 90 | 91 | void 92 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 93 | unordered_map& char_to_id, 94 | const vector& input_ids, 95 | vector >* sequences, vector* tm_scores, 96 | vector* ensmb_model); 97 | 98 | void Serialize(string& filename, EncDecAttn& model, vector* cnn_model); 99 | 100 | void Read(string& filename, EncDecAttn* model, vector* cnn_model); 101 | 102 | 103 | #endif 104 | -------------------------------------------------------------------------------- /src/enc-dec.cc: -------------------------------------------------------------------------------- 1 | #include "enc-dec.h" 2 | 3 | using namespace std; 4 | using namespace cnn; 5 | using namespace cnn::expr; 6 | 7 | string BOW = "", EOW = ""; 8 | int MAX_PRED_LEN = 100; 9 | float NEG_INF = numeric_limits::min(); 10 | 11 | EncDec::EncDec(const unsigned& char_length, const unsigned& hidden_length, 12 | const unsigned& vocab_length, const unsigned& num_layers, 13 | const unsigned& num_morph, vector* m, 14 | vector* optimizer) { 15 | char_len = char_length; 16 | hidden_len = hidden_length; 17 | vocab_len = vocab_length; 18 | layers = num_layers; 19 | morph_len = num_morph; 20 | InitParams(m); 21 | } 22 | 23 | void EncDec::InitParams(vector* m) { 24 | for (unsigned i = 0; i < morph_len; ++i) { 25 | input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i])); 26 | input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i])); 27 | output_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i])); 28 | 29 | phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len})); 30 | phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1})); 31 | 32 | char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len})); 33 | 34 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len, 35 | 2 * hidden_len})); 36 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1})); 37 | 38 | //eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len})); 39 | } 40 | } 41 | 42 | void EncDec::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) { 43 | input_forward[morph_id].new_graph(*cg); 44 | input_backward[morph_id].new_graph(*cg); 45 | output_forward[morph_id].new_graph(*cg); 46 | 47 | hidden_to_output = parameter(*cg, phidden_to_output[morph_id]); 48 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]); 49 | 50 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]); 51 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]); 52 | } 53 | 54 | void EncDec::RunFwdBwd(const unsigned& morph_id, 55 | const vector& inputs, 56 | Expression* hidden, ComputationGraph *cg) { 57 | vector input_vecs; 58 | for (const unsigned& input_id : inputs) { 59 | input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id)); 60 | } 61 | 62 | // Run forward LSTM 63 | Expression forward_unit; 64 | input_forward[morph_id].start_new_sequence(); 65 | for (unsigned i = 0; i < input_vecs.size(); ++i) { 66 | forward_unit = input_forward[morph_id].add_input(input_vecs[i]); 67 | } 68 | 69 | // Run backward LSTM 70 | Expression backward_unit; 71 | input_backward[morph_id].start_new_sequence(); 72 | for (int i = input_vecs.size() - 1; i >= 0; --i) { 73 | backward_unit = input_backward[morph_id].add_input(input_vecs[i]); 74 | } 75 | 76 | // Concatenate the forward and back hidden layers 77 | *hidden = concatenate({forward_unit, backward_unit}); 78 | } 79 | 80 | void EncDec::TransformEncodedInput(Expression* encoded_input) const { 81 | *encoded_input = affine_transform({transform_encoded_bias, 82 | transform_encoded, *encoded_input}); 83 | } 84 | 85 | void EncDec::ProjectToOutput(const Expression& hidden, Expression* out) const { 86 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden}); 87 | } 88 | 89 | Expression EncDec::ComputeLoss(const vector& hidden_units, 90 | const vector& targets) const { 91 | assert(hidden_units.size() == targets.size()); 92 | vector losses; 93 | for (unsigned i = 0; i < hidden_units.size(); ++i) { 94 | Expression out; 95 | ProjectToOutput(hidden_units[i], &out); 96 | losses.push_back(pickneglogsoftmax(out, targets[i])); 97 | } 98 | return sum(losses); 99 | } 100 | 101 | float EncDec::Train(const unsigned& morph_id, const vector& inputs, 102 | const vector& outputs, AdadeltaTrainer* ada_gd) { 103 | ComputationGraph cg; 104 | AddParamsToCG(morph_id, &cg); 105 | 106 | // Encode and Transform to feed into decoder 107 | Expression encoded_input_vec; 108 | RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg); 109 | TransformEncodedInput(&encoded_input_vec); 110 | 111 | // Use this encoded word vector to predict the transformed word 112 | vector input_vecs_for_dec; 113 | vector output_ids_for_pred; 114 | for (unsigned i = 0; i < outputs.size(); ++i) { 115 | if (i < outputs.size() - 1) { 116 | // '' will not be fed as input -- it needs to be predicted. 117 | input_vecs_for_dec.push_back(lookup(cg, char_vecs[morph_id], outputs[i])); 118 | } 119 | if (i > 0) { // '' will not be predicted in the output -- its fed in. 120 | output_ids_for_pred.push_back(outputs[i]); 121 | } 122 | } 123 | 124 | vector decoder_hidden_units; 125 | vector init; 126 | for (unsigned i = 0; i < layers; ++i) { 127 | init.push_back(encoded_input_vec); // init cell of decoder 128 | } 129 | for (unsigned i = 0; i < layers; ++i) { 130 | init.push_back(tanh(encoded_input_vec)); // init hidden layer of decoder 131 | } 132 | output_forward[morph_id].start_new_sequence(init); 133 | for (const auto& vec : input_vecs_for_dec) { 134 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec)); 135 | } 136 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred); 137 | 138 | float return_loss = as_scalar(cg.forward()); 139 | cg.backward(); 140 | ada_gd->update(1.0f); 141 | return return_loss; 142 | } 143 | 144 | void 145 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 146 | const vector& input_ids, 147 | vector* pred_target_ids, vector* ensmb_model) { 148 | ComputationGraph cg; 149 | 150 | unsigned ensmb = ensmb_model->size(); 151 | //vector encoded_word_vecs; 152 | for (unsigned i = 0; i < ensmb; ++i) { 153 | Expression encoded_word_vec; 154 | auto model = (*ensmb_model)[i]; 155 | model->AddParamsToCG(morph_id, &cg); 156 | model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg); 157 | model->TransformEncodedInput(&encoded_word_vec); 158 | //encoded_word_vecs.push_back(encoded_word_vec); 159 | vector init; 160 | for (unsigned i = 0; i < model->layers; ++i) { 161 | init.push_back(encoded_word_vec); // init cell of decoder 162 | } 163 | for (unsigned i = 0; i < model->layers; ++i) { 164 | init.push_back(tanh(encoded_word_vec)); // init hidden layer of decoder 165 | } 166 | model->output_forward[morph_id].start_new_sequence(init); 167 | } 168 | 169 | unsigned out_index = 1; 170 | unsigned pred_index = char_to_id[BOW]; 171 | while (pred_target_ids->size() < MAX_PRED_LEN) { 172 | vector ensmb_out; 173 | pred_target_ids->push_back(pred_index); 174 | if (pred_index == char_to_id[EOW]) { 175 | return; // If the end is found, break from the loop and return 176 | } 177 | 178 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 179 | auto model = (*ensmb_model)[ensmb_id]; 180 | Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index); 181 | Expression input, input_char_vec; 182 | /*if (out_index < input_ids.size()) { 183 | input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]); 184 | } else { 185 | input_char_vec = lookup(cg, model->eps_vecs[morph_id], 186 | min(unsigned(out_index - input_ids.size()), 187 | model->max_eps - 1)); 188 | } 189 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec, 190 | input_char_vec});*/ 191 | input = prev_output_vec; 192 | 193 | Expression hidden = model->output_forward[morph_id].add_input(input); 194 | Expression out; 195 | model->ProjectToOutput(hidden, &out); 196 | ensmb_out.push_back(log_softmax(out)); 197 | } 198 | 199 | Expression out = sum(ensmb_out) / ensmb_out.size(); 200 | vector dist = as_vector(cg.incremental_forward()); 201 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end())); 202 | out_index++; 203 | } 204 | } 205 | 206 | void 207 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 208 | unordered_map& char_to_id, 209 | const vector& input_ids, 210 | vector >* sequences, vector* tm_scores, 211 | vector* ensmb_model) { 212 | unsigned out_index = 1; 213 | unsigned ensmb = ensmb_model->size(); 214 | ComputationGraph cg; 215 | 216 | // Compute stuff for every model in the ensemble. 217 | vector encoded_word_vecs; 218 | vector ensmb_out; 219 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 220 | auto& model = *(*ensmb_model)[ensmb_id]; 221 | model.AddParamsToCG(morph_id, &cg); 222 | 223 | Expression encoded_word_vec; 224 | model.RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg); 225 | model.TransformEncodedInput(&encoded_word_vec); 226 | encoded_word_vecs.push_back(encoded_word_vec); 227 | 228 | vector init; 229 | for (unsigned i = 0; i < model.layers; ++i) { 230 | init.push_back(encoded_word_vec); // init cell of decoder 231 | } 232 | for (unsigned i = 0; i < model.layers; ++i) { 233 | init.push_back(tanh(encoded_word_vec)); // init hidden layer of decoder 234 | } 235 | model.output_forward[morph_id].start_new_sequence(init); 236 | 237 | Expression prev_output_vec = lookup(cg, model.char_vecs[morph_id], 238 | char_to_id[BOW]); 239 | /*Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec, 240 | lookup(cg, model.char_vecs[morph_id], 241 | input_ids[out_index])});*/ 242 | Expression input = prev_output_vec; 243 | Expression hidden = model.output_forward[morph_id].add_input(input); 244 | Expression out; 245 | model.ProjectToOutput(hidden, &out); 246 | out = log_softmax(out); 247 | ensmb_out.push_back(out); 248 | } 249 | 250 | // Compute the average of the ensemble output. 251 | Expression out_dist = average(ensmb_out); 252 | vector log_dist = as_vector(cg.incremental_forward()); 253 | priority_queue > init_queue; 254 | for (unsigned i = 0; i < log_dist.size(); ++i) { 255 | init_queue.push(make_pair(log_dist[i], i)); 256 | } 257 | unsigned vocab_size = log_dist.size(); 258 | 259 | // Initialise the beam_size sequences, scores, hidden states. 260 | vector log_scores; 261 | vector > prev_states; 262 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 263 | vector seq; 264 | seq.push_back(char_to_id[BOW]); 265 | seq.push_back(init_queue.top().second); 266 | sequences->push_back(seq); 267 | log_scores.push_back(init_queue.top().first); 268 | 269 | vector ensmb_states; 270 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 271 | auto& model = *(*ensmb_model)[ensmb_id]; 272 | ensmb_states.push_back(model.output_forward[morph_id].state()); 273 | } 274 | prev_states.push_back(ensmb_states); 275 | init_queue.pop(); 276 | } 277 | 278 | vector neg_inf(vocab_size, NEG_INF); 279 | Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf); 280 | 281 | vector active_beams(beam_size, true); 282 | while (true) { 283 | out_index++; 284 | priority_queue > > probs_queue; 285 | vector > curr_states; 286 | vector out_dist; 287 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 288 | if (active_beams[beam_id]) { 289 | unsigned prev_out_char = (*sequences)[beam_id].back(); 290 | vector ensmb_out; 291 | vector ensmb_states; 292 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) { 293 | auto& model = *(*ensmb_model)[ensmb_id]; 294 | /*Expression input_char_vec; 295 | /if (out_index < input_ids.size()) { 296 | input_char_vec = lookup(cg, model.char_vecs[morph_id], input_ids[out_index]); 297 | } else { 298 | input_char_vec = lookup(cg, model.eps_vecs[morph_id], 299 | min(unsigned(out_index - input_ids.size()), 300 | model.max_eps - 1)); 301 | }*/ 302 | 303 | Expression prev_out_vec = lookup(cg, model.char_vecs[morph_id], prev_out_char); 304 | //Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_out_vec, 305 | // input_char_vec}); 306 | 307 | Expression input = prev_out_vec; 308 | Expression hidden = model.output_forward[morph_id].add_input( 309 | prev_states[beam_id][ensmb_id], input); 310 | ensmb_states.push_back(model.output_forward[morph_id].state()); 311 | 312 | Expression out; 313 | model.ProjectToOutput(hidden, &out); 314 | out = log_softmax(out); 315 | ensmb_out.push_back(out); 316 | } 317 | curr_states.push_back(ensmb_states); 318 | out_dist.push_back(average(ensmb_out)); 319 | } else { 320 | vector dummy(ensmb, RNNPointer(0)); 321 | curr_states.push_back(dummy); 322 | out_dist.push_back(neg_inf_vec); 323 | } 324 | } 325 | 326 | Expression all_scores = concatenate(out_dist); 327 | vector log_dist = as_vector(cg.incremental_forward()); 328 | 329 | for (unsigned index = 0; index < log_dist.size(); ++index) { 330 | unsigned beam_id = index / vocab_size; 331 | unsigned char_id = index % vocab_size; 332 | if (active_beams[beam_id]) { 333 | pair location = make_pair(beam_id, char_id); 334 | probs_queue.push(pair >( 335 | log_scores[beam_id] + log_dist[index], location)); 336 | } 337 | } 338 | 339 | // Find the beam_size best now and update the variables. 340 | unordered_map > new_seq; 341 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 342 | if (active_beams[beam_id]) { 343 | float log_prob = probs_queue.top().first; 344 | pair location = probs_queue.top().second; 345 | unsigned old_beam_id = location.first, char_id = location.second; 346 | 347 | vector seq = (*sequences)[old_beam_id]; 348 | seq.push_back(char_id); 349 | new_seq[beam_id] = seq; 350 | log_scores[beam_id] = log_prob; // Update the score 351 | 352 | prev_states[beam_id] = curr_states[old_beam_id]; // Update hidden state 353 | probs_queue.pop(); 354 | } 355 | } 356 | 357 | // Update the sequences now. 358 | for (auto& it : new_seq) { 359 | (*sequences)[it.first] = it.second; 360 | } 361 | 362 | // Check if a sequence should be made inactive. 363 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 364 | if (active_beams[beam_id] && 365 | ((*sequences)[beam_id].back() == char_to_id[EOW] || 366 | (*sequences)[beam_id].size() > MAX_PRED_LEN)) { 367 | active_beams[beam_id] = false; 368 | } 369 | } 370 | 371 | // Check if all sequences are inactive. 372 | bool all_inactive = true; 373 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 374 | if (active_beams[beam_id]) { 375 | all_inactive = false; 376 | break; 377 | } 378 | } 379 | 380 | if (all_inactive) { 381 | *tm_scores = log_scores; 382 | return; 383 | } 384 | } 385 | } 386 | 387 | void Serialize(string& filename, EncDec& model, vector* cnn_models) { 388 | ofstream outfile(filename); 389 | if (!outfile.is_open()) { 390 | cerr << "File opening failed" << endl; 391 | } 392 | 393 | boost::archive::text_oarchive oa(outfile); 394 | oa & model; 395 | for (unsigned i = 0; i < cnn_models->size(); ++i) { 396 | oa & *(*cnn_models)[i]; 397 | } 398 | 399 | cerr << "Saved model to: " << filename << endl; 400 | outfile.close(); 401 | } 402 | 403 | void Read(string& filename, EncDec* model, vector* cnn_models) { 404 | ifstream infile(filename); 405 | if (!infile.is_open()) { 406 | cerr << "File opening failed" << endl; 407 | } 408 | 409 | boost::archive::text_iarchive ia(infile); 410 | ia & *model; 411 | for (unsigned i = 0; i < model->morph_len; ++i) { 412 | Model *cnn_model = new Model(); 413 | cnn_models->push_back(cnn_model); 414 | } 415 | 416 | model->InitParams(cnn_models); 417 | for (unsigned i = 0; i < model->morph_len; ++i) { 418 | ia & *(*cnn_models)[i]; 419 | } 420 | 421 | cerr << "Loaded model from: " << filename << endl; 422 | infile.close(); 423 | } 424 | 425 | -------------------------------------------------------------------------------- /src/enc-dec.h: -------------------------------------------------------------------------------- 1 | #ifndef ENC_DEC_H_ 2 | #define ENC_DEC_H_ 3 | 4 | #include "cnn/nodes.h" 5 | #include "cnn/cnn.h" 6 | #include "cnn/rnn.h" 7 | #include "cnn/gru.h" 8 | #include "cnn/lstm.h" 9 | #include "cnn/training.h" 10 | #include "cnn/gpu-ops.h" 11 | #include "cnn/expr.h" 12 | 13 | #include "utils.h" 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | using namespace std; 22 | using namespace cnn; 23 | using namespace cnn::expr; 24 | 25 | class EncDec { 26 | public: 27 | vector input_forward, input_backward, output_forward; 28 | vector char_vecs; 29 | 30 | Expression hidden_to_output, hidden_to_output_bias; 31 | vector phidden_to_output, phidden_to_output_bias; 32 | 33 | Expression transform_encoded, transform_encoded_bias; 34 | vector ptransform_encoded, ptransform_encoded_bias; 35 | 36 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5; 37 | vector eps_vecs; 38 | 39 | EncDec() {} 40 | 41 | EncDec(const unsigned& char_length, const unsigned& hidden_length, 42 | const unsigned& vocab_length, const unsigned& layers, 43 | const unsigned& num_morph, vector* m, 44 | vector* optimizer); 45 | 46 | void InitParams(vector* m); 47 | 48 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg); 49 | 50 | void RunFwdBwd(const unsigned& morph_id, const vector& inputs, 51 | Expression* hidden, ComputationGraph *cg); 52 | 53 | void TransformEncodedInput(Expression* encoded_input) const; 54 | 55 | void TransformEncodedInputDuringDecoding(Expression* encoded_input) const; 56 | 57 | void ProjectToOutput(const Expression& hidden, Expression* out) const; 58 | 59 | Expression ComputeLoss(const vector& hidden_units, 60 | const vector& targets) const; 61 | 62 | float Train(const unsigned& morph_id, const vector& inputs, 63 | const vector& outputs, AdadeltaTrainer* ada_gd); 64 | 65 | friend class boost::serialization::access; 66 | template void serialize(Archive& ar, const unsigned int) { 67 | ar & char_len; 68 | ar & hidden_len; 69 | ar & vocab_len; 70 | ar & layers; 71 | ar & morph_len; 72 | ar & max_eps; 73 | } 74 | }; 75 | 76 | void 77 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 78 | const vector& input_ids, 79 | vector* pred_target_ids, vector* ensmb_model); 80 | 81 | void 82 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 83 | unordered_map& char_to_id, 84 | const vector& input_ids, 85 | vector >* sequences, vector* tm_scores, 86 | vector* ensmb_model); 87 | 88 | void Serialize(string& filename, EncDec& model, vector* cnn_model); 89 | 90 | void Read(string& filename, EncDec* model, vector* cnn_model); 91 | 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /src/eval-ensemble-enc-dec-attn.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "enc-dec-attn.h" 12 | 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace cnn; 18 | using namespace cnn::expr; 19 | 20 | int main(int argc, char** argv) { 21 | cnn::Initialize(argc, argv); 22 | 23 | string vocab_filename = argv[1]; // vocabulary of words/characters 24 | string morph_filename = argv[2]; 25 | string test_filename = argv[3]; 26 | unsigned beam_size = 2; 27 | 28 | unordered_map char_to_id, morph_to_id; 29 | unordered_map id_to_char, id_to_morph; 30 | 31 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 32 | unsigned vocab_size = char_to_id.size(); 33 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 34 | unsigned morph_size = morph_to_id.size(); 35 | 36 | vector test_data; // Read the dev file in a vector 37 | ReadData(test_filename, &test_data); 38 | 39 | vector > ensmb_m; 40 | vector ensmb_nn; 41 | for (unsigned i = 0; i < argc - 4; ++i) { 42 | vector m; 43 | EncDecAttn nn; 44 | string f = argv[i + 4]; 45 | Read(f, &nn, &m); 46 | ensmb_m.push_back(m); 47 | ensmb_nn.push_back(nn); 48 | } 49 | 50 | // Read the test file and output predictions for the words. 51 | string line; 52 | double correct = 0, total = 0; 53 | vector object_pointers; 54 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) { 55 | object_pointers.push_back(&ensmb_nn[i]); 56 | } 57 | for (string& line : test_data) { 58 | vector items = split_line(line, '|'); 59 | vector input_ids, target_ids, pred_target_ids; 60 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 61 | for (const string& ch : split_line(items[0], ' ')) { 62 | input_ids.push_back(char_to_id[ch]); 63 | } 64 | for (const string& ch : split_line(items[1], ' ')) { 65 | target_ids.push_back(char_to_id[ch]); 66 | } 67 | unsigned morph_id = morph_to_id[items[2]]; 68 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 69 | &object_pointers); 70 | 71 | string prediction = ""; 72 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 73 | prediction += id_to_char[pred_target_ids[i]]; 74 | if (i != pred_target_ids.size() - 1) { 75 | prediction += " "; 76 | } 77 | } 78 | if (prediction == items[1]) { 79 | correct += 1; 80 | } else { 81 | //cout << "GOLD: " << line << endl; 82 | //cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl; 83 | } 84 | total += 1; 85 | cout << items[0] << "|" << prediction << "|" << items[2] << endl; 86 | } 87 | cerr << "Prediction Accuracy: " << correct / total << endl; 88 | return 1; 89 | } 90 | -------------------------------------------------------------------------------- /src/eval-ensemble-enc-dec.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "enc-dec.h" 12 | 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace cnn; 18 | using namespace cnn::expr; 19 | 20 | int main(int argc, char** argv) { 21 | cnn::Initialize(argc, argv); 22 | 23 | string vocab_filename = argv[1]; // vocabulary of words/characters 24 | string morph_filename = argv[2]; 25 | string test_filename = argv[3]; 26 | unsigned beam_size = 2; 27 | 28 | unordered_map char_to_id, morph_to_id; 29 | unordered_map id_to_char, id_to_morph; 30 | 31 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 32 | unsigned vocab_size = char_to_id.size(); 33 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 34 | unsigned morph_size = morph_to_id.size(); 35 | 36 | vector test_data; // Read the dev file in a vector 37 | ReadData(test_filename, &test_data); 38 | 39 | vector > ensmb_m; 40 | vector ensmb_nn; 41 | for (unsigned i = 0; i < argc - 4; ++i) { 42 | vector m; 43 | EncDec nn; 44 | string f = argv[i + 4]; 45 | Read(f, &nn, &m); 46 | ensmb_m.push_back(m); 47 | ensmb_nn.push_back(nn); 48 | } 49 | 50 | // Read the test file and output predictions for the words. 51 | string line; 52 | double correct = 0, total = 0; 53 | vector object_pointers; 54 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) { 55 | object_pointers.push_back(&ensmb_nn[i]); 56 | } 57 | for (string& line : test_data) { 58 | vector items = split_line(line, '|'); 59 | vector input_ids, target_ids, pred_target_ids; 60 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 61 | for (const string& ch : split_line(items[0], ' ')) { 62 | input_ids.push_back(char_to_id[ch]); 63 | } 64 | for (const string& ch : split_line(items[1], ' ')) { 65 | target_ids.push_back(char_to_id[ch]); 66 | } 67 | unsigned morph_id = morph_to_id[items[2]]; 68 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 69 | &object_pointers); 70 | 71 | string prediction = ""; 72 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 73 | prediction += id_to_char[pred_target_ids[i]]; 74 | if (i != pred_target_ids.size() - 1) { 75 | prediction += " "; 76 | } 77 | } 78 | if (prediction == items[1]) { 79 | correct += 1; 80 | } else { 81 | //cout << "GOLD: " << line << endl; 82 | //cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl; 83 | } 84 | total += 1; 85 | cout << items[0] << "|" << prediction << "|" << items[2] << endl; 86 | } 87 | cerr << "Prediction Accuracy: " << correct / total << endl; 88 | return 1; 89 | } 90 | -------------------------------------------------------------------------------- /src/eval-ensemble-joint-enc-beam.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file outputs all the strings in the beam. 3 | */ 4 | #include "cnn/nodes.h" 5 | #include "cnn/cnn.h" 6 | #include "cnn/rnn.h" 7 | #include "cnn/gru.h" 8 | #include "cnn/lstm.h" 9 | #include "cnn/training.h" 10 | #include "cnn/gpu-ops.h" 11 | #include "cnn/expr.h" 12 | 13 | #include "utils.h" 14 | #include "joint-enc-morph.h" 15 | 16 | #include 17 | #include 18 | 19 | using namespace std; 20 | using namespace cnn; 21 | using namespace cnn::expr; 22 | 23 | int main(int argc, char** argv) { 24 | cnn::Initialize(argc, argv); 25 | 26 | string vocab_filename = argv[1]; // vocabulary of words/characters 27 | string morph_filename = argv[2]; 28 | string test_filename = argv[3]; 29 | unsigned beam_size = atoi(argv[4]); 30 | 31 | unordered_map char_to_id, morph_to_id; 32 | unordered_map id_to_char, id_to_morph; 33 | 34 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 35 | unsigned vocab_size = char_to_id.size(); 36 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 37 | unsigned morph_size = morph_to_id.size(); 38 | 39 | vector test_data; // Read the dev file in a vector 40 | ReadData(test_filename, &test_data); 41 | 42 | vector > ensmb_m; 43 | vector ensmb_nn; 44 | for (unsigned i = 0; i < argc - 5; ++i) { 45 | vector m; 46 | JointEncMorph nn; 47 | string f = argv[i + 5]; 48 | Read(f, &nn, &m); 49 | ensmb_m.push_back(m); 50 | ensmb_nn.push_back(nn); 51 | } 52 | 53 | // Read the test file and output predictions for the words. 54 | string line; 55 | double correct = 0, total = 0; 56 | vector object_pointers; 57 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) { 58 | object_pointers.push_back(&ensmb_nn[i]); 59 | } 60 | 61 | for (string& line : test_data) { 62 | vector items = split_line(line, '|'); 63 | vector input_ids, target_ids, pred_target_ids; 64 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 65 | for (const string& ch : split_line(items[0], ' ')) { 66 | input_ids.push_back(char_to_id[ch]); 67 | } 68 | for (const string& ch : split_line(items[1], ' ')) { 69 | target_ids.push_back(char_to_id[ch]); 70 | } 71 | 72 | vector > pred_beams; 73 | vector beam_score; 74 | unsigned morph_id = morph_to_id[items[2]]; 75 | EnsembleBeamDecode(morph_id, beam_size, char_to_id, input_ids, &pred_beams, 76 | &beam_score, &object_pointers); 77 | 78 | cout << "GOLD: " << line << endl; 79 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 80 | pred_target_ids = pred_beams[beam_id]; 81 | string prediction = ""; 82 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 83 | prediction += id_to_char[pred_target_ids[i]]; 84 | if (i != pred_target_ids.size() - 1) { 85 | prediction += " "; 86 | } 87 | } 88 | cout << "PRED: " << prediction << " " << beam_score[beam_id] << endl; 89 | } 90 | } 91 | return 1; 92 | } 93 | -------------------------------------------------------------------------------- /src/eval-ensemble-joint-enc-dec-morph.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "joint-enc-dec-morph.h" 12 | 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace cnn; 18 | using namespace cnn::expr; 19 | 20 | int main(int argc, char** argv) { 21 | cnn::Initialize(argc, argv); 22 | 23 | string vocab_filename = argv[1]; // vocabulary of words/characters 24 | string morph_filename = argv[2]; 25 | string test_filename = argv[3]; 26 | 27 | unordered_map char_to_id, morph_to_id; 28 | unordered_map id_to_char, id_to_morph; 29 | 30 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 31 | unsigned vocab_size = char_to_id.size(); 32 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 33 | unsigned morph_size = morph_to_id.size(); 34 | 35 | vector test_data; // Read the dev file in a vector 36 | ReadData(test_filename, &test_data); 37 | 38 | vector > ensmb_m; 39 | vector ensmb_nn; 40 | for (unsigned i = 0; i < argc - 4; ++i) { 41 | vector m; 42 | JointEncDecMorph nn; 43 | string f = argv[i + 4]; 44 | Read(f, &nn, &m); 45 | ensmb_m.push_back(m); 46 | ensmb_nn.push_back(nn); 47 | } 48 | 49 | // Read the test file and output predictions for the words. 50 | string line; 51 | vector object_pointers; 52 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) { 53 | object_pointers.push_back(&ensmb_nn[i]); 54 | } 55 | double correct = 0, total = 0; 56 | for (string& line : test_data) { 57 | vector items = split_line(line, '|'); 58 | vector input_ids, target_ids, pred_target_ids; 59 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 60 | for (const string& ch : split_line(items[0], ' ')) { 61 | input_ids.push_back(char_to_id[ch]); 62 | } 63 | for (const string& ch : split_line(items[1], ' ')) { 64 | target_ids.push_back(char_to_id[ch]); 65 | } 66 | unsigned morph_id = morph_to_id[items[2]]; 67 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 68 | &object_pointers); 69 | 70 | string prediction = ""; 71 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 72 | prediction += id_to_char[pred_target_ids[i]]; 73 | if (i != pred_target_ids.size() - 1) { 74 | prediction += " "; 75 | } 76 | } 77 | if (prediction == items[1]) { 78 | correct += 1; 79 | } else { 80 | cout << "GOLD: " << line << endl; 81 | cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl; 82 | } 83 | total += 1; 84 | } 85 | cerr << "Prediction Accuracy: " << correct / total << endl; 86 | return 1; 87 | } 88 | -------------------------------------------------------------------------------- /src/eval-ensemble-joint-enc-morph.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "joint-enc-morph.h" 12 | 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace cnn; 18 | using namespace cnn::expr; 19 | 20 | int main(int argc, char** argv) { 21 | cnn::Initialize(argc, argv); 22 | 23 | string vocab_filename = argv[1]; // vocabulary of words/characters 24 | string morph_filename = argv[2]; 25 | string test_filename = argv[3]; 26 | 27 | unordered_map char_to_id, morph_to_id; 28 | unordered_map id_to_char, id_to_morph; 29 | 30 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 31 | unsigned vocab_size = char_to_id.size(); 32 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 33 | unsigned morph_size = morph_to_id.size(); 34 | 35 | vector test_data; // Read the dev file in a vector 36 | ReadData(test_filename, &test_data); 37 | 38 | vector > ensmb_m; 39 | vector ensmb_nn; 40 | for (unsigned i = 0; i < argc - 4; ++i) { 41 | vector m; 42 | JointEncMorph nn; 43 | string f = argv[i + 4]; 44 | Read(f, &nn, &m); 45 | ensmb_m.push_back(m); 46 | ensmb_nn.push_back(nn); 47 | } 48 | 49 | // Read the test file and output predictions for the words. 50 | string line; 51 | vector object_pointers; 52 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) { 53 | object_pointers.push_back(&ensmb_nn[i]); 54 | } 55 | double correct = 0, total = 0; 56 | for (string& line : test_data) { 57 | vector items = split_line(line, '|'); 58 | vector input_ids, target_ids, pred_target_ids; 59 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 60 | for (const string& ch : split_line(items[0], ' ')) { 61 | input_ids.push_back(char_to_id[ch]); 62 | } 63 | for (const string& ch : split_line(items[1], ' ')) { 64 | target_ids.push_back(char_to_id[ch]); 65 | } 66 | unsigned morph_id = morph_to_id[items[2]]; 67 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 68 | &object_pointers); 69 | 70 | string prediction = ""; 71 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 72 | prediction += id_to_char[pred_target_ids[i]]; 73 | if (i != pred_target_ids.size() - 1) { 74 | prediction += " "; 75 | } 76 | } 77 | if (prediction == items[1]) { 78 | correct += 1; 79 | } else { 80 | cout << "GOLD: " << line << endl; 81 | cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl; 82 | } 83 | total += 1; 84 | } 85 | cerr << "Prediction Accuracy: " << correct / total << endl; 86 | return 1; 87 | } 88 | -------------------------------------------------------------------------------- /src/eval-ensemble-lm-joint-enc.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "lm.h" 11 | #include "utils.h" 12 | #include "lm-joint-enc.h" 13 | 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | using namespace cnn; 19 | using namespace cnn::expr; 20 | 21 | int main(int argc, char** argv) { 22 | cnn::Initialize(argc, argv); 23 | 24 | string vocab_filename = argv[1]; // vocabulary of words/characters 25 | string morph_filename = argv[2]; 26 | string test_filename = argv[3]; 27 | string lm_model_filename = argv[4]; 28 | 29 | unordered_map char_to_id, morph_to_id; 30 | unordered_map id_to_char, id_to_morph; 31 | 32 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 33 | unsigned vocab_size = char_to_id.size(); 34 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 35 | unsigned morph_size = morph_to_id.size(); 36 | 37 | vector test_data; // Read the dev file in a vector 38 | ReadData(test_filename, &test_data); 39 | 40 | LM lm(lm_model_filename, char_to_id, id_to_char); 41 | 42 | vector > ensmb_m; 43 | vector ensmb_nn; 44 | for (unsigned i = 0; i < argc - 5; ++i) { 45 | vector m; 46 | LMJointEnc nn; 47 | string f = argv[i + 5]; 48 | Read(f, &nn, &m); 49 | ensmb_m.push_back(m); 50 | ensmb_nn.push_back(nn); 51 | } 52 | 53 | // Read the test file and output predictions for the words. 54 | string line; 55 | vector object_pointers; 56 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) { 57 | object_pointers.push_back(&ensmb_nn[i]); 58 | } 59 | double correct = 0, total = 0; 60 | for (string& line : test_data) { 61 | vector items = split_line(line, '|'); 62 | vector input_ids, target_ids, pred_target_ids; 63 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 64 | for (const string& ch : split_line(items[0], ' ')) { 65 | input_ids.push_back(char_to_id[ch]); 66 | } 67 | for (const string& ch : split_line(items[1], ' ')) { 68 | target_ids.push_back(char_to_id[ch]); 69 | } 70 | unsigned morph_id = morph_to_id[items[2]]; 71 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, &lm, 72 | &object_pointers); 73 | 74 | string prediction = ""; 75 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 76 | prediction += id_to_char[pred_target_ids[i]]; 77 | if (i != pred_target_ids.size() - 1) { 78 | prediction += " "; 79 | } 80 | } 81 | if (prediction == items[1]) { 82 | correct += 1; 83 | } else { 84 | cout << "GOLD: " << line << endl; 85 | cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl; 86 | } 87 | total += 1; 88 | } 89 | cerr << "Prediction Accuracy: " << correct / total << endl; 90 | return 1; 91 | } 92 | -------------------------------------------------------------------------------- /src/eval-ensemble-lm-sep-morph.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "lm.h" 11 | #include "utils.h" 12 | #include "lm-sep-morph.h" 13 | 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | using namespace cnn; 19 | using namespace cnn::expr; 20 | 21 | int main(int argc, char** argv) { 22 | cnn::Initialize(argc, argv); 23 | 24 | string vocab_filename = argv[1]; // vocabulary of words/characters 25 | string morph_filename = argv[2]; 26 | string test_filename = argv[3]; 27 | string lm_model_filename = argv[4]; 28 | 29 | unordered_map char_to_id, morph_to_id; 30 | unordered_map id_to_char, id_to_morph; 31 | 32 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 33 | unsigned vocab_size = char_to_id.size(); 34 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 35 | unsigned morph_size = morph_to_id.size(); 36 | 37 | vector test_data; // Read the dev file in a vector 38 | ReadData(test_filename, &test_data); 39 | 40 | LM lm(lm_model_filename, char_to_id, id_to_char); 41 | 42 | vector > ensmb_m; 43 | vector ensmb_nn; 44 | for (unsigned i = 0; i < argc - 5; ++i) { 45 | vector m; 46 | LMSepMorph nn; 47 | string f = argv[i + 5]; 48 | Read(f, &nn, &m); 49 | ensmb_m.push_back(m); 50 | ensmb_nn.push_back(nn); 51 | } 52 | 53 | // Read the test file and output predictions for the words. 54 | vector object_pointers; 55 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) { 56 | object_pointers.push_back(&ensmb_nn[i]); 57 | } 58 | 59 | string line; 60 | double correct = 0, total = 0; 61 | for (string& line : test_data) { 62 | vector items = split_line(line, '|'); 63 | vector input_ids, target_ids, pred_target_ids; 64 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 65 | 66 | string input = items[0], output = items[1]; 67 | for (const string& ch : split_line(input, ' ')) { 68 | input_ids.push_back(char_to_id[ch]); 69 | } 70 | for (const string& ch : split_line(output, ' ')) { 71 | target_ids.push_back(char_to_id[ch]); 72 | } 73 | unsigned morph_id = morph_to_id[items[2]]; 74 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, &lm, 75 | &object_pointers); 76 | 77 | string prediction = ""; 78 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 79 | prediction += id_to_char[pred_target_ids[i]]; 80 | if (i != pred_target_ids.size() - 1) { 81 | prediction += " "; 82 | } 83 | } 84 | if (prediction == output) { 85 | correct += 1; 86 | } else { 87 | cout << "GOLD: " << line << endl; 88 | cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl; 89 | } 90 | total += 1; 91 | } 92 | cerr << "Prediction Accuracy: " << correct / total << endl; 93 | return 1; 94 | } 95 | -------------------------------------------------------------------------------- /src/eval-ensemble-no-enc.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "no-enc.h" 12 | 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace cnn; 18 | using namespace cnn::expr; 19 | 20 | int main(int argc, char** argv) { 21 | cnn::Initialize(argc, argv); 22 | 23 | string vocab_filename = argv[1]; // vocabulary of words/characters 24 | string morph_filename = argv[2]; 25 | string test_filename = argv[3]; 26 | unsigned beam_size = 2; 27 | 28 | unordered_map char_to_id, morph_to_id; 29 | unordered_map id_to_char, id_to_morph; 30 | 31 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 32 | unsigned vocab_size = char_to_id.size(); 33 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 34 | unsigned morph_size = morph_to_id.size(); 35 | 36 | vector test_data; // Read the dev file in a vector 37 | ReadData(test_filename, &test_data); 38 | 39 | vector > ensmb_m; 40 | vector ensmb_nn; 41 | for (unsigned i = 0; i < argc - 4; ++i) { 42 | vector m; 43 | NoEnc nn; 44 | string f = argv[i + 4]; 45 | Read(f, &nn, &m); 46 | ensmb_m.push_back(m); 47 | ensmb_nn.push_back(nn); 48 | } 49 | 50 | // Read the test file and output predictions for the words. 51 | string line; 52 | double correct = 0, total = 0; 53 | vector object_pointers; 54 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) { 55 | object_pointers.push_back(&ensmb_nn[i]); 56 | } 57 | for (string& line : test_data) { 58 | vector items = split_line(line, '|'); 59 | vector input_ids, target_ids, pred_target_ids; 60 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 61 | for (const string& ch : split_line(items[0], ' ')) { 62 | input_ids.push_back(char_to_id[ch]); 63 | } 64 | for (const string& ch : split_line(items[1], ' ')) { 65 | target_ids.push_back(char_to_id[ch]); 66 | } 67 | unsigned morph_id = morph_to_id[items[2]]; 68 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 69 | &object_pointers); 70 | 71 | string prediction = ""; 72 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 73 | prediction += id_to_char[pred_target_ids[i]]; 74 | if (i != pred_target_ids.size() - 1) { 75 | prediction += " "; 76 | } 77 | } 78 | if (prediction == items[1]) { 79 | correct += 1; 80 | } else { 81 | //cout << "GOLD: " << line << endl; 82 | //cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl; 83 | } 84 | total += 1; 85 | cout << items[0] << "|" << prediction << "|" << items[2] << endl; 86 | } 87 | cerr << "Prediction Accuracy: " << correct / total << endl; 88 | return 1; 89 | } 90 | -------------------------------------------------------------------------------- /src/eval-ensemble-sep-morph-beam.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file outputs all the strings in the beam. 3 | */ 4 | #include "cnn/nodes.h" 5 | #include "cnn/cnn.h" 6 | #include "cnn/rnn.h" 7 | #include "cnn/gru.h" 8 | #include "cnn/lstm.h" 9 | #include "cnn/training.h" 10 | #include "cnn/gpu-ops.h" 11 | #include "cnn/expr.h" 12 | 13 | #include "utils.h" 14 | #include "sep-morph.h" 15 | 16 | #include 17 | #include 18 | 19 | using namespace std; 20 | using namespace cnn; 21 | using namespace cnn::expr; 22 | 23 | int main(int argc, char** argv) { 24 | cnn::Initialize(argc, argv); 25 | 26 | string vocab_filename = argv[1]; // vocabulary of words/characters 27 | string morph_filename = argv[2]; 28 | string test_filename = argv[3]; 29 | string lm_filename = argv[4]; 30 | unsigned beam_size = atoi(argv[5]); 31 | 32 | unordered_map char_to_id, morph_to_id; 33 | unordered_map id_to_char, id_to_morph; 34 | 35 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 36 | unsigned vocab_size = char_to_id.size(); 37 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 38 | unsigned morph_size = morph_to_id.size(); 39 | 40 | vector test_data; // Read the dev file in a vector 41 | ReadData(test_filename, &test_data); 42 | 43 | vector > ensmb_m; 44 | vector ensmb_nn; 45 | for (unsigned i = 0; i < argc - 6; ++i) { 46 | vector m; 47 | SepMorph nn; 48 | string f = argv[i + 6]; 49 | Read(f, &nn, &m); 50 | ensmb_m.push_back(m); 51 | ensmb_nn.push_back(nn); 52 | } 53 | 54 | // Read the test file and output predictions for the words. 55 | string line; 56 | double correct = 0, total = 0; 57 | vector object_pointers; 58 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) { 59 | object_pointers.push_back(&ensmb_nn[i]); 60 | } 61 | 62 | for (string& line : test_data) { 63 | vector items = split_line(line, '|'); 64 | vector input_ids, target_ids, pred_target_ids; 65 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 66 | for (const string& ch : split_line(items[0], ' ')) { 67 | input_ids.push_back(char_to_id[ch]); 68 | } 69 | for (const string& ch : split_line(items[1], ' ')) { 70 | target_ids.push_back(char_to_id[ch]); 71 | } 72 | 73 | vector > pred_beams; 74 | vector beam_score; 75 | unsigned morph_id = morph_to_id[items[2]]; 76 | EnsembleBeamDecode(morph_id, beam_size, char_to_id, input_ids, &pred_beams, 77 | &beam_score, &object_pointers); 78 | 79 | cout << "GOLD: " << line << endl; 80 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 81 | pred_target_ids = pred_beams[beam_id]; 82 | string prediction = ""; 83 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 84 | prediction += id_to_char[pred_target_ids[i]]; 85 | if (i != pred_target_ids.size() - 1) { 86 | prediction += " "; 87 | } 88 | } 89 | cout << "PRED: " << prediction << " " << beam_score[beam_id] << endl; 90 | } 91 | } 92 | return 1; 93 | } 94 | -------------------------------------------------------------------------------- /src/eval-ensemble-sep-morph.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "sep-morph.h" 12 | 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace cnn; 18 | using namespace cnn::expr; 19 | 20 | int main(int argc, char** argv) { 21 | cnn::Initialize(argc, argv); 22 | 23 | string vocab_filename = argv[1]; // vocabulary of words/characters 24 | string morph_filename = argv[2]; 25 | string test_filename = argv[3]; 26 | 27 | unordered_map char_to_id, morph_to_id; 28 | unordered_map id_to_char, id_to_morph; 29 | 30 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 31 | unsigned vocab_size = char_to_id.size(); 32 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 33 | unsigned morph_size = morph_to_id.size(); 34 | 35 | vector test_data; // Read the dev file in a vector 36 | ReadData(test_filename, &test_data); 37 | 38 | vector > ensmb_m; 39 | vector ensmb_nn; 40 | for (unsigned i = 0; i < argc - 4; ++i) { 41 | vector m; 42 | SepMorph nn; 43 | string f = argv[i + 4]; 44 | Read(f, &nn, &m); 45 | ensmb_m.push_back(m); 46 | ensmb_nn.push_back(nn); 47 | } 48 | 49 | // Read the test file and output predictions for the words. 50 | string line; 51 | double correct = 0, total = 0; 52 | vector object_pointers; 53 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) { 54 | object_pointers.push_back(&ensmb_nn[i]); 55 | } 56 | for (string& line : test_data) { 57 | vector items = split_line(line, '|'); 58 | vector input_ids, target_ids, pred_target_ids; 59 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 60 | for (const string& ch : split_line(items[0], ' ')) { 61 | input_ids.push_back(char_to_id[ch]); 62 | } 63 | for (const string& ch : split_line(items[1], ' ')) { 64 | target_ids.push_back(char_to_id[ch]); 65 | } 66 | unsigned morph_id = morph_to_id[items[2]]; 67 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 68 | &object_pointers); 69 | 70 | string prediction = ""; 71 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 72 | prediction += id_to_char[pred_target_ids[i]]; 73 | if (i != pred_target_ids.size() - 1) { 74 | prediction += " "; 75 | } 76 | } 77 | if (prediction == items[1]) { 78 | correct += 1; 79 | } else { 80 | cout << "GOLD: " << line << endl; 81 | cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl; 82 | } 83 | total += 1; 84 | } 85 | cerr << "Prediction Accuracy: " << correct / total << endl; 86 | return 1; 87 | } 88 | -------------------------------------------------------------------------------- /src/joint-enc-dec-morph.cc: -------------------------------------------------------------------------------- 1 | #include "joint-enc-dec-morph.h" 2 | 3 | using namespace std; 4 | using namespace cnn; 5 | using namespace cnn::expr; 6 | 7 | string BOW = "", EOW = ""; 8 | unsigned MAX_PRED_LEN = 100; 9 | 10 | JointEncDecMorph::JointEncDecMorph( 11 | const unsigned& char_length, const unsigned& hidden_length, 12 | const unsigned& vocab_length, const unsigned& num_layers, 13 | const unsigned& num_morph, vector* m, 14 | vector* optimizer) { 15 | char_len = char_length; 16 | hidden_len = hidden_length; 17 | vocab_len = vocab_length; 18 | layers = num_layers; 19 | morph_len = num_morph; 20 | InitParams(m); 21 | } 22 | 23 | void JointEncDecMorph::InitParams(vector* m) { 24 | // Have all the shared parameters in one model 25 | input_forward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]); 26 | input_backward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]); 27 | output_forward = LSTMBuilder(layers, 2 * char_len + hidden_len, 28 | hidden_len, (*m)[morph_len]); 29 | 30 | char_vecs = (*m)[morph_len]->add_lookup_parameters(vocab_len, {char_len}); 31 | 32 | phidden_to_output = (*m)[morph_len]->add_parameters({vocab_len, hidden_len}); 33 | phidden_to_output_bias = (*m)[morph_len]->add_parameters({vocab_len, 1}); 34 | 35 | eps_vecs = (*m)[morph_len]->add_lookup_parameters(max_eps, {char_len}); 36 | 37 | for (unsigned i = 0; i < morph_len; ++i) { 38 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len, 39 | 2 * hidden_len})); 40 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1})); 41 | } 42 | } 43 | 44 | void JointEncDecMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) { 45 | input_forward.new_graph(*cg); 46 | input_backward.new_graph(*cg); 47 | output_forward.new_graph(*cg); 48 | 49 | hidden_to_output = parameter(*cg, phidden_to_output); 50 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias); 51 | 52 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]); 53 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]); 54 | } 55 | 56 | void JointEncDecMorph::RunFwdBwd(const vector& inputs, 57 | Expression* hidden, ComputationGraph *cg) { 58 | vector input_vecs; 59 | for (const unsigned& input_id : inputs) { 60 | input_vecs.push_back(lookup(*cg, char_vecs, input_id)); 61 | } 62 | 63 | // Run forward LSTM 64 | Expression forward_unit; 65 | input_forward.start_new_sequence(); 66 | for (unsigned i = 0; i < input_vecs.size(); ++i) { 67 | forward_unit = input_forward.add_input(input_vecs[i]); 68 | } 69 | 70 | // Run backward LSTM 71 | Expression backward_unit; 72 | input_backward.start_new_sequence(); 73 | for (int i = input_vecs.size() - 1; i >= 0; --i) { 74 | backward_unit = input_backward.add_input(input_vecs[i]); 75 | } 76 | 77 | // Concatenate the forward and back hidden layers 78 | *hidden = concatenate({forward_unit, backward_unit}); 79 | } 80 | 81 | void JointEncDecMorph::TransformEncodedInput(Expression* encoded_input) const { 82 | *encoded_input = affine_transform({transform_encoded_bias, 83 | transform_encoded, *encoded_input}); 84 | } 85 | 86 | void JointEncDecMorph::ProjectToOutput(const Expression& hidden, Expression* out) 87 | const { 88 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden}); 89 | } 90 | 91 | 92 | Expression JointEncDecMorph::ComputeLoss(const vector& hidden_units, 93 | const vector& targets) const { 94 | assert(hidden_units.size() == targets.size()); 95 | vector losses; 96 | for (unsigned i = 0; i < hidden_units.size(); ++i) { 97 | Expression out; 98 | ProjectToOutput(hidden_units[i], &out); 99 | losses.push_back(pickneglogsoftmax(out, targets[i])); 100 | } 101 | return sum(losses); 102 | } 103 | 104 | float JointEncDecMorph::Train(const unsigned& morph_id, const vector& inputs, 105 | const vector& outputs, AdadeltaTrainer* opt, 106 | AdadeltaTrainer* shared_opt) { 107 | ComputationGraph cg; 108 | AddParamsToCG(morph_id, &cg); 109 | 110 | // Encode and Transform to feed into decoder 111 | Expression encoded_input_vec; 112 | RunFwdBwd(inputs, &encoded_input_vec, &cg); 113 | TransformEncodedInput(&encoded_input_vec); 114 | 115 | // Use this encoded word vector to predict the transformed word 116 | vector input_vecs_for_dec; 117 | vector output_ids_for_pred; 118 | for (unsigned i = 0; i < outputs.size(); ++i) { 119 | if (i < outputs.size() - 1) { 120 | // '' will not be fed as input -- it needs to be predicted. 121 | if (i < inputs.size() - 1) { 122 | input_vecs_for_dec.push_back(concatenate( 123 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]), 124 | lookup(cg, char_vecs, inputs[i + 1])})); 125 | } else { 126 | input_vecs_for_dec.push_back(concatenate( 127 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]), 128 | lookup(cg, eps_vecs, min(unsigned(i - inputs.size()), max_eps - 1))})); 129 | } 130 | } 131 | if (i > 0) { // '' will not be predicted in the output -- its fed in. 132 | output_ids_for_pred.push_back(outputs[i]); 133 | } 134 | } 135 | 136 | vector decoder_hidden_units; 137 | output_forward.start_new_sequence(); 138 | for (const auto& vec : input_vecs_for_dec) { 139 | decoder_hidden_units.push_back(output_forward.add_input(vec)); 140 | } 141 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred); 142 | 143 | float return_loss = as_scalar(cg.forward()); 144 | cg.backward(); 145 | opt->update(1.0f); // Update the morph specific parameters 146 | shared_opt->update(1.0f); // Update the shared parameters 147 | return return_loss; 148 | } 149 | 150 | void Serialize(string& filename, JointEncDecMorph& model, 151 | vector* cnn_models) { 152 | ofstream outfile(filename); 153 | if (!outfile.is_open()) { 154 | cerr << "File opening failed" << endl; 155 | } 156 | 157 | boost::archive::text_oarchive oa(outfile); 158 | oa & model; 159 | for (unsigned i = 0; i < cnn_models->size(); ++i) { 160 | oa & *(*cnn_models)[i]; 161 | } 162 | 163 | cerr << "Saved model to: " << filename << endl; 164 | outfile.close(); 165 | } 166 | 167 | void Read(string& filename, JointEncDecMorph* model, vector* cnn_models) { 168 | ifstream infile(filename); 169 | if (!infile.is_open()) { 170 | cerr << "File opening failed" << endl; 171 | } 172 | 173 | boost::archive::text_iarchive ia(infile); 174 | ia & *model; 175 | for (unsigned i = 0; i < model->morph_len + 1; ++i) { 176 | Model *cnn_model = new Model(); 177 | cnn_models->push_back(cnn_model); 178 | } 179 | 180 | model->InitParams(cnn_models); 181 | for (unsigned i = 0; i < cnn_models->size(); ++i) { 182 | ia & *(*cnn_models)[i]; 183 | } 184 | 185 | cerr << "Loaded model from: " << filename << endl; 186 | infile.close(); 187 | } 188 | 189 | void 190 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 191 | const vector& input_ids, vector* pred_target_ids, 192 | vector* ensmb_model) { 193 | ComputationGraph cg; 194 | 195 | unsigned ensmb = ensmb_model->size(); 196 | vector encoded_word_vecs; 197 | for (unsigned i = 0; i < ensmb; ++i) { 198 | Expression encoded_word_vec; 199 | auto model = (*ensmb_model)[i]; 200 | model->AddParamsToCG(morph_id, &cg); 201 | model->RunFwdBwd(input_ids, &encoded_word_vec, &cg); 202 | model->TransformEncodedInput(&encoded_word_vec); 203 | encoded_word_vecs.push_back(encoded_word_vec); 204 | model->output_forward.start_new_sequence(); 205 | } 206 | 207 | unsigned out_index = 1; 208 | unsigned pred_index = char_to_id[BOW]; 209 | while (pred_target_ids->size() < MAX_PRED_LEN) { 210 | vector ensmb_out; 211 | pred_target_ids->push_back(pred_index); 212 | if (pred_index == char_to_id[EOW]) { 213 | return; // If the end is found, break from the loop and return 214 | } 215 | 216 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 217 | auto model = (*ensmb_model)[ensmb_id]; 218 | Expression prev_output_vec = lookup(cg, model->char_vecs, pred_index); 219 | Expression input, input_char_vec; 220 | if (out_index < input_ids.size()) { 221 | input_char_vec = lookup(cg, model->char_vecs, input_ids[out_index]); 222 | } else { 223 | input_char_vec = lookup(cg, model->eps_vecs, 224 | min(unsigned(out_index - input_ids.size()), 225 | model->max_eps - 1)); 226 | } 227 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec, 228 | input_char_vec}); 229 | 230 | Expression hidden = model->output_forward.add_input(input); 231 | Expression out; 232 | model->ProjectToOutput(hidden, &out); 233 | ensmb_out.push_back(log_softmax(out)); 234 | } 235 | 236 | Expression out = sum(ensmb_out) / ensmb_out.size(); 237 | vector dist = as_vector(cg.incremental_forward()); 238 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end())); 239 | out_index++; 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /src/joint-enc-dec-morph.h: -------------------------------------------------------------------------------- 1 | #ifndef JOINT_ENC_DEC_MORPH_H_ 2 | #define JOINT_ENC_DEC_MORPH_H_ 3 | 4 | #include "cnn/nodes.h" 5 | #include "cnn/cnn.h" 6 | #include "cnn/rnn.h" 7 | #include "cnn/gru.h" 8 | #include "cnn/lstm.h" 9 | #include "cnn/training.h" 10 | #include "cnn/gpu-ops.h" 11 | #include "cnn/expr.h" 12 | 13 | #include "utils.h" 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace std; 20 | using namespace cnn; 21 | using namespace cnn::expr; 22 | 23 | // The class has a shared encoder acorss all morphological types. 24 | // Other auxiliary units are also shared, except for the decoder and 25 | // input transofrmation parameters. 26 | class JointEncDecMorph { 27 | public: 28 | LSTMBuilder input_forward, input_backward, output_forward; // Shared encoder 29 | LookupParameters* char_vecs; // Shared char vectors 30 | 31 | Expression hidden_to_output, hidden_to_output_bias; 32 | Parameters *phidden_to_output, *phidden_to_output_bias; // Shared 33 | 34 | // The only non-shared component is tranformation. 35 | Expression transform_encoded, transform_encoded_bias; 36 | vector ptransform_encoded, ptransform_encoded_bias; 37 | 38 | LookupParameters* eps_vecs; 39 | 40 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5; 41 | 42 | JointEncDecMorph() {} 43 | 44 | JointEncDecMorph(const unsigned& char_length, const unsigned& hidden_length, 45 | const unsigned& vocab_length, const unsigned& layers, 46 | const unsigned& num_morph, vector* m, 47 | vector* optimizer); 48 | 49 | void InitParams(vector* m); 50 | 51 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg); 52 | 53 | void RunFwdBwd(const vector& inputs, 54 | Expression* hidden, ComputationGraph *cg); 55 | 56 | void TransformEncodedInput(Expression* encoded_input) const; 57 | 58 | void ProjectToOutput(const Expression& hidden, Expression* out) const; 59 | 60 | Expression ComputeLoss(const vector& hidden_units, 61 | const vector& targets) const; 62 | 63 | float Train(const unsigned& morph_id, const vector& inputs, 64 | const vector& outputs, AdadeltaTrainer* opt, 65 | AdadeltaTrainer* shared_opt); 66 | 67 | friend class boost::serialization::access; 68 | template void serialize(Archive& ar, const unsigned int) { 69 | ar & char_len; 70 | ar & hidden_len; 71 | ar & vocab_len; 72 | ar & layers; 73 | ar & morph_len; 74 | ar & max_eps; 75 | } 76 | }; 77 | 78 | void Serialize(string& filename, JointEncDecMorph& model, vector* cnn_model); 79 | 80 | void Read(string& filename, JointEncDecMorph* model, vector* cnn_model); 81 | 82 | void 83 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 84 | const vector& input_ids, vector* pred_target_ids, 85 | vector* ensmb_model); 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /src/joint-enc-morph.cc: -------------------------------------------------------------------------------- 1 | #include "joint-enc-morph.h" 2 | 3 | #include 4 | 5 | using namespace std; 6 | using namespace cnn; 7 | using namespace cnn::expr; 8 | 9 | string BOW = "", EOW = ""; 10 | unsigned MAX_PRED_LEN = 100; 11 | float NEG_INF = numeric_limits::min(); 12 | 13 | JointEncMorph::JointEncMorph( 14 | const unsigned& char_length, const unsigned& hidden_length, 15 | const unsigned& vocab_length, const unsigned& num_layers, 16 | const unsigned& num_morph, vector* m, 17 | vector* optimizer) { 18 | char_len = char_length; 19 | hidden_len = hidden_length; 20 | vocab_len = vocab_length; 21 | layers = num_layers; 22 | morph_len = num_morph; 23 | InitParams(m); 24 | } 25 | 26 | void JointEncMorph::InitParams(vector* m) { 27 | 28 | // Have all the shared parameters in one model 29 | input_forward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]); 30 | input_backward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]); 31 | 32 | char_vecs = (*m)[morph_len]->add_lookup_parameters(vocab_len, {char_len}); 33 | 34 | phidden_to_output = (*m)[morph_len]->add_parameters({vocab_len, hidden_len}); 35 | phidden_to_output_bias = (*m)[morph_len]->add_parameters({vocab_len, 1}); 36 | 37 | for (unsigned i = 0; i < morph_len; ++i) { 38 | output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len, 39 | hidden_len, (*m)[i])); 40 | 41 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len, 42 | 2 * hidden_len})); 43 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1})); 44 | 45 | eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len})); 46 | } 47 | } 48 | 49 | void JointEncMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) { 50 | input_forward.new_graph(*cg); 51 | input_backward.new_graph(*cg); 52 | output_forward[morph_id].new_graph(*cg); 53 | 54 | hidden_to_output = parameter(*cg, phidden_to_output); 55 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias); 56 | 57 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]); 58 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]); 59 | } 60 | 61 | void JointEncMorph::RunFwdBwd(const vector& inputs, 62 | Expression* hidden, ComputationGraph *cg) { 63 | vector input_vecs; 64 | for (const unsigned& input_id : inputs) { 65 | input_vecs.push_back(lookup(*cg, char_vecs, input_id)); 66 | } 67 | 68 | // Run forward LSTM 69 | Expression forward_unit; 70 | input_forward.start_new_sequence(); 71 | for (unsigned i = 0; i < input_vecs.size(); ++i) { 72 | forward_unit = input_forward.add_input(input_vecs[i]); 73 | } 74 | 75 | // Run backward LSTM 76 | Expression backward_unit; 77 | input_backward.start_new_sequence(); 78 | for (int i = input_vecs.size() - 1; i >= 0; --i) { 79 | backward_unit = input_backward.add_input(input_vecs[i]); 80 | } 81 | 82 | // Concatenate the forward and back hidden layers 83 | *hidden = concatenate({forward_unit, backward_unit}); 84 | } 85 | 86 | void JointEncMorph::TransformEncodedInput(Expression* encoded_input) const { 87 | *encoded_input = affine_transform({transform_encoded_bias, 88 | transform_encoded, *encoded_input}); 89 | } 90 | 91 | void JointEncMorph::ProjectToOutput(const Expression& hidden, Expression* out) 92 | const { 93 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden}); 94 | } 95 | 96 | 97 | Expression JointEncMorph::ComputeLoss(const vector& hidden_units, 98 | const vector& targets) const { 99 | assert(hidden_units.size() == targets.size()); 100 | vector losses; 101 | for (unsigned i = 0; i < hidden_units.size(); ++i) { 102 | Expression out; 103 | ProjectToOutput(hidden_units[i], &out); 104 | losses.push_back(pickneglogsoftmax(out, targets[i])); 105 | } 106 | return sum(losses); 107 | } 108 | 109 | float JointEncMorph::Train(const unsigned& morph_id, const vector& inputs, 110 | const vector& outputs, AdadeltaTrainer* opt, 111 | AdadeltaTrainer* shared_opt) { 112 | ComputationGraph cg; 113 | AddParamsToCG(morph_id, &cg); 114 | 115 | // Encode and Transform to feed into decoder 116 | Expression encoded_input_vec; 117 | RunFwdBwd(inputs, &encoded_input_vec, &cg); 118 | TransformEncodedInput(&encoded_input_vec); 119 | 120 | // Use this encoded word vector to predict the transformed word 121 | vector input_vecs_for_dec; 122 | vector output_ids_for_pred; 123 | for (unsigned i = 0; i < outputs.size(); ++i) { 124 | if (i < outputs.size() - 1) { 125 | // '' will not be fed as input -- it needs to be predicted. 126 | if (i < inputs.size() - 1) { 127 | input_vecs_for_dec.push_back(concatenate( 128 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]), 129 | lookup(cg, char_vecs, inputs[i + 1])})); 130 | } else { 131 | input_vecs_for_dec.push_back(concatenate( 132 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]), 133 | lookup(cg, eps_vecs[morph_id], 134 | min(unsigned(i - inputs.size()), max_eps - 1))})); 135 | } 136 | } 137 | if (i > 0) { // '' will not be predicted in the output -- its fed in. 138 | output_ids_for_pred.push_back(outputs[i]); 139 | } 140 | } 141 | 142 | vector decoder_hidden_units; 143 | output_forward[morph_id].start_new_sequence(); 144 | for (const auto& vec : input_vecs_for_dec) { 145 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec)); 146 | } 147 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred); 148 | 149 | float return_loss = as_scalar(cg.forward()); 150 | cg.backward(); 151 | opt->update(1.0f); // Update the morph specific parameters 152 | shared_opt->update(1.0f); // Update the shared parameters 153 | return return_loss; 154 | } 155 | 156 | void Serialize(string& filename, JointEncMorph& model, 157 | vector* cnn_models) { 158 | ofstream outfile(filename); 159 | if (!outfile.is_open()) { 160 | cerr << "File opening failed: " << filename << endl; 161 | } 162 | 163 | boost::archive::text_oarchive oa(outfile); 164 | oa & model; 165 | for (unsigned i = 0; i < cnn_models->size(); ++i) { 166 | oa & *(*cnn_models)[i]; 167 | } 168 | 169 | cerr << "Saved model to: " << filename << endl; 170 | outfile.close(); 171 | } 172 | 173 | void Read(string& filename, JointEncMorph* model, vector* cnn_models) { 174 | ifstream infile(filename); 175 | if (!infile.is_open()) { 176 | cerr << "File opening failed: " << filename << endl; 177 | } 178 | 179 | boost::archive::text_iarchive ia(infile); 180 | ia & *model; 181 | for (unsigned i = 0; i < model->morph_len + 1; ++i) { 182 | Model *cnn_model = new Model(); 183 | cnn_models->push_back(cnn_model); 184 | } 185 | 186 | model->InitParams(cnn_models); 187 | for (unsigned i = 0; i < cnn_models->size(); ++i) { 188 | ia & *(*cnn_models)[i]; 189 | } 190 | 191 | cerr << "Loaded model from: " << filename << endl; 192 | infile.close(); 193 | } 194 | 195 | void 196 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 197 | const vector& input_ids, vector* pred_target_ids, 198 | vector* ensmb_model) { 199 | ComputationGraph cg; 200 | 201 | unsigned ensmb = ensmb_model->size(); 202 | vector encoded_word_vecs; 203 | for (unsigned i = 0; i < ensmb; ++i) { 204 | Expression encoded_word_vec; 205 | auto model = (*ensmb_model)[i]; 206 | model->AddParamsToCG(morph_id, &cg); 207 | model->RunFwdBwd(input_ids, &encoded_word_vec, &cg); 208 | model->TransformEncodedInput(&encoded_word_vec); 209 | encoded_word_vecs.push_back(encoded_word_vec); 210 | model->output_forward[morph_id].start_new_sequence(); 211 | } 212 | 213 | unsigned out_index = 1; 214 | unsigned pred_index = char_to_id[BOW]; 215 | while (pred_target_ids->size() < MAX_PRED_LEN) { 216 | vector ensmb_out; 217 | pred_target_ids->push_back(pred_index); 218 | if (pred_index == char_to_id[EOW]) { 219 | return; // If the end is found, break from the loop and return 220 | } 221 | 222 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 223 | auto model = (*ensmb_model)[ensmb_id]; 224 | Expression prev_output_vec = lookup(cg, model->char_vecs, pred_index); 225 | Expression input, input_char_vec; 226 | if (out_index < input_ids.size()) { 227 | input_char_vec = lookup(cg, model->char_vecs, input_ids[out_index]); 228 | } else { 229 | input_char_vec = lookup(cg, model->eps_vecs[morph_id], 230 | min(unsigned(out_index - input_ids.size()), 231 | model->max_eps - 1)); 232 | } 233 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec, 234 | input_char_vec}); 235 | 236 | Expression hidden = model->output_forward[morph_id].add_input(input); 237 | Expression out; 238 | model->ProjectToOutput(hidden, &out); 239 | ensmb_out.push_back(log_softmax(out)); 240 | } 241 | 242 | Expression out = sum(ensmb_out) / ensmb_out.size(); 243 | vector dist = as_vector(cg.incremental_forward()); 244 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end())); 245 | out_index++; 246 | } 247 | } 248 | 249 | void 250 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 251 | unordered_map& char_to_id, 252 | const vector& input_ids, 253 | vector >* sequences, vector* tm_scores, 254 | vector* ensmb_model) { 255 | unsigned out_index = 1; 256 | unsigned ensmb = ensmb_model->size(); 257 | ComputationGraph cg; 258 | 259 | // Compute stuff for every model in the ensemble. 260 | vector encoded_word_vecs; 261 | vector ensmb_out; 262 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 263 | auto& model = *(*ensmb_model)[ensmb_id]; 264 | model.AddParamsToCG(morph_id, &cg); 265 | 266 | Expression encoded_word_vec; 267 | model.RunFwdBwd(input_ids, &encoded_word_vec, &cg); 268 | model.TransformEncodedInput(&encoded_word_vec); 269 | encoded_word_vecs.push_back(encoded_word_vec); 270 | model.output_forward[morph_id].start_new_sequence(); 271 | 272 | Expression prev_output_vec = lookup(cg, model.char_vecs, 273 | char_to_id[BOW]); 274 | Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec, 275 | lookup(cg, model.char_vecs, 276 | input_ids[out_index])}); 277 | Expression hidden = model.output_forward[morph_id].add_input(input); 278 | Expression out; 279 | model.ProjectToOutput(hidden, &out); 280 | out = log_softmax(out); 281 | ensmb_out.push_back(out); 282 | } 283 | 284 | // Compute the average of the ensemble output. 285 | Expression out_dist = average(ensmb_out); 286 | vector log_dist = as_vector(cg.incremental_forward()); 287 | priority_queue > init_queue; 288 | for (unsigned i = 0; i < log_dist.size(); ++i) { 289 | init_queue.push(make_pair(log_dist[i], i)); 290 | } 291 | unsigned vocab_size = log_dist.size(); 292 | 293 | // Initialise the beam_size sequences, scores, hidden states. 294 | vector log_scores; 295 | vector > prev_states; 296 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 297 | vector seq; 298 | seq.push_back(char_to_id[BOW]); 299 | seq.push_back(init_queue.top().second); 300 | sequences->push_back(seq); 301 | log_scores.push_back(init_queue.top().first); 302 | 303 | vector ensmb_states; 304 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 305 | auto& model = *(*ensmb_model)[ensmb_id]; 306 | ensmb_states.push_back(model.output_forward[morph_id].state()); 307 | } 308 | prev_states.push_back(ensmb_states); 309 | init_queue.pop(); 310 | } 311 | 312 | vector neg_inf(vocab_size, NEG_INF); 313 | Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf); 314 | 315 | vector active_beams(beam_size, true); 316 | while (true) { 317 | out_index++; 318 | priority_queue > > probs_queue; 319 | vector > curr_states; 320 | vector out_dist; 321 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 322 | if (active_beams[beam_id]) { 323 | unsigned prev_out_char = (*sequences)[beam_id].back(); 324 | vector ensmb_out; 325 | vector ensmb_states; 326 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) { 327 | auto& model = *(*ensmb_model)[ensmb_id]; 328 | Expression input_char_vec; 329 | if (out_index < input_ids.size()) { 330 | input_char_vec = lookup(cg, model.char_vecs, input_ids[out_index]); 331 | } else { 332 | input_char_vec = lookup(cg, model.eps_vecs[morph_id], 333 | min(unsigned(out_index - input_ids.size()), 334 | model.max_eps - 1)); 335 | } 336 | 337 | Expression prev_out_vec = lookup(cg, model.char_vecs, prev_out_char); 338 | Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_out_vec, 339 | input_char_vec}); 340 | Expression hidden = model.output_forward[morph_id].add_input( 341 | prev_states[beam_id][ensmb_id], input); 342 | ensmb_states.push_back(model.output_forward[morph_id].state()); 343 | 344 | Expression out; 345 | model.ProjectToOutput(hidden, &out); 346 | out = log_softmax(out); 347 | ensmb_out.push_back(out); 348 | } 349 | curr_states.push_back(ensmb_states); 350 | out_dist.push_back(average(ensmb_out)); 351 | } else { 352 | vector dummy(ensmb, RNNPointer(0)); 353 | curr_states.push_back(dummy); 354 | out_dist.push_back(neg_inf_vec); 355 | } 356 | } 357 | 358 | Expression all_scores = concatenate(out_dist); 359 | vector log_dist = as_vector(cg.incremental_forward()); 360 | 361 | for (unsigned index = 0; index < log_dist.size(); ++index) { 362 | unsigned beam_id = index / vocab_size; 363 | unsigned char_id = index % vocab_size; 364 | if (active_beams[beam_id]) { 365 | pair location = make_pair(beam_id, char_id); 366 | probs_queue.push(pair >( 367 | log_scores[beam_id] + log_dist[index], location)); 368 | } 369 | } 370 | 371 | // Find the beam_size best now and update the variables. 372 | unordered_map > new_seq; 373 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 374 | if (active_beams[beam_id]) { 375 | float log_prob = probs_queue.top().first; 376 | pair location = probs_queue.top().second; 377 | unsigned old_beam_id = location.first, char_id = location.second; 378 | 379 | vector seq = (*sequences)[old_beam_id]; 380 | seq.push_back(char_id); 381 | new_seq[beam_id] = seq; 382 | log_scores[beam_id] = log_prob; // Update the score 383 | 384 | prev_states[beam_id] = curr_states[old_beam_id]; // Update hidden state 385 | probs_queue.pop(); 386 | } 387 | } 388 | 389 | // Update the sequences now. 390 | for (auto& it : new_seq) { 391 | (*sequences)[it.first] = it.second; 392 | } 393 | 394 | // Check if a sequence should be made inactive. 395 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 396 | if (active_beams[beam_id] && 397 | ((*sequences)[beam_id].back() == char_to_id[EOW] || 398 | (*sequences)[beam_id].size() > MAX_PRED_LEN)) { 399 | active_beams[beam_id] = false; 400 | } 401 | } 402 | 403 | // Check if all sequences are inactive. 404 | bool all_inactive = true; 405 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 406 | if (active_beams[beam_id]) { 407 | all_inactive = false; 408 | break; 409 | } 410 | } 411 | 412 | if (all_inactive) { 413 | *tm_scores = log_scores; 414 | return; 415 | } 416 | } 417 | } 418 | 419 | -------------------------------------------------------------------------------- /src/joint-enc-morph.h: -------------------------------------------------------------------------------- 1 | #ifndef JOINT_ENC_MORPH_H_ 2 | #define JOINT_ENC_MORPH_H_ 3 | 4 | #include "cnn/nodes.h" 5 | #include "cnn/cnn.h" 6 | #include "cnn/rnn.h" 7 | #include "cnn/gru.h" 8 | #include "cnn/lstm.h" 9 | #include "cnn/training.h" 10 | #include "cnn/gpu-ops.h" 11 | #include "cnn/expr.h" 12 | 13 | #include "utils.h" 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace std; 20 | using namespace cnn; 21 | using namespace cnn::expr; 22 | 23 | // The class has a shared encoder acorss all morphological types. 24 | // Other auxiliary units are also shared, except for the decoder and 25 | // input transofrmation parameters. 26 | class JointEncMorph { 27 | public: 28 | LSTMBuilder input_forward, input_backward; // Shared encoder 29 | vector output_forward; 30 | LookupParameters* char_vecs; // Shared char vectors 31 | 32 | Expression hidden_to_output, hidden_to_output_bias; 33 | Parameters *phidden_to_output, *phidden_to_output_bias; // Shared 34 | 35 | Expression transform_encoded, transform_encoded_bias; 36 | vector ptransform_encoded, ptransform_encoded_bias; 37 | 38 | vector eps_vecs; // Not sharing the epsilon vectors 39 | 40 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5; 41 | 42 | JointEncMorph() {} 43 | 44 | JointEncMorph(const unsigned& char_length, const unsigned& hidden_length, 45 | const unsigned& vocab_length, const unsigned& layers, 46 | const unsigned& num_morph, vector* m, 47 | vector* optimizer); 48 | 49 | void InitParams(vector* m); 50 | 51 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg); 52 | 53 | void RunFwdBwd(const vector& inputs, 54 | Expression* hidden, ComputationGraph *cg); 55 | 56 | void TransformEncodedInput(Expression* encoded_input) const; 57 | 58 | void ProjectToOutput(const Expression& hidden, Expression* out) const; 59 | 60 | Expression ComputeLoss(const vector& hidden_units, 61 | const vector& targets) const; 62 | 63 | float Train(const unsigned& morph_id, const vector& inputs, 64 | const vector& outputs, AdadeltaTrainer* opt, 65 | AdadeltaTrainer* shared_opt); 66 | 67 | friend class boost::serialization::access; 68 | template void serialize(Archive& ar, const unsigned int) { 69 | ar & char_len; 70 | ar & hidden_len; 71 | ar & vocab_len; 72 | ar & layers; 73 | ar & morph_len; 74 | ar & max_eps; 75 | } 76 | }; 77 | 78 | void Serialize(string& filename, JointEncMorph& model, vector* cnn_model); 79 | 80 | void Read(string& filename, JointEncMorph* model, vector* cnn_model); 81 | 82 | void 83 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 84 | const vector& input_ids, vector* pred_target_ids, 85 | vector* ensmb_model); 86 | 87 | void 88 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 89 | unordered_map& char_to_id, 90 | const vector& input_ids, 91 | vector >* sequences, vector* tm_scores, 92 | vector* ensmb_model); 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /src/lm-joint-enc.cc: -------------------------------------------------------------------------------- 1 | #include "lm-joint-enc.h" 2 | 3 | using namespace std; 4 | using namespace cnn; 5 | using namespace cnn::expr; 6 | 7 | string BOW = "", EOW = ""; 8 | unsigned MAX_PRED_LEN = 100; 9 | 10 | LMJointEnc::LMJointEnc( 11 | const unsigned& char_length, const unsigned& hidden_length, 12 | const unsigned& vocab_length, const unsigned& num_layers, 13 | const unsigned& num_morph, vector* m, 14 | vector* optimizer) { 15 | char_len = char_length; 16 | hidden_len = hidden_length; 17 | vocab_len = vocab_length; 18 | layers = num_layers; 19 | morph_len = num_morph; 20 | InitParams(m); 21 | } 22 | 23 | void LMJointEnc::InitParams(vector* m) { 24 | // Have all the shared parameters in one model 25 | input_forward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]); 26 | input_backward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]); 27 | 28 | char_vecs = (*m)[morph_len]->add_lookup_parameters(vocab_len, {char_len}); 29 | 30 | phidden_to_output = (*m)[morph_len]->add_parameters({vocab_len, hidden_len}); 31 | phidden_to_output_bias = (*m)[morph_len]->add_parameters({vocab_len, 1}); 32 | 33 | lm_pos_weights = (*m)[morph_len]->add_lookup_parameters( 34 | max_lm_pos_weights, {1}); 35 | 36 | for (unsigned i = 0; i < morph_len; ++i) { 37 | output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len, 38 | hidden_len, (*m)[i])); 39 | 40 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len, 41 | 2 * hidden_len})); 42 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1})); 43 | 44 | eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len})); 45 | } 46 | } 47 | 48 | void LMJointEnc::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) { 49 | input_forward.new_graph(*cg); 50 | input_backward.new_graph(*cg); 51 | output_forward[morph_id].new_graph(*cg); 52 | 53 | hidden_to_output = parameter(*cg, phidden_to_output); 54 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias); 55 | 56 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]); 57 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]); 58 | } 59 | 60 | void LMJointEnc::RunFwdBwd(const vector& inputs, 61 | Expression* hidden, ComputationGraph *cg) { 62 | vector input_vecs; 63 | for (const unsigned& input_id : inputs) { 64 | input_vecs.push_back(lookup(*cg, char_vecs, input_id)); 65 | } 66 | 67 | // Run forward LSTM 68 | Expression forward_unit; 69 | input_forward.start_new_sequence(); 70 | for (unsigned i = 0; i < input_vecs.size(); ++i) { 71 | forward_unit = input_forward.add_input(input_vecs[i]); 72 | } 73 | 74 | // Run backward LSTM 75 | Expression backward_unit; 76 | input_backward.start_new_sequence(); 77 | for (int i = input_vecs.size() - 1; i >= 0; --i) { 78 | backward_unit = input_backward.add_input(input_vecs[i]); 79 | } 80 | 81 | // Concatenate the forward and back hidden layers 82 | *hidden = concatenate({forward_unit, backward_unit}); 83 | } 84 | 85 | void LMJointEnc::TransformEncodedInput(Expression* encoded_input) const { 86 | *encoded_input = affine_transform({transform_encoded_bias, 87 | transform_encoded, *encoded_input}); 88 | } 89 | 90 | void LMJointEnc::ProjectToOutput(const Expression& hidden, Expression* out) 91 | const { 92 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden}); 93 | } 94 | 95 | Expression LMJointEnc::ComputeLoss(const vector& hidden_units, 96 | const vector& targets, 97 | LM *lm, ComputationGraph* cg) const { 98 | vector losses; 99 | vector incremental_targets; 100 | incremental_targets.push_back(lm->char_to_id[BOW]); 101 | for (unsigned i = 0; i < hidden_units.size(); ++i) { 102 | Expression out; 103 | ProjectToOutput(hidden_units[i], &out); 104 | Expression trans_lp = log_softmax(out); 105 | 106 | // Calculate the LM probabilities of all possible outputs. 107 | Expression lm_lp = LogProbDist(incremental_targets, lm, cg); 108 | 109 | unsigned lm_index = min(i + 1, max_lm_pos_weights - 1); 110 | Expression lm_weight = lookup(*cg, lm_pos_weights, lm_index); 111 | 112 | //Expression total_lp = trans_lp + cwise_multiply(lm_lp, Softplus(lm_weight)); 113 | Expression total_lp = trans_lp + lm_lp * Softplus(lm_weight); 114 | losses.push_back(pickneglogsoftmax(total_lp, targets[i])); 115 | incremental_targets.push_back(targets[i]); 116 | } 117 | return sum(losses); 118 | } 119 | 120 | float LMJointEnc::Train(const unsigned& morph_id, const vector& inputs, 121 | const vector& outputs, 122 | LM* lm, AdadeltaTrainer* opt, 123 | AdadeltaTrainer* shared_opt) { 124 | ComputationGraph cg; 125 | AddParamsToCG(morph_id, &cg); 126 | 127 | // Encode and Transform to feed into decoder 128 | Expression encoded_input_vec; 129 | RunFwdBwd(inputs, &encoded_input_vec, &cg); 130 | TransformEncodedInput(&encoded_input_vec); 131 | 132 | // Use this encoded word vector to predict the transformed word 133 | vector input_vecs_for_dec; 134 | vector output_ids_for_pred; 135 | for (unsigned i = 0; i < outputs.size(); ++i) { 136 | if (i < outputs.size() - 1) { 137 | // '' will not be fed as input -- it needs to be predicted. 138 | if (i < inputs.size() - 1) { 139 | input_vecs_for_dec.push_back(concatenate( 140 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]), 141 | lookup(cg, char_vecs, inputs[i + 1])})); 142 | } else { 143 | input_vecs_for_dec.push_back(concatenate( 144 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]), 145 | lookup(cg, eps_vecs[morph_id], 146 | min(unsigned(i - inputs.size()), max_eps - 1))})); 147 | } 148 | } 149 | if (i > 0) { // '' will not be predicted in the output -- its fed in. 150 | output_ids_for_pred.push_back(outputs[i]); 151 | } 152 | } 153 | 154 | vector decoder_hidden_units; 155 | output_forward[morph_id].start_new_sequence(); 156 | for (const auto& vec : input_vecs_for_dec) { 157 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec)); 158 | } 159 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred, lm, 160 | &cg); 161 | 162 | float return_loss = as_scalar(cg.forward()); 163 | cg.backward(); 164 | opt->update(1.0f); // Update the morph specific parameters 165 | shared_opt->update(1.0f); // Update the shared parameters 166 | return return_loss; 167 | } 168 | 169 | void Serialize(string& filename, LMJointEnc& model, 170 | vector* cnn_models) { 171 | ofstream outfile(filename); 172 | if (!outfile.is_open()) { 173 | cerr << "File opening failed" << endl; 174 | } 175 | 176 | boost::archive::text_oarchive oa(outfile); 177 | oa & model; 178 | for (unsigned i = 0; i < cnn_models->size(); ++i) { 179 | oa & *(*cnn_models)[i]; 180 | } 181 | 182 | cerr << "Saved model to: " << filename << endl; 183 | outfile.close(); 184 | } 185 | 186 | void Read(string& filename, LMJointEnc* model, vector* cnn_models) { 187 | ifstream infile(filename); 188 | if (!infile.is_open()) { 189 | cerr << "File opening failed" << endl; 190 | } 191 | 192 | boost::archive::text_iarchive ia(infile); 193 | ia & *model; 194 | for (unsigned i = 0; i < model->morph_len + 1; ++i) { 195 | Model *cnn_model = new Model(); 196 | cnn_models->push_back(cnn_model); 197 | } 198 | 199 | model->InitParams(cnn_models); 200 | for (unsigned i = 0; i < cnn_models->size(); ++i) { 201 | ia & *(*cnn_models)[i]; 202 | } 203 | 204 | cerr << "Loaded model from: " << filename << endl; 205 | infile.close(); 206 | } 207 | 208 | // Computes the probability of all possible next characters given 209 | // a sequence, but removes the first character (). 210 | Expression LogProbDist(const vector& seq, LM *lm, 211 | ComputationGraph *cg) { 212 | vector lm_dist(lm->char_to_id.size(), 0.f); 213 | 214 | // Remove the first () character. 215 | vector seq_without_start(seq.begin() + 1, seq.end()); 216 | for (const auto& it : lm->char_to_id) { 217 | vector possible_seq(seq_without_start); 218 | possible_seq.push_back(it.second); 219 | lm_dist[it.second] = lm->LogProbSeq(possible_seq); 220 | } 221 | return input(*cg, {(long) lm_dist.size()}, lm_dist); 222 | } 223 | 224 | float Softplus(float x) { 225 | return log(1 + exp(x)); 226 | } 227 | 228 | Expression Softplus(Expression x) { 229 | return log(1 + exp(x)); 230 | } 231 | 232 | void 233 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 234 | const vector& input_ids, vector* pred_target_ids, 235 | LM* lm, vector* ensmb_model) { 236 | ComputationGraph cg; 237 | 238 | unsigned ensmb = ensmb_model->size(); 239 | vector encoded_word_vecs; 240 | for (unsigned i = 0; i < ensmb; ++i) { 241 | Expression encoded_word_vec; 242 | auto model = (*ensmb_model)[i]; 243 | model->AddParamsToCG(morph_id, &cg); 244 | model->RunFwdBwd(input_ids, &encoded_word_vec, &cg); 245 | model->TransformEncodedInput(&encoded_word_vec); 246 | encoded_word_vecs.push_back(encoded_word_vec); 247 | model->output_forward[morph_id].start_new_sequence(); 248 | } 249 | 250 | unsigned out_index = 1; 251 | unsigned pred_index = char_to_id[BOW]; 252 | while (pred_target_ids->size() < MAX_PRED_LEN) { 253 | pred_target_ids->push_back(pred_index); 254 | if (pred_index == char_to_id[EOW]) { 255 | return; // If the end is found, break from the loop and return 256 | } 257 | 258 | vector ensmb_out; 259 | Expression lm_dist = LogProbDist(*pred_target_ids, lm, &cg); 260 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 261 | auto model = (*ensmb_model)[ensmb_id]; 262 | Expression prev_output_vec = lookup(cg, model->char_vecs, pred_index); 263 | Expression input, input_char_vec; 264 | if (out_index < input_ids.size()) { 265 | input_char_vec = lookup(cg, model->char_vecs, input_ids[out_index]); 266 | } else { 267 | input_char_vec = lookup(cg, model->eps_vecs[morph_id], 268 | min(unsigned(out_index - input_ids.size()), 269 | model->max_eps - 1)); 270 | } 271 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec, 272 | input_char_vec}); 273 | 274 | Expression hidden = model->output_forward[morph_id].add_input(input); 275 | Expression tm_prob; 276 | model->ProjectToOutput(hidden, &tm_prob); 277 | tm_prob = log_softmax(tm_prob); 278 | 279 | unsigned lm_index = min(out_index, model->max_lm_pos_weights - 1); 280 | Expression lm_weight = lookup(cg, model->lm_pos_weights, lm_index); 281 | Expression total_lp = tm_prob + lm_dist * Softplus(lm_weight); 282 | 283 | ensmb_out.push_back(log_softmax(total_lp)); 284 | } 285 | 286 | Expression out = average(ensmb_out); 287 | vector dist = as_vector(cg.incremental_forward()); 288 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end())); 289 | out_index++; 290 | } 291 | } 292 | -------------------------------------------------------------------------------- /src/lm-joint-enc.h: -------------------------------------------------------------------------------- 1 | #ifndef LM_JOINT_ENC_H_ 2 | #define LM_JOINT_ENC_H_ 3 | 4 | #include "cnn/nodes.h" 5 | #include "cnn/cnn.h" 6 | #include "cnn/rnn.h" 7 | #include "cnn/gru.h" 8 | #include "cnn/lstm.h" 9 | #include "cnn/training.h" 10 | #include "cnn/gpu-ops.h" 11 | #include "cnn/expr.h" 12 | 13 | #include "lm.h" 14 | #include "utils.h" 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace std; 21 | using namespace cnn; 22 | using namespace cnn::expr; 23 | 24 | // The class has a shared encoder acorss all morphological types. 25 | // Other auxiliary units are also shared, except for the decoder and 26 | // input transofrmation parameters. 27 | class LMJointEnc { 28 | public: 29 | LSTMBuilder input_forward, input_backward; // Shared encoder 30 | vector output_forward; 31 | LookupParameters* char_vecs; // Shared char vectors 32 | 33 | Expression hidden_to_output, hidden_to_output_bias; 34 | Parameters *phidden_to_output, *phidden_to_output_bias; // Shared 35 | 36 | Expression transform_encoded, transform_encoded_bias; 37 | vector ptransform_encoded, ptransform_encoded_bias; 38 | 39 | vector eps_vecs; // Not sharing the epsilon vectors 40 | 41 | LookupParameters* lm_pos_weights; 42 | 43 | unsigned char_len, hidden_len, vocab_len, layers, morph_len; 44 | unsigned max_lm_pos_weights = 20, max_eps = 5; 45 | 46 | LMJointEnc() {} 47 | 48 | LMJointEnc(const unsigned& char_length, const unsigned& hidden_length, 49 | const unsigned& vocab_length, const unsigned& layers, 50 | const unsigned& num_morph, vector* m, 51 | vector* optimizer); 52 | 53 | void InitParams(vector* m); 54 | 55 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg); 56 | 57 | void RunFwdBwd(const vector& inputs, 58 | Expression* hidden, ComputationGraph *cg); 59 | 60 | void TransformEncodedInput(Expression* encoded_input) const; 61 | 62 | void ProjectToOutput(const Expression& hidden, Expression* out) const; 63 | 64 | Expression ComputeLoss(const vector& hidden_units, 65 | const vector& targets, LM *lm, 66 | ComputationGraph* cg) const; 67 | 68 | float Train(const unsigned& morph_id, const vector& inputs, 69 | const vector& outputs, LM *lm, AdadeltaTrainer* opt, 70 | AdadeltaTrainer* shared_opt); 71 | 72 | friend class boost::serialization::access; 73 | template void serialize(Archive& ar, const unsigned int) { 74 | ar & char_len; 75 | ar & hidden_len; 76 | ar & vocab_len; 77 | ar & layers; 78 | ar & morph_len; 79 | ar & max_eps; 80 | ar & max_lm_pos_weights; 81 | } 82 | }; 83 | 84 | Expression LogProbDist(const vector& seq, 85 | LM *lm, ComputationGraph *cg); 86 | 87 | Expression Softplus(Expression x); 88 | 89 | float Softplus(float x); 90 | 91 | 92 | void Serialize(string& filename, LMJointEnc& model, vector* cnn_model); 93 | 94 | void Read(string& filename, LMJointEnc* model, vector* cnn_model); 95 | 96 | void 97 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 98 | const vector& input_ids, vector* pred_target_ids, 99 | LM *lm, vector* ensmb_model); 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /src/lm-sep-morph.cc: -------------------------------------------------------------------------------- 1 | #include "lm-sep-morph.h" 2 | 3 | using namespace std; 4 | using namespace cnn; 5 | using namespace cnn::expr; 6 | using cnn::expr::input; 7 | 8 | string BOW = "", EOW = ""; 9 | int MAX_PRED_LEN = 100; 10 | 11 | LMSepMorph::LMSepMorph(const unsigned& char_length, const unsigned& hidden_length, 12 | const unsigned& vocab_length, const unsigned& num_layers, 13 | const unsigned& num_morph, vector* m, 14 | vector* optimizer) { 15 | char_len = char_length; 16 | hidden_len = hidden_length; 17 | vocab_len = vocab_length; 18 | layers = num_layers; 19 | morph_len = num_morph; 20 | InitParams(m); 21 | } 22 | 23 | void LMSepMorph::InitParams(vector* m) { 24 | for (unsigned i = 0; i < morph_len; ++i) { 25 | input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i])); 26 | input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i])); 27 | output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len, 28 | hidden_len, (*m)[i])); 29 | 30 | phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len})); 31 | phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1})); 32 | 33 | char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len})); 34 | 35 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len, 36 | 2 * hidden_len})); 37 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1})); 38 | 39 | eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len})); 40 | lm_pos_weights.push_back((*m)[i]->add_lookup_parameters( 41 | max_lm_pos_weights, {1})); 42 | } 43 | } 44 | 45 | void LMSepMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) { 46 | input_forward[morph_id].new_graph(*cg); 47 | input_backward[morph_id].new_graph(*cg); 48 | output_forward[morph_id].new_graph(*cg); 49 | 50 | hidden_to_output = parameter(*cg, phidden_to_output[morph_id]); 51 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]); 52 | 53 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]); 54 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]); 55 | } 56 | 57 | void LMSepMorph::RunFwdBwd(const unsigned& morph_id, 58 | const vector& inputs, 59 | Expression* hidden, ComputationGraph *cg) { 60 | vector input_vecs; 61 | for (const unsigned& input_id : inputs) { 62 | input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id)); 63 | } 64 | 65 | // Run forward LSTM 66 | Expression forward_unit; 67 | input_forward[morph_id].start_new_sequence(); 68 | for (unsigned i = 0; i < input_vecs.size(); ++i) { 69 | forward_unit = input_forward[morph_id].add_input(input_vecs[i]); 70 | } 71 | 72 | // Run backward LSTM 73 | Expression backward_unit; 74 | input_backward[morph_id].start_new_sequence(); 75 | for (int i = input_vecs.size() - 1; i >= 0; --i) { 76 | backward_unit = input_backward[morph_id].add_input(input_vecs[i]); 77 | } 78 | 79 | // Concatenate the forward and back hidden layers 80 | *hidden = concatenate({forward_unit, backward_unit}); 81 | } 82 | 83 | void LMSepMorph::TransformEncodedInput(Expression* encoded_input) const { 84 | *encoded_input = affine_transform({transform_encoded_bias, 85 | transform_encoded, *encoded_input}); 86 | } 87 | 88 | void LMSepMorph::ProjectToOutput(const Expression& hidden, Expression* out) const { 89 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden}); 90 | } 91 | 92 | 93 | Expression LMSepMorph::ComputeLoss(const unsigned& morph_id, 94 | const vector& hidden_units, 95 | const vector& targets, 96 | LM *lm, ComputationGraph* cg) const { 97 | vector losses; 98 | vector incremental_targets; 99 | incremental_targets.push_back(lm->char_to_id[BOW]); 100 | for (unsigned i = 0; i < hidden_units.size(); ++i) { 101 | Expression out; 102 | ProjectToOutput(hidden_units[i], &out); 103 | Expression trans_lp = log_softmax(out); 104 | 105 | // Calculate the LM probabilities of all possible outputs. 106 | Expression lm_lp = LogProbDist(incremental_targets, lm, cg); 107 | 108 | unsigned lm_index = min(i + 1, max_lm_pos_weights - 1); 109 | Expression lm_weight = lookup(*cg, lm_pos_weights[morph_id], lm_index); 110 | 111 | //Expression total_lp = trans_lp + cwise_multiply(lm_lp, Softplus(lm_weight)); 112 | Expression total_lp = trans_lp + lm_lp * Softplus(lm_weight); 113 | losses.push_back(pickneglogsoftmax(total_lp, targets[i])); 114 | incremental_targets.push_back(targets[i]); 115 | } 116 | return sum(losses); 117 | } 118 | 119 | float LMSepMorph::Train(const unsigned& morph_id, 120 | const vector& inputs, 121 | const vector& outputs, LM *lm, 122 | AdadeltaTrainer* ada_gd) { 123 | ComputationGraph cg; 124 | AddParamsToCG(morph_id, &cg); 125 | 126 | // Encode and Transform to feed into decoder 127 | Expression encoded_input_vec; 128 | RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg); 129 | TransformEncodedInput(&encoded_input_vec); 130 | 131 | // Use this encoded word vector to predict the transformed word 132 | vector input_vecs_for_dec; 133 | vector output_ids_for_pred; 134 | for (unsigned i = 0; i < outputs.size(); ++i) { 135 | if (i < outputs.size() - 1) { 136 | // '' will not be fed as input -- it needs to be predicted. 137 | if (i < inputs.size() - 1) { 138 | input_vecs_for_dec.push_back(concatenate( 139 | {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]), 140 | lookup(cg, char_vecs[morph_id], inputs[i + 1])})); 141 | } else { 142 | input_vecs_for_dec.push_back(concatenate( 143 | {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]), 144 | lookup(cg, eps_vecs[morph_id], min(unsigned(i - inputs.size()), max_eps - 1))})); 145 | } 146 | } 147 | if (i > 0) { // '' will not be predicted in the output -- its fed in. 148 | output_ids_for_pred.push_back(outputs[i]); 149 | } 150 | } 151 | 152 | vector decoder_hidden_units; 153 | output_forward[morph_id].start_new_sequence(); 154 | for (const auto& vec : input_vecs_for_dec) { 155 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec)); 156 | } 157 | 158 | // If its the first iteration, do not use language model. 159 | Expression loss = ComputeLoss(morph_id, decoder_hidden_units, 160 | output_ids_for_pred, lm, &cg); 161 | 162 | float return_loss = as_scalar(cg.incremental_forward()); 163 | cg.backward(); 164 | ada_gd->update(1.0f); 165 | 166 | return return_loss; 167 | } 168 | 169 | // Computes the probability of all possible next characters given 170 | // a sequence, but removes the first character (). 171 | Expression LogProbDist(const vector& seq, LM *lm, 172 | ComputationGraph *cg) { 173 | vector lm_dist(lm->char_to_id.size(), 0.f); 174 | 175 | // Remove the first () character. 176 | vector seq_without_start(seq.begin() + 1, seq.end()); 177 | for (const auto& it : lm->char_to_id) { 178 | vector possible_seq(seq_without_start); 179 | possible_seq.push_back(it.second); 180 | lm_dist[it.second] = lm->LogProbSeq(possible_seq); 181 | } 182 | return input(*cg, {(long) lm_dist.size()}, lm_dist); 183 | } 184 | 185 | void 186 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 187 | const vector& input_ids, vector* pred_target_ids, 188 | LM* lm, vector* ensmb_model) { 189 | ComputationGraph cg; 190 | 191 | unsigned ensmb = ensmb_model->size(); 192 | vector encoded_word_vecs; 193 | for (unsigned i = 0; i < ensmb; ++i) { 194 | Expression encoded_word_vec; 195 | auto model = (*ensmb_model)[i]; 196 | model->AddParamsToCG(morph_id, &cg); 197 | model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg); 198 | model->TransformEncodedInput(&encoded_word_vec); 199 | encoded_word_vecs.push_back(encoded_word_vec); 200 | model->output_forward[morph_id].start_new_sequence(); 201 | } 202 | 203 | unsigned out_index = 1; 204 | unsigned pred_index = char_to_id[BOW]; 205 | while (pred_target_ids->size() < MAX_PRED_LEN) { 206 | pred_target_ids->push_back(pred_index); 207 | if (pred_index == char_to_id[EOW]) { 208 | // Print the lm weights. 209 | /*for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 210 | auto model = (*ensmb_model)[ensmb_id]; 211 | for (unsigned i = 1; i < model->max_lm_pos_weights; ++i) { 212 | float p = model->lm_pos_weights[morph_id]->values[i].v[0]; 213 | cerr << Softplus(p) << " "; 214 | } 215 | cerr << endl; 216 | }*/ 217 | 218 | return; // If the end is found, break from the loop and return 219 | } 220 | 221 | vector ensmb_out; 222 | Expression lm_dist = LogProbDist(*pred_target_ids, lm, &cg); 223 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 224 | auto model = (*ensmb_model)[ensmb_id]; 225 | Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index); 226 | Expression input, input_char_vec; 227 | if (out_index < input_ids.size()) { 228 | input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]); 229 | } else { 230 | input_char_vec = lookup(cg, model->eps_vecs[morph_id], 231 | min(unsigned(out_index - input_ids.size()), 232 | model->max_eps - 1)); 233 | } 234 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec, 235 | input_char_vec}); 236 | 237 | Expression hidden = model->output_forward[morph_id].add_input(input); 238 | Expression tm_prob; 239 | model->ProjectToOutput(hidden, &tm_prob); 240 | tm_prob = log_softmax(tm_prob); 241 | 242 | unsigned lm_index = min(out_index, model->max_lm_pos_weights - 1); 243 | Expression lm_weight = lookup(cg, model->lm_pos_weights[morph_id], lm_index); 244 | Expression total_lp = tm_prob + lm_dist * Softplus(lm_weight); 245 | 246 | ensmb_out.push_back(log_softmax(total_lp)); 247 | } 248 | Expression total_dist = average(ensmb_out); 249 | 250 | vector dist = as_vector(cg.incremental_forward()); 251 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end())); 252 | out_index++; 253 | } 254 | } 255 | 256 | float Softplus(float x) { 257 | return log(1 + exp(x)); 258 | } 259 | 260 | Expression Softplus(Expression x) { 261 | return log(1 + exp(x)); 262 | } 263 | 264 | void Serialize(string& filename, LMSepMorph& model, vector* cnn_models) { 265 | ofstream outfile(filename); 266 | if (!outfile.is_open()) { 267 | cerr << "File opening failed" << endl; 268 | } 269 | 270 | boost::archive::text_oarchive oa(outfile); 271 | oa & model; 272 | for (unsigned i = 0; i < cnn_models->size(); ++i) { 273 | oa & *(*cnn_models)[i]; 274 | } 275 | 276 | cerr << "Saved model to: " << filename << endl; 277 | outfile.close(); 278 | } 279 | 280 | void Read(string& filename, LMSepMorph* model, vector* cnn_models) { 281 | ifstream infile(filename); 282 | if (!infile.is_open()) { 283 | cerr << "File opening failed" << endl; 284 | } 285 | 286 | boost::archive::text_iarchive ia(infile); 287 | ia & *model; 288 | for (unsigned i = 0; i < model->morph_len; ++i) { 289 | Model *cnn_model = new Model(); 290 | cnn_models->push_back(cnn_model); 291 | } 292 | 293 | model->InitParams(cnn_models); 294 | for (unsigned i = 0; i < model->morph_len; ++i) { 295 | ia & *(*cnn_models)[i]; 296 | } 297 | 298 | cerr << "Loaded model from: " << filename << endl; 299 | infile.close(); 300 | } 301 | 302 | -------------------------------------------------------------------------------- /src/lm-sep-morph.h: -------------------------------------------------------------------------------- 1 | #ifndef SEP_MORPH_H_ 2 | #define SEP_MORPH_H_ 3 | 4 | #include "cnn/nodes.h" 5 | #include "cnn/cnn.h" 6 | #include "cnn/rnn.h" 7 | #include "cnn/gru.h" 8 | #include "cnn/lstm.h" 9 | #include "cnn/training.h" 10 | #include "cnn/gpu-ops.h" 11 | #include "cnn/expr.h" 12 | 13 | #include "lm.h" 14 | #include "utils.h" 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace std; 21 | using namespace cnn; 22 | using namespace cnn::expr; 23 | 24 | class LMSepMorph { 25 | public: 26 | vector input_forward, input_backward, output_forward; 27 | vector char_vecs; 28 | 29 | Expression hidden_to_output, hidden_to_output_bias; 30 | vector phidden_to_output, phidden_to_output_bias; 31 | 32 | Expression transform_encoded, transform_encoded_bias; 33 | vector ptransform_encoded, ptransform_encoded_bias; 34 | 35 | unsigned char_len, hidden_len, vocab_len, layers, morph_len; 36 | unsigned max_eps = 5, max_lm_pos_weights = 20; 37 | vector eps_vecs; 38 | vector lm_pos_weights; 39 | 40 | LMSepMorph() {} 41 | 42 | LMSepMorph(const unsigned& char_length, const unsigned& hidden_length, 43 | const unsigned& vocab_length, const unsigned& layers, 44 | const unsigned& num_morph, vector* m, 45 | vector* optimizer); 46 | 47 | void InitParams(vector* m); 48 | 49 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg); 50 | 51 | void RunFwdBwd(const unsigned& morph_id, const vector& inputs, 52 | Expression* hidden, ComputationGraph *cg); 53 | 54 | void TransformEncodedInput(Expression* encoded_input) const; 55 | 56 | void TransformEncodedInputDuringDecoding(Expression* encoded_input) const; 57 | 58 | void ProjectToOutput(const Expression& hidden, Expression* out) const; 59 | 60 | Expression ComputeLoss(const unsigned& morph_id, 61 | const vector& hidden_units, 62 | const vector& targets, 63 | LM *lm, ComputationGraph* cg) const; 64 | 65 | float Train(const unsigned& morph_id, const vector& inputs, 66 | const vector& outputs, LM* lm, AdadeltaTrainer* ada_gd); 67 | 68 | friend class boost::serialization::access; 69 | template void serialize(Archive& ar, const unsigned int) { 70 | ar & char_len; 71 | ar & hidden_len; 72 | ar & vocab_len; 73 | ar & layers; 74 | ar & morph_len; 75 | ar & max_eps; 76 | ar & max_lm_pos_weights; 77 | } 78 | }; 79 | 80 | Expression LogProbDist(const vector& seq, 81 | LM *lm, ComputationGraph *cg); 82 | 83 | Expression Softplus(Expression x); 84 | 85 | float Softplus(float x); 86 | 87 | void 88 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 89 | const vector& input_ids, 90 | vector* pred_target_ids, LM* lm, 91 | vector* ensmb_model); 92 | 93 | void Serialize(string& filename, LMSepMorph& model, vector* cnn_model); 94 | 95 | void Read(string& filename, LMSepMorph* model, vector* cnn_model); 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /src/lm.cc: -------------------------------------------------------------------------------- 1 | #include "lm.h" 2 | 3 | LM::LM(string& lm_model_file, unordered_map& char_id, 4 | unordered_map& id_char) { 5 | ifstream model_file(lm_model_file); 6 | if (model_file.is_open()) { 7 | string line; 8 | char_to_id = char_id; 9 | id_to_char = id_char; 10 | while(getline(model_file, line)) { 11 | vector items = split_line(line, '\t'); 12 | if (items.size() == 2) { 13 | vector seq; 14 | for (string& ch : split_line(items[1], ' ')) { 15 | seq.push_back(char_to_id[ch]); 16 | } 17 | lp[HashSeq(seq)] = atof(items[0].c_str()); 18 | } else if (items.size() == 3) { 19 | vector seq; 20 | for (string& ch : split_line(items[1], ' ')) { 21 | seq.push_back(char_to_id[ch]); 22 | } 23 | lp[HashSeq(seq)] = atof(items[0].c_str()); 24 | b[HashSeq(seq)] = atof(items[2].c_str()); 25 | } 26 | } 27 | model_file.close(); 28 | cerr << "LM loaded from: " << lm_model_file << endl; 29 | cerr << "LM size: " << lp.size() << endl; 30 | } else { 31 | cerr << "File opening failed" << endl; 32 | exit(0); 33 | } 34 | } 35 | 36 | size_t LM::HashSeq(vector& seq) { 37 | return boost::hash_range(seq.begin(), seq.end()); 38 | } 39 | 40 | void 41 | PrintSeq(vector& seq, unordered_map& id_to_char) { 42 | for (unsigned i = 0; i < seq.size(); ++i) { 43 | cerr << id_to_char[seq[i]] << " "; 44 | } 45 | cerr << endl; 46 | } 47 | 48 | /* LogProbSeq(w1, w2, ..., wn) = LogProbSeq(w2, w3, ..., wn) 49 | + backoff(w1, w2, ..., wn-1); 50 | http://cmusphinx.sourceforge.net/wiki/sphinx4:standardgrammarformats 51 | 52 | Assumes that unigrams are always present in lp[]. 53 | */ 54 | float LM::LogProb(vector& seq) { 55 | size_t seq_hash = HashSeq(seq); 56 | auto it_seq = lp.find(seq_hash); 57 | if (it_seq == lp.end()) { 58 | vector backoff_seq(seq.begin(), seq.end() - 1); 59 | size_t backoff_hash = HashSeq(backoff_seq); 60 | auto it_backoff = b.find(backoff_hash); 61 | 62 | float backoff; 63 | if (it_backoff == b.end()) { 64 | backoff = 0.0; 65 | } else { 66 | backoff = it_backoff->second; 67 | } 68 | 69 | vector small(seq.begin() + 1, seq.end()); 70 | lp[seq_hash] = backoff + LogProb(small); 71 | return lp[seq_hash]; 72 | } else { 73 | return it_seq->second; // Return the log prob 74 | } 75 | } 76 | 77 | float LM::LogProbSeq(vector& seq) { 78 | float score = 0.; 79 | for (unsigned i = 1; i <= seq.size(); ++i) { 80 | vector temp(seq.begin(), seq.begin()+ i); 81 | score += LogProb(temp); 82 | } 83 | return score; 84 | } 85 | -------------------------------------------------------------------------------- /src/lm.h: -------------------------------------------------------------------------------- 1 | #ifndef LM_H_ 2 | #define LM_H_ 3 | 4 | #include "utils.h" 5 | 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | class LM { 12 | public: 13 | unordered_map char_to_id; 14 | unordered_map id_to_char; 15 | 16 | LM (string& lm_model_file, unordered_map& char_id, 17 | unordered_map& id_char); 18 | float LogProbSeq(vector& seq); 19 | float LogProb(vector& seq); 20 | size_t HashSeq(vector& seq); 21 | 22 | private: 23 | unordered_map lp, b; 24 | }; 25 | 26 | void 27 | PrintSeq(vector& seq, unordered_map& id_to_char); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /src/no-enc.cc: -------------------------------------------------------------------------------- 1 | #include "no-enc.h" 2 | 3 | using namespace std; 4 | using namespace cnn; 5 | using namespace cnn::expr; 6 | 7 | string BOW = "", EOW = ""; 8 | int MAX_PRED_LEN = 100; 9 | float NEG_INF = numeric_limits::min(); 10 | 11 | NoEnc::NoEnc(const unsigned& char_length, const unsigned& hidden_length, 12 | const unsigned& vocab_length, const unsigned& num_layers, 13 | const unsigned& num_morph, vector* m, 14 | vector* optimizer) { 15 | char_len = char_length; 16 | hidden_len = hidden_length; 17 | vocab_len = vocab_length; 18 | layers = num_layers; 19 | morph_len = num_morph; 20 | InitParams(m); 21 | } 22 | 23 | void NoEnc::InitParams(vector* m) { 24 | for (unsigned i = 0; i < morph_len; ++i) { 25 | //input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i])); 26 | //input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i])); 27 | output_forward.push_back(LSTMBuilder(layers, 2 * char_len, // + hidden_len, 28 | hidden_len, (*m)[i])); 29 | 30 | phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len})); 31 | phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1})); 32 | 33 | char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len})); 34 | 35 | //ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len, 36 | // 2 * hidden_len})); 37 | //ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1})); 38 | 39 | eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len})); 40 | } 41 | } 42 | 43 | void NoEnc::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) { 44 | //input_forward[morph_id].new_graph(*cg); 45 | //input_backward[morph_id].new_graph(*cg); 46 | output_forward[morph_id].new_graph(*cg); 47 | 48 | hidden_to_output = parameter(*cg, phidden_to_output[morph_id]); 49 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]); 50 | 51 | //transform_encoded = parameter(*cg, ptransform_encoded[morph_id]); 52 | //transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]); 53 | } 54 | 55 | /*void NoEnc::RunFwdBwd(const unsigned& morph_id, 56 | const vector& inputs, 57 | Expression* hidden, ComputationGraph *cg) { 58 | vector input_vecs; 59 | for (const unsigned& input_id : inputs) { 60 | input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id)); 61 | } 62 | 63 | // Run forward LSTM 64 | Expression forward_unit; 65 | input_forward[morph_id].start_new_sequence(); 66 | for (unsigned i = 0; i < input_vecs.size(); ++i) { 67 | forward_unit = input_forward[morph_id].add_input(input_vecs[i]); 68 | } 69 | 70 | // Run backward LSTM 71 | Expression backward_unit; 72 | input_backward[morph_id].start_new_sequence(); 73 | for (int i = input_vecs.size() - 1; i >= 0; --i) { 74 | backward_unit = input_backward[morph_id].add_input(input_vecs[i]); 75 | } 76 | 77 | // Concatenate the forward and back hidden layers 78 | *hidden = concatenate({forward_unit, backward_unit}); 79 | } 80 | 81 | void NoEnc::TransformEncodedInput(Expression* encoded_input) const { 82 | *encoded_input = affine_transform({transform_encoded_bias, 83 | transform_encoded, *encoded_input}); 84 | }*/ 85 | 86 | void NoEnc::ProjectToOutput(const Expression& hidden, Expression* out) const { 87 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden}); 88 | } 89 | 90 | Expression NoEnc::ComputeLoss(const vector& hidden_units, 91 | const vector& targets) const { 92 | assert(hidden_units.size() == targets.size()); 93 | vector losses; 94 | for (unsigned i = 0; i < hidden_units.size(); ++i) { 95 | Expression out; 96 | ProjectToOutput(hidden_units[i], &out); 97 | losses.push_back(pickneglogsoftmax(out, targets[i])); 98 | } 99 | return sum(losses); 100 | } 101 | 102 | float NoEnc::Train(const unsigned& morph_id, const vector& inputs, 103 | const vector& outputs, AdadeltaTrainer* ada_gd) { 104 | ComputationGraph cg; 105 | AddParamsToCG(morph_id, &cg); 106 | 107 | // Encode and Transform to feed into decoder 108 | //Expression encoded_input_vec; 109 | //RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg); 110 | //TransformEncodedInput(&encoded_input_vec); 111 | 112 | // Use this encoded word vector to predict the transformed word 113 | vector input_vecs_for_dec; 114 | vector output_ids_for_pred; 115 | for (unsigned i = 0; i < outputs.size(); ++i) { 116 | if (i < outputs.size() - 1) { 117 | // '' will not be fed as input -- it needs to be predicted. 118 | if (i < inputs.size() - 1) { 119 | input_vecs_for_dec.push_back(concatenate( 120 | {lookup(cg, char_vecs[morph_id], outputs[i]), 121 | lookup(cg, char_vecs[morph_id], inputs[i + 1])})); 122 | } else { 123 | input_vecs_for_dec.push_back(concatenate( 124 | {lookup(cg, char_vecs[morph_id], outputs[i]), 125 | lookup(cg, eps_vecs[morph_id], min(unsigned(i - inputs.size()), max_eps - 1))})); 126 | } 127 | } 128 | if (i > 0) { // '' will not be predicted in the output -- its fed in. 129 | output_ids_for_pred.push_back(outputs[i]); 130 | } 131 | } 132 | 133 | vector decoder_hidden_units; 134 | output_forward[morph_id].start_new_sequence(); 135 | for (const auto& vec : input_vecs_for_dec) { 136 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec)); 137 | } 138 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred); 139 | 140 | float return_loss = as_scalar(cg.forward()); 141 | cg.backward(); 142 | ada_gd->update(1.0f); 143 | return return_loss; 144 | } 145 | 146 | void 147 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 148 | const vector& input_ids, 149 | vector* pred_target_ids, vector* ensmb_model) { 150 | ComputationGraph cg; 151 | 152 | /*auto temp_model = *(*ensmb_model)[0]; 153 | for (unsigned char_id = 0; char_id < temp_model.vocab_len; ++char_id) { 154 | for (unsigned index = 0; index < temp_model.char_len; ++index) { 155 | cout << temp_model.char_vecs[0]->values[char_id].v[index] << " "; 156 | } 157 | cout << endl; 158 | } 159 | exit(0);*/ 160 | 161 | unsigned ensmb = ensmb_model->size(); 162 | vector encoded_word_vecs; 163 | for (unsigned i = 0; i < ensmb; ++i) { 164 | Expression encoded_word_vec; 165 | auto model = (*ensmb_model)[i]; 166 | model->AddParamsToCG(morph_id, &cg); 167 | /*model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg); 168 | model->TransformEncodedInput(&encoded_word_vec); 169 | encoded_word_vecs.push_back(encoded_word_vec);*/ 170 | model->output_forward[morph_id].start_new_sequence(); 171 | } 172 | 173 | unsigned out_index = 1; 174 | unsigned pred_index = char_to_id[BOW]; 175 | while (pred_target_ids->size() < MAX_PRED_LEN) { 176 | vector ensmb_out; 177 | pred_target_ids->push_back(pred_index); 178 | if (pred_index == char_to_id[EOW]) { 179 | return; // If the end is found, break from the loop and return 180 | } 181 | 182 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 183 | auto model = (*ensmb_model)[ensmb_id]; 184 | Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index); 185 | Expression input, input_char_vec; 186 | if (out_index < input_ids.size()) { 187 | input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]); 188 | } else { 189 | input_char_vec = lookup(cg, model->eps_vecs[morph_id], 190 | min(unsigned(out_index - input_ids.size()), 191 | model->max_eps - 1)); 192 | } 193 | input = concatenate({//encoded_word_vecs[ensmb_id], 194 | prev_output_vec, 195 | input_char_vec}); 196 | 197 | Expression hidden = model->output_forward[morph_id].add_input(input); 198 | Expression out; 199 | model->ProjectToOutput(hidden, &out); 200 | ensmb_out.push_back(log_softmax(out)); 201 | } 202 | 203 | Expression out = sum(ensmb_out) / ensmb_out.size(); 204 | vector dist = as_vector(cg.incremental_forward()); 205 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end())); 206 | out_index++; 207 | } 208 | } 209 | 210 | void 211 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 212 | unordered_map& char_to_id, 213 | const vector& input_ids, 214 | vector >* sequences, vector* tm_scores, 215 | vector* ensmb_model) { 216 | unsigned out_index = 1; 217 | unsigned ensmb = ensmb_model->size(); 218 | ComputationGraph cg; 219 | 220 | // Compute stuff for every model in the ensemble. 221 | vector encoded_word_vecs; 222 | vector ensmb_out; 223 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 224 | auto& model = *(*ensmb_model)[ensmb_id]; 225 | model.AddParamsToCG(morph_id, &cg); 226 | 227 | /*Expression encoded_word_vec; 228 | model.RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg); 229 | model.TransformEncodedInput(&encoded_word_vec); 230 | encoded_word_vecs.push_back(encoded_word_vec);*/ 231 | model.output_forward[morph_id].start_new_sequence(); 232 | 233 | Expression prev_output_vec = lookup(cg, model.char_vecs[morph_id], 234 | char_to_id[BOW]); 235 | Expression input = concatenate({//encoded_word_vecs[ensmb_id], 236 | prev_output_vec, 237 | lookup(cg, model.char_vecs[morph_id], 238 | input_ids[out_index])}); 239 | Expression hidden = model.output_forward[morph_id].add_input(input); 240 | Expression out; 241 | model.ProjectToOutput(hidden, &out); 242 | out = log_softmax(out); 243 | ensmb_out.push_back(out); 244 | } 245 | 246 | // Compute the average of the ensemble output. 247 | Expression out_dist = average(ensmb_out); 248 | vector log_dist = as_vector(cg.incremental_forward()); 249 | priority_queue > init_queue; 250 | for (unsigned i = 0; i < log_dist.size(); ++i) { 251 | init_queue.push(make_pair(log_dist[i], i)); 252 | } 253 | unsigned vocab_size = log_dist.size(); 254 | 255 | // Initialise the beam_size sequences, scores, hidden states. 256 | vector log_scores; 257 | vector > prev_states; 258 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 259 | vector seq; 260 | seq.push_back(char_to_id[BOW]); 261 | seq.push_back(init_queue.top().second); 262 | sequences->push_back(seq); 263 | log_scores.push_back(init_queue.top().first); 264 | 265 | vector ensmb_states; 266 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 267 | auto& model = *(*ensmb_model)[ensmb_id]; 268 | ensmb_states.push_back(model.output_forward[morph_id].state()); 269 | } 270 | prev_states.push_back(ensmb_states); 271 | init_queue.pop(); 272 | } 273 | 274 | vector neg_inf(vocab_size, NEG_INF); 275 | Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf); 276 | 277 | vector active_beams(beam_size, true); 278 | while (true) { 279 | out_index++; 280 | priority_queue > > probs_queue; 281 | vector > curr_states; 282 | vector out_dist; 283 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 284 | if (active_beams[beam_id]) { 285 | unsigned prev_out_char = (*sequences)[beam_id].back(); 286 | vector ensmb_out; 287 | vector ensmb_states; 288 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) { 289 | auto& model = *(*ensmb_model)[ensmb_id]; 290 | Expression input_char_vec; 291 | if (out_index < input_ids.size()) { 292 | input_char_vec = lookup(cg, model.char_vecs[morph_id], input_ids[out_index]); 293 | } else { 294 | input_char_vec = lookup(cg, model.eps_vecs[morph_id], 295 | min(unsigned(out_index - input_ids.size()), 296 | model.max_eps - 1)); 297 | } 298 | 299 | Expression prev_out_vec = lookup(cg, model.char_vecs[morph_id], prev_out_char); 300 | Expression input = concatenate({//encoded_word_vecs[ensmb_id], 301 | prev_out_vec, 302 | input_char_vec}); 303 | Expression hidden = model.output_forward[morph_id].add_input( 304 | prev_states[beam_id][ensmb_id], input); 305 | ensmb_states.push_back(model.output_forward[morph_id].state()); 306 | 307 | Expression out; 308 | model.ProjectToOutput(hidden, &out); 309 | out = log_softmax(out); 310 | ensmb_out.push_back(out); 311 | } 312 | curr_states.push_back(ensmb_states); 313 | out_dist.push_back(average(ensmb_out)); 314 | } else { 315 | vector dummy(ensmb, RNNPointer(0)); 316 | curr_states.push_back(dummy); 317 | out_dist.push_back(neg_inf_vec); 318 | } 319 | } 320 | 321 | Expression all_scores = concatenate(out_dist); 322 | vector log_dist = as_vector(cg.incremental_forward()); 323 | 324 | for (unsigned index = 0; index < log_dist.size(); ++index) { 325 | unsigned beam_id = index / vocab_size; 326 | unsigned char_id = index % vocab_size; 327 | if (active_beams[beam_id]) { 328 | pair location = make_pair(beam_id, char_id); 329 | probs_queue.push(pair >( 330 | log_scores[beam_id] + log_dist[index], location)); 331 | } 332 | } 333 | 334 | // Find the beam_size best now and update the variables. 335 | unordered_map > new_seq; 336 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 337 | if (active_beams[beam_id]) { 338 | float log_prob = probs_queue.top().first; 339 | pair location = probs_queue.top().second; 340 | unsigned old_beam_id = location.first, char_id = location.second; 341 | 342 | vector seq = (*sequences)[old_beam_id]; 343 | seq.push_back(char_id); 344 | new_seq[beam_id] = seq; 345 | log_scores[beam_id] = log_prob; // Update the score 346 | 347 | prev_states[beam_id] = curr_states[old_beam_id]; // Update hidden state 348 | probs_queue.pop(); 349 | } 350 | } 351 | 352 | // Update the sequences now. 353 | for (auto& it : new_seq) { 354 | (*sequences)[it.first] = it.second; 355 | } 356 | 357 | // Check if a sequence should be made inactive. 358 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 359 | if (active_beams[beam_id] && 360 | ((*sequences)[beam_id].back() == char_to_id[EOW] || 361 | (*sequences)[beam_id].size() > MAX_PRED_LEN)) { 362 | active_beams[beam_id] = false; 363 | } 364 | } 365 | 366 | // Check if all sequences are inactive. 367 | bool all_inactive = true; 368 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 369 | if (active_beams[beam_id]) { 370 | all_inactive = false; 371 | break; 372 | } 373 | } 374 | 375 | if (all_inactive) { 376 | *tm_scores = log_scores; 377 | return; 378 | } 379 | } 380 | } 381 | 382 | void Serialize(string& filename, NoEnc& model, vector* cnn_models) { 383 | ofstream outfile(filename); 384 | if (!outfile.is_open()) { 385 | cerr << "File opening failed" << endl; 386 | } 387 | 388 | boost::archive::text_oarchive oa(outfile); 389 | oa & model; 390 | for (unsigned i = 0; i < cnn_models->size(); ++i) { 391 | oa & *(*cnn_models)[i]; 392 | } 393 | 394 | cerr << "Saved model to: " << filename << endl; 395 | outfile.close(); 396 | } 397 | 398 | void Read(string& filename, NoEnc* model, vector* cnn_models) { 399 | ifstream infile(filename); 400 | if (!infile.is_open()) { 401 | cerr << "File opening failed" << endl; 402 | } 403 | 404 | boost::archive::text_iarchive ia(infile); 405 | ia & *model; 406 | for (unsigned i = 0; i < model->morph_len; ++i) { 407 | Model *cnn_model = new Model(); 408 | cnn_models->push_back(cnn_model); 409 | } 410 | 411 | model->InitParams(cnn_models); 412 | for (unsigned i = 0; i < model->morph_len; ++i) { 413 | ia & *(*cnn_models)[i]; 414 | } 415 | 416 | cerr << "Loaded model from: " << filename << endl; 417 | infile.close(); 418 | } 419 | 420 | -------------------------------------------------------------------------------- /src/no-enc.h: -------------------------------------------------------------------------------- 1 | #ifndef NO_ENC_H_ 2 | #define NO_ENC_H_ 3 | 4 | #include "cnn/nodes.h" 5 | #include "cnn/cnn.h" 6 | #include "cnn/rnn.h" 7 | #include "cnn/gru.h" 8 | #include "cnn/lstm.h" 9 | #include "cnn/training.h" 10 | #include "cnn/gpu-ops.h" 11 | #include "cnn/expr.h" 12 | 13 | #include "utils.h" 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | using namespace std; 22 | using namespace cnn; 23 | using namespace cnn::expr; 24 | 25 | class NoEnc { 26 | public: 27 | vector output_forward; 28 | vector char_vecs; 29 | 30 | Expression hidden_to_output, hidden_to_output_bias; 31 | vector phidden_to_output, phidden_to_output_bias; 32 | 33 | //Expression transform_encoded, transform_encoded_bias; 34 | //vector ptransform_encoded, ptransform_encoded_bias; 35 | 36 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5; 37 | vector eps_vecs; 38 | 39 | NoEnc() {} 40 | 41 | NoEnc(const unsigned& char_length, const unsigned& hidden_length, 42 | const unsigned& vocab_length, const unsigned& layers, 43 | const unsigned& num_morph, vector* m, 44 | vector* optimizer); 45 | 46 | void InitParams(vector* m); 47 | 48 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg); 49 | 50 | //void RunFwdBwd(const unsigned& morph_id, const vector& inputs, 51 | // Expression* hidden, ComputationGraph *cg); 52 | 53 | //void TransformEncodedInput(Expression* encoded_input) const; 54 | 55 | //void TransformEncodedInputDuringDecoding(Expression* encoded_input) const; 56 | 57 | void ProjectToOutput(const Expression& hidden, Expression* out) const; 58 | 59 | Expression ComputeLoss(const vector& hidden_units, 60 | const vector& targets) const; 61 | 62 | float Train(const unsigned& morph_id, const vector& inputs, 63 | const vector& outputs, AdadeltaTrainer* ada_gd); 64 | 65 | friend class boost::serialization::access; 66 | template void serialize(Archive& ar, const unsigned int) { 67 | ar & char_len; 68 | ar & hidden_len; 69 | ar & vocab_len; 70 | ar & layers; 71 | ar & morph_len; 72 | ar & max_eps; 73 | } 74 | }; 75 | 76 | void 77 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 78 | const vector& input_ids, 79 | vector* pred_target_ids, vector* ensmb_model); 80 | 81 | void 82 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 83 | unordered_map& char_to_id, 84 | const vector& input_ids, 85 | vector >* sequences, vector* tm_scores, 86 | vector* ensmb_model); 87 | 88 | void Serialize(string& filename, NoEnc& model, vector* cnn_model); 89 | 90 | void Read(string& filename, NoEnc* model, vector* cnn_model); 91 | 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /src/sep-morph.cc: -------------------------------------------------------------------------------- 1 | #include "sep-morph.h" 2 | 3 | using namespace std; 4 | using namespace cnn; 5 | using namespace cnn::expr; 6 | 7 | string BOW = "", EOW = ""; 8 | int MAX_PRED_LEN = 100; 9 | float NEG_INF = numeric_limits::min(); 10 | 11 | SepMorph::SepMorph(const unsigned& char_length, const unsigned& hidden_length, 12 | const unsigned& vocab_length, const unsigned& num_layers, 13 | const unsigned& num_morph, vector* m, 14 | vector* optimizer) { 15 | char_len = char_length; 16 | hidden_len = hidden_length; 17 | vocab_len = vocab_length; 18 | layers = num_layers; 19 | morph_len = num_morph; 20 | InitParams(m); 21 | } 22 | 23 | void SepMorph::InitParams(vector* m) { 24 | for (unsigned i = 0; i < morph_len; ++i) { 25 | input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i])); 26 | input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i])); 27 | output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len, 28 | hidden_len, (*m)[i])); 29 | 30 | phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len})); 31 | phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1})); 32 | 33 | char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len})); 34 | 35 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len, 36 | 2 * hidden_len})); 37 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1})); 38 | 39 | eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len})); 40 | } 41 | } 42 | 43 | void SepMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) { 44 | input_forward[morph_id].new_graph(*cg); 45 | input_backward[morph_id].new_graph(*cg); 46 | output_forward[morph_id].new_graph(*cg); 47 | 48 | hidden_to_output = parameter(*cg, phidden_to_output[morph_id]); 49 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]); 50 | 51 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]); 52 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]); 53 | } 54 | 55 | void SepMorph::RunFwdBwd(const unsigned& morph_id, 56 | const vector& inputs, 57 | Expression* hidden, ComputationGraph *cg) { 58 | vector input_vecs; 59 | for (const unsigned& input_id : inputs) { 60 | input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id)); 61 | } 62 | 63 | // Run forward LSTM 64 | Expression forward_unit; 65 | input_forward[morph_id].start_new_sequence(); 66 | for (unsigned i = 0; i < input_vecs.size(); ++i) { 67 | forward_unit = input_forward[morph_id].add_input(input_vecs[i]); 68 | } 69 | 70 | // Run backward LSTM 71 | Expression backward_unit; 72 | input_backward[morph_id].start_new_sequence(); 73 | for (int i = input_vecs.size() - 1; i >= 0; --i) { 74 | backward_unit = input_backward[morph_id].add_input(input_vecs[i]); 75 | } 76 | 77 | // Concatenate the forward and back hidden layers 78 | *hidden = concatenate({forward_unit, backward_unit}); 79 | } 80 | 81 | void SepMorph::TransformEncodedInput(Expression* encoded_input) const { 82 | *encoded_input = affine_transform({transform_encoded_bias, 83 | transform_encoded, *encoded_input}); 84 | } 85 | 86 | void SepMorph::ProjectToOutput(const Expression& hidden, Expression* out) const { 87 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden}); 88 | } 89 | 90 | Expression SepMorph::ComputeLoss(const vector& hidden_units, 91 | const vector& targets) const { 92 | assert(hidden_units.size() == targets.size()); 93 | vector losses; 94 | for (unsigned i = 0; i < hidden_units.size(); ++i) { 95 | Expression out; 96 | ProjectToOutput(hidden_units[i], &out); 97 | losses.push_back(pickneglogsoftmax(out, targets[i])); 98 | } 99 | return sum(losses); 100 | } 101 | 102 | float SepMorph::Train(const unsigned& morph_id, const vector& inputs, 103 | const vector& outputs, AdadeltaTrainer* ada_gd) { 104 | ComputationGraph cg; 105 | AddParamsToCG(morph_id, &cg); 106 | 107 | // Encode and Transform to feed into decoder 108 | Expression encoded_input_vec; 109 | RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg); 110 | TransformEncodedInput(&encoded_input_vec); 111 | 112 | // Use this encoded word vector to predict the transformed word 113 | vector input_vecs_for_dec; 114 | vector output_ids_for_pred; 115 | for (unsigned i = 0; i < outputs.size(); ++i) { 116 | if (i < outputs.size() - 1) { 117 | // '' will not be fed as input -- it needs to be predicted. 118 | if (i < inputs.size() - 1) { 119 | input_vecs_for_dec.push_back(concatenate( 120 | {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]), 121 | lookup(cg, char_vecs[morph_id], inputs[i + 1])})); 122 | } else { 123 | input_vecs_for_dec.push_back(concatenate( 124 | {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]), 125 | lookup(cg, eps_vecs[morph_id], min(unsigned(i - inputs.size()), max_eps - 1))})); 126 | } 127 | } 128 | if (i > 0) { // '' will not be predicted in the output -- its fed in. 129 | output_ids_for_pred.push_back(outputs[i]); 130 | } 131 | } 132 | 133 | vector decoder_hidden_units; 134 | output_forward[morph_id].start_new_sequence(); 135 | for (const auto& vec : input_vecs_for_dec) { 136 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec)); 137 | } 138 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred); 139 | 140 | float return_loss = as_scalar(cg.forward()); 141 | cg.backward(); 142 | ada_gd->update(1.0f); 143 | return return_loss; 144 | } 145 | 146 | void 147 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 148 | const vector& input_ids, 149 | vector* pred_target_ids, vector* ensmb_model) { 150 | ComputationGraph cg; 151 | 152 | unsigned ensmb = ensmb_model->size(); 153 | vector encoded_word_vecs; 154 | for (unsigned i = 0; i < ensmb; ++i) { 155 | Expression encoded_word_vec; 156 | auto model = (*ensmb_model)[i]; 157 | model->AddParamsToCG(morph_id, &cg); 158 | model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg); 159 | model->TransformEncodedInput(&encoded_word_vec); 160 | encoded_word_vecs.push_back(encoded_word_vec); 161 | model->output_forward[morph_id].start_new_sequence(); 162 | } 163 | 164 | unsigned out_index = 1; 165 | unsigned pred_index = char_to_id[BOW]; 166 | while (pred_target_ids->size() < MAX_PRED_LEN) { 167 | vector ensmb_out; 168 | pred_target_ids->push_back(pred_index); 169 | if (pred_index == char_to_id[EOW]) { 170 | return; // If the end is found, break from the loop and return 171 | } 172 | 173 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 174 | auto model = (*ensmb_model)[ensmb_id]; 175 | Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index); 176 | Expression input, input_char_vec; 177 | if (out_index < input_ids.size()) { 178 | input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]); 179 | } else { 180 | input_char_vec = lookup(cg, model->eps_vecs[morph_id], 181 | min(unsigned(out_index - input_ids.size()), 182 | model->max_eps - 1)); 183 | } 184 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec, 185 | input_char_vec}); 186 | 187 | Expression hidden = model->output_forward[morph_id].add_input(input); 188 | Expression out; 189 | model->ProjectToOutput(hidden, &out); 190 | ensmb_out.push_back(log_softmax(out)); 191 | } 192 | 193 | Expression out = sum(ensmb_out) / ensmb_out.size(); 194 | vector dist = as_vector(cg.incremental_forward()); 195 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end())); 196 | out_index++; 197 | } 198 | } 199 | 200 | void 201 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 202 | unordered_map& char_to_id, 203 | const vector& input_ids, 204 | vector >* sequences, vector* tm_scores, 205 | vector* ensmb_model) { 206 | unsigned out_index = 1; 207 | unsigned ensmb = ensmb_model->size(); 208 | ComputationGraph cg; 209 | 210 | // Compute stuff for every model in the ensemble. 211 | vector encoded_word_vecs; 212 | vector ensmb_out; 213 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 214 | auto& model = *(*ensmb_model)[ensmb_id]; 215 | model.AddParamsToCG(morph_id, &cg); 216 | 217 | Expression encoded_word_vec; 218 | model.RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg); 219 | model.TransformEncodedInput(&encoded_word_vec); 220 | encoded_word_vecs.push_back(encoded_word_vec); 221 | model.output_forward[morph_id].start_new_sequence(); 222 | 223 | Expression prev_output_vec = lookup(cg, model.char_vecs[morph_id], 224 | char_to_id[BOW]); 225 | Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec, 226 | lookup(cg, model.char_vecs[morph_id], 227 | input_ids[out_index])}); 228 | Expression hidden = model.output_forward[morph_id].add_input(input); 229 | Expression out; 230 | model.ProjectToOutput(hidden, &out); 231 | out = log_softmax(out); 232 | ensmb_out.push_back(out); 233 | } 234 | 235 | // Compute the average of the ensemble output. 236 | Expression out_dist = average(ensmb_out); 237 | vector log_dist = as_vector(cg.incremental_forward()); 238 | priority_queue > init_queue; 239 | for (unsigned i = 0; i < log_dist.size(); ++i) { 240 | init_queue.push(make_pair(log_dist[i], i)); 241 | } 242 | unsigned vocab_size = log_dist.size(); 243 | 244 | // Initialise the beam_size sequences, scores, hidden states. 245 | vector log_scores; 246 | vector > prev_states; 247 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 248 | vector seq; 249 | seq.push_back(char_to_id[BOW]); 250 | seq.push_back(init_queue.top().second); 251 | sequences->push_back(seq); 252 | log_scores.push_back(init_queue.top().first); 253 | 254 | vector ensmb_states; 255 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) { 256 | auto& model = *(*ensmb_model)[ensmb_id]; 257 | ensmb_states.push_back(model.output_forward[morph_id].state()); 258 | } 259 | prev_states.push_back(ensmb_states); 260 | init_queue.pop(); 261 | } 262 | 263 | vector neg_inf(vocab_size, NEG_INF); 264 | Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf); 265 | 266 | vector active_beams(beam_size, true); 267 | while (true) { 268 | out_index++; 269 | priority_queue > > probs_queue; 270 | vector > curr_states; 271 | vector out_dist; 272 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 273 | if (active_beams[beam_id]) { 274 | unsigned prev_out_char = (*sequences)[beam_id].back(); 275 | vector ensmb_out; 276 | vector ensmb_states; 277 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) { 278 | auto& model = *(*ensmb_model)[ensmb_id]; 279 | Expression input_char_vec; 280 | if (out_index < input_ids.size()) { 281 | input_char_vec = lookup(cg, model.char_vecs[morph_id], input_ids[out_index]); 282 | } else { 283 | input_char_vec = lookup(cg, model.eps_vecs[morph_id], 284 | min(unsigned(out_index - input_ids.size()), 285 | model.max_eps - 1)); 286 | } 287 | 288 | Expression prev_out_vec = lookup(cg, model.char_vecs[morph_id], prev_out_char); 289 | Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_out_vec, 290 | input_char_vec}); 291 | Expression hidden = model.output_forward[morph_id].add_input( 292 | prev_states[beam_id][ensmb_id], input); 293 | ensmb_states.push_back(model.output_forward[morph_id].state()); 294 | 295 | Expression out; 296 | model.ProjectToOutput(hidden, &out); 297 | out = log_softmax(out); 298 | ensmb_out.push_back(out); 299 | } 300 | curr_states.push_back(ensmb_states); 301 | out_dist.push_back(average(ensmb_out)); 302 | } else { 303 | vector dummy(ensmb, RNNPointer(0)); 304 | curr_states.push_back(dummy); 305 | out_dist.push_back(neg_inf_vec); 306 | } 307 | } 308 | 309 | Expression all_scores = concatenate(out_dist); 310 | vector log_dist = as_vector(cg.incremental_forward()); 311 | 312 | for (unsigned index = 0; index < log_dist.size(); ++index) { 313 | unsigned beam_id = index / vocab_size; 314 | unsigned char_id = index % vocab_size; 315 | if (active_beams[beam_id]) { 316 | pair location = make_pair(beam_id, char_id); 317 | probs_queue.push(pair >( 318 | log_scores[beam_id] + log_dist[index], location)); 319 | } 320 | } 321 | 322 | // Find the beam_size best now and update the variables. 323 | unordered_map > new_seq; 324 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 325 | if (active_beams[beam_id]) { 326 | float log_prob = probs_queue.top().first; 327 | pair location = probs_queue.top().second; 328 | unsigned old_beam_id = location.first, char_id = location.second; 329 | 330 | vector seq = (*sequences)[old_beam_id]; 331 | seq.push_back(char_id); 332 | new_seq[beam_id] = seq; 333 | log_scores[beam_id] = log_prob; // Update the score 334 | 335 | prev_states[beam_id] = curr_states[old_beam_id]; // Update hidden state 336 | probs_queue.pop(); 337 | } 338 | } 339 | 340 | // Update the sequences now. 341 | for (auto& it : new_seq) { 342 | (*sequences)[it.first] = it.second; 343 | } 344 | 345 | // Check if a sequence should be made inactive. 346 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 347 | if (active_beams[beam_id] && 348 | ((*sequences)[beam_id].back() == char_to_id[EOW] || 349 | (*sequences)[beam_id].size() > MAX_PRED_LEN)) { 350 | active_beams[beam_id] = false; 351 | } 352 | } 353 | 354 | // Check if all sequences are inactive. 355 | bool all_inactive = true; 356 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) { 357 | if (active_beams[beam_id]) { 358 | all_inactive = false; 359 | break; 360 | } 361 | } 362 | 363 | if (all_inactive) { 364 | *tm_scores = log_scores; 365 | return; 366 | } 367 | } 368 | } 369 | 370 | void Serialize(string& filename, SepMorph& model, vector* cnn_models) { 371 | ofstream outfile(filename); 372 | if (!outfile.is_open()) { 373 | cerr << "File opening failed" << endl; 374 | } 375 | 376 | boost::archive::text_oarchive oa(outfile); 377 | oa & model; 378 | for (unsigned i = 0; i < cnn_models->size(); ++i) { 379 | oa & *(*cnn_models)[i]; 380 | } 381 | 382 | cerr << "Saved model to: " << filename << endl; 383 | outfile.close(); 384 | } 385 | 386 | void Read(string& filename, SepMorph* model, vector* cnn_models) { 387 | ifstream infile(filename); 388 | if (!infile.is_open()) { 389 | cerr << "File opening failed" << endl; 390 | } 391 | 392 | boost::archive::text_iarchive ia(infile); 393 | ia & *model; 394 | for (unsigned i = 0; i < model->morph_len; ++i) { 395 | Model *cnn_model = new Model(); 396 | cnn_models->push_back(cnn_model); 397 | } 398 | 399 | model->InitParams(cnn_models); 400 | for (unsigned i = 0; i < model->morph_len; ++i) { 401 | ia & *(*cnn_models)[i]; 402 | } 403 | 404 | cerr << "Loaded model from: " << filename << endl; 405 | infile.close(); 406 | } 407 | 408 | -------------------------------------------------------------------------------- /src/sep-morph.h: -------------------------------------------------------------------------------- 1 | #ifndef SEP_MORPH_H_ 2 | #define SEP_MORPH_H_ 3 | 4 | #include "cnn/nodes.h" 5 | #include "cnn/cnn.h" 6 | #include "cnn/rnn.h" 7 | #include "cnn/gru.h" 8 | #include "cnn/lstm.h" 9 | #include "cnn/training.h" 10 | #include "cnn/gpu-ops.h" 11 | #include "cnn/expr.h" 12 | 13 | #include "utils.h" 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | using namespace std; 22 | using namespace cnn; 23 | using namespace cnn::expr; 24 | 25 | class SepMorph { 26 | public: 27 | vector input_forward, input_backward, output_forward; 28 | vector char_vecs; 29 | 30 | Expression hidden_to_output, hidden_to_output_bias; 31 | vector phidden_to_output, phidden_to_output_bias; 32 | 33 | Expression transform_encoded, transform_encoded_bias; 34 | vector ptransform_encoded, ptransform_encoded_bias; 35 | 36 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5; 37 | vector eps_vecs; 38 | 39 | SepMorph() {} 40 | 41 | SepMorph(const unsigned& char_length, const unsigned& hidden_length, 42 | const unsigned& vocab_length, const unsigned& layers, 43 | const unsigned& num_morph, vector* m, 44 | vector* optimizer); 45 | 46 | void InitParams(vector* m); 47 | 48 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg); 49 | 50 | void RunFwdBwd(const unsigned& morph_id, const vector& inputs, 51 | Expression* hidden, ComputationGraph *cg); 52 | 53 | void TransformEncodedInput(Expression* encoded_input) const; 54 | 55 | void TransformEncodedInputDuringDecoding(Expression* encoded_input) const; 56 | 57 | void ProjectToOutput(const Expression& hidden, Expression* out) const; 58 | 59 | Expression ComputeLoss(const vector& hidden_units, 60 | const vector& targets) const; 61 | 62 | float Train(const unsigned& morph_id, const vector& inputs, 63 | const vector& outputs, AdadeltaTrainer* ada_gd); 64 | 65 | friend class boost::serialization::access; 66 | template void serialize(Archive& ar, const unsigned int) { 67 | ar & char_len; 68 | ar & hidden_len; 69 | ar & vocab_len; 70 | ar & layers; 71 | ar & morph_len; 72 | ar & max_eps; 73 | } 74 | }; 75 | 76 | void 77 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id, 78 | const vector& input_ids, 79 | vector* pred_target_ids, vector* ensmb_model); 80 | 81 | void 82 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size, 83 | unordered_map& char_to_id, 84 | const vector& input_ids, 85 | vector >* sequences, vector* tm_scores, 86 | vector* ensmb_model); 87 | 88 | void Serialize(string& filename, SepMorph& model, vector* cnn_model); 89 | 90 | void Read(string& filename, SepMorph* model, vector* cnn_model); 91 | 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /src/train-enc-dec-attn.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "enc-dec-attn.h" 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace std; 21 | using namespace cnn; 22 | using namespace cnn::expr; 23 | 24 | int main(int argc, char** argv) { 25 | cnn::Initialize(argc, argv); 26 | 27 | string vocab_filename = argv[1]; // vocabulary of words/characters 28 | string morph_filename = argv[2]; 29 | string train_filename = argv[3]; 30 | string test_filename = argv[4]; 31 | unsigned hidden_size = atoi(argv[5]); 32 | unsigned num_iter = atoi(argv[6]); 33 | float reg_strength = atof(argv[7]); 34 | unsigned layers = atoi(argv[8]); 35 | string model_outputfilename = argv[9]; 36 | 37 | unordered_map char_to_id, morph_to_id; 38 | unordered_map id_to_char, id_to_morph; 39 | 40 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 41 | unsigned vocab_size = char_to_id.size(); 42 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 43 | unsigned morph_size = morph_to_id.size(); 44 | 45 | vector train_data; // Read the training file in a vector 46 | ReadData(train_filename, &train_data); 47 | 48 | vector test_data; // Read the dev file in a vector 49 | ReadData(test_filename, &test_data); 50 | 51 | vector m; 52 | vector optimizer; 53 | 54 | for (unsigned i = 0; i < morph_size; ++i) { 55 | m.push_back(new Model()); 56 | AdadeltaTrainer ada(m[i], reg_strength); 57 | optimizer.push_back(ada); 58 | } 59 | 60 | unsigned char_size = vocab_size; 61 | EncDecAttn nn(char_size, hidden_size, vocab_size, layers, morph_size, 62 | &m, &optimizer); 63 | 64 | // Pre-train all models on all datasets. 65 | /*cerr << "Pre-training... " << endl; 66 | for (unsigned morph_id = 0; morph_id < morph_size; ++morph_id) { 67 | cerr << "Morph class: " << morph_id + 1 << endl; 68 | for (string& line : train_data) { 69 | vector items = split_line(line, '|'); 70 | vector input_ids, target_ids; 71 | input_ids.clear(); target_ids.clear(); 72 | for (const string& ch : split_line(items[0], ' ')) { 73 | input_ids.push_back(char_to_id[ch]); 74 | } 75 | for (const string& ch : split_line(items[1], ' ')) { 76 | target_ids.push_back(char_to_id[ch]); 77 | } 78 | //unsigned morph_id = morph_to_id[items[2]]; 79 | double temp_loss = nn.Train(morph_id, input_ids, target_ids, 80 | &optimizer[morph_id]); 81 | } 82 | } 83 | cerr << "complete." << endl;*/ 84 | 85 | // Read the training file and train the model 86 | double best_score = -1; 87 | vector object_list; 88 | object_list.push_back(&nn); 89 | for (unsigned iter = 0; iter < num_iter; ++iter) { 90 | unsigned line_id = 0; 91 | random_shuffle(train_data.begin(), train_data.end()); 92 | vector loss(morph_size, 0.0f); 93 | for (string& line : train_data) { 94 | vector items = split_line(line, '|'); 95 | vector input_ids, target_ids; 96 | input_ids.clear(); target_ids.clear(); 97 | for (const string& ch : split_line(items[0], ' ')) { 98 | input_ids.push_back(char_to_id[ch]); 99 | } 100 | for (const string& ch : split_line(items[1], ' ')) { 101 | target_ids.push_back(char_to_id[ch]); 102 | } 103 | unsigned morph_id = morph_to_id[items[2]]; 104 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids, 105 | &optimizer[morph_id]); 106 | cerr << ++line_id << "\r"; 107 | } 108 | 109 | // Read the test file and output predictions for the words. 110 | string line; 111 | double correct = 0, total = 0; 112 | for (string& line : test_data) { 113 | vector items = split_line(line, '|'); 114 | vector input_ids, target_ids, pred_target_ids; 115 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 116 | for (const string& ch : split_line(items[0], ' ')) { 117 | input_ids.push_back(char_to_id[ch]); 118 | } 119 | for (const string& ch : split_line(items[1], ' ')) { 120 | target_ids.push_back(char_to_id[ch]); 121 | } 122 | unsigned morph_id = morph_to_id[items[2]]; 123 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 124 | &object_list); 125 | 126 | string prediction = ""; 127 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 128 | prediction += id_to_char[pred_target_ids[i]]; 129 | if (i != pred_target_ids.size() - 1) { 130 | prediction += " "; 131 | } 132 | } 133 | if (prediction == items[1]) { 134 | correct += 1; 135 | } 136 | total += 1; 137 | } 138 | double curr_score = correct / total; 139 | cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.); 140 | cerr << "Prediction Accuracy: " << curr_score << endl; 141 | if (curr_score > best_score) { 142 | best_score = curr_score; 143 | Serialize(model_outputfilename, nn, &m); 144 | } 145 | } 146 | return 1; 147 | } 148 | -------------------------------------------------------------------------------- /src/train-enc-dec.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "enc-dec.h" 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace std; 21 | using namespace cnn; 22 | using namespace cnn::expr; 23 | 24 | int main(int argc, char** argv) { 25 | cnn::Initialize(argc, argv); 26 | 27 | string vocab_filename = argv[1]; // vocabulary of words/characters 28 | string morph_filename = argv[2]; 29 | string train_filename = argv[3]; 30 | string test_filename = argv[4]; 31 | unsigned hidden_size = atoi(argv[5]); 32 | unsigned num_iter = atoi(argv[6]); 33 | float reg_strength = atof(argv[7]); 34 | unsigned layers = atoi(argv[8]); 35 | string model_outputfilename = argv[9]; 36 | 37 | unordered_map char_to_id, morph_to_id; 38 | unordered_map id_to_char, id_to_morph; 39 | 40 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 41 | unsigned vocab_size = char_to_id.size(); 42 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 43 | unsigned morph_size = morph_to_id.size(); 44 | 45 | vector train_data; // Read the training file in a vector 46 | ReadData(train_filename, &train_data); 47 | 48 | vector test_data; // Read the dev file in a vector 49 | ReadData(test_filename, &test_data); 50 | 51 | vector m; 52 | vector optimizer; 53 | 54 | for (unsigned i = 0; i < morph_size; ++i) { 55 | m.push_back(new Model()); 56 | AdadeltaTrainer ada(m[i], reg_strength); 57 | optimizer.push_back(ada); 58 | } 59 | 60 | unsigned char_size = vocab_size; 61 | EncDec nn(char_size, hidden_size, vocab_size, layers, morph_size, 62 | &m, &optimizer); 63 | 64 | // Read the training file and train the model 65 | double best_score = -1; 66 | vector object_list; 67 | object_list.push_back(&nn); 68 | for (unsigned iter = 0; iter < num_iter; ++iter) { 69 | unsigned line_id = 0; 70 | random_shuffle(train_data.begin(), train_data.end()); 71 | vector loss(morph_size, 0.0f); 72 | for (string& line : train_data) { 73 | vector items = split_line(line, '|'); 74 | vector input_ids, target_ids; 75 | input_ids.clear(); target_ids.clear(); 76 | for (const string& ch : split_line(items[0], ' ')) { 77 | input_ids.push_back(char_to_id[ch]); 78 | } 79 | for (const string& ch : split_line(items[1], ' ')) { 80 | target_ids.push_back(char_to_id[ch]); 81 | } 82 | unsigned morph_id = morph_to_id[items[2]]; 83 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids, 84 | &optimizer[morph_id]); 85 | cerr << ++line_id << "\r"; 86 | } 87 | 88 | // Read the test file and output predictions for the words. 89 | string line; 90 | double correct = 0, total = 0; 91 | for (string& line : test_data) { 92 | vector items = split_line(line, '|'); 93 | vector input_ids, target_ids, pred_target_ids; 94 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 95 | for (const string& ch : split_line(items[0], ' ')) { 96 | input_ids.push_back(char_to_id[ch]); 97 | } 98 | for (const string& ch : split_line(items[1], ' ')) { 99 | target_ids.push_back(char_to_id[ch]); 100 | } 101 | unsigned morph_id = morph_to_id[items[2]]; 102 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 103 | &object_list); 104 | 105 | string prediction = ""; 106 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 107 | prediction += id_to_char[pred_target_ids[i]]; 108 | if (i != pred_target_ids.size() - 1) { 109 | prediction += " "; 110 | } 111 | } 112 | if (prediction == items[1]) { 113 | correct += 1; 114 | } 115 | total += 1; 116 | } 117 | double curr_score = correct / total; 118 | cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.); 119 | cerr << "Prediction Accuracy: " << curr_score << endl; 120 | if (curr_score > best_score) { 121 | best_score = curr_score; 122 | Serialize(model_outputfilename, nn, &m); 123 | } 124 | } 125 | return 1; 126 | } 127 | -------------------------------------------------------------------------------- /src/train-joint-enc-dec-morph.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "joint-enc-dec-morph.h" 12 | 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace cnn; 18 | using namespace cnn::expr; 19 | 20 | int main(int argc, char** argv) { 21 | cnn::Initialize(argc, argv); 22 | 23 | string vocab_filename = argv[1]; // vocabulary of words/characters 24 | string morph_filename = argv[2]; 25 | string train_filename = argv[3]; 26 | string test_filename = argv[4]; 27 | unsigned hidden_size = atoi(argv[5]); 28 | unsigned num_iter = atoi(argv[6]); 29 | float reg_strength = atof(argv[7]); 30 | unsigned layers = atoi(argv[8]); 31 | string model_outputfilename = argv[9]; 32 | 33 | unordered_map char_to_id, morph_to_id; 34 | unordered_map id_to_char, id_to_morph; 35 | 36 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 37 | unsigned vocab_size = char_to_id.size(); 38 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 39 | unsigned morph_size = morph_to_id.size(); 40 | 41 | vector train_data; // Read the training file in a vector 42 | ReadData(train_filename, &train_data); 43 | 44 | vector test_data; // Read the dev file in a vector 45 | ReadData(test_filename, &test_data); 46 | 47 | vector m; 48 | vector optimizer; 49 | 50 | for (unsigned i = 0; i < morph_size + 1; ++i) { 51 | m.push_back(new Model()); 52 | AdadeltaTrainer ada(m[i], reg_strength); 53 | optimizer.push_back(ada); 54 | } 55 | 56 | unsigned char_size = vocab_size; 57 | JointEncDecMorph nn(char_size, hidden_size, vocab_size, layers, morph_size, 58 | &m, &optimizer); 59 | 60 | // Read the training file and train the model 61 | double best_score = -1; 62 | vector model_pointers; 63 | model_pointers.push_back(&nn); 64 | for (unsigned iter = 0; iter < num_iter; ++iter) { 65 | unsigned line_id = 0; 66 | random_shuffle(train_data.begin(), train_data.end()); 67 | vector loss(morph_size, 0.0f); 68 | for (string& line : train_data) { 69 | vector items = split_line(line, '|'); 70 | vector input_ids, target_ids; 71 | input_ids.clear(); target_ids.clear(); 72 | for (const string& ch : split_line(items[0], ' ')) { 73 | input_ids.push_back(char_to_id[ch]); 74 | } 75 | for (const string& ch : split_line(items[1], ' ')) { 76 | target_ids.push_back(char_to_id[ch]); 77 | } 78 | unsigned morph_id = morph_to_id[items[2]]; 79 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids, 80 | &optimizer[morph_id], &optimizer[morph_size]); 81 | cerr << ++line_id << "\r"; 82 | } 83 | 84 | // Read the test file and output predictions for the words. 85 | string line; 86 | double correct = 0, total = 0; 87 | for (string& line : test_data) { 88 | vector items = split_line(line, '|'); 89 | vector input_ids, target_ids, pred_target_ids; 90 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 91 | for (const string& ch : split_line(items[0], ' ')) { 92 | input_ids.push_back(char_to_id[ch]); 93 | } 94 | for (const string& ch : split_line(items[1], ' ')) { 95 | target_ids.push_back(char_to_id[ch]); 96 | } 97 | unsigned morph_id = morph_to_id[items[2]]; 98 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 99 | &model_pointers); 100 | 101 | string prediction = ""; 102 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 103 | prediction += id_to_char[pred_target_ids[i]]; 104 | if (i != pred_target_ids.size() - 1) { 105 | prediction += " "; 106 | } 107 | } 108 | if (prediction == items[1]) { 109 | correct += 1; 110 | } 111 | total += 1; 112 | } 113 | double curr_score = correct / total; 114 | cerr << "Iter " << iter + 1 << " "; 115 | cerr << "Prediction Accuracy: " << curr_score << endl; 116 | if (curr_score > best_score) { 117 | best_score = curr_score; 118 | Serialize(model_outputfilename, nn, &m); 119 | } 120 | } 121 | return 1; 122 | } 123 | -------------------------------------------------------------------------------- /src/train-joint-enc-morph.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "joint-enc-morph.h" 12 | 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace cnn; 18 | using namespace cnn::expr; 19 | 20 | int main(int argc, char** argv) { 21 | cnn::Initialize(argc, argv); 22 | 23 | string vocab_filename = argv[1]; // vocabulary of words/characters 24 | string morph_filename = argv[2]; 25 | string train_filename = argv[3]; 26 | string test_filename = argv[4]; 27 | unsigned hidden_size = atoi(argv[5]); 28 | unsigned num_iter = atoi(argv[6]); 29 | float reg_strength = atof(argv[7]); 30 | unsigned layers = atoi(argv[8]); 31 | string model_outputfilename = argv[9]; 32 | 33 | unordered_map char_to_id, morph_to_id; 34 | unordered_map id_to_char, id_to_morph; 35 | 36 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 37 | unsigned vocab_size = char_to_id.size(); 38 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 39 | unsigned morph_size = morph_to_id.size(); 40 | 41 | vector train_data; // Read the training file in a vector 42 | ReadData(train_filename, &train_data); 43 | 44 | vector test_data; // Read the dev file in a vector 45 | ReadData(test_filename, &test_data); 46 | 47 | vector m; 48 | vector optimizer; 49 | 50 | for (unsigned i = 0; i < morph_size + 1; ++i) { 51 | m.push_back(new Model()); 52 | AdadeltaTrainer ada(m[i], reg_strength); 53 | optimizer.push_back(ada); 54 | } 55 | 56 | unsigned char_size = vocab_size; 57 | JointEncMorph nn(char_size, hidden_size, vocab_size, layers, morph_size, 58 | &m, &optimizer); 59 | 60 | // Read the training file and train the model 61 | double best_score = -1; 62 | vector model_pointers; 63 | model_pointers.push_back(&nn); 64 | for (unsigned iter = 0; iter < num_iter; ++iter) { 65 | unsigned line_id = 0; 66 | random_shuffle(train_data.begin(), train_data.end()); 67 | vector loss(morph_size, 0.0f); 68 | for (string& line : train_data) { 69 | vector items = split_line(line, '|'); 70 | vector input_ids, target_ids; 71 | input_ids.clear(); target_ids.clear(); 72 | for (const string& ch : split_line(items[0], ' ')) { 73 | input_ids.push_back(char_to_id[ch]); 74 | } 75 | for (const string& ch : split_line(items[1], ' ')) { 76 | target_ids.push_back(char_to_id[ch]); 77 | } 78 | unsigned morph_id = morph_to_id[items[2]]; 79 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids, 80 | &optimizer[morph_id], &optimizer[morph_size]); 81 | cerr << ++line_id << "\r"; 82 | } 83 | 84 | // Read the test file and output predictions for the words. 85 | string line; 86 | double correct = 0, total = 0; 87 | for (string& line : test_data) { 88 | vector items = split_line(line, '|'); 89 | vector input_ids, target_ids, pred_target_ids; 90 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 91 | for (const string& ch : split_line(items[0], ' ')) { 92 | input_ids.push_back(char_to_id[ch]); 93 | } 94 | for (const string& ch : split_line(items[1], ' ')) { 95 | target_ids.push_back(char_to_id[ch]); 96 | } 97 | unsigned morph_id = morph_to_id[items[2]]; 98 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 99 | &model_pointers); 100 | 101 | string prediction = ""; 102 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 103 | prediction += id_to_char[pred_target_ids[i]]; 104 | if (i != pred_target_ids.size() - 1) { 105 | prediction += " "; 106 | } 107 | } 108 | if (prediction == items[1]) { 109 | correct += 1; 110 | } 111 | total += 1; 112 | } 113 | double curr_score = correct / total; 114 | cerr << "Iter " << iter + 1 << " "; 115 | cerr << "Prediction Accuracy: " << curr_score << endl; 116 | if (curr_score > best_score) { 117 | best_score = curr_score; 118 | Serialize(model_outputfilename, nn, &m); 119 | } 120 | } 121 | return 1; 122 | } 123 | -------------------------------------------------------------------------------- /src/train-lm-joint-enc.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "lm.h" 11 | #include "utils.h" 12 | #include "lm-joint-enc.h" 13 | 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | using namespace cnn; 19 | using namespace cnn::expr; 20 | 21 | int main(int argc, char** argv) { 22 | cnn::Initialize(argc, argv); 23 | 24 | string vocab_filename = argv[1]; // vocabulary of words/characters 25 | string morph_filename = argv[2]; 26 | string train_filename = argv[3]; 27 | string test_filename = argv[4]; 28 | unsigned hidden_size = atoi(argv[5]); 29 | unsigned num_iter = atoi(argv[6]); 30 | float reg_strength = atof(argv[7]); 31 | unsigned layers = atoi(argv[8]); 32 | string lm_model_filename = argv[9]; 33 | string model_outputfilename = argv[10]; 34 | 35 | unordered_map char_to_id, morph_to_id; 36 | unordered_map id_to_char, id_to_morph; 37 | 38 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 39 | unsigned vocab_size = char_to_id.size(); 40 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 41 | unsigned morph_size = morph_to_id.size(); 42 | 43 | vector train_data; // Read the training file in a vector 44 | ReadData(train_filename, &train_data); 45 | 46 | vector test_data; // Read the dev file in a vector 47 | ReadData(test_filename, &test_data); 48 | 49 | LM lm(lm_model_filename, char_to_id, id_to_char); 50 | 51 | vector m; 52 | vector optimizer; 53 | 54 | for (unsigned i = 0; i < morph_size + 1; ++i) { 55 | m.push_back(new Model()); 56 | AdadeltaTrainer ada(m[i], reg_strength); 57 | optimizer.push_back(ada); 58 | } 59 | 60 | unsigned char_size = vocab_size; 61 | LMJointEnc nn(char_size, hidden_size, vocab_size, layers, morph_size, 62 | &m, &optimizer); 63 | 64 | // Read the training file and train the model 65 | double best_score = -1; 66 | vector model_pointers; 67 | model_pointers.push_back(&nn); 68 | for (unsigned iter = 0; iter < num_iter; ++iter) { 69 | unsigned line_id = 0; 70 | random_shuffle(train_data.begin(), train_data.end()); 71 | vector loss(morph_size, 0.0f); 72 | for (string& line : train_data) { 73 | vector items = split_line(line, '|'); 74 | vector input_ids, target_ids; 75 | input_ids.clear(); target_ids.clear(); 76 | for (const string& ch : split_line(items[0], ' ')) { 77 | input_ids.push_back(char_to_id[ch]); 78 | } 79 | for (const string& ch : split_line(items[1], ' ')) { 80 | target_ids.push_back(char_to_id[ch]); 81 | } 82 | unsigned morph_id = morph_to_id[items[2]]; 83 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids, &lm, 84 | &optimizer[morph_id], &optimizer[morph_size]); 85 | cerr << ++line_id << "\r"; 86 | } 87 | 88 | // Read the test file and output predictions for the words. 89 | string line; 90 | double correct = 0, total = 0; 91 | for (string& line : test_data) { 92 | vector items = split_line(line, '|'); 93 | vector input_ids, target_ids, pred_target_ids; 94 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 95 | for (const string& ch : split_line(items[0], ' ')) { 96 | input_ids.push_back(char_to_id[ch]); 97 | } 98 | for (const string& ch : split_line(items[1], ' ')) { 99 | target_ids.push_back(char_to_id[ch]); 100 | } 101 | unsigned morph_id = morph_to_id[items[2]]; 102 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 103 | &lm, &model_pointers); 104 | 105 | string prediction = ""; 106 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 107 | prediction += id_to_char[pred_target_ids[i]]; 108 | if (i != pred_target_ids.size() - 1) { 109 | prediction += " "; 110 | } 111 | } 112 | if (prediction == items[1]) { 113 | correct += 1; 114 | } 115 | total += 1; 116 | } 117 | double curr_score = correct / total; 118 | cerr << "Iter " << iter + 1 << " "; 119 | cerr << "Prediction Accuracy: " << curr_score << endl; 120 | if (curr_score > best_score) { 121 | best_score = curr_score; 122 | Serialize(model_outputfilename, nn, &m); 123 | } 124 | } 125 | return 1; 126 | } 127 | -------------------------------------------------------------------------------- /src/train-lm-sep-morph.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "lm.h" 11 | #include "utils.h" 12 | #include "lm-sep-morph.h" 13 | 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | using namespace std; 24 | using namespace cnn; 25 | using namespace cnn::expr; 26 | 27 | int main(int argc, char** argv) { 28 | cnn::Initialize(argc, argv); 29 | feenableexcept(FE_INVALID | FE_OVERFLOW | FE_DIVBYZERO); 30 | 31 | string vocab_filename = argv[1]; // vocabulary of words/characters 32 | string morph_filename = argv[2]; 33 | string train_filename = argv[3]; 34 | string test_filename = argv[4]; 35 | unsigned hidden_size = atoi(argv[5]); 36 | unsigned num_iter = atoi(argv[6]); 37 | float reg_strength = atof(argv[7]); 38 | unsigned layers = atoi(argv[8]); 39 | string lm_model_filename = argv[9]; 40 | string model_outputfilename = argv[10]; 41 | 42 | unordered_map char_to_id, morph_to_id; 43 | unordered_map id_to_char, id_to_morph; 44 | 45 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 46 | unsigned vocab_size = char_to_id.size(); 47 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 48 | unsigned morph_size = morph_to_id.size(); 49 | 50 | vector train_data; // Read the training file in a vector 51 | ReadData(train_filename, &train_data); 52 | 53 | vector test_data; // Read the dev file in a vector 54 | ReadData(test_filename, &test_data); 55 | 56 | LM lm(lm_model_filename, char_to_id, id_to_char); 57 | 58 | vector m; 59 | vector optimizer; 60 | 61 | for (unsigned i = 0; i < morph_size; ++i) { 62 | m.push_back(new Model()); 63 | AdadeltaTrainer ada(m[i], reg_strength); 64 | optimizer.push_back(ada); 65 | } 66 | 67 | unsigned char_size = vocab_size; 68 | LMSepMorph nn(char_size, hidden_size, vocab_size, layers, morph_size, 69 | &m, &optimizer); 70 | 71 | // Read the training file and train the model 72 | double best_score = -1; 73 | vector object_list; 74 | object_list.push_back(&nn); 75 | for (unsigned iter = 0; iter < num_iter; ++iter) { 76 | unsigned line_id = 0; 77 | random_shuffle(train_data.begin(), train_data.end()); 78 | vector loss(morph_size, 0.0f); 79 | for (string& line : train_data) { 80 | vector items = split_line(line, '|'); 81 | vector input_ids, target_ids; 82 | input_ids.clear(); target_ids.clear(); 83 | 84 | string input = items[0], output = items[1]; 85 | for (const string& ch : split_line(input, ' ')) { 86 | input_ids.push_back(char_to_id[ch]); 87 | } 88 | for (const string& ch : split_line(output, ' ')) { 89 | target_ids.push_back(char_to_id[ch]); 90 | } 91 | unsigned morph_id = morph_to_id[items[2]]; 92 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids, 93 | &lm, &optimizer[morph_id]); 94 | cerr << ++line_id << "\r"; 95 | } 96 | 97 | // Read the test file and output predictions for the words. 98 | string line; 99 | double correct = 0, total = 0; 100 | for (string& line : test_data) { 101 | vector items = split_line(line, '|'); 102 | vector input_ids, target_ids, pred_target_ids; 103 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 104 | 105 | string input = items[0], output = items[1]; 106 | for (const string& ch : split_line(input, ' ')) { 107 | input_ids.push_back(char_to_id[ch]); 108 | } 109 | for (const string& ch : split_line(output, ' ')) { 110 | target_ids.push_back(char_to_id[ch]); 111 | } 112 | unsigned morph_id = morph_to_id[items[2]]; 113 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, &lm, 114 | &object_list); 115 | 116 | string prediction = ""; 117 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 118 | prediction += id_to_char[pred_target_ids[i]]; 119 | if (i != pred_target_ids.size() - 1) { 120 | prediction += " "; 121 | } 122 | } 123 | if (prediction == output) { 124 | correct += 1; 125 | } 126 | total += 1; 127 | } 128 | double curr_score = correct / total; 129 | cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.); 130 | cerr << " Prediction Accuracy: " << curr_score << endl; 131 | if (curr_score > best_score) { 132 | best_score = curr_score; 133 | Serialize(model_outputfilename, nn, &m); 134 | } 135 | } 136 | return 1; 137 | } 138 | -------------------------------------------------------------------------------- /src/train-no-enc.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "no-enc.h" 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace std; 21 | using namespace cnn; 22 | using namespace cnn::expr; 23 | 24 | int main(int argc, char** argv) { 25 | cnn::Initialize(argc, argv); 26 | 27 | string vocab_filename = argv[1]; // vocabulary of words/characters 28 | string morph_filename = argv[2]; 29 | string train_filename = argv[3]; 30 | string test_filename = argv[4]; 31 | unsigned hidden_size = atoi(argv[5]); 32 | unsigned num_iter = atoi(argv[6]); 33 | float reg_strength = atof(argv[7]); 34 | unsigned layers = atoi(argv[8]); 35 | string model_outputfilename = argv[9]; 36 | 37 | unordered_map char_to_id, morph_to_id; 38 | unordered_map id_to_char, id_to_morph; 39 | 40 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 41 | unsigned vocab_size = char_to_id.size(); 42 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 43 | unsigned morph_size = morph_to_id.size(); 44 | 45 | vector train_data; // Read the training file in a vector 46 | ReadData(train_filename, &train_data); 47 | 48 | vector test_data; // Read the dev file in a vector 49 | ReadData(test_filename, &test_data); 50 | 51 | vector m; 52 | vector optimizer; 53 | 54 | for (unsigned i = 0; i < morph_size; ++i) { 55 | m.push_back(new Model()); 56 | AdadeltaTrainer ada(m[i], reg_strength); 57 | optimizer.push_back(ada); 58 | } 59 | 60 | unsigned char_size = vocab_size; 61 | NoEnc nn(char_size, hidden_size, vocab_size, layers, morph_size, 62 | &m, &optimizer); 63 | 64 | // Pre-train all models on all datasets. 65 | /*cerr << "Pre-training... " << endl; 66 | for (unsigned morph_id = 0; morph_id < morph_size; ++morph_id) { 67 | cerr << "Morph class: " << morph_id + 1 << endl; 68 | for (string& line : train_data) { 69 | vector items = split_line(line, '|'); 70 | vector input_ids, target_ids; 71 | input_ids.clear(); target_ids.clear(); 72 | for (const string& ch : split_line(items[0], ' ')) { 73 | input_ids.push_back(char_to_id[ch]); 74 | } 75 | for (const string& ch : split_line(items[1], ' ')) { 76 | target_ids.push_back(char_to_id[ch]); 77 | } 78 | //unsigned morph_id = morph_to_id[items[2]]; 79 | double temp_loss = nn.Train(morph_id, input_ids, target_ids, 80 | &optimizer[morph_id]); 81 | } 82 | } 83 | cerr << "complete." << endl;*/ 84 | 85 | // Read the training file and train the model 86 | double best_score = -1; 87 | vector object_list; 88 | object_list.push_back(&nn); 89 | for (unsigned iter = 0; iter < num_iter; ++iter) { 90 | unsigned line_id = 0; 91 | random_shuffle(train_data.begin(), train_data.end()); 92 | vector loss(morph_size, 0.0f); 93 | for (string& line : train_data) { 94 | vector items = split_line(line, '|'); 95 | vector input_ids, target_ids; 96 | input_ids.clear(); target_ids.clear(); 97 | for (const string& ch : split_line(items[0], ' ')) { 98 | input_ids.push_back(char_to_id[ch]); 99 | } 100 | for (const string& ch : split_line(items[1], ' ')) { 101 | target_ids.push_back(char_to_id[ch]); 102 | } 103 | unsigned morph_id = morph_to_id[items[2]]; 104 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids, 105 | &optimizer[morph_id]); 106 | cerr << ++line_id << "\r"; 107 | } 108 | 109 | // Read the test file and output predictions for the words. 110 | string line; 111 | double correct = 0, total = 0; 112 | for (string& line : test_data) { 113 | vector items = split_line(line, '|'); 114 | vector input_ids, target_ids, pred_target_ids; 115 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 116 | for (const string& ch : split_line(items[0], ' ')) { 117 | input_ids.push_back(char_to_id[ch]); 118 | } 119 | for (const string& ch : split_line(items[1], ' ')) { 120 | target_ids.push_back(char_to_id[ch]); 121 | } 122 | unsigned morph_id = morph_to_id[items[2]]; 123 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 124 | &object_list); 125 | 126 | string prediction = ""; 127 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 128 | prediction += id_to_char[pred_target_ids[i]]; 129 | if (i != pred_target_ids.size() - 1) { 130 | prediction += " "; 131 | } 132 | } 133 | if (prediction == items[1]) { 134 | correct += 1; 135 | } 136 | total += 1; 137 | } 138 | double curr_score = correct / total; 139 | cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.); 140 | cerr << "Prediction Accuracy: " << curr_score << endl; 141 | if (curr_score > best_score) { 142 | best_score = curr_score; 143 | Serialize(model_outputfilename, nn, &m); 144 | } 145 | } 146 | return 1; 147 | } 148 | -------------------------------------------------------------------------------- /src/train-sep-morph.cc: -------------------------------------------------------------------------------- 1 | #include "cnn/nodes.h" 2 | #include "cnn/cnn.h" 3 | #include "cnn/rnn.h" 4 | #include "cnn/gru.h" 5 | #include "cnn/lstm.h" 6 | #include "cnn/training.h" 7 | #include "cnn/gpu-ops.h" 8 | #include "cnn/expr.h" 9 | 10 | #include "utils.h" 11 | #include "sep-morph.h" 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace std; 21 | using namespace cnn; 22 | using namespace cnn::expr; 23 | 24 | int main(int argc, char** argv) { 25 | cnn::Initialize(argc, argv); 26 | 27 | string vocab_filename = argv[1]; // vocabulary of words/characters 28 | string morph_filename = argv[2]; 29 | string train_filename = argv[3]; 30 | string test_filename = argv[4]; 31 | unsigned hidden_size = atoi(argv[5]); 32 | unsigned num_iter = atoi(argv[6]); 33 | float reg_strength = atof(argv[7]); 34 | unsigned layers = atoi(argv[8]); 35 | string model_outputfilename = argv[9]; 36 | 37 | unordered_map char_to_id, morph_to_id; 38 | unordered_map id_to_char, id_to_morph; 39 | 40 | ReadVocab(vocab_filename, &char_to_id, &id_to_char); 41 | unsigned vocab_size = char_to_id.size(); 42 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph); 43 | unsigned morph_size = morph_to_id.size(); 44 | 45 | vector train_data; // Read the training file in a vector 46 | ReadData(train_filename, &train_data); 47 | 48 | vector test_data; // Read the dev file in a vector 49 | ReadData(test_filename, &test_data); 50 | 51 | vector m; 52 | vector optimizer; 53 | 54 | for (unsigned i = 0; i < morph_size; ++i) { 55 | m.push_back(new Model()); 56 | AdadeltaTrainer ada(m[i], reg_strength); 57 | optimizer.push_back(ada); 58 | } 59 | 60 | unsigned char_size = vocab_size; 61 | SepMorph nn(char_size, hidden_size, vocab_size, layers, morph_size, 62 | &m, &optimizer); 63 | 64 | // Read the training file and train the model 65 | double best_score = -1; 66 | vector object_list; 67 | object_list.push_back(&nn); 68 | for (unsigned iter = 0; iter < num_iter; ++iter) { 69 | unsigned line_id = 0; 70 | random_shuffle(train_data.begin(), train_data.end()); 71 | vector loss(morph_size, 0.0f); 72 | for (string& line : train_data) { 73 | vector items = split_line(line, '|'); 74 | vector input_ids, target_ids; 75 | input_ids.clear(); target_ids.clear(); 76 | for (const string& ch : split_line(items[0], ' ')) { 77 | input_ids.push_back(char_to_id[ch]); 78 | } 79 | for (const string& ch : split_line(items[1], ' ')) { 80 | target_ids.push_back(char_to_id[ch]); 81 | } 82 | unsigned morph_id = morph_to_id[items[2]]; 83 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids, 84 | &optimizer[morph_id]); 85 | cerr << ++line_id << "\r"; 86 | } 87 | 88 | // Read the test file and output predictions for the words. 89 | string line; 90 | double correct = 0, total = 0; 91 | for (string& line : test_data) { 92 | vector items = split_line(line, '|'); 93 | vector input_ids, target_ids, pred_target_ids; 94 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear(); 95 | for (const string& ch : split_line(items[0], ' ')) { 96 | input_ids.push_back(char_to_id[ch]); 97 | } 98 | for (const string& ch : split_line(items[1], ' ')) { 99 | target_ids.push_back(char_to_id[ch]); 100 | } 101 | unsigned morph_id = morph_to_id[items[2]]; 102 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, 103 | &object_list); 104 | 105 | string prediction = ""; 106 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) { 107 | prediction += id_to_char[pred_target_ids[i]]; 108 | if (i != pred_target_ids.size() - 1) { 109 | prediction += " "; 110 | } 111 | } 112 | if (prediction == items[1]) { 113 | correct += 1; 114 | } 115 | total += 1; 116 | } 117 | double curr_score = correct / total; 118 | cerr << "Iter " << iter + 1 << " "; 119 | cerr << "Prediction Accuracy: " << curr_score << endl; 120 | if (curr_score > best_score) { 121 | best_score = curr_score; 122 | Serialize(model_outputfilename, nn, &m); 123 | } 124 | } 125 | return 1; 126 | } 127 | -------------------------------------------------------------------------------- /src/utils.cc: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | 3 | vector split_line(const string& line, char delim) { 4 | vector words; 5 | stringstream ss(line); 6 | string item; 7 | while (std::getline(ss, item, delim)) { 8 | if (!item.empty()) 9 | words.push_back(item); 10 | } 11 | return words; 12 | } 13 | 14 | void ReadVocab(string& filename, unordered_map* item_to_id, 15 | unordered_map* id_to_item) { 16 | ifstream vocab_file(filename); 17 | vector chars; 18 | if (vocab_file.is_open()) { // Reading the vocab file 19 | string line; 20 | getline(vocab_file, line); 21 | chars = split_line(line, ' '); 22 | } else { 23 | cerr << "File opening failed" << endl; 24 | } 25 | unsigned char_id = 0; 26 | for (const string& ch : chars) { 27 | (*item_to_id)[ch] = char_id; 28 | (*id_to_item)[char_id] = ch; 29 | char_id++; 30 | } 31 | vocab_file.close(); 32 | } 33 | 34 | void ReadData(string& filename, vector* data) { 35 | // Read the training file in a vector 36 | ifstream train_file(filename); 37 | if (train_file.is_open()) { 38 | string line; 39 | while (getline(train_file, line)) { 40 | data->push_back(line); 41 | } 42 | } 43 | train_file.close(); 44 | } 45 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H_ 2 | #define UTILS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | vector split_line(const string& line, char delim); 14 | 15 | void ReadVocab(string& filename, unordered_map* item_to_id, 16 | unordered_map* id_to_item); 17 | 18 | void ReadData(string& filename, vector* data); 19 | 20 | #endif 21 | --------------------------------------------------------------------------------