├── LICENSE
├── Makefile
├── README.md
├── data
├── README.txt
├── char_vocab.txt
├── dev_infl.txt
├── morph_vocab.txt
├── test_infl.txt
└── train_infl.txt
└── src
├── enc-dec-attn.cc
├── enc-dec-attn.h
├── enc-dec.cc
├── enc-dec.h
├── eval-ensemble-enc-dec-attn.cc
├── eval-ensemble-enc-dec.cc
├── eval-ensemble-joint-enc-beam.cc
├── eval-ensemble-joint-enc-dec-morph.cc
├── eval-ensemble-joint-enc-morph.cc
├── eval-ensemble-lm-joint-enc.cc
├── eval-ensemble-lm-sep-morph.cc
├── eval-ensemble-no-enc.cc
├── eval-ensemble-sep-morph-beam.cc
├── eval-ensemble-sep-morph.cc
├── joint-enc-dec-morph.cc
├── joint-enc-dec-morph.h
├── joint-enc-morph.cc
├── joint-enc-morph.h
├── lm-joint-enc.cc
├── lm-joint-enc.h
├── lm-sep-morph.cc
├── lm-sep-morph.h
├── lm.cc
├── lm.h
├── no-enc.cc
├── no-enc.h
├── sep-morph.cc
├── sep-morph.h
├── train-enc-dec-attn.cc
├── train-enc-dec.cc
├── train-joint-enc-dec-morph.cc
├── train-joint-enc-morph.cc
├── train-lm-joint-enc.cc
├── train-lm-sep-morph.cc
├── train-no-enc.cc
├── train-sep-morph.cc
├── utils.cc
└── utils.h
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Manaal Faruqui
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | CC=g++
2 | CNN_DIR=$(CNN)
3 | EIGEN_DIR=$(EIGEN)
4 | BOOST_DIR=$(BOOST)
5 |
6 | CNN_BUILD_DIR=$(CNN_DIR)/build
7 | INCS=-I$(CNN_DIR) -I$(CNN_BUILD_DIR) -I$(EIGEN_DIR)
8 | LIBS=-L$(CNN_BUILD_DIR)/cnn/ -L$(BOOST_DIR)/lib
9 | FINAL=-lcnn -lboost_regex -lboost_serialization -lboost_program_options -lrt -lpthread
10 | CFLAGS=-std=c++11 -Ofast -g -march=native -pipe
11 | BINDIR=bin
12 | OBJDIR=obj
13 | SRCDIR=src
14 |
15 | .PHONY: clean
16 | all: make_dirs $(BINDIR)/train-sep-morph $(BINDIR)/eval-ensemble-sep-morph $(BINDIR)/train-joint-enc-morph $(BINDIR)/eval-ensemble-joint-enc-morph $(BINDIR)/train-lm-sep-morph $(BINDIR)/eval-ensemble-lm-sep-morph $(BINDIR)/train-joint-enc-dec-morph $(BINDIR)/eval-ensemble-joint-enc-dec-morph $(BINDIR)/eval-ensemble-sep-morph-beam $(BINDIR)/train-lm-joint-enc $(BINDIR)/eval-ensemble-lm-joint-enc $(BINDIR)/eval-ensemble-joint-enc-beam $(BINDIR)/train-no-enc $(BINDIR)/eval-ensemble-no-enc $(BINDIR)/train-enc-dec $(BINDIR)/eval-ensemble-enc-dec $(BINDIR)/train-enc-dec-attn $(BINDIR)/eval-ensemble-enc-dec-attn
17 |
18 | make_dirs:
19 | mkdir -p $(OBJDIR)
20 | mkdir -p $(BINDIR)
21 |
22 | include $(wildcard $(OBJDIR)/*.d)
23 |
24 | $(OBJDIR)/%.o: $(SRCDIR)/%.cc
25 | $(CC) $(CFLAGS) $(INCS) -c $< -o $@
26 | $(CC) -MM -MP -MT "$@" $(CFLAGS) $(INCS) $< > $(OBJDIR)/$*.d
27 |
28 | $(BINDIR)/train-sep-morph: $(addprefix $(OBJDIR)/, train-sep-morph.o sep-morph.o utils.o)
29 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
30 |
31 | $(BINDIR)/train-no-enc: $(addprefix $(OBJDIR)/, train-no-enc.o no-enc.o utils.o)
32 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
33 |
34 | $(BINDIR)/train-enc-dec: $(addprefix $(OBJDIR)/, train-enc-dec.o enc-dec.o utils.o)
35 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
36 |
37 | $(BINDIR)/train-enc-dec-attn: $(addprefix $(OBJDIR)/, train-enc-dec-attn.o enc-dec-attn.o utils.o)
38 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
39 |
40 | $(BINDIR)/train-joint-enc-morph: $(addprefix $(OBJDIR)/, train-joint-enc-morph.o joint-enc-morph.o utils.o)
41 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
42 |
43 | $(BINDIR)/train-joint-enc-dec-morph: $(addprefix $(OBJDIR)/, train-joint-enc-dec-morph.o joint-enc-dec-morph.o utils.o)
44 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
45 |
46 | $(BINDIR)/train-lm-sep-morph: $(addprefix $(OBJDIR)/, train-lm-sep-morph.o lm-sep-morph.o utils.o lm.o)
47 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
48 |
49 | $(BINDIR)/train-lm-joint-enc: $(addprefix $(OBJDIR)/, train-lm-joint-enc.o lm-joint-enc.o utils.o lm.o)
50 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
51 |
52 | $(BINDIR)/eval-ensemble-lm-joint-enc: $(addprefix $(OBJDIR)/, eval-ensemble-lm-joint-enc.o utils.o lm-joint-enc.o lm.o)
53 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
54 |
55 | $(BINDIR)/eval-ensemble-sep-morph: $(addprefix $(OBJDIR)/, eval-ensemble-sep-morph.o utils.o sep-morph.o)
56 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
57 |
58 | $(BINDIR)/eval-ensemble-sep-morph-spanish-gen: $(addprefix $(OBJDIR)/, eval-ensemble-sep-morph-spanish-gen.o utils.o sep-morph.o)
59 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
60 |
61 | $(BINDIR)/eval-ensemble-no-enc: $(addprefix $(OBJDIR)/, eval-ensemble-no-enc.o utils.o no-enc.o)
62 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
63 |
64 | $(BINDIR)/eval-ensemble-enc-dec: $(addprefix $(OBJDIR)/, eval-ensemble-enc-dec.o utils.o enc-dec.o)
65 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
66 |
67 | $(BINDIR)/eval-ensemble-enc-dec-attn: $(addprefix $(OBJDIR)/, eval-ensemble-enc-dec-attn.o utils.o enc-dec-attn.o)
68 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
69 |
70 | $(BINDIR)/eval-ensemble-lm-sep-morph: $(addprefix $(OBJDIR)/, eval-ensemble-lm-sep-morph.o utils.o lm-sep-morph.o lm.o)
71 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
72 |
73 | $(BINDIR)/eval-ensemble-joint-enc-morph: $(addprefix $(OBJDIR)/, eval-ensemble-joint-enc-morph.o utils.o joint-enc-morph.o)
74 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
75 |
76 | $(BINDIR)/eval-ensemble-joint-enc-dec-morph: $(addprefix $(OBJDIR)/, eval-ensemble-joint-enc-dec-morph.o utils.o joint-enc-dec-morph.o)
77 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
78 |
79 | $(BINDIR)/eval-ensemble-sep-morph-beam: $(addprefix $(OBJDIR)/, eval-ensemble-sep-morph-beam.o utils.o sep-morph.o)
80 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
81 |
82 | $(BINDIR)/eval-ensemble-joint-enc-beam: $(addprefix $(OBJDIR)/, eval-ensemble-joint-enc-beam.o utils.o joint-enc-morph.o)
83 | $(CC) $(CFLAGS) $(LIBS) $(INCS) $^ -o $@ $(FINAL)
84 |
85 |
86 | clean:
87 | rm -rf $(BINDIR)/*
88 | rm -rf $(OBJDIR)/*
89 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # morph-trans
2 | Manaal Faruqui, manaalfar@gmail.com
3 |
4 | This tool is used to generate morphologically inflected forms of a given word according to a specified morphological attribute. The input word is treated as a sequence of characters and the output word is generated as the transofrmed sequence of characters using a variant of neural encoder-decoder architecture. This is described in Faruqui et al (NAACL, 2016).
5 |
6 | ###Requirements
7 |
8 | 1. CNN neural network library: https://github.com/clab/cnn
9 | 2. C++ BOOST library: http://www.boost.org/
10 | 3. C++ Eigen library: http://eigen.tuxfamily.org/
11 |
12 | Please download and compile these libraries.
13 |
14 | ###Data
15 |
16 | You need three files to train the inflection generation system. Two files that contain vocabulary of the characters, and the vocabulary of the morphological attributes. The third file contains the training/dev/test data. Sample files can be found in data/ .
17 |
18 | * Character vocabulary file: char_vocab.txt
19 |
20 | ``` a b c g f ä ...```
21 |
22 | * Morphological attribute file: morph_vocab.txt
23 |
24 | ```case=nominative:number=singular case=dative:number=plural ...```
25 |
26 | * Inflection train/dev/test file: infl.txt
27 |
28 | ``` a a l | a a l e s |case=genitive:number=singular```
29 |
30 | * Optional language models: [here] (https://drive.google.com/folderview?id=0B93-ltInuGUyeGhDUVZSeWkyUVE&usp=sharing)
31 |
32 | ###Compile
33 |
34 | After you have installed, CNN, Boost and Eige, you can simply install the software by typing the following command:-
35 |
36 | ```make CNN=cnn-dir BOOST=boost-dir EIGEN=eigen-dir```
37 |
38 | ###Run
39 |
40 | To train the inflection generation system, simply run the following:-
41 |
42 | ```./bin/train-sep-morph char_vocab.txt morph_vocab.txt train_infl.txt dev_infl.txt 100 30 1e-5 1 model.txt```
43 |
44 | Here, 100 is the hidden layer size of the LSTM, 30 is the number of iterations for training, 1e-5 is the l2 regularization strength and 1 is the number of layers in the LSTM.
45 |
46 | To test the system, run:-
47 |
48 | ```./bin/eval-ensemble-sep-morph char_vocab.txt morph_vocab.txt test_infl.txt model1.txt model2.txt model3.txt ... > output.txt```
49 |
50 | This can use an ensemble of models for evaluation. If you want to use only one model, just provide one model. The sep-moprh model is the model that provided us best supervised results. Other models can be used in the same way. Baseline encoder-decoder models can be trained using ```train-enc-dec``` and ```train-enc-dec-attn``` models.
51 |
52 | ###Reference
53 | ```
54 | @inproceedings{faruqui:2016:infl,
55 | author = {Faruqui, Manaal and Tsvetkov, Yulia and Neubig, Graham and Dyer, Chris},
56 | title = {Morphological Inflection Generation Using Character Sequence to Sequence Learning},
57 | journal = {Proc. of NAACL},
58 | year = {2016},
59 | }
60 | ```
61 |
--------------------------------------------------------------------------------
/data/README.txt:
--------------------------------------------------------------------------------
1 | This is the German Nouns inflection dataset from Durrent and DeNero (2013).
2 |
--------------------------------------------------------------------------------
/data/char_vocab.txt:
--------------------------------------------------------------------------------
1 | ß é è æ ä ü ö a c b e d g f i h k j m l o n q p s r u t w v y x z
2 |
--------------------------------------------------------------------------------
/data/morph_vocab.txt:
--------------------------------------------------------------------------------
1 | case=nominative:number=singular case=dative:number=plural case=nominative:number=plural case=genitive:number=singular case=dative:number=singular case=accusative:number=singular case=accusative:number=plural case=genitive:number=plural
2 |
--------------------------------------------------------------------------------
/src/enc-dec-attn.h:
--------------------------------------------------------------------------------
1 | #ifndef ENC_DEC_ATTN_H_
2 | #define ENC_DEC_ATTN_H_
3 |
4 | #include "cnn/nodes.h"
5 | #include "cnn/cnn.h"
6 | #include "cnn/rnn.h"
7 | #include "cnn/gru.h"
8 | #include "cnn/lstm.h"
9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 |
13 | #include "utils.h"
14 |
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 |
21 | using namespace std;
22 | using namespace cnn;
23 | using namespace cnn::expr;
24 |
25 | class EncDecAttn {
26 | public:
27 | vector input_forward, input_backward, output_forward;
28 | vector char_vecs;
29 |
30 | Expression hidden_to_output, hidden_to_output_bias;
31 | vector phidden_to_output, phidden_to_output_bias;
32 |
33 | Expression transform_encoded, transform_encoded_bias;
34 | vector ptransform_encoded, ptransform_encoded_bias;
35 |
36 | Expression compress_hidden, compress_hidden_bias;
37 | vector pcompress_hidden, pcompress_hidden_bias;
38 |
39 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
40 | vector eps_vecs;
41 |
42 | EncDecAttn() {}
43 |
44 | EncDecAttn(const unsigned& char_length, const unsigned& hidden_length,
45 | const unsigned& vocab_length, const unsigned& layers,
46 | const unsigned& num_morph, vector* m,
47 | vector* optimizer);
48 |
49 | void InitParams(vector* m);
50 |
51 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
52 |
53 | void RunFwdBwd(const unsigned& morph_id, const vector& inputs,
54 | Expression* hidden, vector* all_hidden,
55 | ComputationGraph *cg);
56 |
57 | Expression GetAvgAttnLayer(const Expression& hidden,
58 | const vector& all_input_hidden) const ;
59 |
60 | void TransformEncodedInput(Expression* encoded_input) const;
61 |
62 | void TransformEncodedInputDuringDecoding(Expression* encoded_input) const;
63 |
64 | void ProjectToOutput(const Expression& hidden,
65 | const vector& all_input_hidden,
66 | Expression* out) const;
67 |
68 | Expression ComputeLoss(const vector& hidden_units,
69 | const vector& targets,
70 | const vector& all_input_hidden) const;
71 |
72 | float Train(const unsigned& morph_id, const vector& inputs,
73 | const vector& outputs, AdadeltaTrainer* ada_gd);
74 |
75 | friend class boost::serialization::access;
76 | template void serialize(Archive& ar, const unsigned int) {
77 | ar & char_len;
78 | ar & hidden_len;
79 | ar & vocab_len;
80 | ar & layers;
81 | ar & morph_len;
82 | ar & max_eps;
83 | }
84 | };
85 |
86 | void
87 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
88 | const vector& input_ids,
89 | vector* pred_target_ids, vector* ensmb_model);
90 |
91 | void
92 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
93 | unordered_map& char_to_id,
94 | const vector& input_ids,
95 | vector >* sequences, vector* tm_scores,
96 | vector* ensmb_model);
97 |
98 | void Serialize(string& filename, EncDecAttn& model, vector* cnn_model);
99 |
100 | void Read(string& filename, EncDecAttn* model, vector* cnn_model);
101 |
102 |
103 | #endif
104 |
--------------------------------------------------------------------------------
/src/enc-dec.cc:
--------------------------------------------------------------------------------
1 | #include "enc-dec.h"
2 |
3 | using namespace std;
4 | using namespace cnn;
5 | using namespace cnn::expr;
6 |
7 | string BOW = "", EOW = "";
8 | int MAX_PRED_LEN = 100;
9 | float NEG_INF = numeric_limits::min();
10 |
11 | EncDec::EncDec(const unsigned& char_length, const unsigned& hidden_length,
12 | const unsigned& vocab_length, const unsigned& num_layers,
13 | const unsigned& num_morph, vector* m,
14 | vector* optimizer) {
15 | char_len = char_length;
16 | hidden_len = hidden_length;
17 | vocab_len = vocab_length;
18 | layers = num_layers;
19 | morph_len = num_morph;
20 | InitParams(m);
21 | }
22 |
23 | void EncDec::InitParams(vector* m) {
24 | for (unsigned i = 0; i < morph_len; ++i) {
25 | input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
26 | input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
27 | output_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
28 |
29 | phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len}));
30 | phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1}));
31 |
32 | char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len}));
33 |
34 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
35 | 2 * hidden_len}));
36 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
37 |
38 | //eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
39 | }
40 | }
41 |
42 | void EncDec::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
43 | input_forward[morph_id].new_graph(*cg);
44 | input_backward[morph_id].new_graph(*cg);
45 | output_forward[morph_id].new_graph(*cg);
46 |
47 | hidden_to_output = parameter(*cg, phidden_to_output[morph_id]);
48 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]);
49 |
50 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
51 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
52 | }
53 |
54 | void EncDec::RunFwdBwd(const unsigned& morph_id,
55 | const vector& inputs,
56 | Expression* hidden, ComputationGraph *cg) {
57 | vector input_vecs;
58 | for (const unsigned& input_id : inputs) {
59 | input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id));
60 | }
61 |
62 | // Run forward LSTM
63 | Expression forward_unit;
64 | input_forward[morph_id].start_new_sequence();
65 | for (unsigned i = 0; i < input_vecs.size(); ++i) {
66 | forward_unit = input_forward[morph_id].add_input(input_vecs[i]);
67 | }
68 |
69 | // Run backward LSTM
70 | Expression backward_unit;
71 | input_backward[morph_id].start_new_sequence();
72 | for (int i = input_vecs.size() - 1; i >= 0; --i) {
73 | backward_unit = input_backward[morph_id].add_input(input_vecs[i]);
74 | }
75 |
76 | // Concatenate the forward and back hidden layers
77 | *hidden = concatenate({forward_unit, backward_unit});
78 | }
79 |
80 | void EncDec::TransformEncodedInput(Expression* encoded_input) const {
81 | *encoded_input = affine_transform({transform_encoded_bias,
82 | transform_encoded, *encoded_input});
83 | }
84 |
85 | void EncDec::ProjectToOutput(const Expression& hidden, Expression* out) const {
86 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
87 | }
88 |
89 | Expression EncDec::ComputeLoss(const vector& hidden_units,
90 | const vector& targets) const {
91 | assert(hidden_units.size() == targets.size());
92 | vector losses;
93 | for (unsigned i = 0; i < hidden_units.size(); ++i) {
94 | Expression out;
95 | ProjectToOutput(hidden_units[i], &out);
96 | losses.push_back(pickneglogsoftmax(out, targets[i]));
97 | }
98 | return sum(losses);
99 | }
100 |
101 | float EncDec::Train(const unsigned& morph_id, const vector& inputs,
102 | const vector& outputs, AdadeltaTrainer* ada_gd) {
103 | ComputationGraph cg;
104 | AddParamsToCG(morph_id, &cg);
105 |
106 | // Encode and Transform to feed into decoder
107 | Expression encoded_input_vec;
108 | RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg);
109 | TransformEncodedInput(&encoded_input_vec);
110 |
111 | // Use this encoded word vector to predict the transformed word
112 | vector input_vecs_for_dec;
113 | vector output_ids_for_pred;
114 | for (unsigned i = 0; i < outputs.size(); ++i) {
115 | if (i < outputs.size() - 1) {
116 | // '' will not be fed as input -- it needs to be predicted.
117 | input_vecs_for_dec.push_back(lookup(cg, char_vecs[morph_id], outputs[i]));
118 | }
119 | if (i > 0) { // '' will not be predicted in the output -- its fed in.
120 | output_ids_for_pred.push_back(outputs[i]);
121 | }
122 | }
123 |
124 | vector decoder_hidden_units;
125 | vector init;
126 | for (unsigned i = 0; i < layers; ++i) {
127 | init.push_back(encoded_input_vec); // init cell of decoder
128 | }
129 | for (unsigned i = 0; i < layers; ++i) {
130 | init.push_back(tanh(encoded_input_vec)); // init hidden layer of decoder
131 | }
132 | output_forward[morph_id].start_new_sequence(init);
133 | for (const auto& vec : input_vecs_for_dec) {
134 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
135 | }
136 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred);
137 |
138 | float return_loss = as_scalar(cg.forward());
139 | cg.backward();
140 | ada_gd->update(1.0f);
141 | return return_loss;
142 | }
143 |
144 | void
145 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
146 | const vector& input_ids,
147 | vector* pred_target_ids, vector* ensmb_model) {
148 | ComputationGraph cg;
149 |
150 | unsigned ensmb = ensmb_model->size();
151 | //vector encoded_word_vecs;
152 | for (unsigned i = 0; i < ensmb; ++i) {
153 | Expression encoded_word_vec;
154 | auto model = (*ensmb_model)[i];
155 | model->AddParamsToCG(morph_id, &cg);
156 | model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
157 | model->TransformEncodedInput(&encoded_word_vec);
158 | //encoded_word_vecs.push_back(encoded_word_vec);
159 | vector init;
160 | for (unsigned i = 0; i < model->layers; ++i) {
161 | init.push_back(encoded_word_vec); // init cell of decoder
162 | }
163 | for (unsigned i = 0; i < model->layers; ++i) {
164 | init.push_back(tanh(encoded_word_vec)); // init hidden layer of decoder
165 | }
166 | model->output_forward[morph_id].start_new_sequence(init);
167 | }
168 |
169 | unsigned out_index = 1;
170 | unsigned pred_index = char_to_id[BOW];
171 | while (pred_target_ids->size() < MAX_PRED_LEN) {
172 | vector ensmb_out;
173 | pred_target_ids->push_back(pred_index);
174 | if (pred_index == char_to_id[EOW]) {
175 | return; // If the end is found, break from the loop and return
176 | }
177 |
178 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
179 | auto model = (*ensmb_model)[ensmb_id];
180 | Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index);
181 | Expression input, input_char_vec;
182 | /*if (out_index < input_ids.size()) {
183 | input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]);
184 | } else {
185 | input_char_vec = lookup(cg, model->eps_vecs[morph_id],
186 | min(unsigned(out_index - input_ids.size()),
187 | model->max_eps - 1));
188 | }
189 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
190 | input_char_vec});*/
191 | input = prev_output_vec;
192 |
193 | Expression hidden = model->output_forward[morph_id].add_input(input);
194 | Expression out;
195 | model->ProjectToOutput(hidden, &out);
196 | ensmb_out.push_back(log_softmax(out));
197 | }
198 |
199 | Expression out = sum(ensmb_out) / ensmb_out.size();
200 | vector dist = as_vector(cg.incremental_forward());
201 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
202 | out_index++;
203 | }
204 | }
205 |
206 | void
207 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
208 | unordered_map& char_to_id,
209 | const vector& input_ids,
210 | vector >* sequences, vector* tm_scores,
211 | vector* ensmb_model) {
212 | unsigned out_index = 1;
213 | unsigned ensmb = ensmb_model->size();
214 | ComputationGraph cg;
215 |
216 | // Compute stuff for every model in the ensemble.
217 | vector encoded_word_vecs;
218 | vector ensmb_out;
219 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
220 | auto& model = *(*ensmb_model)[ensmb_id];
221 | model.AddParamsToCG(morph_id, &cg);
222 |
223 | Expression encoded_word_vec;
224 | model.RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
225 | model.TransformEncodedInput(&encoded_word_vec);
226 | encoded_word_vecs.push_back(encoded_word_vec);
227 |
228 | vector init;
229 | for (unsigned i = 0; i < model.layers; ++i) {
230 | init.push_back(encoded_word_vec); // init cell of decoder
231 | }
232 | for (unsigned i = 0; i < model.layers; ++i) {
233 | init.push_back(tanh(encoded_word_vec)); // init hidden layer of decoder
234 | }
235 | model.output_forward[morph_id].start_new_sequence(init);
236 |
237 | Expression prev_output_vec = lookup(cg, model.char_vecs[morph_id],
238 | char_to_id[BOW]);
239 | /*Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
240 | lookup(cg, model.char_vecs[morph_id],
241 | input_ids[out_index])});*/
242 | Expression input = prev_output_vec;
243 | Expression hidden = model.output_forward[morph_id].add_input(input);
244 | Expression out;
245 | model.ProjectToOutput(hidden, &out);
246 | out = log_softmax(out);
247 | ensmb_out.push_back(out);
248 | }
249 |
250 | // Compute the average of the ensemble output.
251 | Expression out_dist = average(ensmb_out);
252 | vector log_dist = as_vector(cg.incremental_forward());
253 | priority_queue > init_queue;
254 | for (unsigned i = 0; i < log_dist.size(); ++i) {
255 | init_queue.push(make_pair(log_dist[i], i));
256 | }
257 | unsigned vocab_size = log_dist.size();
258 |
259 | // Initialise the beam_size sequences, scores, hidden states.
260 | vector log_scores;
261 | vector > prev_states;
262 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
263 | vector seq;
264 | seq.push_back(char_to_id[BOW]);
265 | seq.push_back(init_queue.top().second);
266 | sequences->push_back(seq);
267 | log_scores.push_back(init_queue.top().first);
268 |
269 | vector ensmb_states;
270 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
271 | auto& model = *(*ensmb_model)[ensmb_id];
272 | ensmb_states.push_back(model.output_forward[morph_id].state());
273 | }
274 | prev_states.push_back(ensmb_states);
275 | init_queue.pop();
276 | }
277 |
278 | vector neg_inf(vocab_size, NEG_INF);
279 | Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf);
280 |
281 | vector active_beams(beam_size, true);
282 | while (true) {
283 | out_index++;
284 | priority_queue > > probs_queue;
285 | vector > curr_states;
286 | vector out_dist;
287 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
288 | if (active_beams[beam_id]) {
289 | unsigned prev_out_char = (*sequences)[beam_id].back();
290 | vector ensmb_out;
291 | vector ensmb_states;
292 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) {
293 | auto& model = *(*ensmb_model)[ensmb_id];
294 | /*Expression input_char_vec;
295 | /if (out_index < input_ids.size()) {
296 | input_char_vec = lookup(cg, model.char_vecs[morph_id], input_ids[out_index]);
297 | } else {
298 | input_char_vec = lookup(cg, model.eps_vecs[morph_id],
299 | min(unsigned(out_index - input_ids.size()),
300 | model.max_eps - 1));
301 | }*/
302 |
303 | Expression prev_out_vec = lookup(cg, model.char_vecs[morph_id], prev_out_char);
304 | //Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_out_vec,
305 | // input_char_vec});
306 |
307 | Expression input = prev_out_vec;
308 | Expression hidden = model.output_forward[morph_id].add_input(
309 | prev_states[beam_id][ensmb_id], input);
310 | ensmb_states.push_back(model.output_forward[morph_id].state());
311 |
312 | Expression out;
313 | model.ProjectToOutput(hidden, &out);
314 | out = log_softmax(out);
315 | ensmb_out.push_back(out);
316 | }
317 | curr_states.push_back(ensmb_states);
318 | out_dist.push_back(average(ensmb_out));
319 | } else {
320 | vector dummy(ensmb, RNNPointer(0));
321 | curr_states.push_back(dummy);
322 | out_dist.push_back(neg_inf_vec);
323 | }
324 | }
325 |
326 | Expression all_scores = concatenate(out_dist);
327 | vector log_dist = as_vector(cg.incremental_forward());
328 |
329 | for (unsigned index = 0; index < log_dist.size(); ++index) {
330 | unsigned beam_id = index / vocab_size;
331 | unsigned char_id = index % vocab_size;
332 | if (active_beams[beam_id]) {
333 | pair location = make_pair(beam_id, char_id);
334 | probs_queue.push(pair >(
335 | log_scores[beam_id] + log_dist[index], location));
336 | }
337 | }
338 |
339 | // Find the beam_size best now and update the variables.
340 | unordered_map > new_seq;
341 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
342 | if (active_beams[beam_id]) {
343 | float log_prob = probs_queue.top().first;
344 | pair location = probs_queue.top().second;
345 | unsigned old_beam_id = location.first, char_id = location.second;
346 |
347 | vector seq = (*sequences)[old_beam_id];
348 | seq.push_back(char_id);
349 | new_seq[beam_id] = seq;
350 | log_scores[beam_id] = log_prob; // Update the score
351 |
352 | prev_states[beam_id] = curr_states[old_beam_id]; // Update hidden state
353 | probs_queue.pop();
354 | }
355 | }
356 |
357 | // Update the sequences now.
358 | for (auto& it : new_seq) {
359 | (*sequences)[it.first] = it.second;
360 | }
361 |
362 | // Check if a sequence should be made inactive.
363 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
364 | if (active_beams[beam_id] &&
365 | ((*sequences)[beam_id].back() == char_to_id[EOW] ||
366 | (*sequences)[beam_id].size() > MAX_PRED_LEN)) {
367 | active_beams[beam_id] = false;
368 | }
369 | }
370 |
371 | // Check if all sequences are inactive.
372 | bool all_inactive = true;
373 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
374 | if (active_beams[beam_id]) {
375 | all_inactive = false;
376 | break;
377 | }
378 | }
379 |
380 | if (all_inactive) {
381 | *tm_scores = log_scores;
382 | return;
383 | }
384 | }
385 | }
386 |
387 | void Serialize(string& filename, EncDec& model, vector* cnn_models) {
388 | ofstream outfile(filename);
389 | if (!outfile.is_open()) {
390 | cerr << "File opening failed" << endl;
391 | }
392 |
393 | boost::archive::text_oarchive oa(outfile);
394 | oa & model;
395 | for (unsigned i = 0; i < cnn_models->size(); ++i) {
396 | oa & *(*cnn_models)[i];
397 | }
398 |
399 | cerr << "Saved model to: " << filename << endl;
400 | outfile.close();
401 | }
402 |
403 | void Read(string& filename, EncDec* model, vector* cnn_models) {
404 | ifstream infile(filename);
405 | if (!infile.is_open()) {
406 | cerr << "File opening failed" << endl;
407 | }
408 |
409 | boost::archive::text_iarchive ia(infile);
410 | ia & *model;
411 | for (unsigned i = 0; i < model->morph_len; ++i) {
412 | Model *cnn_model = new Model();
413 | cnn_models->push_back(cnn_model);
414 | }
415 |
416 | model->InitParams(cnn_models);
417 | for (unsigned i = 0; i < model->morph_len; ++i) {
418 | ia & *(*cnn_models)[i];
419 | }
420 |
421 | cerr << "Loaded model from: " << filename << endl;
422 | infile.close();
423 | }
424 |
425 |
--------------------------------------------------------------------------------
/src/enc-dec.h:
--------------------------------------------------------------------------------
1 | #ifndef ENC_DEC_H_
2 | #define ENC_DEC_H_
3 |
4 | #include "cnn/nodes.h"
5 | #include "cnn/cnn.h"
6 | #include "cnn/rnn.h"
7 | #include "cnn/gru.h"
8 | #include "cnn/lstm.h"
9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 |
13 | #include "utils.h"
14 |
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 |
21 | using namespace std;
22 | using namespace cnn;
23 | using namespace cnn::expr;
24 |
25 | class EncDec {
26 | public:
27 | vector input_forward, input_backward, output_forward;
28 | vector char_vecs;
29 |
30 | Expression hidden_to_output, hidden_to_output_bias;
31 | vector phidden_to_output, phidden_to_output_bias;
32 |
33 | Expression transform_encoded, transform_encoded_bias;
34 | vector ptransform_encoded, ptransform_encoded_bias;
35 |
36 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
37 | vector eps_vecs;
38 |
39 | EncDec() {}
40 |
41 | EncDec(const unsigned& char_length, const unsigned& hidden_length,
42 | const unsigned& vocab_length, const unsigned& layers,
43 | const unsigned& num_morph, vector* m,
44 | vector* optimizer);
45 |
46 | void InitParams(vector* m);
47 |
48 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
49 |
50 | void RunFwdBwd(const unsigned& morph_id, const vector& inputs,
51 | Expression* hidden, ComputationGraph *cg);
52 |
53 | void TransformEncodedInput(Expression* encoded_input) const;
54 |
55 | void TransformEncodedInputDuringDecoding(Expression* encoded_input) const;
56 |
57 | void ProjectToOutput(const Expression& hidden, Expression* out) const;
58 |
59 | Expression ComputeLoss(const vector& hidden_units,
60 | const vector& targets) const;
61 |
62 | float Train(const unsigned& morph_id, const vector& inputs,
63 | const vector& outputs, AdadeltaTrainer* ada_gd);
64 |
65 | friend class boost::serialization::access;
66 | template void serialize(Archive& ar, const unsigned int) {
67 | ar & char_len;
68 | ar & hidden_len;
69 | ar & vocab_len;
70 | ar & layers;
71 | ar & morph_len;
72 | ar & max_eps;
73 | }
74 | };
75 |
76 | void
77 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
78 | const vector& input_ids,
79 | vector* pred_target_ids, vector* ensmb_model);
80 |
81 | void
82 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
83 | unordered_map& char_to_id,
84 | const vector& input_ids,
85 | vector >* sequences, vector* tm_scores,
86 | vector* ensmb_model);
87 |
88 | void Serialize(string& filename, EncDec& model, vector* cnn_model);
89 |
90 | void Read(string& filename, EncDec* model, vector* cnn_model);
91 |
92 |
93 | #endif
94 |
--------------------------------------------------------------------------------
/src/eval-ensemble-enc-dec-attn.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "enc-dec-attn.h"
12 |
13 | #include
14 | #include
15 |
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 |
20 | int main(int argc, char** argv) {
21 | cnn::Initialize(argc, argv);
22 |
23 | string vocab_filename = argv[1]; // vocabulary of words/characters
24 | string morph_filename = argv[2];
25 | string test_filename = argv[3];
26 | unsigned beam_size = 2;
27 |
28 | unordered_map char_to_id, morph_to_id;
29 | unordered_map id_to_char, id_to_morph;
30 |
31 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
32 | unsigned vocab_size = char_to_id.size();
33 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
34 | unsigned morph_size = morph_to_id.size();
35 |
36 | vector test_data; // Read the dev file in a vector
37 | ReadData(test_filename, &test_data);
38 |
39 | vector > ensmb_m;
40 | vector ensmb_nn;
41 | for (unsigned i = 0; i < argc - 4; ++i) {
42 | vector m;
43 | EncDecAttn nn;
44 | string f = argv[i + 4];
45 | Read(f, &nn, &m);
46 | ensmb_m.push_back(m);
47 | ensmb_nn.push_back(nn);
48 | }
49 |
50 | // Read the test file and output predictions for the words.
51 | string line;
52 | double correct = 0, total = 0;
53 | vector object_pointers;
54 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
55 | object_pointers.push_back(&ensmb_nn[i]);
56 | }
57 | for (string& line : test_data) {
58 | vector items = split_line(line, '|');
59 | vector input_ids, target_ids, pred_target_ids;
60 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
61 | for (const string& ch : split_line(items[0], ' ')) {
62 | input_ids.push_back(char_to_id[ch]);
63 | }
64 | for (const string& ch : split_line(items[1], ' ')) {
65 | target_ids.push_back(char_to_id[ch]);
66 | }
67 | unsigned morph_id = morph_to_id[items[2]];
68 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
69 | &object_pointers);
70 |
71 | string prediction = "";
72 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
73 | prediction += id_to_char[pred_target_ids[i]];
74 | if (i != pred_target_ids.size() - 1) {
75 | prediction += " ";
76 | }
77 | }
78 | if (prediction == items[1]) {
79 | correct += 1;
80 | } else {
81 | //cout << "GOLD: " << line << endl;
82 | //cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
83 | }
84 | total += 1;
85 | cout << items[0] << "|" << prediction << "|" << items[2] << endl;
86 | }
87 | cerr << "Prediction Accuracy: " << correct / total << endl;
88 | return 1;
89 | }
90 |
--------------------------------------------------------------------------------
/src/eval-ensemble-enc-dec.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "enc-dec.h"
12 |
13 | #include
14 | #include
15 |
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 |
20 | int main(int argc, char** argv) {
21 | cnn::Initialize(argc, argv);
22 |
23 | string vocab_filename = argv[1]; // vocabulary of words/characters
24 | string morph_filename = argv[2];
25 | string test_filename = argv[3];
26 | unsigned beam_size = 2;
27 |
28 | unordered_map char_to_id, morph_to_id;
29 | unordered_map id_to_char, id_to_morph;
30 |
31 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
32 | unsigned vocab_size = char_to_id.size();
33 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
34 | unsigned morph_size = morph_to_id.size();
35 |
36 | vector test_data; // Read the dev file in a vector
37 | ReadData(test_filename, &test_data);
38 |
39 | vector > ensmb_m;
40 | vector ensmb_nn;
41 | for (unsigned i = 0; i < argc - 4; ++i) {
42 | vector m;
43 | EncDec nn;
44 | string f = argv[i + 4];
45 | Read(f, &nn, &m);
46 | ensmb_m.push_back(m);
47 | ensmb_nn.push_back(nn);
48 | }
49 |
50 | // Read the test file and output predictions for the words.
51 | string line;
52 | double correct = 0, total = 0;
53 | vector object_pointers;
54 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
55 | object_pointers.push_back(&ensmb_nn[i]);
56 | }
57 | for (string& line : test_data) {
58 | vector items = split_line(line, '|');
59 | vector input_ids, target_ids, pred_target_ids;
60 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
61 | for (const string& ch : split_line(items[0], ' ')) {
62 | input_ids.push_back(char_to_id[ch]);
63 | }
64 | for (const string& ch : split_line(items[1], ' ')) {
65 | target_ids.push_back(char_to_id[ch]);
66 | }
67 | unsigned morph_id = morph_to_id[items[2]];
68 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
69 | &object_pointers);
70 |
71 | string prediction = "";
72 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
73 | prediction += id_to_char[pred_target_ids[i]];
74 | if (i != pred_target_ids.size() - 1) {
75 | prediction += " ";
76 | }
77 | }
78 | if (prediction == items[1]) {
79 | correct += 1;
80 | } else {
81 | //cout << "GOLD: " << line << endl;
82 | //cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
83 | }
84 | total += 1;
85 | cout << items[0] << "|" << prediction << "|" << items[2] << endl;
86 | }
87 | cerr << "Prediction Accuracy: " << correct / total << endl;
88 | return 1;
89 | }
90 |
--------------------------------------------------------------------------------
/src/eval-ensemble-joint-enc-beam.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file outputs all the strings in the beam.
3 | */
4 | #include "cnn/nodes.h"
5 | #include "cnn/cnn.h"
6 | #include "cnn/rnn.h"
7 | #include "cnn/gru.h"
8 | #include "cnn/lstm.h"
9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 |
13 | #include "utils.h"
14 | #include "joint-enc-morph.h"
15 |
16 | #include
17 | #include
18 |
19 | using namespace std;
20 | using namespace cnn;
21 | using namespace cnn::expr;
22 |
23 | int main(int argc, char** argv) {
24 | cnn::Initialize(argc, argv);
25 |
26 | string vocab_filename = argv[1]; // vocabulary of words/characters
27 | string morph_filename = argv[2];
28 | string test_filename = argv[3];
29 | unsigned beam_size = atoi(argv[4]);
30 |
31 | unordered_map char_to_id, morph_to_id;
32 | unordered_map id_to_char, id_to_morph;
33 |
34 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
35 | unsigned vocab_size = char_to_id.size();
36 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
37 | unsigned morph_size = morph_to_id.size();
38 |
39 | vector test_data; // Read the dev file in a vector
40 | ReadData(test_filename, &test_data);
41 |
42 | vector > ensmb_m;
43 | vector ensmb_nn;
44 | for (unsigned i = 0; i < argc - 5; ++i) {
45 | vector m;
46 | JointEncMorph nn;
47 | string f = argv[i + 5];
48 | Read(f, &nn, &m);
49 | ensmb_m.push_back(m);
50 | ensmb_nn.push_back(nn);
51 | }
52 |
53 | // Read the test file and output predictions for the words.
54 | string line;
55 | double correct = 0, total = 0;
56 | vector object_pointers;
57 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
58 | object_pointers.push_back(&ensmb_nn[i]);
59 | }
60 |
61 | for (string& line : test_data) {
62 | vector items = split_line(line, '|');
63 | vector input_ids, target_ids, pred_target_ids;
64 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
65 | for (const string& ch : split_line(items[0], ' ')) {
66 | input_ids.push_back(char_to_id[ch]);
67 | }
68 | for (const string& ch : split_line(items[1], ' ')) {
69 | target_ids.push_back(char_to_id[ch]);
70 | }
71 |
72 | vector > pred_beams;
73 | vector beam_score;
74 | unsigned morph_id = morph_to_id[items[2]];
75 | EnsembleBeamDecode(morph_id, beam_size, char_to_id, input_ids, &pred_beams,
76 | &beam_score, &object_pointers);
77 |
78 | cout << "GOLD: " << line << endl;
79 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
80 | pred_target_ids = pred_beams[beam_id];
81 | string prediction = "";
82 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
83 | prediction += id_to_char[pred_target_ids[i]];
84 | if (i != pred_target_ids.size() - 1) {
85 | prediction += " ";
86 | }
87 | }
88 | cout << "PRED: " << prediction << " " << beam_score[beam_id] << endl;
89 | }
90 | }
91 | return 1;
92 | }
93 |
--------------------------------------------------------------------------------
/src/eval-ensemble-joint-enc-dec-morph.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "joint-enc-dec-morph.h"
12 |
13 | #include
14 | #include
15 |
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 |
20 | int main(int argc, char** argv) {
21 | cnn::Initialize(argc, argv);
22 |
23 | string vocab_filename = argv[1]; // vocabulary of words/characters
24 | string morph_filename = argv[2];
25 | string test_filename = argv[3];
26 |
27 | unordered_map char_to_id, morph_to_id;
28 | unordered_map id_to_char, id_to_morph;
29 |
30 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
31 | unsigned vocab_size = char_to_id.size();
32 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
33 | unsigned morph_size = morph_to_id.size();
34 |
35 | vector test_data; // Read the dev file in a vector
36 | ReadData(test_filename, &test_data);
37 |
38 | vector > ensmb_m;
39 | vector ensmb_nn;
40 | for (unsigned i = 0; i < argc - 4; ++i) {
41 | vector m;
42 | JointEncDecMorph nn;
43 | string f = argv[i + 4];
44 | Read(f, &nn, &m);
45 | ensmb_m.push_back(m);
46 | ensmb_nn.push_back(nn);
47 | }
48 |
49 | // Read the test file and output predictions for the words.
50 | string line;
51 | vector object_pointers;
52 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
53 | object_pointers.push_back(&ensmb_nn[i]);
54 | }
55 | double correct = 0, total = 0;
56 | for (string& line : test_data) {
57 | vector items = split_line(line, '|');
58 | vector input_ids, target_ids, pred_target_ids;
59 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
60 | for (const string& ch : split_line(items[0], ' ')) {
61 | input_ids.push_back(char_to_id[ch]);
62 | }
63 | for (const string& ch : split_line(items[1], ' ')) {
64 | target_ids.push_back(char_to_id[ch]);
65 | }
66 | unsigned morph_id = morph_to_id[items[2]];
67 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
68 | &object_pointers);
69 |
70 | string prediction = "";
71 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
72 | prediction += id_to_char[pred_target_ids[i]];
73 | if (i != pred_target_ids.size() - 1) {
74 | prediction += " ";
75 | }
76 | }
77 | if (prediction == items[1]) {
78 | correct += 1;
79 | } else {
80 | cout << "GOLD: " << line << endl;
81 | cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
82 | }
83 | total += 1;
84 | }
85 | cerr << "Prediction Accuracy: " << correct / total << endl;
86 | return 1;
87 | }
88 |
--------------------------------------------------------------------------------
/src/eval-ensemble-joint-enc-morph.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "joint-enc-morph.h"
12 |
13 | #include
14 | #include
15 |
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 |
20 | int main(int argc, char** argv) {
21 | cnn::Initialize(argc, argv);
22 |
23 | string vocab_filename = argv[1]; // vocabulary of words/characters
24 | string morph_filename = argv[2];
25 | string test_filename = argv[3];
26 |
27 | unordered_map char_to_id, morph_to_id;
28 | unordered_map id_to_char, id_to_morph;
29 |
30 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
31 | unsigned vocab_size = char_to_id.size();
32 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
33 | unsigned morph_size = morph_to_id.size();
34 |
35 | vector test_data; // Read the dev file in a vector
36 | ReadData(test_filename, &test_data);
37 |
38 | vector > ensmb_m;
39 | vector ensmb_nn;
40 | for (unsigned i = 0; i < argc - 4; ++i) {
41 | vector m;
42 | JointEncMorph nn;
43 | string f = argv[i + 4];
44 | Read(f, &nn, &m);
45 | ensmb_m.push_back(m);
46 | ensmb_nn.push_back(nn);
47 | }
48 |
49 | // Read the test file and output predictions for the words.
50 | string line;
51 | vector object_pointers;
52 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
53 | object_pointers.push_back(&ensmb_nn[i]);
54 | }
55 | double correct = 0, total = 0;
56 | for (string& line : test_data) {
57 | vector items = split_line(line, '|');
58 | vector input_ids, target_ids, pred_target_ids;
59 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
60 | for (const string& ch : split_line(items[0], ' ')) {
61 | input_ids.push_back(char_to_id[ch]);
62 | }
63 | for (const string& ch : split_line(items[1], ' ')) {
64 | target_ids.push_back(char_to_id[ch]);
65 | }
66 | unsigned morph_id = morph_to_id[items[2]];
67 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
68 | &object_pointers);
69 |
70 | string prediction = "";
71 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
72 | prediction += id_to_char[pred_target_ids[i]];
73 | if (i != pred_target_ids.size() - 1) {
74 | prediction += " ";
75 | }
76 | }
77 | if (prediction == items[1]) {
78 | correct += 1;
79 | } else {
80 | cout << "GOLD: " << line << endl;
81 | cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
82 | }
83 | total += 1;
84 | }
85 | cerr << "Prediction Accuracy: " << correct / total << endl;
86 | return 1;
87 | }
88 |
--------------------------------------------------------------------------------
/src/eval-ensemble-lm-joint-enc.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "lm.h"
11 | #include "utils.h"
12 | #include "lm-joint-enc.h"
13 |
14 | #include
15 | #include
16 |
17 | using namespace std;
18 | using namespace cnn;
19 | using namespace cnn::expr;
20 |
21 | int main(int argc, char** argv) {
22 | cnn::Initialize(argc, argv);
23 |
24 | string vocab_filename = argv[1]; // vocabulary of words/characters
25 | string morph_filename = argv[2];
26 | string test_filename = argv[3];
27 | string lm_model_filename = argv[4];
28 |
29 | unordered_map char_to_id, morph_to_id;
30 | unordered_map id_to_char, id_to_morph;
31 |
32 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
33 | unsigned vocab_size = char_to_id.size();
34 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
35 | unsigned morph_size = morph_to_id.size();
36 |
37 | vector test_data; // Read the dev file in a vector
38 | ReadData(test_filename, &test_data);
39 |
40 | LM lm(lm_model_filename, char_to_id, id_to_char);
41 |
42 | vector > ensmb_m;
43 | vector ensmb_nn;
44 | for (unsigned i = 0; i < argc - 5; ++i) {
45 | vector m;
46 | LMJointEnc nn;
47 | string f = argv[i + 5];
48 | Read(f, &nn, &m);
49 | ensmb_m.push_back(m);
50 | ensmb_nn.push_back(nn);
51 | }
52 |
53 | // Read the test file and output predictions for the words.
54 | string line;
55 | vector object_pointers;
56 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
57 | object_pointers.push_back(&ensmb_nn[i]);
58 | }
59 | double correct = 0, total = 0;
60 | for (string& line : test_data) {
61 | vector items = split_line(line, '|');
62 | vector input_ids, target_ids, pred_target_ids;
63 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
64 | for (const string& ch : split_line(items[0], ' ')) {
65 | input_ids.push_back(char_to_id[ch]);
66 | }
67 | for (const string& ch : split_line(items[1], ' ')) {
68 | target_ids.push_back(char_to_id[ch]);
69 | }
70 | unsigned morph_id = morph_to_id[items[2]];
71 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, &lm,
72 | &object_pointers);
73 |
74 | string prediction = "";
75 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
76 | prediction += id_to_char[pred_target_ids[i]];
77 | if (i != pred_target_ids.size() - 1) {
78 | prediction += " ";
79 | }
80 | }
81 | if (prediction == items[1]) {
82 | correct += 1;
83 | } else {
84 | cout << "GOLD: " << line << endl;
85 | cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
86 | }
87 | total += 1;
88 | }
89 | cerr << "Prediction Accuracy: " << correct / total << endl;
90 | return 1;
91 | }
92 |
--------------------------------------------------------------------------------
/src/eval-ensemble-lm-sep-morph.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "lm.h"
11 | #include "utils.h"
12 | #include "lm-sep-morph.h"
13 |
14 | #include
15 | #include
16 |
17 | using namespace std;
18 | using namespace cnn;
19 | using namespace cnn::expr;
20 |
21 | int main(int argc, char** argv) {
22 | cnn::Initialize(argc, argv);
23 |
24 | string vocab_filename = argv[1]; // vocabulary of words/characters
25 | string morph_filename = argv[2];
26 | string test_filename = argv[3];
27 | string lm_model_filename = argv[4];
28 |
29 | unordered_map char_to_id, morph_to_id;
30 | unordered_map id_to_char, id_to_morph;
31 |
32 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
33 | unsigned vocab_size = char_to_id.size();
34 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
35 | unsigned morph_size = morph_to_id.size();
36 |
37 | vector test_data; // Read the dev file in a vector
38 | ReadData(test_filename, &test_data);
39 |
40 | LM lm(lm_model_filename, char_to_id, id_to_char);
41 |
42 | vector > ensmb_m;
43 | vector ensmb_nn;
44 | for (unsigned i = 0; i < argc - 5; ++i) {
45 | vector m;
46 | LMSepMorph nn;
47 | string f = argv[i + 5];
48 | Read(f, &nn, &m);
49 | ensmb_m.push_back(m);
50 | ensmb_nn.push_back(nn);
51 | }
52 |
53 | // Read the test file and output predictions for the words.
54 | vector object_pointers;
55 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
56 | object_pointers.push_back(&ensmb_nn[i]);
57 | }
58 |
59 | string line;
60 | double correct = 0, total = 0;
61 | for (string& line : test_data) {
62 | vector items = split_line(line, '|');
63 | vector input_ids, target_ids, pred_target_ids;
64 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
65 |
66 | string input = items[0], output = items[1];
67 | for (const string& ch : split_line(input, ' ')) {
68 | input_ids.push_back(char_to_id[ch]);
69 | }
70 | for (const string& ch : split_line(output, ' ')) {
71 | target_ids.push_back(char_to_id[ch]);
72 | }
73 | unsigned morph_id = morph_to_id[items[2]];
74 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, &lm,
75 | &object_pointers);
76 |
77 | string prediction = "";
78 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
79 | prediction += id_to_char[pred_target_ids[i]];
80 | if (i != pred_target_ids.size() - 1) {
81 | prediction += " ";
82 | }
83 | }
84 | if (prediction == output) {
85 | correct += 1;
86 | } else {
87 | cout << "GOLD: " << line << endl;
88 | cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
89 | }
90 | total += 1;
91 | }
92 | cerr << "Prediction Accuracy: " << correct / total << endl;
93 | return 1;
94 | }
95 |
--------------------------------------------------------------------------------
/src/eval-ensemble-no-enc.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "no-enc.h"
12 |
13 | #include
14 | #include
15 |
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 |
20 | int main(int argc, char** argv) {
21 | cnn::Initialize(argc, argv);
22 |
23 | string vocab_filename = argv[1]; // vocabulary of words/characters
24 | string morph_filename = argv[2];
25 | string test_filename = argv[3];
26 | unsigned beam_size = 2;
27 |
28 | unordered_map char_to_id, morph_to_id;
29 | unordered_map id_to_char, id_to_morph;
30 |
31 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
32 | unsigned vocab_size = char_to_id.size();
33 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
34 | unsigned morph_size = morph_to_id.size();
35 |
36 | vector test_data; // Read the dev file in a vector
37 | ReadData(test_filename, &test_data);
38 |
39 | vector > ensmb_m;
40 | vector ensmb_nn;
41 | for (unsigned i = 0; i < argc - 4; ++i) {
42 | vector m;
43 | NoEnc nn;
44 | string f = argv[i + 4];
45 | Read(f, &nn, &m);
46 | ensmb_m.push_back(m);
47 | ensmb_nn.push_back(nn);
48 | }
49 |
50 | // Read the test file and output predictions for the words.
51 | string line;
52 | double correct = 0, total = 0;
53 | vector object_pointers;
54 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
55 | object_pointers.push_back(&ensmb_nn[i]);
56 | }
57 | for (string& line : test_data) {
58 | vector items = split_line(line, '|');
59 | vector input_ids, target_ids, pred_target_ids;
60 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
61 | for (const string& ch : split_line(items[0], ' ')) {
62 | input_ids.push_back(char_to_id[ch]);
63 | }
64 | for (const string& ch : split_line(items[1], ' ')) {
65 | target_ids.push_back(char_to_id[ch]);
66 | }
67 | unsigned morph_id = morph_to_id[items[2]];
68 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
69 | &object_pointers);
70 |
71 | string prediction = "";
72 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
73 | prediction += id_to_char[pred_target_ids[i]];
74 | if (i != pred_target_ids.size() - 1) {
75 | prediction += " ";
76 | }
77 | }
78 | if (prediction == items[1]) {
79 | correct += 1;
80 | } else {
81 | //cout << "GOLD: " << line << endl;
82 | //cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
83 | }
84 | total += 1;
85 | cout << items[0] << "|" << prediction << "|" << items[2] << endl;
86 | }
87 | cerr << "Prediction Accuracy: " << correct / total << endl;
88 | return 1;
89 | }
90 |
--------------------------------------------------------------------------------
/src/eval-ensemble-sep-morph-beam.cc:
--------------------------------------------------------------------------------
1 | /*
2 | This file outputs all the strings in the beam.
3 | */
4 | #include "cnn/nodes.h"
5 | #include "cnn/cnn.h"
6 | #include "cnn/rnn.h"
7 | #include "cnn/gru.h"
8 | #include "cnn/lstm.h"
9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 |
13 | #include "utils.h"
14 | #include "sep-morph.h"
15 |
16 | #include
17 | #include
18 |
19 | using namespace std;
20 | using namespace cnn;
21 | using namespace cnn::expr;
22 |
23 | int main(int argc, char** argv) {
24 | cnn::Initialize(argc, argv);
25 |
26 | string vocab_filename = argv[1]; // vocabulary of words/characters
27 | string morph_filename = argv[2];
28 | string test_filename = argv[3];
29 | string lm_filename = argv[4];
30 | unsigned beam_size = atoi(argv[5]);
31 |
32 | unordered_map char_to_id, morph_to_id;
33 | unordered_map id_to_char, id_to_morph;
34 |
35 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
36 | unsigned vocab_size = char_to_id.size();
37 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
38 | unsigned morph_size = morph_to_id.size();
39 |
40 | vector test_data; // Read the dev file in a vector
41 | ReadData(test_filename, &test_data);
42 |
43 | vector > ensmb_m;
44 | vector ensmb_nn;
45 | for (unsigned i = 0; i < argc - 6; ++i) {
46 | vector m;
47 | SepMorph nn;
48 | string f = argv[i + 6];
49 | Read(f, &nn, &m);
50 | ensmb_m.push_back(m);
51 | ensmb_nn.push_back(nn);
52 | }
53 |
54 | // Read the test file and output predictions for the words.
55 | string line;
56 | double correct = 0, total = 0;
57 | vector object_pointers;
58 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
59 | object_pointers.push_back(&ensmb_nn[i]);
60 | }
61 |
62 | for (string& line : test_data) {
63 | vector items = split_line(line, '|');
64 | vector input_ids, target_ids, pred_target_ids;
65 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
66 | for (const string& ch : split_line(items[0], ' ')) {
67 | input_ids.push_back(char_to_id[ch]);
68 | }
69 | for (const string& ch : split_line(items[1], ' ')) {
70 | target_ids.push_back(char_to_id[ch]);
71 | }
72 |
73 | vector > pred_beams;
74 | vector beam_score;
75 | unsigned morph_id = morph_to_id[items[2]];
76 | EnsembleBeamDecode(morph_id, beam_size, char_to_id, input_ids, &pred_beams,
77 | &beam_score, &object_pointers);
78 |
79 | cout << "GOLD: " << line << endl;
80 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
81 | pred_target_ids = pred_beams[beam_id];
82 | string prediction = "";
83 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
84 | prediction += id_to_char[pred_target_ids[i]];
85 | if (i != pred_target_ids.size() - 1) {
86 | prediction += " ";
87 | }
88 | }
89 | cout << "PRED: " << prediction << " " << beam_score[beam_id] << endl;
90 | }
91 | }
92 | return 1;
93 | }
94 |
--------------------------------------------------------------------------------
/src/eval-ensemble-sep-morph.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "sep-morph.h"
12 |
13 | #include
14 | #include
15 |
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 |
20 | int main(int argc, char** argv) {
21 | cnn::Initialize(argc, argv);
22 |
23 | string vocab_filename = argv[1]; // vocabulary of words/characters
24 | string morph_filename = argv[2];
25 | string test_filename = argv[3];
26 |
27 | unordered_map char_to_id, morph_to_id;
28 | unordered_map id_to_char, id_to_morph;
29 |
30 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
31 | unsigned vocab_size = char_to_id.size();
32 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
33 | unsigned morph_size = morph_to_id.size();
34 |
35 | vector test_data; // Read the dev file in a vector
36 | ReadData(test_filename, &test_data);
37 |
38 | vector > ensmb_m;
39 | vector ensmb_nn;
40 | for (unsigned i = 0; i < argc - 4; ++i) {
41 | vector m;
42 | SepMorph nn;
43 | string f = argv[i + 4];
44 | Read(f, &nn, &m);
45 | ensmb_m.push_back(m);
46 | ensmb_nn.push_back(nn);
47 | }
48 |
49 | // Read the test file and output predictions for the words.
50 | string line;
51 | double correct = 0, total = 0;
52 | vector object_pointers;
53 | for (unsigned i = 0; i < ensmb_nn.size(); ++i) {
54 | object_pointers.push_back(&ensmb_nn[i]);
55 | }
56 | for (string& line : test_data) {
57 | vector items = split_line(line, '|');
58 | vector input_ids, target_ids, pred_target_ids;
59 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
60 | for (const string& ch : split_line(items[0], ' ')) {
61 | input_ids.push_back(char_to_id[ch]);
62 | }
63 | for (const string& ch : split_line(items[1], ' ')) {
64 | target_ids.push_back(char_to_id[ch]);
65 | }
66 | unsigned morph_id = morph_to_id[items[2]];
67 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
68 | &object_pointers);
69 |
70 | string prediction = "";
71 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
72 | prediction += id_to_char[pred_target_ids[i]];
73 | if (i != pred_target_ids.size() - 1) {
74 | prediction += " ";
75 | }
76 | }
77 | if (prediction == items[1]) {
78 | correct += 1;
79 | } else {
80 | cout << "GOLD: " << line << endl;
81 | cout << "PRED: " << items[0] << "|" << prediction << "|" << items[2] << endl;
82 | }
83 | total += 1;
84 | }
85 | cerr << "Prediction Accuracy: " << correct / total << endl;
86 | return 1;
87 | }
88 |
--------------------------------------------------------------------------------
/src/joint-enc-dec-morph.cc:
--------------------------------------------------------------------------------
1 | #include "joint-enc-dec-morph.h"
2 |
3 | using namespace std;
4 | using namespace cnn;
5 | using namespace cnn::expr;
6 |
7 | string BOW = "", EOW = "";
8 | unsigned MAX_PRED_LEN = 100;
9 |
10 | JointEncDecMorph::JointEncDecMorph(
11 | const unsigned& char_length, const unsigned& hidden_length,
12 | const unsigned& vocab_length, const unsigned& num_layers,
13 | const unsigned& num_morph, vector* m,
14 | vector* optimizer) {
15 | char_len = char_length;
16 | hidden_len = hidden_length;
17 | vocab_len = vocab_length;
18 | layers = num_layers;
19 | morph_len = num_morph;
20 | InitParams(m);
21 | }
22 |
23 | void JointEncDecMorph::InitParams(vector* m) {
24 | // Have all the shared parameters in one model
25 | input_forward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
26 | input_backward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
27 | output_forward = LSTMBuilder(layers, 2 * char_len + hidden_len,
28 | hidden_len, (*m)[morph_len]);
29 |
30 | char_vecs = (*m)[morph_len]->add_lookup_parameters(vocab_len, {char_len});
31 |
32 | phidden_to_output = (*m)[morph_len]->add_parameters({vocab_len, hidden_len});
33 | phidden_to_output_bias = (*m)[morph_len]->add_parameters({vocab_len, 1});
34 |
35 | eps_vecs = (*m)[morph_len]->add_lookup_parameters(max_eps, {char_len});
36 |
37 | for (unsigned i = 0; i < morph_len; ++i) {
38 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
39 | 2 * hidden_len}));
40 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
41 | }
42 | }
43 |
44 | void JointEncDecMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
45 | input_forward.new_graph(*cg);
46 | input_backward.new_graph(*cg);
47 | output_forward.new_graph(*cg);
48 |
49 | hidden_to_output = parameter(*cg, phidden_to_output);
50 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias);
51 |
52 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
53 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
54 | }
55 |
56 | void JointEncDecMorph::RunFwdBwd(const vector& inputs,
57 | Expression* hidden, ComputationGraph *cg) {
58 | vector input_vecs;
59 | for (const unsigned& input_id : inputs) {
60 | input_vecs.push_back(lookup(*cg, char_vecs, input_id));
61 | }
62 |
63 | // Run forward LSTM
64 | Expression forward_unit;
65 | input_forward.start_new_sequence();
66 | for (unsigned i = 0; i < input_vecs.size(); ++i) {
67 | forward_unit = input_forward.add_input(input_vecs[i]);
68 | }
69 |
70 | // Run backward LSTM
71 | Expression backward_unit;
72 | input_backward.start_new_sequence();
73 | for (int i = input_vecs.size() - 1; i >= 0; --i) {
74 | backward_unit = input_backward.add_input(input_vecs[i]);
75 | }
76 |
77 | // Concatenate the forward and back hidden layers
78 | *hidden = concatenate({forward_unit, backward_unit});
79 | }
80 |
81 | void JointEncDecMorph::TransformEncodedInput(Expression* encoded_input) const {
82 | *encoded_input = affine_transform({transform_encoded_bias,
83 | transform_encoded, *encoded_input});
84 | }
85 |
86 | void JointEncDecMorph::ProjectToOutput(const Expression& hidden, Expression* out)
87 | const {
88 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
89 | }
90 |
91 |
92 | Expression JointEncDecMorph::ComputeLoss(const vector& hidden_units,
93 | const vector& targets) const {
94 | assert(hidden_units.size() == targets.size());
95 | vector losses;
96 | for (unsigned i = 0; i < hidden_units.size(); ++i) {
97 | Expression out;
98 | ProjectToOutput(hidden_units[i], &out);
99 | losses.push_back(pickneglogsoftmax(out, targets[i]));
100 | }
101 | return sum(losses);
102 | }
103 |
104 | float JointEncDecMorph::Train(const unsigned& morph_id, const vector& inputs,
105 | const vector& outputs, AdadeltaTrainer* opt,
106 | AdadeltaTrainer* shared_opt) {
107 | ComputationGraph cg;
108 | AddParamsToCG(morph_id, &cg);
109 |
110 | // Encode and Transform to feed into decoder
111 | Expression encoded_input_vec;
112 | RunFwdBwd(inputs, &encoded_input_vec, &cg);
113 | TransformEncodedInput(&encoded_input_vec);
114 |
115 | // Use this encoded word vector to predict the transformed word
116 | vector input_vecs_for_dec;
117 | vector output_ids_for_pred;
118 | for (unsigned i = 0; i < outputs.size(); ++i) {
119 | if (i < outputs.size() - 1) {
120 | // '' will not be fed as input -- it needs to be predicted.
121 | if (i < inputs.size() - 1) {
122 | input_vecs_for_dec.push_back(concatenate(
123 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
124 | lookup(cg, char_vecs, inputs[i + 1])}));
125 | } else {
126 | input_vecs_for_dec.push_back(concatenate(
127 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
128 | lookup(cg, eps_vecs, min(unsigned(i - inputs.size()), max_eps - 1))}));
129 | }
130 | }
131 | if (i > 0) { // '' will not be predicted in the output -- its fed in.
132 | output_ids_for_pred.push_back(outputs[i]);
133 | }
134 | }
135 |
136 | vector decoder_hidden_units;
137 | output_forward.start_new_sequence();
138 | for (const auto& vec : input_vecs_for_dec) {
139 | decoder_hidden_units.push_back(output_forward.add_input(vec));
140 | }
141 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred);
142 |
143 | float return_loss = as_scalar(cg.forward());
144 | cg.backward();
145 | opt->update(1.0f); // Update the morph specific parameters
146 | shared_opt->update(1.0f); // Update the shared parameters
147 | return return_loss;
148 | }
149 |
150 | void Serialize(string& filename, JointEncDecMorph& model,
151 | vector* cnn_models) {
152 | ofstream outfile(filename);
153 | if (!outfile.is_open()) {
154 | cerr << "File opening failed" << endl;
155 | }
156 |
157 | boost::archive::text_oarchive oa(outfile);
158 | oa & model;
159 | for (unsigned i = 0; i < cnn_models->size(); ++i) {
160 | oa & *(*cnn_models)[i];
161 | }
162 |
163 | cerr << "Saved model to: " << filename << endl;
164 | outfile.close();
165 | }
166 |
167 | void Read(string& filename, JointEncDecMorph* model, vector* cnn_models) {
168 | ifstream infile(filename);
169 | if (!infile.is_open()) {
170 | cerr << "File opening failed" << endl;
171 | }
172 |
173 | boost::archive::text_iarchive ia(infile);
174 | ia & *model;
175 | for (unsigned i = 0; i < model->morph_len + 1; ++i) {
176 | Model *cnn_model = new Model();
177 | cnn_models->push_back(cnn_model);
178 | }
179 |
180 | model->InitParams(cnn_models);
181 | for (unsigned i = 0; i < cnn_models->size(); ++i) {
182 | ia & *(*cnn_models)[i];
183 | }
184 |
185 | cerr << "Loaded model from: " << filename << endl;
186 | infile.close();
187 | }
188 |
189 | void
190 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
191 | const vector& input_ids, vector* pred_target_ids,
192 | vector* ensmb_model) {
193 | ComputationGraph cg;
194 |
195 | unsigned ensmb = ensmb_model->size();
196 | vector encoded_word_vecs;
197 | for (unsigned i = 0; i < ensmb; ++i) {
198 | Expression encoded_word_vec;
199 | auto model = (*ensmb_model)[i];
200 | model->AddParamsToCG(morph_id, &cg);
201 | model->RunFwdBwd(input_ids, &encoded_word_vec, &cg);
202 | model->TransformEncodedInput(&encoded_word_vec);
203 | encoded_word_vecs.push_back(encoded_word_vec);
204 | model->output_forward.start_new_sequence();
205 | }
206 |
207 | unsigned out_index = 1;
208 | unsigned pred_index = char_to_id[BOW];
209 | while (pred_target_ids->size() < MAX_PRED_LEN) {
210 | vector ensmb_out;
211 | pred_target_ids->push_back(pred_index);
212 | if (pred_index == char_to_id[EOW]) {
213 | return; // If the end is found, break from the loop and return
214 | }
215 |
216 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
217 | auto model = (*ensmb_model)[ensmb_id];
218 | Expression prev_output_vec = lookup(cg, model->char_vecs, pred_index);
219 | Expression input, input_char_vec;
220 | if (out_index < input_ids.size()) {
221 | input_char_vec = lookup(cg, model->char_vecs, input_ids[out_index]);
222 | } else {
223 | input_char_vec = lookup(cg, model->eps_vecs,
224 | min(unsigned(out_index - input_ids.size()),
225 | model->max_eps - 1));
226 | }
227 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
228 | input_char_vec});
229 |
230 | Expression hidden = model->output_forward.add_input(input);
231 | Expression out;
232 | model->ProjectToOutput(hidden, &out);
233 | ensmb_out.push_back(log_softmax(out));
234 | }
235 |
236 | Expression out = sum(ensmb_out) / ensmb_out.size();
237 | vector dist = as_vector(cg.incremental_forward());
238 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
239 | out_index++;
240 | }
241 | }
242 |
--------------------------------------------------------------------------------
/src/joint-enc-dec-morph.h:
--------------------------------------------------------------------------------
1 | #ifndef JOINT_ENC_DEC_MORPH_H_
2 | #define JOINT_ENC_DEC_MORPH_H_
3 |
4 | #include "cnn/nodes.h"
5 | #include "cnn/cnn.h"
6 | #include "cnn/rnn.h"
7 | #include "cnn/gru.h"
8 | #include "cnn/lstm.h"
9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 |
13 | #include "utils.h"
14 |
15 | #include
16 | #include
17 | #include
18 |
19 | using namespace std;
20 | using namespace cnn;
21 | using namespace cnn::expr;
22 |
23 | // The class has a shared encoder acorss all morphological types.
24 | // Other auxiliary units are also shared, except for the decoder and
25 | // input transofrmation parameters.
26 | class JointEncDecMorph {
27 | public:
28 | LSTMBuilder input_forward, input_backward, output_forward; // Shared encoder
29 | LookupParameters* char_vecs; // Shared char vectors
30 |
31 | Expression hidden_to_output, hidden_to_output_bias;
32 | Parameters *phidden_to_output, *phidden_to_output_bias; // Shared
33 |
34 | // The only non-shared component is tranformation.
35 | Expression transform_encoded, transform_encoded_bias;
36 | vector ptransform_encoded, ptransform_encoded_bias;
37 |
38 | LookupParameters* eps_vecs;
39 |
40 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
41 |
42 | JointEncDecMorph() {}
43 |
44 | JointEncDecMorph(const unsigned& char_length, const unsigned& hidden_length,
45 | const unsigned& vocab_length, const unsigned& layers,
46 | const unsigned& num_morph, vector* m,
47 | vector* optimizer);
48 |
49 | void InitParams(vector* m);
50 |
51 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
52 |
53 | void RunFwdBwd(const vector& inputs,
54 | Expression* hidden, ComputationGraph *cg);
55 |
56 | void TransformEncodedInput(Expression* encoded_input) const;
57 |
58 | void ProjectToOutput(const Expression& hidden, Expression* out) const;
59 |
60 | Expression ComputeLoss(const vector& hidden_units,
61 | const vector& targets) const;
62 |
63 | float Train(const unsigned& morph_id, const vector& inputs,
64 | const vector& outputs, AdadeltaTrainer* opt,
65 | AdadeltaTrainer* shared_opt);
66 |
67 | friend class boost::serialization::access;
68 | template void serialize(Archive& ar, const unsigned int) {
69 | ar & char_len;
70 | ar & hidden_len;
71 | ar & vocab_len;
72 | ar & layers;
73 | ar & morph_len;
74 | ar & max_eps;
75 | }
76 | };
77 |
78 | void Serialize(string& filename, JointEncDecMorph& model, vector* cnn_model);
79 |
80 | void Read(string& filename, JointEncDecMorph* model, vector* cnn_model);
81 |
82 | void
83 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
84 | const vector& input_ids, vector* pred_target_ids,
85 | vector* ensmb_model);
86 |
87 | #endif
88 |
--------------------------------------------------------------------------------
/src/joint-enc-morph.cc:
--------------------------------------------------------------------------------
1 | #include "joint-enc-morph.h"
2 |
3 | #include
4 |
5 | using namespace std;
6 | using namespace cnn;
7 | using namespace cnn::expr;
8 |
9 | string BOW = "", EOW = "";
10 | unsigned MAX_PRED_LEN = 100;
11 | float NEG_INF = numeric_limits::min();
12 |
13 | JointEncMorph::JointEncMorph(
14 | const unsigned& char_length, const unsigned& hidden_length,
15 | const unsigned& vocab_length, const unsigned& num_layers,
16 | const unsigned& num_morph, vector* m,
17 | vector* optimizer) {
18 | char_len = char_length;
19 | hidden_len = hidden_length;
20 | vocab_len = vocab_length;
21 | layers = num_layers;
22 | morph_len = num_morph;
23 | InitParams(m);
24 | }
25 |
26 | void JointEncMorph::InitParams(vector* m) {
27 |
28 | // Have all the shared parameters in one model
29 | input_forward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
30 | input_backward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
31 |
32 | char_vecs = (*m)[morph_len]->add_lookup_parameters(vocab_len, {char_len});
33 |
34 | phidden_to_output = (*m)[morph_len]->add_parameters({vocab_len, hidden_len});
35 | phidden_to_output_bias = (*m)[morph_len]->add_parameters({vocab_len, 1});
36 |
37 | for (unsigned i = 0; i < morph_len; ++i) {
38 | output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len,
39 | hidden_len, (*m)[i]));
40 |
41 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
42 | 2 * hidden_len}));
43 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
44 |
45 | eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
46 | }
47 | }
48 |
49 | void JointEncMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
50 | input_forward.new_graph(*cg);
51 | input_backward.new_graph(*cg);
52 | output_forward[morph_id].new_graph(*cg);
53 |
54 | hidden_to_output = parameter(*cg, phidden_to_output);
55 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias);
56 |
57 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
58 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
59 | }
60 |
61 | void JointEncMorph::RunFwdBwd(const vector& inputs,
62 | Expression* hidden, ComputationGraph *cg) {
63 | vector input_vecs;
64 | for (const unsigned& input_id : inputs) {
65 | input_vecs.push_back(lookup(*cg, char_vecs, input_id));
66 | }
67 |
68 | // Run forward LSTM
69 | Expression forward_unit;
70 | input_forward.start_new_sequence();
71 | for (unsigned i = 0; i < input_vecs.size(); ++i) {
72 | forward_unit = input_forward.add_input(input_vecs[i]);
73 | }
74 |
75 | // Run backward LSTM
76 | Expression backward_unit;
77 | input_backward.start_new_sequence();
78 | for (int i = input_vecs.size() - 1; i >= 0; --i) {
79 | backward_unit = input_backward.add_input(input_vecs[i]);
80 | }
81 |
82 | // Concatenate the forward and back hidden layers
83 | *hidden = concatenate({forward_unit, backward_unit});
84 | }
85 |
86 | void JointEncMorph::TransformEncodedInput(Expression* encoded_input) const {
87 | *encoded_input = affine_transform({transform_encoded_bias,
88 | transform_encoded, *encoded_input});
89 | }
90 |
91 | void JointEncMorph::ProjectToOutput(const Expression& hidden, Expression* out)
92 | const {
93 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
94 | }
95 |
96 |
97 | Expression JointEncMorph::ComputeLoss(const vector& hidden_units,
98 | const vector& targets) const {
99 | assert(hidden_units.size() == targets.size());
100 | vector losses;
101 | for (unsigned i = 0; i < hidden_units.size(); ++i) {
102 | Expression out;
103 | ProjectToOutput(hidden_units[i], &out);
104 | losses.push_back(pickneglogsoftmax(out, targets[i]));
105 | }
106 | return sum(losses);
107 | }
108 |
109 | float JointEncMorph::Train(const unsigned& morph_id, const vector& inputs,
110 | const vector& outputs, AdadeltaTrainer* opt,
111 | AdadeltaTrainer* shared_opt) {
112 | ComputationGraph cg;
113 | AddParamsToCG(morph_id, &cg);
114 |
115 | // Encode and Transform to feed into decoder
116 | Expression encoded_input_vec;
117 | RunFwdBwd(inputs, &encoded_input_vec, &cg);
118 | TransformEncodedInput(&encoded_input_vec);
119 |
120 | // Use this encoded word vector to predict the transformed word
121 | vector input_vecs_for_dec;
122 | vector output_ids_for_pred;
123 | for (unsigned i = 0; i < outputs.size(); ++i) {
124 | if (i < outputs.size() - 1) {
125 | // '' will not be fed as input -- it needs to be predicted.
126 | if (i < inputs.size() - 1) {
127 | input_vecs_for_dec.push_back(concatenate(
128 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
129 | lookup(cg, char_vecs, inputs[i + 1])}));
130 | } else {
131 | input_vecs_for_dec.push_back(concatenate(
132 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
133 | lookup(cg, eps_vecs[morph_id],
134 | min(unsigned(i - inputs.size()), max_eps - 1))}));
135 | }
136 | }
137 | if (i > 0) { // '' will not be predicted in the output -- its fed in.
138 | output_ids_for_pred.push_back(outputs[i]);
139 | }
140 | }
141 |
142 | vector decoder_hidden_units;
143 | output_forward[morph_id].start_new_sequence();
144 | for (const auto& vec : input_vecs_for_dec) {
145 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
146 | }
147 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred);
148 |
149 | float return_loss = as_scalar(cg.forward());
150 | cg.backward();
151 | opt->update(1.0f); // Update the morph specific parameters
152 | shared_opt->update(1.0f); // Update the shared parameters
153 | return return_loss;
154 | }
155 |
156 | void Serialize(string& filename, JointEncMorph& model,
157 | vector* cnn_models) {
158 | ofstream outfile(filename);
159 | if (!outfile.is_open()) {
160 | cerr << "File opening failed: " << filename << endl;
161 | }
162 |
163 | boost::archive::text_oarchive oa(outfile);
164 | oa & model;
165 | for (unsigned i = 0; i < cnn_models->size(); ++i) {
166 | oa & *(*cnn_models)[i];
167 | }
168 |
169 | cerr << "Saved model to: " << filename << endl;
170 | outfile.close();
171 | }
172 |
173 | void Read(string& filename, JointEncMorph* model, vector* cnn_models) {
174 | ifstream infile(filename);
175 | if (!infile.is_open()) {
176 | cerr << "File opening failed: " << filename << endl;
177 | }
178 |
179 | boost::archive::text_iarchive ia(infile);
180 | ia & *model;
181 | for (unsigned i = 0; i < model->morph_len + 1; ++i) {
182 | Model *cnn_model = new Model();
183 | cnn_models->push_back(cnn_model);
184 | }
185 |
186 | model->InitParams(cnn_models);
187 | for (unsigned i = 0; i < cnn_models->size(); ++i) {
188 | ia & *(*cnn_models)[i];
189 | }
190 |
191 | cerr << "Loaded model from: " << filename << endl;
192 | infile.close();
193 | }
194 |
195 | void
196 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
197 | const vector& input_ids, vector* pred_target_ids,
198 | vector* ensmb_model) {
199 | ComputationGraph cg;
200 |
201 | unsigned ensmb = ensmb_model->size();
202 | vector encoded_word_vecs;
203 | for (unsigned i = 0; i < ensmb; ++i) {
204 | Expression encoded_word_vec;
205 | auto model = (*ensmb_model)[i];
206 | model->AddParamsToCG(morph_id, &cg);
207 | model->RunFwdBwd(input_ids, &encoded_word_vec, &cg);
208 | model->TransformEncodedInput(&encoded_word_vec);
209 | encoded_word_vecs.push_back(encoded_word_vec);
210 | model->output_forward[morph_id].start_new_sequence();
211 | }
212 |
213 | unsigned out_index = 1;
214 | unsigned pred_index = char_to_id[BOW];
215 | while (pred_target_ids->size() < MAX_PRED_LEN) {
216 | vector ensmb_out;
217 | pred_target_ids->push_back(pred_index);
218 | if (pred_index == char_to_id[EOW]) {
219 | return; // If the end is found, break from the loop and return
220 | }
221 |
222 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
223 | auto model = (*ensmb_model)[ensmb_id];
224 | Expression prev_output_vec = lookup(cg, model->char_vecs, pred_index);
225 | Expression input, input_char_vec;
226 | if (out_index < input_ids.size()) {
227 | input_char_vec = lookup(cg, model->char_vecs, input_ids[out_index]);
228 | } else {
229 | input_char_vec = lookup(cg, model->eps_vecs[morph_id],
230 | min(unsigned(out_index - input_ids.size()),
231 | model->max_eps - 1));
232 | }
233 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
234 | input_char_vec});
235 |
236 | Expression hidden = model->output_forward[morph_id].add_input(input);
237 | Expression out;
238 | model->ProjectToOutput(hidden, &out);
239 | ensmb_out.push_back(log_softmax(out));
240 | }
241 |
242 | Expression out = sum(ensmb_out) / ensmb_out.size();
243 | vector dist = as_vector(cg.incremental_forward());
244 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
245 | out_index++;
246 | }
247 | }
248 |
249 | void
250 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
251 | unordered_map& char_to_id,
252 | const vector& input_ids,
253 | vector >* sequences, vector* tm_scores,
254 | vector* ensmb_model) {
255 | unsigned out_index = 1;
256 | unsigned ensmb = ensmb_model->size();
257 | ComputationGraph cg;
258 |
259 | // Compute stuff for every model in the ensemble.
260 | vector encoded_word_vecs;
261 | vector ensmb_out;
262 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
263 | auto& model = *(*ensmb_model)[ensmb_id];
264 | model.AddParamsToCG(morph_id, &cg);
265 |
266 | Expression encoded_word_vec;
267 | model.RunFwdBwd(input_ids, &encoded_word_vec, &cg);
268 | model.TransformEncodedInput(&encoded_word_vec);
269 | encoded_word_vecs.push_back(encoded_word_vec);
270 | model.output_forward[morph_id].start_new_sequence();
271 |
272 | Expression prev_output_vec = lookup(cg, model.char_vecs,
273 | char_to_id[BOW]);
274 | Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
275 | lookup(cg, model.char_vecs,
276 | input_ids[out_index])});
277 | Expression hidden = model.output_forward[morph_id].add_input(input);
278 | Expression out;
279 | model.ProjectToOutput(hidden, &out);
280 | out = log_softmax(out);
281 | ensmb_out.push_back(out);
282 | }
283 |
284 | // Compute the average of the ensemble output.
285 | Expression out_dist = average(ensmb_out);
286 | vector log_dist = as_vector(cg.incremental_forward());
287 | priority_queue > init_queue;
288 | for (unsigned i = 0; i < log_dist.size(); ++i) {
289 | init_queue.push(make_pair(log_dist[i], i));
290 | }
291 | unsigned vocab_size = log_dist.size();
292 |
293 | // Initialise the beam_size sequences, scores, hidden states.
294 | vector log_scores;
295 | vector > prev_states;
296 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
297 | vector seq;
298 | seq.push_back(char_to_id[BOW]);
299 | seq.push_back(init_queue.top().second);
300 | sequences->push_back(seq);
301 | log_scores.push_back(init_queue.top().first);
302 |
303 | vector ensmb_states;
304 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
305 | auto& model = *(*ensmb_model)[ensmb_id];
306 | ensmb_states.push_back(model.output_forward[morph_id].state());
307 | }
308 | prev_states.push_back(ensmb_states);
309 | init_queue.pop();
310 | }
311 |
312 | vector neg_inf(vocab_size, NEG_INF);
313 | Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf);
314 |
315 | vector active_beams(beam_size, true);
316 | while (true) {
317 | out_index++;
318 | priority_queue > > probs_queue;
319 | vector > curr_states;
320 | vector out_dist;
321 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
322 | if (active_beams[beam_id]) {
323 | unsigned prev_out_char = (*sequences)[beam_id].back();
324 | vector ensmb_out;
325 | vector ensmb_states;
326 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) {
327 | auto& model = *(*ensmb_model)[ensmb_id];
328 | Expression input_char_vec;
329 | if (out_index < input_ids.size()) {
330 | input_char_vec = lookup(cg, model.char_vecs, input_ids[out_index]);
331 | } else {
332 | input_char_vec = lookup(cg, model.eps_vecs[morph_id],
333 | min(unsigned(out_index - input_ids.size()),
334 | model.max_eps - 1));
335 | }
336 |
337 | Expression prev_out_vec = lookup(cg, model.char_vecs, prev_out_char);
338 | Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_out_vec,
339 | input_char_vec});
340 | Expression hidden = model.output_forward[morph_id].add_input(
341 | prev_states[beam_id][ensmb_id], input);
342 | ensmb_states.push_back(model.output_forward[morph_id].state());
343 |
344 | Expression out;
345 | model.ProjectToOutput(hidden, &out);
346 | out = log_softmax(out);
347 | ensmb_out.push_back(out);
348 | }
349 | curr_states.push_back(ensmb_states);
350 | out_dist.push_back(average(ensmb_out));
351 | } else {
352 | vector dummy(ensmb, RNNPointer(0));
353 | curr_states.push_back(dummy);
354 | out_dist.push_back(neg_inf_vec);
355 | }
356 | }
357 |
358 | Expression all_scores = concatenate(out_dist);
359 | vector log_dist = as_vector(cg.incremental_forward());
360 |
361 | for (unsigned index = 0; index < log_dist.size(); ++index) {
362 | unsigned beam_id = index / vocab_size;
363 | unsigned char_id = index % vocab_size;
364 | if (active_beams[beam_id]) {
365 | pair location = make_pair(beam_id, char_id);
366 | probs_queue.push(pair >(
367 | log_scores[beam_id] + log_dist[index], location));
368 | }
369 | }
370 |
371 | // Find the beam_size best now and update the variables.
372 | unordered_map > new_seq;
373 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
374 | if (active_beams[beam_id]) {
375 | float log_prob = probs_queue.top().first;
376 | pair location = probs_queue.top().second;
377 | unsigned old_beam_id = location.first, char_id = location.second;
378 |
379 | vector seq = (*sequences)[old_beam_id];
380 | seq.push_back(char_id);
381 | new_seq[beam_id] = seq;
382 | log_scores[beam_id] = log_prob; // Update the score
383 |
384 | prev_states[beam_id] = curr_states[old_beam_id]; // Update hidden state
385 | probs_queue.pop();
386 | }
387 | }
388 |
389 | // Update the sequences now.
390 | for (auto& it : new_seq) {
391 | (*sequences)[it.first] = it.second;
392 | }
393 |
394 | // Check if a sequence should be made inactive.
395 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
396 | if (active_beams[beam_id] &&
397 | ((*sequences)[beam_id].back() == char_to_id[EOW] ||
398 | (*sequences)[beam_id].size() > MAX_PRED_LEN)) {
399 | active_beams[beam_id] = false;
400 | }
401 | }
402 |
403 | // Check if all sequences are inactive.
404 | bool all_inactive = true;
405 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
406 | if (active_beams[beam_id]) {
407 | all_inactive = false;
408 | break;
409 | }
410 | }
411 |
412 | if (all_inactive) {
413 | *tm_scores = log_scores;
414 | return;
415 | }
416 | }
417 | }
418 |
419 |
--------------------------------------------------------------------------------
/src/joint-enc-morph.h:
--------------------------------------------------------------------------------
1 | #ifndef JOINT_ENC_MORPH_H_
2 | #define JOINT_ENC_MORPH_H_
3 |
4 | #include "cnn/nodes.h"
5 | #include "cnn/cnn.h"
6 | #include "cnn/rnn.h"
7 | #include "cnn/gru.h"
8 | #include "cnn/lstm.h"
9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 |
13 | #include "utils.h"
14 |
15 | #include
16 | #include
17 | #include
18 |
19 | using namespace std;
20 | using namespace cnn;
21 | using namespace cnn::expr;
22 |
23 | // The class has a shared encoder acorss all morphological types.
24 | // Other auxiliary units are also shared, except for the decoder and
25 | // input transofrmation parameters.
26 | class JointEncMorph {
27 | public:
28 | LSTMBuilder input_forward, input_backward; // Shared encoder
29 | vector output_forward;
30 | LookupParameters* char_vecs; // Shared char vectors
31 |
32 | Expression hidden_to_output, hidden_to_output_bias;
33 | Parameters *phidden_to_output, *phidden_to_output_bias; // Shared
34 |
35 | Expression transform_encoded, transform_encoded_bias;
36 | vector ptransform_encoded, ptransform_encoded_bias;
37 |
38 | vector eps_vecs; // Not sharing the epsilon vectors
39 |
40 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
41 |
42 | JointEncMorph() {}
43 |
44 | JointEncMorph(const unsigned& char_length, const unsigned& hidden_length,
45 | const unsigned& vocab_length, const unsigned& layers,
46 | const unsigned& num_morph, vector* m,
47 | vector* optimizer);
48 |
49 | void InitParams(vector* m);
50 |
51 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
52 |
53 | void RunFwdBwd(const vector& inputs,
54 | Expression* hidden, ComputationGraph *cg);
55 |
56 | void TransformEncodedInput(Expression* encoded_input) const;
57 |
58 | void ProjectToOutput(const Expression& hidden, Expression* out) const;
59 |
60 | Expression ComputeLoss(const vector& hidden_units,
61 | const vector& targets) const;
62 |
63 | float Train(const unsigned& morph_id, const vector& inputs,
64 | const vector& outputs, AdadeltaTrainer* opt,
65 | AdadeltaTrainer* shared_opt);
66 |
67 | friend class boost::serialization::access;
68 | template void serialize(Archive& ar, const unsigned int) {
69 | ar & char_len;
70 | ar & hidden_len;
71 | ar & vocab_len;
72 | ar & layers;
73 | ar & morph_len;
74 | ar & max_eps;
75 | }
76 | };
77 |
78 | void Serialize(string& filename, JointEncMorph& model, vector* cnn_model);
79 |
80 | void Read(string& filename, JointEncMorph* model, vector* cnn_model);
81 |
82 | void
83 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
84 | const vector& input_ids, vector* pred_target_ids,
85 | vector* ensmb_model);
86 |
87 | void
88 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
89 | unordered_map& char_to_id,
90 | const vector& input_ids,
91 | vector >* sequences, vector* tm_scores,
92 | vector* ensmb_model);
93 |
94 | #endif
95 |
--------------------------------------------------------------------------------
/src/lm-joint-enc.cc:
--------------------------------------------------------------------------------
1 | #include "lm-joint-enc.h"
2 |
3 | using namespace std;
4 | using namespace cnn;
5 | using namespace cnn::expr;
6 |
7 | string BOW = "", EOW = "";
8 | unsigned MAX_PRED_LEN = 100;
9 |
10 | LMJointEnc::LMJointEnc(
11 | const unsigned& char_length, const unsigned& hidden_length,
12 | const unsigned& vocab_length, const unsigned& num_layers,
13 | const unsigned& num_morph, vector* m,
14 | vector* optimizer) {
15 | char_len = char_length;
16 | hidden_len = hidden_length;
17 | vocab_len = vocab_length;
18 | layers = num_layers;
19 | morph_len = num_morph;
20 | InitParams(m);
21 | }
22 |
23 | void LMJointEnc::InitParams(vector* m) {
24 | // Have all the shared parameters in one model
25 | input_forward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
26 | input_backward = LSTMBuilder(layers, char_len, hidden_len, (*m)[morph_len]);
27 |
28 | char_vecs = (*m)[morph_len]->add_lookup_parameters(vocab_len, {char_len});
29 |
30 | phidden_to_output = (*m)[morph_len]->add_parameters({vocab_len, hidden_len});
31 | phidden_to_output_bias = (*m)[morph_len]->add_parameters({vocab_len, 1});
32 |
33 | lm_pos_weights = (*m)[morph_len]->add_lookup_parameters(
34 | max_lm_pos_weights, {1});
35 |
36 | for (unsigned i = 0; i < morph_len; ++i) {
37 | output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len,
38 | hidden_len, (*m)[i]));
39 |
40 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
41 | 2 * hidden_len}));
42 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
43 |
44 | eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
45 | }
46 | }
47 |
48 | void LMJointEnc::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
49 | input_forward.new_graph(*cg);
50 | input_backward.new_graph(*cg);
51 | output_forward[morph_id].new_graph(*cg);
52 |
53 | hidden_to_output = parameter(*cg, phidden_to_output);
54 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias);
55 |
56 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
57 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
58 | }
59 |
60 | void LMJointEnc::RunFwdBwd(const vector& inputs,
61 | Expression* hidden, ComputationGraph *cg) {
62 | vector input_vecs;
63 | for (const unsigned& input_id : inputs) {
64 | input_vecs.push_back(lookup(*cg, char_vecs, input_id));
65 | }
66 |
67 | // Run forward LSTM
68 | Expression forward_unit;
69 | input_forward.start_new_sequence();
70 | for (unsigned i = 0; i < input_vecs.size(); ++i) {
71 | forward_unit = input_forward.add_input(input_vecs[i]);
72 | }
73 |
74 | // Run backward LSTM
75 | Expression backward_unit;
76 | input_backward.start_new_sequence();
77 | for (int i = input_vecs.size() - 1; i >= 0; --i) {
78 | backward_unit = input_backward.add_input(input_vecs[i]);
79 | }
80 |
81 | // Concatenate the forward and back hidden layers
82 | *hidden = concatenate({forward_unit, backward_unit});
83 | }
84 |
85 | void LMJointEnc::TransformEncodedInput(Expression* encoded_input) const {
86 | *encoded_input = affine_transform({transform_encoded_bias,
87 | transform_encoded, *encoded_input});
88 | }
89 |
90 | void LMJointEnc::ProjectToOutput(const Expression& hidden, Expression* out)
91 | const {
92 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
93 | }
94 |
95 | Expression LMJointEnc::ComputeLoss(const vector& hidden_units,
96 | const vector& targets,
97 | LM *lm, ComputationGraph* cg) const {
98 | vector losses;
99 | vector incremental_targets;
100 | incremental_targets.push_back(lm->char_to_id[BOW]);
101 | for (unsigned i = 0; i < hidden_units.size(); ++i) {
102 | Expression out;
103 | ProjectToOutput(hidden_units[i], &out);
104 | Expression trans_lp = log_softmax(out);
105 |
106 | // Calculate the LM probabilities of all possible outputs.
107 | Expression lm_lp = LogProbDist(incremental_targets, lm, cg);
108 |
109 | unsigned lm_index = min(i + 1, max_lm_pos_weights - 1);
110 | Expression lm_weight = lookup(*cg, lm_pos_weights, lm_index);
111 |
112 | //Expression total_lp = trans_lp + cwise_multiply(lm_lp, Softplus(lm_weight));
113 | Expression total_lp = trans_lp + lm_lp * Softplus(lm_weight);
114 | losses.push_back(pickneglogsoftmax(total_lp, targets[i]));
115 | incremental_targets.push_back(targets[i]);
116 | }
117 | return sum(losses);
118 | }
119 |
120 | float LMJointEnc::Train(const unsigned& morph_id, const vector& inputs,
121 | const vector& outputs,
122 | LM* lm, AdadeltaTrainer* opt,
123 | AdadeltaTrainer* shared_opt) {
124 | ComputationGraph cg;
125 | AddParamsToCG(morph_id, &cg);
126 |
127 | // Encode and Transform to feed into decoder
128 | Expression encoded_input_vec;
129 | RunFwdBwd(inputs, &encoded_input_vec, &cg);
130 | TransformEncodedInput(&encoded_input_vec);
131 |
132 | // Use this encoded word vector to predict the transformed word
133 | vector input_vecs_for_dec;
134 | vector output_ids_for_pred;
135 | for (unsigned i = 0; i < outputs.size(); ++i) {
136 | if (i < outputs.size() - 1) {
137 | // '' will not be fed as input -- it needs to be predicted.
138 | if (i < inputs.size() - 1) {
139 | input_vecs_for_dec.push_back(concatenate(
140 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
141 | lookup(cg, char_vecs, inputs[i + 1])}));
142 | } else {
143 | input_vecs_for_dec.push_back(concatenate(
144 | {encoded_input_vec, lookup(cg, char_vecs, outputs[i]),
145 | lookup(cg, eps_vecs[morph_id],
146 | min(unsigned(i - inputs.size()), max_eps - 1))}));
147 | }
148 | }
149 | if (i > 0) { // '' will not be predicted in the output -- its fed in.
150 | output_ids_for_pred.push_back(outputs[i]);
151 | }
152 | }
153 |
154 | vector decoder_hidden_units;
155 | output_forward[morph_id].start_new_sequence();
156 | for (const auto& vec : input_vecs_for_dec) {
157 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
158 | }
159 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred, lm,
160 | &cg);
161 |
162 | float return_loss = as_scalar(cg.forward());
163 | cg.backward();
164 | opt->update(1.0f); // Update the morph specific parameters
165 | shared_opt->update(1.0f); // Update the shared parameters
166 | return return_loss;
167 | }
168 |
169 | void Serialize(string& filename, LMJointEnc& model,
170 | vector* cnn_models) {
171 | ofstream outfile(filename);
172 | if (!outfile.is_open()) {
173 | cerr << "File opening failed" << endl;
174 | }
175 |
176 | boost::archive::text_oarchive oa(outfile);
177 | oa & model;
178 | for (unsigned i = 0; i < cnn_models->size(); ++i) {
179 | oa & *(*cnn_models)[i];
180 | }
181 |
182 | cerr << "Saved model to: " << filename << endl;
183 | outfile.close();
184 | }
185 |
186 | void Read(string& filename, LMJointEnc* model, vector* cnn_models) {
187 | ifstream infile(filename);
188 | if (!infile.is_open()) {
189 | cerr << "File opening failed" << endl;
190 | }
191 |
192 | boost::archive::text_iarchive ia(infile);
193 | ia & *model;
194 | for (unsigned i = 0; i < model->morph_len + 1; ++i) {
195 | Model *cnn_model = new Model();
196 | cnn_models->push_back(cnn_model);
197 | }
198 |
199 | model->InitParams(cnn_models);
200 | for (unsigned i = 0; i < cnn_models->size(); ++i) {
201 | ia & *(*cnn_models)[i];
202 | }
203 |
204 | cerr << "Loaded model from: " << filename << endl;
205 | infile.close();
206 | }
207 |
208 | // Computes the probability of all possible next characters given
209 | // a sequence, but removes the first character ().
210 | Expression LogProbDist(const vector& seq, LM *lm,
211 | ComputationGraph *cg) {
212 | vector lm_dist(lm->char_to_id.size(), 0.f);
213 |
214 | // Remove the first () character.
215 | vector seq_without_start(seq.begin() + 1, seq.end());
216 | for (const auto& it : lm->char_to_id) {
217 | vector possible_seq(seq_without_start);
218 | possible_seq.push_back(it.second);
219 | lm_dist[it.second] = lm->LogProbSeq(possible_seq);
220 | }
221 | return input(*cg, {(long) lm_dist.size()}, lm_dist);
222 | }
223 |
224 | float Softplus(float x) {
225 | return log(1 + exp(x));
226 | }
227 |
228 | Expression Softplus(Expression x) {
229 | return log(1 + exp(x));
230 | }
231 |
232 | void
233 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
234 | const vector& input_ids, vector* pred_target_ids,
235 | LM* lm, vector* ensmb_model) {
236 | ComputationGraph cg;
237 |
238 | unsigned ensmb = ensmb_model->size();
239 | vector encoded_word_vecs;
240 | for (unsigned i = 0; i < ensmb; ++i) {
241 | Expression encoded_word_vec;
242 | auto model = (*ensmb_model)[i];
243 | model->AddParamsToCG(morph_id, &cg);
244 | model->RunFwdBwd(input_ids, &encoded_word_vec, &cg);
245 | model->TransformEncodedInput(&encoded_word_vec);
246 | encoded_word_vecs.push_back(encoded_word_vec);
247 | model->output_forward[morph_id].start_new_sequence();
248 | }
249 |
250 | unsigned out_index = 1;
251 | unsigned pred_index = char_to_id[BOW];
252 | while (pred_target_ids->size() < MAX_PRED_LEN) {
253 | pred_target_ids->push_back(pred_index);
254 | if (pred_index == char_to_id[EOW]) {
255 | return; // If the end is found, break from the loop and return
256 | }
257 |
258 | vector ensmb_out;
259 | Expression lm_dist = LogProbDist(*pred_target_ids, lm, &cg);
260 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
261 | auto model = (*ensmb_model)[ensmb_id];
262 | Expression prev_output_vec = lookup(cg, model->char_vecs, pred_index);
263 | Expression input, input_char_vec;
264 | if (out_index < input_ids.size()) {
265 | input_char_vec = lookup(cg, model->char_vecs, input_ids[out_index]);
266 | } else {
267 | input_char_vec = lookup(cg, model->eps_vecs[morph_id],
268 | min(unsigned(out_index - input_ids.size()),
269 | model->max_eps - 1));
270 | }
271 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
272 | input_char_vec});
273 |
274 | Expression hidden = model->output_forward[morph_id].add_input(input);
275 | Expression tm_prob;
276 | model->ProjectToOutput(hidden, &tm_prob);
277 | tm_prob = log_softmax(tm_prob);
278 |
279 | unsigned lm_index = min(out_index, model->max_lm_pos_weights - 1);
280 | Expression lm_weight = lookup(cg, model->lm_pos_weights, lm_index);
281 | Expression total_lp = tm_prob + lm_dist * Softplus(lm_weight);
282 |
283 | ensmb_out.push_back(log_softmax(total_lp));
284 | }
285 |
286 | Expression out = average(ensmb_out);
287 | vector dist = as_vector(cg.incremental_forward());
288 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
289 | out_index++;
290 | }
291 | }
292 |
--------------------------------------------------------------------------------
/src/lm-joint-enc.h:
--------------------------------------------------------------------------------
1 | #ifndef LM_JOINT_ENC_H_
2 | #define LM_JOINT_ENC_H_
3 |
4 | #include "cnn/nodes.h"
5 | #include "cnn/cnn.h"
6 | #include "cnn/rnn.h"
7 | #include "cnn/gru.h"
8 | #include "cnn/lstm.h"
9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 |
13 | #include "lm.h"
14 | #include "utils.h"
15 |
16 | #include
17 | #include
18 | #include
19 |
20 | using namespace std;
21 | using namespace cnn;
22 | using namespace cnn::expr;
23 |
24 | // The class has a shared encoder acorss all morphological types.
25 | // Other auxiliary units are also shared, except for the decoder and
26 | // input transofrmation parameters.
27 | class LMJointEnc {
28 | public:
29 | LSTMBuilder input_forward, input_backward; // Shared encoder
30 | vector output_forward;
31 | LookupParameters* char_vecs; // Shared char vectors
32 |
33 | Expression hidden_to_output, hidden_to_output_bias;
34 | Parameters *phidden_to_output, *phidden_to_output_bias; // Shared
35 |
36 | Expression transform_encoded, transform_encoded_bias;
37 | vector ptransform_encoded, ptransform_encoded_bias;
38 |
39 | vector eps_vecs; // Not sharing the epsilon vectors
40 |
41 | LookupParameters* lm_pos_weights;
42 |
43 | unsigned char_len, hidden_len, vocab_len, layers, morph_len;
44 | unsigned max_lm_pos_weights = 20, max_eps = 5;
45 |
46 | LMJointEnc() {}
47 |
48 | LMJointEnc(const unsigned& char_length, const unsigned& hidden_length,
49 | const unsigned& vocab_length, const unsigned& layers,
50 | const unsigned& num_morph, vector* m,
51 | vector* optimizer);
52 |
53 | void InitParams(vector* m);
54 |
55 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
56 |
57 | void RunFwdBwd(const vector& inputs,
58 | Expression* hidden, ComputationGraph *cg);
59 |
60 | void TransformEncodedInput(Expression* encoded_input) const;
61 |
62 | void ProjectToOutput(const Expression& hidden, Expression* out) const;
63 |
64 | Expression ComputeLoss(const vector& hidden_units,
65 | const vector& targets, LM *lm,
66 | ComputationGraph* cg) const;
67 |
68 | float Train(const unsigned& morph_id, const vector& inputs,
69 | const vector& outputs, LM *lm, AdadeltaTrainer* opt,
70 | AdadeltaTrainer* shared_opt);
71 |
72 | friend class boost::serialization::access;
73 | template void serialize(Archive& ar, const unsigned int) {
74 | ar & char_len;
75 | ar & hidden_len;
76 | ar & vocab_len;
77 | ar & layers;
78 | ar & morph_len;
79 | ar & max_eps;
80 | ar & max_lm_pos_weights;
81 | }
82 | };
83 |
84 | Expression LogProbDist(const vector& seq,
85 | LM *lm, ComputationGraph *cg);
86 |
87 | Expression Softplus(Expression x);
88 |
89 | float Softplus(float x);
90 |
91 |
92 | void Serialize(string& filename, LMJointEnc& model, vector* cnn_model);
93 |
94 | void Read(string& filename, LMJointEnc* model, vector* cnn_model);
95 |
96 | void
97 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
98 | const vector& input_ids, vector* pred_target_ids,
99 | LM *lm, vector* ensmb_model);
100 |
101 | #endif
102 |
--------------------------------------------------------------------------------
/src/lm-sep-morph.cc:
--------------------------------------------------------------------------------
1 | #include "lm-sep-morph.h"
2 |
3 | using namespace std;
4 | using namespace cnn;
5 | using namespace cnn::expr;
6 | using cnn::expr::input;
7 |
8 | string BOW = "", EOW = "";
9 | int MAX_PRED_LEN = 100;
10 |
11 | LMSepMorph::LMSepMorph(const unsigned& char_length, const unsigned& hidden_length,
12 | const unsigned& vocab_length, const unsigned& num_layers,
13 | const unsigned& num_morph, vector* m,
14 | vector* optimizer) {
15 | char_len = char_length;
16 | hidden_len = hidden_length;
17 | vocab_len = vocab_length;
18 | layers = num_layers;
19 | morph_len = num_morph;
20 | InitParams(m);
21 | }
22 |
23 | void LMSepMorph::InitParams(vector* m) {
24 | for (unsigned i = 0; i < morph_len; ++i) {
25 | input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
26 | input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
27 | output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len,
28 | hidden_len, (*m)[i]));
29 |
30 | phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len}));
31 | phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1}));
32 |
33 | char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len}));
34 |
35 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
36 | 2 * hidden_len}));
37 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
38 |
39 | eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
40 | lm_pos_weights.push_back((*m)[i]->add_lookup_parameters(
41 | max_lm_pos_weights, {1}));
42 | }
43 | }
44 |
45 | void LMSepMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
46 | input_forward[morph_id].new_graph(*cg);
47 | input_backward[morph_id].new_graph(*cg);
48 | output_forward[morph_id].new_graph(*cg);
49 |
50 | hidden_to_output = parameter(*cg, phidden_to_output[morph_id]);
51 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]);
52 |
53 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
54 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
55 | }
56 |
57 | void LMSepMorph::RunFwdBwd(const unsigned& morph_id,
58 | const vector& inputs,
59 | Expression* hidden, ComputationGraph *cg) {
60 | vector input_vecs;
61 | for (const unsigned& input_id : inputs) {
62 | input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id));
63 | }
64 |
65 | // Run forward LSTM
66 | Expression forward_unit;
67 | input_forward[morph_id].start_new_sequence();
68 | for (unsigned i = 0; i < input_vecs.size(); ++i) {
69 | forward_unit = input_forward[morph_id].add_input(input_vecs[i]);
70 | }
71 |
72 | // Run backward LSTM
73 | Expression backward_unit;
74 | input_backward[morph_id].start_new_sequence();
75 | for (int i = input_vecs.size() - 1; i >= 0; --i) {
76 | backward_unit = input_backward[morph_id].add_input(input_vecs[i]);
77 | }
78 |
79 | // Concatenate the forward and back hidden layers
80 | *hidden = concatenate({forward_unit, backward_unit});
81 | }
82 |
83 | void LMSepMorph::TransformEncodedInput(Expression* encoded_input) const {
84 | *encoded_input = affine_transform({transform_encoded_bias,
85 | transform_encoded, *encoded_input});
86 | }
87 |
88 | void LMSepMorph::ProjectToOutput(const Expression& hidden, Expression* out) const {
89 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
90 | }
91 |
92 |
93 | Expression LMSepMorph::ComputeLoss(const unsigned& morph_id,
94 | const vector& hidden_units,
95 | const vector& targets,
96 | LM *lm, ComputationGraph* cg) const {
97 | vector losses;
98 | vector incremental_targets;
99 | incremental_targets.push_back(lm->char_to_id[BOW]);
100 | for (unsigned i = 0; i < hidden_units.size(); ++i) {
101 | Expression out;
102 | ProjectToOutput(hidden_units[i], &out);
103 | Expression trans_lp = log_softmax(out);
104 |
105 | // Calculate the LM probabilities of all possible outputs.
106 | Expression lm_lp = LogProbDist(incremental_targets, lm, cg);
107 |
108 | unsigned lm_index = min(i + 1, max_lm_pos_weights - 1);
109 | Expression lm_weight = lookup(*cg, lm_pos_weights[morph_id], lm_index);
110 |
111 | //Expression total_lp = trans_lp + cwise_multiply(lm_lp, Softplus(lm_weight));
112 | Expression total_lp = trans_lp + lm_lp * Softplus(lm_weight);
113 | losses.push_back(pickneglogsoftmax(total_lp, targets[i]));
114 | incremental_targets.push_back(targets[i]);
115 | }
116 | return sum(losses);
117 | }
118 |
119 | float LMSepMorph::Train(const unsigned& morph_id,
120 | const vector& inputs,
121 | const vector& outputs, LM *lm,
122 | AdadeltaTrainer* ada_gd) {
123 | ComputationGraph cg;
124 | AddParamsToCG(morph_id, &cg);
125 |
126 | // Encode and Transform to feed into decoder
127 | Expression encoded_input_vec;
128 | RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg);
129 | TransformEncodedInput(&encoded_input_vec);
130 |
131 | // Use this encoded word vector to predict the transformed word
132 | vector input_vecs_for_dec;
133 | vector output_ids_for_pred;
134 | for (unsigned i = 0; i < outputs.size(); ++i) {
135 | if (i < outputs.size() - 1) {
136 | // '' will not be fed as input -- it needs to be predicted.
137 | if (i < inputs.size() - 1) {
138 | input_vecs_for_dec.push_back(concatenate(
139 | {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]),
140 | lookup(cg, char_vecs[morph_id], inputs[i + 1])}));
141 | } else {
142 | input_vecs_for_dec.push_back(concatenate(
143 | {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]),
144 | lookup(cg, eps_vecs[morph_id], min(unsigned(i - inputs.size()), max_eps - 1))}));
145 | }
146 | }
147 | if (i > 0) { // '' will not be predicted in the output -- its fed in.
148 | output_ids_for_pred.push_back(outputs[i]);
149 | }
150 | }
151 |
152 | vector decoder_hidden_units;
153 | output_forward[morph_id].start_new_sequence();
154 | for (const auto& vec : input_vecs_for_dec) {
155 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
156 | }
157 |
158 | // If its the first iteration, do not use language model.
159 | Expression loss = ComputeLoss(morph_id, decoder_hidden_units,
160 | output_ids_for_pred, lm, &cg);
161 |
162 | float return_loss = as_scalar(cg.incremental_forward());
163 | cg.backward();
164 | ada_gd->update(1.0f);
165 |
166 | return return_loss;
167 | }
168 |
169 | // Computes the probability of all possible next characters given
170 | // a sequence, but removes the first character ().
171 | Expression LogProbDist(const vector& seq, LM *lm,
172 | ComputationGraph *cg) {
173 | vector lm_dist(lm->char_to_id.size(), 0.f);
174 |
175 | // Remove the first () character.
176 | vector seq_without_start(seq.begin() + 1, seq.end());
177 | for (const auto& it : lm->char_to_id) {
178 | vector possible_seq(seq_without_start);
179 | possible_seq.push_back(it.second);
180 | lm_dist[it.second] = lm->LogProbSeq(possible_seq);
181 | }
182 | return input(*cg, {(long) lm_dist.size()}, lm_dist);
183 | }
184 |
185 | void
186 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
187 | const vector& input_ids, vector* pred_target_ids,
188 | LM* lm, vector* ensmb_model) {
189 | ComputationGraph cg;
190 |
191 | unsigned ensmb = ensmb_model->size();
192 | vector encoded_word_vecs;
193 | for (unsigned i = 0; i < ensmb; ++i) {
194 | Expression encoded_word_vec;
195 | auto model = (*ensmb_model)[i];
196 | model->AddParamsToCG(morph_id, &cg);
197 | model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
198 | model->TransformEncodedInput(&encoded_word_vec);
199 | encoded_word_vecs.push_back(encoded_word_vec);
200 | model->output_forward[morph_id].start_new_sequence();
201 | }
202 |
203 | unsigned out_index = 1;
204 | unsigned pred_index = char_to_id[BOW];
205 | while (pred_target_ids->size() < MAX_PRED_LEN) {
206 | pred_target_ids->push_back(pred_index);
207 | if (pred_index == char_to_id[EOW]) {
208 | // Print the lm weights.
209 | /*for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
210 | auto model = (*ensmb_model)[ensmb_id];
211 | for (unsigned i = 1; i < model->max_lm_pos_weights; ++i) {
212 | float p = model->lm_pos_weights[morph_id]->values[i].v[0];
213 | cerr << Softplus(p) << " ";
214 | }
215 | cerr << endl;
216 | }*/
217 |
218 | return; // If the end is found, break from the loop and return
219 | }
220 |
221 | vector ensmb_out;
222 | Expression lm_dist = LogProbDist(*pred_target_ids, lm, &cg);
223 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
224 | auto model = (*ensmb_model)[ensmb_id];
225 | Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index);
226 | Expression input, input_char_vec;
227 | if (out_index < input_ids.size()) {
228 | input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]);
229 | } else {
230 | input_char_vec = lookup(cg, model->eps_vecs[morph_id],
231 | min(unsigned(out_index - input_ids.size()),
232 | model->max_eps - 1));
233 | }
234 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
235 | input_char_vec});
236 |
237 | Expression hidden = model->output_forward[morph_id].add_input(input);
238 | Expression tm_prob;
239 | model->ProjectToOutput(hidden, &tm_prob);
240 | tm_prob = log_softmax(tm_prob);
241 |
242 | unsigned lm_index = min(out_index, model->max_lm_pos_weights - 1);
243 | Expression lm_weight = lookup(cg, model->lm_pos_weights[morph_id], lm_index);
244 | Expression total_lp = tm_prob + lm_dist * Softplus(lm_weight);
245 |
246 | ensmb_out.push_back(log_softmax(total_lp));
247 | }
248 | Expression total_dist = average(ensmb_out);
249 |
250 | vector dist = as_vector(cg.incremental_forward());
251 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
252 | out_index++;
253 | }
254 | }
255 |
256 | float Softplus(float x) {
257 | return log(1 + exp(x));
258 | }
259 |
260 | Expression Softplus(Expression x) {
261 | return log(1 + exp(x));
262 | }
263 |
264 | void Serialize(string& filename, LMSepMorph& model, vector* cnn_models) {
265 | ofstream outfile(filename);
266 | if (!outfile.is_open()) {
267 | cerr << "File opening failed" << endl;
268 | }
269 |
270 | boost::archive::text_oarchive oa(outfile);
271 | oa & model;
272 | for (unsigned i = 0; i < cnn_models->size(); ++i) {
273 | oa & *(*cnn_models)[i];
274 | }
275 |
276 | cerr << "Saved model to: " << filename << endl;
277 | outfile.close();
278 | }
279 |
280 | void Read(string& filename, LMSepMorph* model, vector* cnn_models) {
281 | ifstream infile(filename);
282 | if (!infile.is_open()) {
283 | cerr << "File opening failed" << endl;
284 | }
285 |
286 | boost::archive::text_iarchive ia(infile);
287 | ia & *model;
288 | for (unsigned i = 0; i < model->morph_len; ++i) {
289 | Model *cnn_model = new Model();
290 | cnn_models->push_back(cnn_model);
291 | }
292 |
293 | model->InitParams(cnn_models);
294 | for (unsigned i = 0; i < model->morph_len; ++i) {
295 | ia & *(*cnn_models)[i];
296 | }
297 |
298 | cerr << "Loaded model from: " << filename << endl;
299 | infile.close();
300 | }
301 |
302 |
--------------------------------------------------------------------------------
/src/lm-sep-morph.h:
--------------------------------------------------------------------------------
1 | #ifndef SEP_MORPH_H_
2 | #define SEP_MORPH_H_
3 |
4 | #include "cnn/nodes.h"
5 | #include "cnn/cnn.h"
6 | #include "cnn/rnn.h"
7 | #include "cnn/gru.h"
8 | #include "cnn/lstm.h"
9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 |
13 | #include "lm.h"
14 | #include "utils.h"
15 |
16 | #include
17 | #include
18 | #include
19 |
20 | using namespace std;
21 | using namespace cnn;
22 | using namespace cnn::expr;
23 |
24 | class LMSepMorph {
25 | public:
26 | vector input_forward, input_backward, output_forward;
27 | vector char_vecs;
28 |
29 | Expression hidden_to_output, hidden_to_output_bias;
30 | vector phidden_to_output, phidden_to_output_bias;
31 |
32 | Expression transform_encoded, transform_encoded_bias;
33 | vector ptransform_encoded, ptransform_encoded_bias;
34 |
35 | unsigned char_len, hidden_len, vocab_len, layers, morph_len;
36 | unsigned max_eps = 5, max_lm_pos_weights = 20;
37 | vector eps_vecs;
38 | vector lm_pos_weights;
39 |
40 | LMSepMorph() {}
41 |
42 | LMSepMorph(const unsigned& char_length, const unsigned& hidden_length,
43 | const unsigned& vocab_length, const unsigned& layers,
44 | const unsigned& num_morph, vector* m,
45 | vector* optimizer);
46 |
47 | void InitParams(vector* m);
48 |
49 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
50 |
51 | void RunFwdBwd(const unsigned& morph_id, const vector& inputs,
52 | Expression* hidden, ComputationGraph *cg);
53 |
54 | void TransformEncodedInput(Expression* encoded_input) const;
55 |
56 | void TransformEncodedInputDuringDecoding(Expression* encoded_input) const;
57 |
58 | void ProjectToOutput(const Expression& hidden, Expression* out) const;
59 |
60 | Expression ComputeLoss(const unsigned& morph_id,
61 | const vector& hidden_units,
62 | const vector& targets,
63 | LM *lm, ComputationGraph* cg) const;
64 |
65 | float Train(const unsigned& morph_id, const vector& inputs,
66 | const vector& outputs, LM* lm, AdadeltaTrainer* ada_gd);
67 |
68 | friend class boost::serialization::access;
69 | template void serialize(Archive& ar, const unsigned int) {
70 | ar & char_len;
71 | ar & hidden_len;
72 | ar & vocab_len;
73 | ar & layers;
74 | ar & morph_len;
75 | ar & max_eps;
76 | ar & max_lm_pos_weights;
77 | }
78 | };
79 |
80 | Expression LogProbDist(const vector& seq,
81 | LM *lm, ComputationGraph *cg);
82 |
83 | Expression Softplus(Expression x);
84 |
85 | float Softplus(float x);
86 |
87 | void
88 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
89 | const vector& input_ids,
90 | vector* pred_target_ids, LM* lm,
91 | vector* ensmb_model);
92 |
93 | void Serialize(string& filename, LMSepMorph& model, vector* cnn_model);
94 |
95 | void Read(string& filename, LMSepMorph* model, vector* cnn_model);
96 |
97 | #endif
98 |
--------------------------------------------------------------------------------
/src/lm.cc:
--------------------------------------------------------------------------------
1 | #include "lm.h"
2 |
3 | LM::LM(string& lm_model_file, unordered_map& char_id,
4 | unordered_map& id_char) {
5 | ifstream model_file(lm_model_file);
6 | if (model_file.is_open()) {
7 | string line;
8 | char_to_id = char_id;
9 | id_to_char = id_char;
10 | while(getline(model_file, line)) {
11 | vector items = split_line(line, '\t');
12 | if (items.size() == 2) {
13 | vector seq;
14 | for (string& ch : split_line(items[1], ' ')) {
15 | seq.push_back(char_to_id[ch]);
16 | }
17 | lp[HashSeq(seq)] = atof(items[0].c_str());
18 | } else if (items.size() == 3) {
19 | vector seq;
20 | for (string& ch : split_line(items[1], ' ')) {
21 | seq.push_back(char_to_id[ch]);
22 | }
23 | lp[HashSeq(seq)] = atof(items[0].c_str());
24 | b[HashSeq(seq)] = atof(items[2].c_str());
25 | }
26 | }
27 | model_file.close();
28 | cerr << "LM loaded from: " << lm_model_file << endl;
29 | cerr << "LM size: " << lp.size() << endl;
30 | } else {
31 | cerr << "File opening failed" << endl;
32 | exit(0);
33 | }
34 | }
35 |
36 | size_t LM::HashSeq(vector& seq) {
37 | return boost::hash_range(seq.begin(), seq.end());
38 | }
39 |
40 | void
41 | PrintSeq(vector& seq, unordered_map& id_to_char) {
42 | for (unsigned i = 0; i < seq.size(); ++i) {
43 | cerr << id_to_char[seq[i]] << " ";
44 | }
45 | cerr << endl;
46 | }
47 |
48 | /* LogProbSeq(w1, w2, ..., wn) = LogProbSeq(w2, w3, ..., wn)
49 | + backoff(w1, w2, ..., wn-1);
50 | http://cmusphinx.sourceforge.net/wiki/sphinx4:standardgrammarformats
51 |
52 | Assumes that unigrams are always present in lp[].
53 | */
54 | float LM::LogProb(vector& seq) {
55 | size_t seq_hash = HashSeq(seq);
56 | auto it_seq = lp.find(seq_hash);
57 | if (it_seq == lp.end()) {
58 | vector backoff_seq(seq.begin(), seq.end() - 1);
59 | size_t backoff_hash = HashSeq(backoff_seq);
60 | auto it_backoff = b.find(backoff_hash);
61 |
62 | float backoff;
63 | if (it_backoff == b.end()) {
64 | backoff = 0.0;
65 | } else {
66 | backoff = it_backoff->second;
67 | }
68 |
69 | vector small(seq.begin() + 1, seq.end());
70 | lp[seq_hash] = backoff + LogProb(small);
71 | return lp[seq_hash];
72 | } else {
73 | return it_seq->second; // Return the log prob
74 | }
75 | }
76 |
77 | float LM::LogProbSeq(vector& seq) {
78 | float score = 0.;
79 | for (unsigned i = 1; i <= seq.size(); ++i) {
80 | vector temp(seq.begin(), seq.begin()+ i);
81 | score += LogProb(temp);
82 | }
83 | return score;
84 | }
85 |
--------------------------------------------------------------------------------
/src/lm.h:
--------------------------------------------------------------------------------
1 | #ifndef LM_H_
2 | #define LM_H_
3 |
4 | #include "utils.h"
5 |
6 | #include
7 | #include
8 |
9 | using namespace std;
10 |
11 | class LM {
12 | public:
13 | unordered_map char_to_id;
14 | unordered_map id_to_char;
15 |
16 | LM (string& lm_model_file, unordered_map& char_id,
17 | unordered_map& id_char);
18 | float LogProbSeq(vector& seq);
19 | float LogProb(vector& seq);
20 | size_t HashSeq(vector& seq);
21 |
22 | private:
23 | unordered_map lp, b;
24 | };
25 |
26 | void
27 | PrintSeq(vector& seq, unordered_map& id_to_char);
28 |
29 | #endif
30 |
--------------------------------------------------------------------------------
/src/no-enc.cc:
--------------------------------------------------------------------------------
1 | #include "no-enc.h"
2 |
3 | using namespace std;
4 | using namespace cnn;
5 | using namespace cnn::expr;
6 |
7 | string BOW = "", EOW = "";
8 | int MAX_PRED_LEN = 100;
9 | float NEG_INF = numeric_limits::min();
10 |
11 | NoEnc::NoEnc(const unsigned& char_length, const unsigned& hidden_length,
12 | const unsigned& vocab_length, const unsigned& num_layers,
13 | const unsigned& num_morph, vector* m,
14 | vector* optimizer) {
15 | char_len = char_length;
16 | hidden_len = hidden_length;
17 | vocab_len = vocab_length;
18 | layers = num_layers;
19 | morph_len = num_morph;
20 | InitParams(m);
21 | }
22 |
23 | void NoEnc::InitParams(vector* m) {
24 | for (unsigned i = 0; i < morph_len; ++i) {
25 | //input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
26 | //input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
27 | output_forward.push_back(LSTMBuilder(layers, 2 * char_len, // + hidden_len,
28 | hidden_len, (*m)[i]));
29 |
30 | phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len}));
31 | phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1}));
32 |
33 | char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len}));
34 |
35 | //ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
36 | // 2 * hidden_len}));
37 | //ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
38 |
39 | eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
40 | }
41 | }
42 |
43 | void NoEnc::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
44 | //input_forward[morph_id].new_graph(*cg);
45 | //input_backward[morph_id].new_graph(*cg);
46 | output_forward[morph_id].new_graph(*cg);
47 |
48 | hidden_to_output = parameter(*cg, phidden_to_output[morph_id]);
49 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]);
50 |
51 | //transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
52 | //transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
53 | }
54 |
55 | /*void NoEnc::RunFwdBwd(const unsigned& morph_id,
56 | const vector& inputs,
57 | Expression* hidden, ComputationGraph *cg) {
58 | vector input_vecs;
59 | for (const unsigned& input_id : inputs) {
60 | input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id));
61 | }
62 |
63 | // Run forward LSTM
64 | Expression forward_unit;
65 | input_forward[morph_id].start_new_sequence();
66 | for (unsigned i = 0; i < input_vecs.size(); ++i) {
67 | forward_unit = input_forward[morph_id].add_input(input_vecs[i]);
68 | }
69 |
70 | // Run backward LSTM
71 | Expression backward_unit;
72 | input_backward[morph_id].start_new_sequence();
73 | for (int i = input_vecs.size() - 1; i >= 0; --i) {
74 | backward_unit = input_backward[morph_id].add_input(input_vecs[i]);
75 | }
76 |
77 | // Concatenate the forward and back hidden layers
78 | *hidden = concatenate({forward_unit, backward_unit});
79 | }
80 |
81 | void NoEnc::TransformEncodedInput(Expression* encoded_input) const {
82 | *encoded_input = affine_transform({transform_encoded_bias,
83 | transform_encoded, *encoded_input});
84 | }*/
85 |
86 | void NoEnc::ProjectToOutput(const Expression& hidden, Expression* out) const {
87 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
88 | }
89 |
90 | Expression NoEnc::ComputeLoss(const vector& hidden_units,
91 | const vector& targets) const {
92 | assert(hidden_units.size() == targets.size());
93 | vector losses;
94 | for (unsigned i = 0; i < hidden_units.size(); ++i) {
95 | Expression out;
96 | ProjectToOutput(hidden_units[i], &out);
97 | losses.push_back(pickneglogsoftmax(out, targets[i]));
98 | }
99 | return sum(losses);
100 | }
101 |
102 | float NoEnc::Train(const unsigned& morph_id, const vector& inputs,
103 | const vector& outputs, AdadeltaTrainer* ada_gd) {
104 | ComputationGraph cg;
105 | AddParamsToCG(morph_id, &cg);
106 |
107 | // Encode and Transform to feed into decoder
108 | //Expression encoded_input_vec;
109 | //RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg);
110 | //TransformEncodedInput(&encoded_input_vec);
111 |
112 | // Use this encoded word vector to predict the transformed word
113 | vector input_vecs_for_dec;
114 | vector output_ids_for_pred;
115 | for (unsigned i = 0; i < outputs.size(); ++i) {
116 | if (i < outputs.size() - 1) {
117 | // '' will not be fed as input -- it needs to be predicted.
118 | if (i < inputs.size() - 1) {
119 | input_vecs_for_dec.push_back(concatenate(
120 | {lookup(cg, char_vecs[morph_id], outputs[i]),
121 | lookup(cg, char_vecs[morph_id], inputs[i + 1])}));
122 | } else {
123 | input_vecs_for_dec.push_back(concatenate(
124 | {lookup(cg, char_vecs[morph_id], outputs[i]),
125 | lookup(cg, eps_vecs[morph_id], min(unsigned(i - inputs.size()), max_eps - 1))}));
126 | }
127 | }
128 | if (i > 0) { // '' will not be predicted in the output -- its fed in.
129 | output_ids_for_pred.push_back(outputs[i]);
130 | }
131 | }
132 |
133 | vector decoder_hidden_units;
134 | output_forward[morph_id].start_new_sequence();
135 | for (const auto& vec : input_vecs_for_dec) {
136 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
137 | }
138 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred);
139 |
140 | float return_loss = as_scalar(cg.forward());
141 | cg.backward();
142 | ada_gd->update(1.0f);
143 | return return_loss;
144 | }
145 |
146 | void
147 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
148 | const vector& input_ids,
149 | vector* pred_target_ids, vector* ensmb_model) {
150 | ComputationGraph cg;
151 |
152 | /*auto temp_model = *(*ensmb_model)[0];
153 | for (unsigned char_id = 0; char_id < temp_model.vocab_len; ++char_id) {
154 | for (unsigned index = 0; index < temp_model.char_len; ++index) {
155 | cout << temp_model.char_vecs[0]->values[char_id].v[index] << " ";
156 | }
157 | cout << endl;
158 | }
159 | exit(0);*/
160 |
161 | unsigned ensmb = ensmb_model->size();
162 | vector encoded_word_vecs;
163 | for (unsigned i = 0; i < ensmb; ++i) {
164 | Expression encoded_word_vec;
165 | auto model = (*ensmb_model)[i];
166 | model->AddParamsToCG(morph_id, &cg);
167 | /*model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
168 | model->TransformEncodedInput(&encoded_word_vec);
169 | encoded_word_vecs.push_back(encoded_word_vec);*/
170 | model->output_forward[morph_id].start_new_sequence();
171 | }
172 |
173 | unsigned out_index = 1;
174 | unsigned pred_index = char_to_id[BOW];
175 | while (pred_target_ids->size() < MAX_PRED_LEN) {
176 | vector ensmb_out;
177 | pred_target_ids->push_back(pred_index);
178 | if (pred_index == char_to_id[EOW]) {
179 | return; // If the end is found, break from the loop and return
180 | }
181 |
182 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
183 | auto model = (*ensmb_model)[ensmb_id];
184 | Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index);
185 | Expression input, input_char_vec;
186 | if (out_index < input_ids.size()) {
187 | input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]);
188 | } else {
189 | input_char_vec = lookup(cg, model->eps_vecs[morph_id],
190 | min(unsigned(out_index - input_ids.size()),
191 | model->max_eps - 1));
192 | }
193 | input = concatenate({//encoded_word_vecs[ensmb_id],
194 | prev_output_vec,
195 | input_char_vec});
196 |
197 | Expression hidden = model->output_forward[morph_id].add_input(input);
198 | Expression out;
199 | model->ProjectToOutput(hidden, &out);
200 | ensmb_out.push_back(log_softmax(out));
201 | }
202 |
203 | Expression out = sum(ensmb_out) / ensmb_out.size();
204 | vector dist = as_vector(cg.incremental_forward());
205 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
206 | out_index++;
207 | }
208 | }
209 |
210 | void
211 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
212 | unordered_map& char_to_id,
213 | const vector& input_ids,
214 | vector >* sequences, vector* tm_scores,
215 | vector* ensmb_model) {
216 | unsigned out_index = 1;
217 | unsigned ensmb = ensmb_model->size();
218 | ComputationGraph cg;
219 |
220 | // Compute stuff for every model in the ensemble.
221 | vector encoded_word_vecs;
222 | vector ensmb_out;
223 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
224 | auto& model = *(*ensmb_model)[ensmb_id];
225 | model.AddParamsToCG(morph_id, &cg);
226 |
227 | /*Expression encoded_word_vec;
228 | model.RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
229 | model.TransformEncodedInput(&encoded_word_vec);
230 | encoded_word_vecs.push_back(encoded_word_vec);*/
231 | model.output_forward[morph_id].start_new_sequence();
232 |
233 | Expression prev_output_vec = lookup(cg, model.char_vecs[morph_id],
234 | char_to_id[BOW]);
235 | Expression input = concatenate({//encoded_word_vecs[ensmb_id],
236 | prev_output_vec,
237 | lookup(cg, model.char_vecs[morph_id],
238 | input_ids[out_index])});
239 | Expression hidden = model.output_forward[morph_id].add_input(input);
240 | Expression out;
241 | model.ProjectToOutput(hidden, &out);
242 | out = log_softmax(out);
243 | ensmb_out.push_back(out);
244 | }
245 |
246 | // Compute the average of the ensemble output.
247 | Expression out_dist = average(ensmb_out);
248 | vector log_dist = as_vector(cg.incremental_forward());
249 | priority_queue > init_queue;
250 | for (unsigned i = 0; i < log_dist.size(); ++i) {
251 | init_queue.push(make_pair(log_dist[i], i));
252 | }
253 | unsigned vocab_size = log_dist.size();
254 |
255 | // Initialise the beam_size sequences, scores, hidden states.
256 | vector log_scores;
257 | vector > prev_states;
258 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
259 | vector seq;
260 | seq.push_back(char_to_id[BOW]);
261 | seq.push_back(init_queue.top().second);
262 | sequences->push_back(seq);
263 | log_scores.push_back(init_queue.top().first);
264 |
265 | vector ensmb_states;
266 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
267 | auto& model = *(*ensmb_model)[ensmb_id];
268 | ensmb_states.push_back(model.output_forward[morph_id].state());
269 | }
270 | prev_states.push_back(ensmb_states);
271 | init_queue.pop();
272 | }
273 |
274 | vector neg_inf(vocab_size, NEG_INF);
275 | Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf);
276 |
277 | vector active_beams(beam_size, true);
278 | while (true) {
279 | out_index++;
280 | priority_queue > > probs_queue;
281 | vector > curr_states;
282 | vector out_dist;
283 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
284 | if (active_beams[beam_id]) {
285 | unsigned prev_out_char = (*sequences)[beam_id].back();
286 | vector ensmb_out;
287 | vector ensmb_states;
288 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) {
289 | auto& model = *(*ensmb_model)[ensmb_id];
290 | Expression input_char_vec;
291 | if (out_index < input_ids.size()) {
292 | input_char_vec = lookup(cg, model.char_vecs[morph_id], input_ids[out_index]);
293 | } else {
294 | input_char_vec = lookup(cg, model.eps_vecs[morph_id],
295 | min(unsigned(out_index - input_ids.size()),
296 | model.max_eps - 1));
297 | }
298 |
299 | Expression prev_out_vec = lookup(cg, model.char_vecs[morph_id], prev_out_char);
300 | Expression input = concatenate({//encoded_word_vecs[ensmb_id],
301 | prev_out_vec,
302 | input_char_vec});
303 | Expression hidden = model.output_forward[morph_id].add_input(
304 | prev_states[beam_id][ensmb_id], input);
305 | ensmb_states.push_back(model.output_forward[morph_id].state());
306 |
307 | Expression out;
308 | model.ProjectToOutput(hidden, &out);
309 | out = log_softmax(out);
310 | ensmb_out.push_back(out);
311 | }
312 | curr_states.push_back(ensmb_states);
313 | out_dist.push_back(average(ensmb_out));
314 | } else {
315 | vector dummy(ensmb, RNNPointer(0));
316 | curr_states.push_back(dummy);
317 | out_dist.push_back(neg_inf_vec);
318 | }
319 | }
320 |
321 | Expression all_scores = concatenate(out_dist);
322 | vector log_dist = as_vector(cg.incremental_forward());
323 |
324 | for (unsigned index = 0; index < log_dist.size(); ++index) {
325 | unsigned beam_id = index / vocab_size;
326 | unsigned char_id = index % vocab_size;
327 | if (active_beams[beam_id]) {
328 | pair location = make_pair(beam_id, char_id);
329 | probs_queue.push(pair >(
330 | log_scores[beam_id] + log_dist[index], location));
331 | }
332 | }
333 |
334 | // Find the beam_size best now and update the variables.
335 | unordered_map > new_seq;
336 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
337 | if (active_beams[beam_id]) {
338 | float log_prob = probs_queue.top().first;
339 | pair location = probs_queue.top().second;
340 | unsigned old_beam_id = location.first, char_id = location.second;
341 |
342 | vector seq = (*sequences)[old_beam_id];
343 | seq.push_back(char_id);
344 | new_seq[beam_id] = seq;
345 | log_scores[beam_id] = log_prob; // Update the score
346 |
347 | prev_states[beam_id] = curr_states[old_beam_id]; // Update hidden state
348 | probs_queue.pop();
349 | }
350 | }
351 |
352 | // Update the sequences now.
353 | for (auto& it : new_seq) {
354 | (*sequences)[it.first] = it.second;
355 | }
356 |
357 | // Check if a sequence should be made inactive.
358 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
359 | if (active_beams[beam_id] &&
360 | ((*sequences)[beam_id].back() == char_to_id[EOW] ||
361 | (*sequences)[beam_id].size() > MAX_PRED_LEN)) {
362 | active_beams[beam_id] = false;
363 | }
364 | }
365 |
366 | // Check if all sequences are inactive.
367 | bool all_inactive = true;
368 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
369 | if (active_beams[beam_id]) {
370 | all_inactive = false;
371 | break;
372 | }
373 | }
374 |
375 | if (all_inactive) {
376 | *tm_scores = log_scores;
377 | return;
378 | }
379 | }
380 | }
381 |
382 | void Serialize(string& filename, NoEnc& model, vector* cnn_models) {
383 | ofstream outfile(filename);
384 | if (!outfile.is_open()) {
385 | cerr << "File opening failed" << endl;
386 | }
387 |
388 | boost::archive::text_oarchive oa(outfile);
389 | oa & model;
390 | for (unsigned i = 0; i < cnn_models->size(); ++i) {
391 | oa & *(*cnn_models)[i];
392 | }
393 |
394 | cerr << "Saved model to: " << filename << endl;
395 | outfile.close();
396 | }
397 |
398 | void Read(string& filename, NoEnc* model, vector* cnn_models) {
399 | ifstream infile(filename);
400 | if (!infile.is_open()) {
401 | cerr << "File opening failed" << endl;
402 | }
403 |
404 | boost::archive::text_iarchive ia(infile);
405 | ia & *model;
406 | for (unsigned i = 0; i < model->morph_len; ++i) {
407 | Model *cnn_model = new Model();
408 | cnn_models->push_back(cnn_model);
409 | }
410 |
411 | model->InitParams(cnn_models);
412 | for (unsigned i = 0; i < model->morph_len; ++i) {
413 | ia & *(*cnn_models)[i];
414 | }
415 |
416 | cerr << "Loaded model from: " << filename << endl;
417 | infile.close();
418 | }
419 |
420 |
--------------------------------------------------------------------------------
/src/no-enc.h:
--------------------------------------------------------------------------------
1 | #ifndef NO_ENC_H_
2 | #define NO_ENC_H_
3 |
4 | #include "cnn/nodes.h"
5 | #include "cnn/cnn.h"
6 | #include "cnn/rnn.h"
7 | #include "cnn/gru.h"
8 | #include "cnn/lstm.h"
9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 |
13 | #include "utils.h"
14 |
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 |
21 | using namespace std;
22 | using namespace cnn;
23 | using namespace cnn::expr;
24 |
25 | class NoEnc {
26 | public:
27 | vector output_forward;
28 | vector char_vecs;
29 |
30 | Expression hidden_to_output, hidden_to_output_bias;
31 | vector phidden_to_output, phidden_to_output_bias;
32 |
33 | //Expression transform_encoded, transform_encoded_bias;
34 | //vector ptransform_encoded, ptransform_encoded_bias;
35 |
36 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
37 | vector eps_vecs;
38 |
39 | NoEnc() {}
40 |
41 | NoEnc(const unsigned& char_length, const unsigned& hidden_length,
42 | const unsigned& vocab_length, const unsigned& layers,
43 | const unsigned& num_morph, vector* m,
44 | vector* optimizer);
45 |
46 | void InitParams(vector* m);
47 |
48 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
49 |
50 | //void RunFwdBwd(const unsigned& morph_id, const vector& inputs,
51 | // Expression* hidden, ComputationGraph *cg);
52 |
53 | //void TransformEncodedInput(Expression* encoded_input) const;
54 |
55 | //void TransformEncodedInputDuringDecoding(Expression* encoded_input) const;
56 |
57 | void ProjectToOutput(const Expression& hidden, Expression* out) const;
58 |
59 | Expression ComputeLoss(const vector& hidden_units,
60 | const vector& targets) const;
61 |
62 | float Train(const unsigned& morph_id, const vector& inputs,
63 | const vector& outputs, AdadeltaTrainer* ada_gd);
64 |
65 | friend class boost::serialization::access;
66 | template void serialize(Archive& ar, const unsigned int) {
67 | ar & char_len;
68 | ar & hidden_len;
69 | ar & vocab_len;
70 | ar & layers;
71 | ar & morph_len;
72 | ar & max_eps;
73 | }
74 | };
75 |
76 | void
77 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
78 | const vector& input_ids,
79 | vector* pred_target_ids, vector* ensmb_model);
80 |
81 | void
82 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
83 | unordered_map& char_to_id,
84 | const vector& input_ids,
85 | vector >* sequences, vector* tm_scores,
86 | vector* ensmb_model);
87 |
88 | void Serialize(string& filename, NoEnc& model, vector* cnn_model);
89 |
90 | void Read(string& filename, NoEnc* model, vector* cnn_model);
91 |
92 |
93 | #endif
94 |
--------------------------------------------------------------------------------
/src/sep-morph.cc:
--------------------------------------------------------------------------------
1 | #include "sep-morph.h"
2 |
3 | using namespace std;
4 | using namespace cnn;
5 | using namespace cnn::expr;
6 |
7 | string BOW = "", EOW = "";
8 | int MAX_PRED_LEN = 100;
9 | float NEG_INF = numeric_limits::min();
10 |
11 | SepMorph::SepMorph(const unsigned& char_length, const unsigned& hidden_length,
12 | const unsigned& vocab_length, const unsigned& num_layers,
13 | const unsigned& num_morph, vector* m,
14 | vector* optimizer) {
15 | char_len = char_length;
16 | hidden_len = hidden_length;
17 | vocab_len = vocab_length;
18 | layers = num_layers;
19 | morph_len = num_morph;
20 | InitParams(m);
21 | }
22 |
23 | void SepMorph::InitParams(vector* m) {
24 | for (unsigned i = 0; i < morph_len; ++i) {
25 | input_forward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
26 | input_backward.push_back(LSTMBuilder(layers, char_len, hidden_len, (*m)[i]));
27 | output_forward.push_back(LSTMBuilder(layers, 2 * char_len + hidden_len,
28 | hidden_len, (*m)[i]));
29 |
30 | phidden_to_output.push_back((*m)[i]->add_parameters({vocab_len, hidden_len}));
31 | phidden_to_output_bias.push_back((*m)[i]->add_parameters({vocab_len, 1}));
32 |
33 | char_vecs.push_back((*m)[i]->add_lookup_parameters(vocab_len, {char_len}));
34 |
35 | ptransform_encoded.push_back((*m)[i]->add_parameters({hidden_len,
36 | 2 * hidden_len}));
37 | ptransform_encoded_bias.push_back((*m)[i]->add_parameters({hidden_len, 1}));
38 |
39 | eps_vecs.push_back((*m)[i]->add_lookup_parameters(max_eps, {char_len}));
40 | }
41 | }
42 |
43 | void SepMorph::AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg) {
44 | input_forward[morph_id].new_graph(*cg);
45 | input_backward[morph_id].new_graph(*cg);
46 | output_forward[morph_id].new_graph(*cg);
47 |
48 | hidden_to_output = parameter(*cg, phidden_to_output[morph_id]);
49 | hidden_to_output_bias = parameter(*cg, phidden_to_output_bias[morph_id]);
50 |
51 | transform_encoded = parameter(*cg, ptransform_encoded[morph_id]);
52 | transform_encoded_bias = parameter(*cg, ptransform_encoded_bias[morph_id]);
53 | }
54 |
55 | void SepMorph::RunFwdBwd(const unsigned& morph_id,
56 | const vector& inputs,
57 | Expression* hidden, ComputationGraph *cg) {
58 | vector input_vecs;
59 | for (const unsigned& input_id : inputs) {
60 | input_vecs.push_back(lookup(*cg, char_vecs[morph_id], input_id));
61 | }
62 |
63 | // Run forward LSTM
64 | Expression forward_unit;
65 | input_forward[morph_id].start_new_sequence();
66 | for (unsigned i = 0; i < input_vecs.size(); ++i) {
67 | forward_unit = input_forward[morph_id].add_input(input_vecs[i]);
68 | }
69 |
70 | // Run backward LSTM
71 | Expression backward_unit;
72 | input_backward[morph_id].start_new_sequence();
73 | for (int i = input_vecs.size() - 1; i >= 0; --i) {
74 | backward_unit = input_backward[morph_id].add_input(input_vecs[i]);
75 | }
76 |
77 | // Concatenate the forward and back hidden layers
78 | *hidden = concatenate({forward_unit, backward_unit});
79 | }
80 |
81 | void SepMorph::TransformEncodedInput(Expression* encoded_input) const {
82 | *encoded_input = affine_transform({transform_encoded_bias,
83 | transform_encoded, *encoded_input});
84 | }
85 |
86 | void SepMorph::ProjectToOutput(const Expression& hidden, Expression* out) const {
87 | *out = affine_transform({hidden_to_output_bias, hidden_to_output, hidden});
88 | }
89 |
90 | Expression SepMorph::ComputeLoss(const vector& hidden_units,
91 | const vector& targets) const {
92 | assert(hidden_units.size() == targets.size());
93 | vector losses;
94 | for (unsigned i = 0; i < hidden_units.size(); ++i) {
95 | Expression out;
96 | ProjectToOutput(hidden_units[i], &out);
97 | losses.push_back(pickneglogsoftmax(out, targets[i]));
98 | }
99 | return sum(losses);
100 | }
101 |
102 | float SepMorph::Train(const unsigned& morph_id, const vector& inputs,
103 | const vector& outputs, AdadeltaTrainer* ada_gd) {
104 | ComputationGraph cg;
105 | AddParamsToCG(morph_id, &cg);
106 |
107 | // Encode and Transform to feed into decoder
108 | Expression encoded_input_vec;
109 | RunFwdBwd(morph_id, inputs, &encoded_input_vec, &cg);
110 | TransformEncodedInput(&encoded_input_vec);
111 |
112 | // Use this encoded word vector to predict the transformed word
113 | vector input_vecs_for_dec;
114 | vector output_ids_for_pred;
115 | for (unsigned i = 0; i < outputs.size(); ++i) {
116 | if (i < outputs.size() - 1) {
117 | // '' will not be fed as input -- it needs to be predicted.
118 | if (i < inputs.size() - 1) {
119 | input_vecs_for_dec.push_back(concatenate(
120 | {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]),
121 | lookup(cg, char_vecs[morph_id], inputs[i + 1])}));
122 | } else {
123 | input_vecs_for_dec.push_back(concatenate(
124 | {encoded_input_vec, lookup(cg, char_vecs[morph_id], outputs[i]),
125 | lookup(cg, eps_vecs[morph_id], min(unsigned(i - inputs.size()), max_eps - 1))}));
126 | }
127 | }
128 | if (i > 0) { // '' will not be predicted in the output -- its fed in.
129 | output_ids_for_pred.push_back(outputs[i]);
130 | }
131 | }
132 |
133 | vector decoder_hidden_units;
134 | output_forward[morph_id].start_new_sequence();
135 | for (const auto& vec : input_vecs_for_dec) {
136 | decoder_hidden_units.push_back(output_forward[morph_id].add_input(vec));
137 | }
138 | Expression loss = ComputeLoss(decoder_hidden_units, output_ids_for_pred);
139 |
140 | float return_loss = as_scalar(cg.forward());
141 | cg.backward();
142 | ada_gd->update(1.0f);
143 | return return_loss;
144 | }
145 |
146 | void
147 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
148 | const vector& input_ids,
149 | vector* pred_target_ids, vector* ensmb_model) {
150 | ComputationGraph cg;
151 |
152 | unsigned ensmb = ensmb_model->size();
153 | vector encoded_word_vecs;
154 | for (unsigned i = 0; i < ensmb; ++i) {
155 | Expression encoded_word_vec;
156 | auto model = (*ensmb_model)[i];
157 | model->AddParamsToCG(morph_id, &cg);
158 | model->RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
159 | model->TransformEncodedInput(&encoded_word_vec);
160 | encoded_word_vecs.push_back(encoded_word_vec);
161 | model->output_forward[morph_id].start_new_sequence();
162 | }
163 |
164 | unsigned out_index = 1;
165 | unsigned pred_index = char_to_id[BOW];
166 | while (pred_target_ids->size() < MAX_PRED_LEN) {
167 | vector ensmb_out;
168 | pred_target_ids->push_back(pred_index);
169 | if (pred_index == char_to_id[EOW]) {
170 | return; // If the end is found, break from the loop and return
171 | }
172 |
173 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
174 | auto model = (*ensmb_model)[ensmb_id];
175 | Expression prev_output_vec = lookup(cg, model->char_vecs[morph_id], pred_index);
176 | Expression input, input_char_vec;
177 | if (out_index < input_ids.size()) {
178 | input_char_vec = lookup(cg, model->char_vecs[morph_id], input_ids[out_index]);
179 | } else {
180 | input_char_vec = lookup(cg, model->eps_vecs[morph_id],
181 | min(unsigned(out_index - input_ids.size()),
182 | model->max_eps - 1));
183 | }
184 | input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
185 | input_char_vec});
186 |
187 | Expression hidden = model->output_forward[morph_id].add_input(input);
188 | Expression out;
189 | model->ProjectToOutput(hidden, &out);
190 | ensmb_out.push_back(log_softmax(out));
191 | }
192 |
193 | Expression out = sum(ensmb_out) / ensmb_out.size();
194 | vector dist = as_vector(cg.incremental_forward());
195 | pred_index = distance(dist.begin(), max_element(dist.begin(), dist.end()));
196 | out_index++;
197 | }
198 | }
199 |
200 | void
201 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
202 | unordered_map& char_to_id,
203 | const vector& input_ids,
204 | vector >* sequences, vector* tm_scores,
205 | vector* ensmb_model) {
206 | unsigned out_index = 1;
207 | unsigned ensmb = ensmb_model->size();
208 | ComputationGraph cg;
209 |
210 | // Compute stuff for every model in the ensemble.
211 | vector encoded_word_vecs;
212 | vector ensmb_out;
213 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
214 | auto& model = *(*ensmb_model)[ensmb_id];
215 | model.AddParamsToCG(morph_id, &cg);
216 |
217 | Expression encoded_word_vec;
218 | model.RunFwdBwd(morph_id, input_ids, &encoded_word_vec, &cg);
219 | model.TransformEncodedInput(&encoded_word_vec);
220 | encoded_word_vecs.push_back(encoded_word_vec);
221 | model.output_forward[morph_id].start_new_sequence();
222 |
223 | Expression prev_output_vec = lookup(cg, model.char_vecs[morph_id],
224 | char_to_id[BOW]);
225 | Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_output_vec,
226 | lookup(cg, model.char_vecs[morph_id],
227 | input_ids[out_index])});
228 | Expression hidden = model.output_forward[morph_id].add_input(input);
229 | Expression out;
230 | model.ProjectToOutput(hidden, &out);
231 | out = log_softmax(out);
232 | ensmb_out.push_back(out);
233 | }
234 |
235 | // Compute the average of the ensemble output.
236 | Expression out_dist = average(ensmb_out);
237 | vector log_dist = as_vector(cg.incremental_forward());
238 | priority_queue > init_queue;
239 | for (unsigned i = 0; i < log_dist.size(); ++i) {
240 | init_queue.push(make_pair(log_dist[i], i));
241 | }
242 | unsigned vocab_size = log_dist.size();
243 |
244 | // Initialise the beam_size sequences, scores, hidden states.
245 | vector log_scores;
246 | vector > prev_states;
247 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
248 | vector seq;
249 | seq.push_back(char_to_id[BOW]);
250 | seq.push_back(init_queue.top().second);
251 | sequences->push_back(seq);
252 | log_scores.push_back(init_queue.top().first);
253 |
254 | vector ensmb_states;
255 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ++ensmb_id) {
256 | auto& model = *(*ensmb_model)[ensmb_id];
257 | ensmb_states.push_back(model.output_forward[morph_id].state());
258 | }
259 | prev_states.push_back(ensmb_states);
260 | init_queue.pop();
261 | }
262 |
263 | vector neg_inf(vocab_size, NEG_INF);
264 | Expression neg_inf_vec = cnn::expr::input(cg, {vocab_size}, &neg_inf);
265 |
266 | vector active_beams(beam_size, true);
267 | while (true) {
268 | out_index++;
269 | priority_queue > > probs_queue;
270 | vector > curr_states;
271 | vector out_dist;
272 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
273 | if (active_beams[beam_id]) {
274 | unsigned prev_out_char = (*sequences)[beam_id].back();
275 | vector ensmb_out;
276 | vector ensmb_states;
277 | for (unsigned ensmb_id = 0; ensmb_id < ensmb; ensmb_id++) {
278 | auto& model = *(*ensmb_model)[ensmb_id];
279 | Expression input_char_vec;
280 | if (out_index < input_ids.size()) {
281 | input_char_vec = lookup(cg, model.char_vecs[morph_id], input_ids[out_index]);
282 | } else {
283 | input_char_vec = lookup(cg, model.eps_vecs[morph_id],
284 | min(unsigned(out_index - input_ids.size()),
285 | model.max_eps - 1));
286 | }
287 |
288 | Expression prev_out_vec = lookup(cg, model.char_vecs[morph_id], prev_out_char);
289 | Expression input = concatenate({encoded_word_vecs[ensmb_id], prev_out_vec,
290 | input_char_vec});
291 | Expression hidden = model.output_forward[morph_id].add_input(
292 | prev_states[beam_id][ensmb_id], input);
293 | ensmb_states.push_back(model.output_forward[morph_id].state());
294 |
295 | Expression out;
296 | model.ProjectToOutput(hidden, &out);
297 | out = log_softmax(out);
298 | ensmb_out.push_back(out);
299 | }
300 | curr_states.push_back(ensmb_states);
301 | out_dist.push_back(average(ensmb_out));
302 | } else {
303 | vector dummy(ensmb, RNNPointer(0));
304 | curr_states.push_back(dummy);
305 | out_dist.push_back(neg_inf_vec);
306 | }
307 | }
308 |
309 | Expression all_scores = concatenate(out_dist);
310 | vector log_dist = as_vector(cg.incremental_forward());
311 |
312 | for (unsigned index = 0; index < log_dist.size(); ++index) {
313 | unsigned beam_id = index / vocab_size;
314 | unsigned char_id = index % vocab_size;
315 | if (active_beams[beam_id]) {
316 | pair location = make_pair(beam_id, char_id);
317 | probs_queue.push(pair >(
318 | log_scores[beam_id] + log_dist[index], location));
319 | }
320 | }
321 |
322 | // Find the beam_size best now and update the variables.
323 | unordered_map > new_seq;
324 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
325 | if (active_beams[beam_id]) {
326 | float log_prob = probs_queue.top().first;
327 | pair location = probs_queue.top().second;
328 | unsigned old_beam_id = location.first, char_id = location.second;
329 |
330 | vector seq = (*sequences)[old_beam_id];
331 | seq.push_back(char_id);
332 | new_seq[beam_id] = seq;
333 | log_scores[beam_id] = log_prob; // Update the score
334 |
335 | prev_states[beam_id] = curr_states[old_beam_id]; // Update hidden state
336 | probs_queue.pop();
337 | }
338 | }
339 |
340 | // Update the sequences now.
341 | for (auto& it : new_seq) {
342 | (*sequences)[it.first] = it.second;
343 | }
344 |
345 | // Check if a sequence should be made inactive.
346 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
347 | if (active_beams[beam_id] &&
348 | ((*sequences)[beam_id].back() == char_to_id[EOW] ||
349 | (*sequences)[beam_id].size() > MAX_PRED_LEN)) {
350 | active_beams[beam_id] = false;
351 | }
352 | }
353 |
354 | // Check if all sequences are inactive.
355 | bool all_inactive = true;
356 | for (unsigned beam_id = 0; beam_id < beam_size; ++beam_id) {
357 | if (active_beams[beam_id]) {
358 | all_inactive = false;
359 | break;
360 | }
361 | }
362 |
363 | if (all_inactive) {
364 | *tm_scores = log_scores;
365 | return;
366 | }
367 | }
368 | }
369 |
370 | void Serialize(string& filename, SepMorph& model, vector* cnn_models) {
371 | ofstream outfile(filename);
372 | if (!outfile.is_open()) {
373 | cerr << "File opening failed" << endl;
374 | }
375 |
376 | boost::archive::text_oarchive oa(outfile);
377 | oa & model;
378 | for (unsigned i = 0; i < cnn_models->size(); ++i) {
379 | oa & *(*cnn_models)[i];
380 | }
381 |
382 | cerr << "Saved model to: " << filename << endl;
383 | outfile.close();
384 | }
385 |
386 | void Read(string& filename, SepMorph* model, vector* cnn_models) {
387 | ifstream infile(filename);
388 | if (!infile.is_open()) {
389 | cerr << "File opening failed" << endl;
390 | }
391 |
392 | boost::archive::text_iarchive ia(infile);
393 | ia & *model;
394 | for (unsigned i = 0; i < model->morph_len; ++i) {
395 | Model *cnn_model = new Model();
396 | cnn_models->push_back(cnn_model);
397 | }
398 |
399 | model->InitParams(cnn_models);
400 | for (unsigned i = 0; i < model->morph_len; ++i) {
401 | ia & *(*cnn_models)[i];
402 | }
403 |
404 | cerr << "Loaded model from: " << filename << endl;
405 | infile.close();
406 | }
407 |
408 |
--------------------------------------------------------------------------------
/src/sep-morph.h:
--------------------------------------------------------------------------------
1 | #ifndef SEP_MORPH_H_
2 | #define SEP_MORPH_H_
3 |
4 | #include "cnn/nodes.h"
5 | #include "cnn/cnn.h"
6 | #include "cnn/rnn.h"
7 | #include "cnn/gru.h"
8 | #include "cnn/lstm.h"
9 | #include "cnn/training.h"
10 | #include "cnn/gpu-ops.h"
11 | #include "cnn/expr.h"
12 |
13 | #include "utils.h"
14 |
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 |
21 | using namespace std;
22 | using namespace cnn;
23 | using namespace cnn::expr;
24 |
25 | class SepMorph {
26 | public:
27 | vector input_forward, input_backward, output_forward;
28 | vector char_vecs;
29 |
30 | Expression hidden_to_output, hidden_to_output_bias;
31 | vector phidden_to_output, phidden_to_output_bias;
32 |
33 | Expression transform_encoded, transform_encoded_bias;
34 | vector ptransform_encoded, ptransform_encoded_bias;
35 |
36 | unsigned char_len, hidden_len, vocab_len, layers, morph_len, max_eps = 5;
37 | vector eps_vecs;
38 |
39 | SepMorph() {}
40 |
41 | SepMorph(const unsigned& char_length, const unsigned& hidden_length,
42 | const unsigned& vocab_length, const unsigned& layers,
43 | const unsigned& num_morph, vector* m,
44 | vector* optimizer);
45 |
46 | void InitParams(vector* m);
47 |
48 | void AddParamsToCG(const unsigned& morph_id, ComputationGraph* cg);
49 |
50 | void RunFwdBwd(const unsigned& morph_id, const vector& inputs,
51 | Expression* hidden, ComputationGraph *cg);
52 |
53 | void TransformEncodedInput(Expression* encoded_input) const;
54 |
55 | void TransformEncodedInputDuringDecoding(Expression* encoded_input) const;
56 |
57 | void ProjectToOutput(const Expression& hidden, Expression* out) const;
58 |
59 | Expression ComputeLoss(const vector& hidden_units,
60 | const vector& targets) const;
61 |
62 | float Train(const unsigned& morph_id, const vector& inputs,
63 | const vector& outputs, AdadeltaTrainer* ada_gd);
64 |
65 | friend class boost::serialization::access;
66 | template void serialize(Archive& ar, const unsigned int) {
67 | ar & char_len;
68 | ar & hidden_len;
69 | ar & vocab_len;
70 | ar & layers;
71 | ar & morph_len;
72 | ar & max_eps;
73 | }
74 | };
75 |
76 | void
77 | EnsembleDecode(const unsigned& morph_id, unordered_map& char_to_id,
78 | const vector& input_ids,
79 | vector* pred_target_ids, vector* ensmb_model);
80 |
81 | void
82 | EnsembleBeamDecode(const unsigned& morph_id, const unsigned& beam_size,
83 | unordered_map& char_to_id,
84 | const vector& input_ids,
85 | vector >* sequences, vector* tm_scores,
86 | vector* ensmb_model);
87 |
88 | void Serialize(string& filename, SepMorph& model, vector* cnn_model);
89 |
90 | void Read(string& filename, SepMorph* model, vector* cnn_model);
91 |
92 |
93 | #endif
94 |
--------------------------------------------------------------------------------
/src/train-enc-dec-attn.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "enc-dec-attn.h"
12 |
13 | #include
14 | #include
15 |
16 | #include
17 | #include
18 | #include
19 |
20 | using namespace std;
21 | using namespace cnn;
22 | using namespace cnn::expr;
23 |
24 | int main(int argc, char** argv) {
25 | cnn::Initialize(argc, argv);
26 |
27 | string vocab_filename = argv[1]; // vocabulary of words/characters
28 | string morph_filename = argv[2];
29 | string train_filename = argv[3];
30 | string test_filename = argv[4];
31 | unsigned hidden_size = atoi(argv[5]);
32 | unsigned num_iter = atoi(argv[6]);
33 | float reg_strength = atof(argv[7]);
34 | unsigned layers = atoi(argv[8]);
35 | string model_outputfilename = argv[9];
36 |
37 | unordered_map char_to_id, morph_to_id;
38 | unordered_map id_to_char, id_to_morph;
39 |
40 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
41 | unsigned vocab_size = char_to_id.size();
42 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
43 | unsigned morph_size = morph_to_id.size();
44 |
45 | vector train_data; // Read the training file in a vector
46 | ReadData(train_filename, &train_data);
47 |
48 | vector test_data; // Read the dev file in a vector
49 | ReadData(test_filename, &test_data);
50 |
51 | vector m;
52 | vector optimizer;
53 |
54 | for (unsigned i = 0; i < morph_size; ++i) {
55 | m.push_back(new Model());
56 | AdadeltaTrainer ada(m[i], reg_strength);
57 | optimizer.push_back(ada);
58 | }
59 |
60 | unsigned char_size = vocab_size;
61 | EncDecAttn nn(char_size, hidden_size, vocab_size, layers, morph_size,
62 | &m, &optimizer);
63 |
64 | // Pre-train all models on all datasets.
65 | /*cerr << "Pre-training... " << endl;
66 | for (unsigned morph_id = 0; morph_id < morph_size; ++morph_id) {
67 | cerr << "Morph class: " << morph_id + 1 << endl;
68 | for (string& line : train_data) {
69 | vector items = split_line(line, '|');
70 | vector input_ids, target_ids;
71 | input_ids.clear(); target_ids.clear();
72 | for (const string& ch : split_line(items[0], ' ')) {
73 | input_ids.push_back(char_to_id[ch]);
74 | }
75 | for (const string& ch : split_line(items[1], ' ')) {
76 | target_ids.push_back(char_to_id[ch]);
77 | }
78 | //unsigned morph_id = morph_to_id[items[2]];
79 | double temp_loss = nn.Train(morph_id, input_ids, target_ids,
80 | &optimizer[morph_id]);
81 | }
82 | }
83 | cerr << "complete." << endl;*/
84 |
85 | // Read the training file and train the model
86 | double best_score = -1;
87 | vector object_list;
88 | object_list.push_back(&nn);
89 | for (unsigned iter = 0; iter < num_iter; ++iter) {
90 | unsigned line_id = 0;
91 | random_shuffle(train_data.begin(), train_data.end());
92 | vector loss(morph_size, 0.0f);
93 | for (string& line : train_data) {
94 | vector items = split_line(line, '|');
95 | vector input_ids, target_ids;
96 | input_ids.clear(); target_ids.clear();
97 | for (const string& ch : split_line(items[0], ' ')) {
98 | input_ids.push_back(char_to_id[ch]);
99 | }
100 | for (const string& ch : split_line(items[1], ' ')) {
101 | target_ids.push_back(char_to_id[ch]);
102 | }
103 | unsigned morph_id = morph_to_id[items[2]];
104 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
105 | &optimizer[morph_id]);
106 | cerr << ++line_id << "\r";
107 | }
108 |
109 | // Read the test file and output predictions for the words.
110 | string line;
111 | double correct = 0, total = 0;
112 | for (string& line : test_data) {
113 | vector items = split_line(line, '|');
114 | vector input_ids, target_ids, pred_target_ids;
115 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
116 | for (const string& ch : split_line(items[0], ' ')) {
117 | input_ids.push_back(char_to_id[ch]);
118 | }
119 | for (const string& ch : split_line(items[1], ' ')) {
120 | target_ids.push_back(char_to_id[ch]);
121 | }
122 | unsigned morph_id = morph_to_id[items[2]];
123 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
124 | &object_list);
125 |
126 | string prediction = "";
127 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
128 | prediction += id_to_char[pred_target_ids[i]];
129 | if (i != pred_target_ids.size() - 1) {
130 | prediction += " ";
131 | }
132 | }
133 | if (prediction == items[1]) {
134 | correct += 1;
135 | }
136 | total += 1;
137 | }
138 | double curr_score = correct / total;
139 | cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.);
140 | cerr << "Prediction Accuracy: " << curr_score << endl;
141 | if (curr_score > best_score) {
142 | best_score = curr_score;
143 | Serialize(model_outputfilename, nn, &m);
144 | }
145 | }
146 | return 1;
147 | }
148 |
--------------------------------------------------------------------------------
/src/train-enc-dec.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "enc-dec.h"
12 |
13 | #include
14 | #include
15 |
16 | #include
17 | #include
18 | #include
19 |
20 | using namespace std;
21 | using namespace cnn;
22 | using namespace cnn::expr;
23 |
24 | int main(int argc, char** argv) {
25 | cnn::Initialize(argc, argv);
26 |
27 | string vocab_filename = argv[1]; // vocabulary of words/characters
28 | string morph_filename = argv[2];
29 | string train_filename = argv[3];
30 | string test_filename = argv[4];
31 | unsigned hidden_size = atoi(argv[5]);
32 | unsigned num_iter = atoi(argv[6]);
33 | float reg_strength = atof(argv[7]);
34 | unsigned layers = atoi(argv[8]);
35 | string model_outputfilename = argv[9];
36 |
37 | unordered_map char_to_id, morph_to_id;
38 | unordered_map id_to_char, id_to_morph;
39 |
40 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
41 | unsigned vocab_size = char_to_id.size();
42 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
43 | unsigned morph_size = morph_to_id.size();
44 |
45 | vector train_data; // Read the training file in a vector
46 | ReadData(train_filename, &train_data);
47 |
48 | vector test_data; // Read the dev file in a vector
49 | ReadData(test_filename, &test_data);
50 |
51 | vector m;
52 | vector optimizer;
53 |
54 | for (unsigned i = 0; i < morph_size; ++i) {
55 | m.push_back(new Model());
56 | AdadeltaTrainer ada(m[i], reg_strength);
57 | optimizer.push_back(ada);
58 | }
59 |
60 | unsigned char_size = vocab_size;
61 | EncDec nn(char_size, hidden_size, vocab_size, layers, morph_size,
62 | &m, &optimizer);
63 |
64 | // Read the training file and train the model
65 | double best_score = -1;
66 | vector object_list;
67 | object_list.push_back(&nn);
68 | for (unsigned iter = 0; iter < num_iter; ++iter) {
69 | unsigned line_id = 0;
70 | random_shuffle(train_data.begin(), train_data.end());
71 | vector loss(morph_size, 0.0f);
72 | for (string& line : train_data) {
73 | vector items = split_line(line, '|');
74 | vector input_ids, target_ids;
75 | input_ids.clear(); target_ids.clear();
76 | for (const string& ch : split_line(items[0], ' ')) {
77 | input_ids.push_back(char_to_id[ch]);
78 | }
79 | for (const string& ch : split_line(items[1], ' ')) {
80 | target_ids.push_back(char_to_id[ch]);
81 | }
82 | unsigned morph_id = morph_to_id[items[2]];
83 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
84 | &optimizer[morph_id]);
85 | cerr << ++line_id << "\r";
86 | }
87 |
88 | // Read the test file and output predictions for the words.
89 | string line;
90 | double correct = 0, total = 0;
91 | for (string& line : test_data) {
92 | vector items = split_line(line, '|');
93 | vector input_ids, target_ids, pred_target_ids;
94 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
95 | for (const string& ch : split_line(items[0], ' ')) {
96 | input_ids.push_back(char_to_id[ch]);
97 | }
98 | for (const string& ch : split_line(items[1], ' ')) {
99 | target_ids.push_back(char_to_id[ch]);
100 | }
101 | unsigned morph_id = morph_to_id[items[2]];
102 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
103 | &object_list);
104 |
105 | string prediction = "";
106 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
107 | prediction += id_to_char[pred_target_ids[i]];
108 | if (i != pred_target_ids.size() - 1) {
109 | prediction += " ";
110 | }
111 | }
112 | if (prediction == items[1]) {
113 | correct += 1;
114 | }
115 | total += 1;
116 | }
117 | double curr_score = correct / total;
118 | cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.);
119 | cerr << "Prediction Accuracy: " << curr_score << endl;
120 | if (curr_score > best_score) {
121 | best_score = curr_score;
122 | Serialize(model_outputfilename, nn, &m);
123 | }
124 | }
125 | return 1;
126 | }
127 |
--------------------------------------------------------------------------------
/src/train-joint-enc-dec-morph.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "joint-enc-dec-morph.h"
12 |
13 | #include
14 | #include
15 |
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 |
20 | int main(int argc, char** argv) {
21 | cnn::Initialize(argc, argv);
22 |
23 | string vocab_filename = argv[1]; // vocabulary of words/characters
24 | string morph_filename = argv[2];
25 | string train_filename = argv[3];
26 | string test_filename = argv[4];
27 | unsigned hidden_size = atoi(argv[5]);
28 | unsigned num_iter = atoi(argv[6]);
29 | float reg_strength = atof(argv[7]);
30 | unsigned layers = atoi(argv[8]);
31 | string model_outputfilename = argv[9];
32 |
33 | unordered_map char_to_id, morph_to_id;
34 | unordered_map id_to_char, id_to_morph;
35 |
36 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
37 | unsigned vocab_size = char_to_id.size();
38 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
39 | unsigned morph_size = morph_to_id.size();
40 |
41 | vector train_data; // Read the training file in a vector
42 | ReadData(train_filename, &train_data);
43 |
44 | vector test_data; // Read the dev file in a vector
45 | ReadData(test_filename, &test_data);
46 |
47 | vector m;
48 | vector optimizer;
49 |
50 | for (unsigned i = 0; i < morph_size + 1; ++i) {
51 | m.push_back(new Model());
52 | AdadeltaTrainer ada(m[i], reg_strength);
53 | optimizer.push_back(ada);
54 | }
55 |
56 | unsigned char_size = vocab_size;
57 | JointEncDecMorph nn(char_size, hidden_size, vocab_size, layers, morph_size,
58 | &m, &optimizer);
59 |
60 | // Read the training file and train the model
61 | double best_score = -1;
62 | vector model_pointers;
63 | model_pointers.push_back(&nn);
64 | for (unsigned iter = 0; iter < num_iter; ++iter) {
65 | unsigned line_id = 0;
66 | random_shuffle(train_data.begin(), train_data.end());
67 | vector loss(morph_size, 0.0f);
68 | for (string& line : train_data) {
69 | vector items = split_line(line, '|');
70 | vector input_ids, target_ids;
71 | input_ids.clear(); target_ids.clear();
72 | for (const string& ch : split_line(items[0], ' ')) {
73 | input_ids.push_back(char_to_id[ch]);
74 | }
75 | for (const string& ch : split_line(items[1], ' ')) {
76 | target_ids.push_back(char_to_id[ch]);
77 | }
78 | unsigned morph_id = morph_to_id[items[2]];
79 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
80 | &optimizer[morph_id], &optimizer[morph_size]);
81 | cerr << ++line_id << "\r";
82 | }
83 |
84 | // Read the test file and output predictions for the words.
85 | string line;
86 | double correct = 0, total = 0;
87 | for (string& line : test_data) {
88 | vector items = split_line(line, '|');
89 | vector input_ids, target_ids, pred_target_ids;
90 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
91 | for (const string& ch : split_line(items[0], ' ')) {
92 | input_ids.push_back(char_to_id[ch]);
93 | }
94 | for (const string& ch : split_line(items[1], ' ')) {
95 | target_ids.push_back(char_to_id[ch]);
96 | }
97 | unsigned morph_id = morph_to_id[items[2]];
98 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
99 | &model_pointers);
100 |
101 | string prediction = "";
102 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
103 | prediction += id_to_char[pred_target_ids[i]];
104 | if (i != pred_target_ids.size() - 1) {
105 | prediction += " ";
106 | }
107 | }
108 | if (prediction == items[1]) {
109 | correct += 1;
110 | }
111 | total += 1;
112 | }
113 | double curr_score = correct / total;
114 | cerr << "Iter " << iter + 1 << " ";
115 | cerr << "Prediction Accuracy: " << curr_score << endl;
116 | if (curr_score > best_score) {
117 | best_score = curr_score;
118 | Serialize(model_outputfilename, nn, &m);
119 | }
120 | }
121 | return 1;
122 | }
123 |
--------------------------------------------------------------------------------
/src/train-joint-enc-morph.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "joint-enc-morph.h"
12 |
13 | #include
14 | #include
15 |
16 | using namespace std;
17 | using namespace cnn;
18 | using namespace cnn::expr;
19 |
20 | int main(int argc, char** argv) {
21 | cnn::Initialize(argc, argv);
22 |
23 | string vocab_filename = argv[1]; // vocabulary of words/characters
24 | string morph_filename = argv[2];
25 | string train_filename = argv[3];
26 | string test_filename = argv[4];
27 | unsigned hidden_size = atoi(argv[5]);
28 | unsigned num_iter = atoi(argv[6]);
29 | float reg_strength = atof(argv[7]);
30 | unsigned layers = atoi(argv[8]);
31 | string model_outputfilename = argv[9];
32 |
33 | unordered_map char_to_id, morph_to_id;
34 | unordered_map id_to_char, id_to_morph;
35 |
36 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
37 | unsigned vocab_size = char_to_id.size();
38 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
39 | unsigned morph_size = morph_to_id.size();
40 |
41 | vector train_data; // Read the training file in a vector
42 | ReadData(train_filename, &train_data);
43 |
44 | vector test_data; // Read the dev file in a vector
45 | ReadData(test_filename, &test_data);
46 |
47 | vector m;
48 | vector optimizer;
49 |
50 | for (unsigned i = 0; i < morph_size + 1; ++i) {
51 | m.push_back(new Model());
52 | AdadeltaTrainer ada(m[i], reg_strength);
53 | optimizer.push_back(ada);
54 | }
55 |
56 | unsigned char_size = vocab_size;
57 | JointEncMorph nn(char_size, hidden_size, vocab_size, layers, morph_size,
58 | &m, &optimizer);
59 |
60 | // Read the training file and train the model
61 | double best_score = -1;
62 | vector model_pointers;
63 | model_pointers.push_back(&nn);
64 | for (unsigned iter = 0; iter < num_iter; ++iter) {
65 | unsigned line_id = 0;
66 | random_shuffle(train_data.begin(), train_data.end());
67 | vector loss(morph_size, 0.0f);
68 | for (string& line : train_data) {
69 | vector items = split_line(line, '|');
70 | vector input_ids, target_ids;
71 | input_ids.clear(); target_ids.clear();
72 | for (const string& ch : split_line(items[0], ' ')) {
73 | input_ids.push_back(char_to_id[ch]);
74 | }
75 | for (const string& ch : split_line(items[1], ' ')) {
76 | target_ids.push_back(char_to_id[ch]);
77 | }
78 | unsigned morph_id = morph_to_id[items[2]];
79 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
80 | &optimizer[morph_id], &optimizer[morph_size]);
81 | cerr << ++line_id << "\r";
82 | }
83 |
84 | // Read the test file and output predictions for the words.
85 | string line;
86 | double correct = 0, total = 0;
87 | for (string& line : test_data) {
88 | vector items = split_line(line, '|');
89 | vector input_ids, target_ids, pred_target_ids;
90 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
91 | for (const string& ch : split_line(items[0], ' ')) {
92 | input_ids.push_back(char_to_id[ch]);
93 | }
94 | for (const string& ch : split_line(items[1], ' ')) {
95 | target_ids.push_back(char_to_id[ch]);
96 | }
97 | unsigned morph_id = morph_to_id[items[2]];
98 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
99 | &model_pointers);
100 |
101 | string prediction = "";
102 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
103 | prediction += id_to_char[pred_target_ids[i]];
104 | if (i != pred_target_ids.size() - 1) {
105 | prediction += " ";
106 | }
107 | }
108 | if (prediction == items[1]) {
109 | correct += 1;
110 | }
111 | total += 1;
112 | }
113 | double curr_score = correct / total;
114 | cerr << "Iter " << iter + 1 << " ";
115 | cerr << "Prediction Accuracy: " << curr_score << endl;
116 | if (curr_score > best_score) {
117 | best_score = curr_score;
118 | Serialize(model_outputfilename, nn, &m);
119 | }
120 | }
121 | return 1;
122 | }
123 |
--------------------------------------------------------------------------------
/src/train-lm-joint-enc.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "lm.h"
11 | #include "utils.h"
12 | #include "lm-joint-enc.h"
13 |
14 | #include
15 | #include
16 |
17 | using namespace std;
18 | using namespace cnn;
19 | using namespace cnn::expr;
20 |
21 | int main(int argc, char** argv) {
22 | cnn::Initialize(argc, argv);
23 |
24 | string vocab_filename = argv[1]; // vocabulary of words/characters
25 | string morph_filename = argv[2];
26 | string train_filename = argv[3];
27 | string test_filename = argv[4];
28 | unsigned hidden_size = atoi(argv[5]);
29 | unsigned num_iter = atoi(argv[6]);
30 | float reg_strength = atof(argv[7]);
31 | unsigned layers = atoi(argv[8]);
32 | string lm_model_filename = argv[9];
33 | string model_outputfilename = argv[10];
34 |
35 | unordered_map char_to_id, morph_to_id;
36 | unordered_map id_to_char, id_to_morph;
37 |
38 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
39 | unsigned vocab_size = char_to_id.size();
40 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
41 | unsigned morph_size = morph_to_id.size();
42 |
43 | vector train_data; // Read the training file in a vector
44 | ReadData(train_filename, &train_data);
45 |
46 | vector test_data; // Read the dev file in a vector
47 | ReadData(test_filename, &test_data);
48 |
49 | LM lm(lm_model_filename, char_to_id, id_to_char);
50 |
51 | vector m;
52 | vector optimizer;
53 |
54 | for (unsigned i = 0; i < morph_size + 1; ++i) {
55 | m.push_back(new Model());
56 | AdadeltaTrainer ada(m[i], reg_strength);
57 | optimizer.push_back(ada);
58 | }
59 |
60 | unsigned char_size = vocab_size;
61 | LMJointEnc nn(char_size, hidden_size, vocab_size, layers, morph_size,
62 | &m, &optimizer);
63 |
64 | // Read the training file and train the model
65 | double best_score = -1;
66 | vector model_pointers;
67 | model_pointers.push_back(&nn);
68 | for (unsigned iter = 0; iter < num_iter; ++iter) {
69 | unsigned line_id = 0;
70 | random_shuffle(train_data.begin(), train_data.end());
71 | vector loss(morph_size, 0.0f);
72 | for (string& line : train_data) {
73 | vector items = split_line(line, '|');
74 | vector input_ids, target_ids;
75 | input_ids.clear(); target_ids.clear();
76 | for (const string& ch : split_line(items[0], ' ')) {
77 | input_ids.push_back(char_to_id[ch]);
78 | }
79 | for (const string& ch : split_line(items[1], ' ')) {
80 | target_ids.push_back(char_to_id[ch]);
81 | }
82 | unsigned morph_id = morph_to_id[items[2]];
83 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids, &lm,
84 | &optimizer[morph_id], &optimizer[morph_size]);
85 | cerr << ++line_id << "\r";
86 | }
87 |
88 | // Read the test file and output predictions for the words.
89 | string line;
90 | double correct = 0, total = 0;
91 | for (string& line : test_data) {
92 | vector items = split_line(line, '|');
93 | vector input_ids, target_ids, pred_target_ids;
94 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
95 | for (const string& ch : split_line(items[0], ' ')) {
96 | input_ids.push_back(char_to_id[ch]);
97 | }
98 | for (const string& ch : split_line(items[1], ' ')) {
99 | target_ids.push_back(char_to_id[ch]);
100 | }
101 | unsigned morph_id = morph_to_id[items[2]];
102 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
103 | &lm, &model_pointers);
104 |
105 | string prediction = "";
106 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
107 | prediction += id_to_char[pred_target_ids[i]];
108 | if (i != pred_target_ids.size() - 1) {
109 | prediction += " ";
110 | }
111 | }
112 | if (prediction == items[1]) {
113 | correct += 1;
114 | }
115 | total += 1;
116 | }
117 | double curr_score = correct / total;
118 | cerr << "Iter " << iter + 1 << " ";
119 | cerr << "Prediction Accuracy: " << curr_score << endl;
120 | if (curr_score > best_score) {
121 | best_score = curr_score;
122 | Serialize(model_outputfilename, nn, &m);
123 | }
124 | }
125 | return 1;
126 | }
127 |
--------------------------------------------------------------------------------
/src/train-lm-sep-morph.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "lm.h"
11 | #include "utils.h"
12 | #include "lm-sep-morph.h"
13 |
14 | #include
15 | #include
16 |
17 | #include
18 | #include
19 | #include
20 |
21 | #include
22 |
23 | using namespace std;
24 | using namespace cnn;
25 | using namespace cnn::expr;
26 |
27 | int main(int argc, char** argv) {
28 | cnn::Initialize(argc, argv);
29 | feenableexcept(FE_INVALID | FE_OVERFLOW | FE_DIVBYZERO);
30 |
31 | string vocab_filename = argv[1]; // vocabulary of words/characters
32 | string morph_filename = argv[2];
33 | string train_filename = argv[3];
34 | string test_filename = argv[4];
35 | unsigned hidden_size = atoi(argv[5]);
36 | unsigned num_iter = atoi(argv[6]);
37 | float reg_strength = atof(argv[7]);
38 | unsigned layers = atoi(argv[8]);
39 | string lm_model_filename = argv[9];
40 | string model_outputfilename = argv[10];
41 |
42 | unordered_map char_to_id, morph_to_id;
43 | unordered_map id_to_char, id_to_morph;
44 |
45 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
46 | unsigned vocab_size = char_to_id.size();
47 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
48 | unsigned morph_size = morph_to_id.size();
49 |
50 | vector train_data; // Read the training file in a vector
51 | ReadData(train_filename, &train_data);
52 |
53 | vector test_data; // Read the dev file in a vector
54 | ReadData(test_filename, &test_data);
55 |
56 | LM lm(lm_model_filename, char_to_id, id_to_char);
57 |
58 | vector m;
59 | vector optimizer;
60 |
61 | for (unsigned i = 0; i < morph_size; ++i) {
62 | m.push_back(new Model());
63 | AdadeltaTrainer ada(m[i], reg_strength);
64 | optimizer.push_back(ada);
65 | }
66 |
67 | unsigned char_size = vocab_size;
68 | LMSepMorph nn(char_size, hidden_size, vocab_size, layers, morph_size,
69 | &m, &optimizer);
70 |
71 | // Read the training file and train the model
72 | double best_score = -1;
73 | vector object_list;
74 | object_list.push_back(&nn);
75 | for (unsigned iter = 0; iter < num_iter; ++iter) {
76 | unsigned line_id = 0;
77 | random_shuffle(train_data.begin(), train_data.end());
78 | vector loss(morph_size, 0.0f);
79 | for (string& line : train_data) {
80 | vector items = split_line(line, '|');
81 | vector input_ids, target_ids;
82 | input_ids.clear(); target_ids.clear();
83 |
84 | string input = items[0], output = items[1];
85 | for (const string& ch : split_line(input, ' ')) {
86 | input_ids.push_back(char_to_id[ch]);
87 | }
88 | for (const string& ch : split_line(output, ' ')) {
89 | target_ids.push_back(char_to_id[ch]);
90 | }
91 | unsigned morph_id = morph_to_id[items[2]];
92 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
93 | &lm, &optimizer[morph_id]);
94 | cerr << ++line_id << "\r";
95 | }
96 |
97 | // Read the test file and output predictions for the words.
98 | string line;
99 | double correct = 0, total = 0;
100 | for (string& line : test_data) {
101 | vector items = split_line(line, '|');
102 | vector input_ids, target_ids, pred_target_ids;
103 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
104 |
105 | string input = items[0], output = items[1];
106 | for (const string& ch : split_line(input, ' ')) {
107 | input_ids.push_back(char_to_id[ch]);
108 | }
109 | for (const string& ch : split_line(output, ' ')) {
110 | target_ids.push_back(char_to_id[ch]);
111 | }
112 | unsigned morph_id = morph_to_id[items[2]];
113 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids, &lm,
114 | &object_list);
115 |
116 | string prediction = "";
117 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
118 | prediction += id_to_char[pred_target_ids[i]];
119 | if (i != pred_target_ids.size() - 1) {
120 | prediction += " ";
121 | }
122 | }
123 | if (prediction == output) {
124 | correct += 1;
125 | }
126 | total += 1;
127 | }
128 | double curr_score = correct / total;
129 | cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.);
130 | cerr << " Prediction Accuracy: " << curr_score << endl;
131 | if (curr_score > best_score) {
132 | best_score = curr_score;
133 | Serialize(model_outputfilename, nn, &m);
134 | }
135 | }
136 | return 1;
137 | }
138 |
--------------------------------------------------------------------------------
/src/train-no-enc.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "no-enc.h"
12 |
13 | #include
14 | #include
15 |
16 | #include
17 | #include
18 | #include
19 |
20 | using namespace std;
21 | using namespace cnn;
22 | using namespace cnn::expr;
23 |
24 | int main(int argc, char** argv) {
25 | cnn::Initialize(argc, argv);
26 |
27 | string vocab_filename = argv[1]; // vocabulary of words/characters
28 | string morph_filename = argv[2];
29 | string train_filename = argv[3];
30 | string test_filename = argv[4];
31 | unsigned hidden_size = atoi(argv[5]);
32 | unsigned num_iter = atoi(argv[6]);
33 | float reg_strength = atof(argv[7]);
34 | unsigned layers = atoi(argv[8]);
35 | string model_outputfilename = argv[9];
36 |
37 | unordered_map char_to_id, morph_to_id;
38 | unordered_map id_to_char, id_to_morph;
39 |
40 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
41 | unsigned vocab_size = char_to_id.size();
42 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
43 | unsigned morph_size = morph_to_id.size();
44 |
45 | vector train_data; // Read the training file in a vector
46 | ReadData(train_filename, &train_data);
47 |
48 | vector test_data; // Read the dev file in a vector
49 | ReadData(test_filename, &test_data);
50 |
51 | vector m;
52 | vector optimizer;
53 |
54 | for (unsigned i = 0; i < morph_size; ++i) {
55 | m.push_back(new Model());
56 | AdadeltaTrainer ada(m[i], reg_strength);
57 | optimizer.push_back(ada);
58 | }
59 |
60 | unsigned char_size = vocab_size;
61 | NoEnc nn(char_size, hidden_size, vocab_size, layers, morph_size,
62 | &m, &optimizer);
63 |
64 | // Pre-train all models on all datasets.
65 | /*cerr << "Pre-training... " << endl;
66 | for (unsigned morph_id = 0; morph_id < morph_size; ++morph_id) {
67 | cerr << "Morph class: " << morph_id + 1 << endl;
68 | for (string& line : train_data) {
69 | vector items = split_line(line, '|');
70 | vector input_ids, target_ids;
71 | input_ids.clear(); target_ids.clear();
72 | for (const string& ch : split_line(items[0], ' ')) {
73 | input_ids.push_back(char_to_id[ch]);
74 | }
75 | for (const string& ch : split_line(items[1], ' ')) {
76 | target_ids.push_back(char_to_id[ch]);
77 | }
78 | //unsigned morph_id = morph_to_id[items[2]];
79 | double temp_loss = nn.Train(morph_id, input_ids, target_ids,
80 | &optimizer[morph_id]);
81 | }
82 | }
83 | cerr << "complete." << endl;*/
84 |
85 | // Read the training file and train the model
86 | double best_score = -1;
87 | vector object_list;
88 | object_list.push_back(&nn);
89 | for (unsigned iter = 0; iter < num_iter; ++iter) {
90 | unsigned line_id = 0;
91 | random_shuffle(train_data.begin(), train_data.end());
92 | vector loss(morph_size, 0.0f);
93 | for (string& line : train_data) {
94 | vector items = split_line(line, '|');
95 | vector input_ids, target_ids;
96 | input_ids.clear(); target_ids.clear();
97 | for (const string& ch : split_line(items[0], ' ')) {
98 | input_ids.push_back(char_to_id[ch]);
99 | }
100 | for (const string& ch : split_line(items[1], ' ')) {
101 | target_ids.push_back(char_to_id[ch]);
102 | }
103 | unsigned morph_id = morph_to_id[items[2]];
104 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
105 | &optimizer[morph_id]);
106 | cerr << ++line_id << "\r";
107 | }
108 |
109 | // Read the test file and output predictions for the words.
110 | string line;
111 | double correct = 0, total = 0;
112 | for (string& line : test_data) {
113 | vector items = split_line(line, '|');
114 | vector input_ids, target_ids, pred_target_ids;
115 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
116 | for (const string& ch : split_line(items[0], ' ')) {
117 | input_ids.push_back(char_to_id[ch]);
118 | }
119 | for (const string& ch : split_line(items[1], ' ')) {
120 | target_ids.push_back(char_to_id[ch]);
121 | }
122 | unsigned morph_id = morph_to_id[items[2]];
123 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
124 | &object_list);
125 |
126 | string prediction = "";
127 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
128 | prediction += id_to_char[pred_target_ids[i]];
129 | if (i != pred_target_ids.size() - 1) {
130 | prediction += " ";
131 | }
132 | }
133 | if (prediction == items[1]) {
134 | correct += 1;
135 | }
136 | total += 1;
137 | }
138 | double curr_score = correct / total;
139 | cerr << "Iter " << iter + 1 << " " << "Loss: " << accumulate(loss.begin(), loss.end(), 0.);
140 | cerr << "Prediction Accuracy: " << curr_score << endl;
141 | if (curr_score > best_score) {
142 | best_score = curr_score;
143 | Serialize(model_outputfilename, nn, &m);
144 | }
145 | }
146 | return 1;
147 | }
148 |
--------------------------------------------------------------------------------
/src/train-sep-morph.cc:
--------------------------------------------------------------------------------
1 | #include "cnn/nodes.h"
2 | #include "cnn/cnn.h"
3 | #include "cnn/rnn.h"
4 | #include "cnn/gru.h"
5 | #include "cnn/lstm.h"
6 | #include "cnn/training.h"
7 | #include "cnn/gpu-ops.h"
8 | #include "cnn/expr.h"
9 |
10 | #include "utils.h"
11 | #include "sep-morph.h"
12 |
13 | #include
14 | #include
15 |
16 | #include
17 | #include
18 | #include
19 |
20 | using namespace std;
21 | using namespace cnn;
22 | using namespace cnn::expr;
23 |
24 | int main(int argc, char** argv) {
25 | cnn::Initialize(argc, argv);
26 |
27 | string vocab_filename = argv[1]; // vocabulary of words/characters
28 | string morph_filename = argv[2];
29 | string train_filename = argv[3];
30 | string test_filename = argv[4];
31 | unsigned hidden_size = atoi(argv[5]);
32 | unsigned num_iter = atoi(argv[6]);
33 | float reg_strength = atof(argv[7]);
34 | unsigned layers = atoi(argv[8]);
35 | string model_outputfilename = argv[9];
36 |
37 | unordered_map char_to_id, morph_to_id;
38 | unordered_map id_to_char, id_to_morph;
39 |
40 | ReadVocab(vocab_filename, &char_to_id, &id_to_char);
41 | unsigned vocab_size = char_to_id.size();
42 | ReadVocab(morph_filename, &morph_to_id, &id_to_morph);
43 | unsigned morph_size = morph_to_id.size();
44 |
45 | vector train_data; // Read the training file in a vector
46 | ReadData(train_filename, &train_data);
47 |
48 | vector test_data; // Read the dev file in a vector
49 | ReadData(test_filename, &test_data);
50 |
51 | vector m;
52 | vector optimizer;
53 |
54 | for (unsigned i = 0; i < morph_size; ++i) {
55 | m.push_back(new Model());
56 | AdadeltaTrainer ada(m[i], reg_strength);
57 | optimizer.push_back(ada);
58 | }
59 |
60 | unsigned char_size = vocab_size;
61 | SepMorph nn(char_size, hidden_size, vocab_size, layers, morph_size,
62 | &m, &optimizer);
63 |
64 | // Read the training file and train the model
65 | double best_score = -1;
66 | vector object_list;
67 | object_list.push_back(&nn);
68 | for (unsigned iter = 0; iter < num_iter; ++iter) {
69 | unsigned line_id = 0;
70 | random_shuffle(train_data.begin(), train_data.end());
71 | vector loss(morph_size, 0.0f);
72 | for (string& line : train_data) {
73 | vector items = split_line(line, '|');
74 | vector input_ids, target_ids;
75 | input_ids.clear(); target_ids.clear();
76 | for (const string& ch : split_line(items[0], ' ')) {
77 | input_ids.push_back(char_to_id[ch]);
78 | }
79 | for (const string& ch : split_line(items[1], ' ')) {
80 | target_ids.push_back(char_to_id[ch]);
81 | }
82 | unsigned morph_id = morph_to_id[items[2]];
83 | loss[morph_id] += nn.Train(morph_id, input_ids, target_ids,
84 | &optimizer[morph_id]);
85 | cerr << ++line_id << "\r";
86 | }
87 |
88 | // Read the test file and output predictions for the words.
89 | string line;
90 | double correct = 0, total = 0;
91 | for (string& line : test_data) {
92 | vector items = split_line(line, '|');
93 | vector input_ids, target_ids, pred_target_ids;
94 | input_ids.clear(); target_ids.clear(); pred_target_ids.clear();
95 | for (const string& ch : split_line(items[0], ' ')) {
96 | input_ids.push_back(char_to_id[ch]);
97 | }
98 | for (const string& ch : split_line(items[1], ' ')) {
99 | target_ids.push_back(char_to_id[ch]);
100 | }
101 | unsigned morph_id = morph_to_id[items[2]];
102 | EnsembleDecode(morph_id, char_to_id, input_ids, &pred_target_ids,
103 | &object_list);
104 |
105 | string prediction = "";
106 | for (unsigned i = 0; i < pred_target_ids.size(); ++i) {
107 | prediction += id_to_char[pred_target_ids[i]];
108 | if (i != pred_target_ids.size() - 1) {
109 | prediction += " ";
110 | }
111 | }
112 | if (prediction == items[1]) {
113 | correct += 1;
114 | }
115 | total += 1;
116 | }
117 | double curr_score = correct / total;
118 | cerr << "Iter " << iter + 1 << " ";
119 | cerr << "Prediction Accuracy: " << curr_score << endl;
120 | if (curr_score > best_score) {
121 | best_score = curr_score;
122 | Serialize(model_outputfilename, nn, &m);
123 | }
124 | }
125 | return 1;
126 | }
127 |
--------------------------------------------------------------------------------
/src/utils.cc:
--------------------------------------------------------------------------------
1 | #include "utils.h"
2 |
3 | vector split_line(const string& line, char delim) {
4 | vector words;
5 | stringstream ss(line);
6 | string item;
7 | while (std::getline(ss, item, delim)) {
8 | if (!item.empty())
9 | words.push_back(item);
10 | }
11 | return words;
12 | }
13 |
14 | void ReadVocab(string& filename, unordered_map* item_to_id,
15 | unordered_map* id_to_item) {
16 | ifstream vocab_file(filename);
17 | vector chars;
18 | if (vocab_file.is_open()) { // Reading the vocab file
19 | string line;
20 | getline(vocab_file, line);
21 | chars = split_line(line, ' ');
22 | } else {
23 | cerr << "File opening failed" << endl;
24 | }
25 | unsigned char_id = 0;
26 | for (const string& ch : chars) {
27 | (*item_to_id)[ch] = char_id;
28 | (*id_to_item)[char_id] = ch;
29 | char_id++;
30 | }
31 | vocab_file.close();
32 | }
33 |
34 | void ReadData(string& filename, vector* data) {
35 | // Read the training file in a vector
36 | ifstream train_file(filename);
37 | if (train_file.is_open()) {
38 | string line;
39 | while (getline(train_file, line)) {
40 | data->push_back(line);
41 | }
42 | }
43 | train_file.close();
44 | }
45 |
--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
1 | #ifndef UTILS_H_
2 | #define UTILS_H_
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | using namespace std;
12 |
13 | vector split_line(const string& line, char delim);
14 |
15 | void ReadVocab(string& filename, unordered_map* item_to_id,
16 | unordered_map* id_to_item);
17 |
18 | void ReadData(string& filename, vector* data);
19 |
20 | #endif
21 |
--------------------------------------------------------------------------------