├── .gitignore ├── 01-ngramlm ├── README.md ├── grid_search.sh ├── ngram_lm.py └── visualize_probs.py ├── 02-neurallm ├── README.md ├── replace_unk.py ├── replace_unk_given_vocab.py ├── rnnlm-batch.py ├── rnnlm-word.py ├── rnnlm.py └── util.py ├── 03-encdec ├── README.md ├── asturian-dev.txt ├── asturian-test.txt ├── asturian-train.txt ├── encdec.py └── util.py ├── 04-attention ├── Attention-Visualization │ ├── LICENSE │ ├── README.md │ ├── exec │ │ ├── plot_heatmap.jar │ │ └── plot_heatmap.py │ ├── multihead-att-java │ │ ├── ActionLabel.java │ │ ├── DataObject.java │ │ ├── HeatmapPanel.java │ │ ├── MainFrame.java │ │ ├── MainPanel.java │ │ └── Utils.java │ └── toydata │ │ ├── figures │ │ ├── java-heatmap1.png │ │ ├── java-heatmap2.png │ │ ├── java-heatmap3.png │ │ ├── java-heatmap4.png │ │ ├── py-heatmap.png │ │ ├── py-heatmap1.png │ │ └── py-heatmap2.png │ │ └── toy.attention ├── README.md ├── attention-dynet.py └── attention.py ├── 05-selfattention ├── sample.vec └── transformer_modules.py ├── 20-ibmmodels ├── .gitignore ├── README.md ├── model1.py └── visualize.pl ├── 21-wfst ├── .gitignore ├── README.md ├── bigram.py ├── bigram.ssym ├── corpuse.txt ├── corpusf.txt ├── example.isym ├── example.txt ├── input.txt ├── onetoone.py ├── process.sh ├── symbols.py └── t1.txt └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | en-de 2 | iwslt-en-de-preprocessed.tar.gz 3 | .idea 4 | *.swp 5 | *.DS_Store 6 | result* 7 | -------------------------------------------------------------------------------- /01-ngramlm/README.md: -------------------------------------------------------------------------------- 1 | # Interpolated bi-gram model code example 2 | by Graham Neubig 3 | 4 | This is an example of an interpolated bi-gram language model. 5 | 6 | ## Basic Usage 7 | 8 | Usage: 9 | 10 | python ngram_lm.py train_data.txt test_data.txt 11 | 12 | For example, on the example data in the top directory: 13 | 14 | python ngram_lm.py ../en-de/train.en-de.low.en ../en-de/valid.en-de.low.en 15 | 16 | You can also set some hyper-parameters 17 | 18 | python ngram_lm.py --uni_prob 0.3 --unk_prob 0.005 ../en-de/train.en-de.low.en ../en-de/valid.en-de.low.en 19 | 20 | ## Advanced Examples 21 | 22 | You can perform grid search to find the best interpolation coefficients, an example is shown in `grid_search.sh` 23 | 24 | bash grid_search.sh 25 | 26 | You can also print out the probabilities of each word: 27 | 28 | python ngram_lm.py --print_probs ../en-de/train.en-de.low.en ../en-de/valid.en-de.low.en > result-probs.txt 29 | 30 | You can also visualize the probabilities: 31 | 32 | python visualize_probs.py < result-probs.txt > result-probs.html 33 | -------------------------------------------------------------------------------- /01-ngramlm/grid_search.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for uni_alpha in $(seq 0.01 0.01 0.5); do 4 | echo "--- uni_alpha=$uni_alpha" 5 | python ngram_lm.py --uni_alpha $uni_alpha ../en-de/train.en-de.low.en ../en-de/valid.en-de.low.en 6 | done 7 | -------------------------------------------------------------------------------- /01-ngramlm/ngram_lm.py: -------------------------------------------------------------------------------- 1 | # --- Interpolated bi-gram model code example 2 | # by Graham Neubig 3 | 4 | import sys 5 | import math 6 | import argparse 7 | from collections import defaultdict 8 | 9 | p = argparse.ArgumentParser(description="""Code to train an extremely simple linearlly interpolated bigram language model""") 10 | p.add_argument("train_file", help="The file to train on") 11 | p.add_argument("test_file", help="The file to test on") 12 | p.add_argument("--print_probs", action="store_true", help="Whether to print probabilities for each word") 13 | p.add_argument("--skip_unk", action="store_true", help="Skip unknown words in calculating probabilities") 14 | p.add_argument("--vocab_size", type=int, help="Total size of the vocabulary", default=1e7) 15 | p.add_argument("--unk_alpha", type=float, help="The amount of probability to assign to unknown words", default=0.01) 16 | p.add_argument("--uni_alpha", type=float, help="The amount of probability to assign to unigrams", default=0.20) 17 | args = p.parse_args() 18 | 19 | N = 2 20 | VOCAB_SIZE = args.vocab_size 21 | UNK_ALPHA = args.unk_alpha 22 | UNI_ALPHA = args.uni_alpha 23 | 24 | # If we're skipping unknown words, set the interpolation probability of unknowns to zero 25 | if args.skip_unk: 26 | BI_ALPHA = 1.0 - UNI_ALPHA 27 | UNK_ALPHA = 0.0 28 | else: 29 | assert(UNK_ALPHA >= 0 and UNK_ALPHA <= 1) 30 | assert(UNI_ALPHA >= 0 and UNI_ALPHA <= 1) 31 | assert(UNK_ALPHA + UNI_ALPHA <= 1) 32 | BI_ALPHA = 1.0 - UNK_ALPHA - UNI_ALPHA 33 | 34 | # Read in the training data 35 | train_counts = defaultdict(lambda: 0) 36 | train_ctxts = defaultdict(lambda: 0) 37 | with open(args.train_file, "r") as f: 38 | for line in f: 39 | sent = line.strip().split(" ") + [""] 40 | ngram = [""] * N 41 | for word in sent: 42 | ctxt = ngram[1:] 43 | ngram = ctxt + [word] 44 | for i in range(N): 45 | train_ctxts[tuple(ctxt[i:])] += 1 46 | train_counts[tuple(ngram[i:])] += 1 47 | 48 | for k, v in train_counts.items(): 49 | if k[0] == 'pittsburgh': 50 | print(k, v) 51 | sys.exit(0) 52 | 53 | # Calculate on test 54 | alpha = [UNK_ALPHA, UNI_ALPHA, BI_ALPHA] 55 | lls = 0 56 | words = 0 57 | with open(args.test_file, "r") as f: 58 | for line in f: 59 | sent = line.strip().split(" ") + [""] 60 | ngram = [""] * N 61 | for word in sent: 62 | ctxt = ngram[1:] 63 | ngram = ctxt + [word] 64 | all_probs = [1.0 / VOCAB_SIZE] 65 | for i in range(N)[::-1]: 66 | if tuple(ngram[i:]) in train_counts: 67 | all_probs.append(train_counts[tuple(ngram[i:])] / train_ctxts[tuple(ctxt[i:])]) 68 | else: 69 | all_probs.append(0.0) 70 | total = 0.0 71 | for prob, alph in zip(all_probs, alpha): 72 | total += prob * alph 73 | if args.print_probs: 74 | print(' '.join([str(x) for x in [word]+all_probs+[total]])) 75 | if not (args.skip_unk and total == 0.0): 76 | lls += math.log(total) 77 | if args.print_probs: 78 | print() 79 | words += len(sent)-1 80 | 81 | # Print out the results 82 | my_score = math.exp(-lls/words) 83 | print ("perplexity at alpha=%r: %f" % (alpha, my_score), file=sys.stderr) 84 | 85 | -------------------------------------------------------------------------------- /01-ngramlm/visualize_probs.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | print('Probability Visualization') 5 | 6 | def num_to_cell(prob): 7 | num = max(-20, np.log(prob) if prob else -1e10) / -20.0 8 | bgcolor = '{:02x}{:02x}{:02x}'.format(int(255-num*255), int(255-num*255), int(255-num*126)) 9 | fcolor = 'FFFFFF' if num > 0.5 else '000000' 10 | return f'{prob:.2e}' 11 | 12 | 13 | curr_lines = [] 14 | for line in sys.stdin: 15 | line = line.strip() 16 | if line: 17 | curr_lines.append(line.split(' ')) 18 | else: 19 | assert(len(curr_lines) > 0) 20 | depth = len(curr_lines[0]) 21 | print('') 22 | for i in range(1, depth): 23 | print('' + ''.join([num_to_cell(float(x[i])) for x in curr_lines]) + '') 24 | print('
'+''.join([x[0] for x in curr_lines])+'


') 25 | curr_lines = [] 26 | 27 | print('') 28 | -------------------------------------------------------------------------------- /02-neurallm/README.md: -------------------------------------------------------------------------------- 1 | # Neural language model code example 2 | by Antonis Anastasopoulos 3 | 4 | This is an example of a simple LSTM neural language model, based on the DyNet examples. 5 | 6 | ## Preparing the Data 7 | 8 | First we find the word with frequency larger than 1 9 | 10 | py replace_unk.py ../en-de/train.en-de.low.en ../en-de/train.en-de.low.unk.en 1 ../en-de/vocab.en 11 | 12 | And make sure that UNKs are also replaced with `` in the dev and test 13 | 14 | py replace_unk_given_vocab.py ../en-de/valid.en-de.low.en ../en-de/valid.en-de.low.unk.en ../en-de/vocab.en 15 | py replace_unk_given_vocab.py ../en-de/test.en-de.low.en ../en-de/test.en-de.low.unk.en ../en-de/vocab.en 16 | 17 | ## Basic Usage 18 | 19 | py rnnlm.py train dev test --perform_train 20 | 21 | Use `rnnlm.py` for a character-level LM on the example data in the top directory: 22 | 23 | py rnnlm.py ../en-de/train.en-de.low.unk.en ../en-de/valid.en-de.low.unk.en ../en-de/test.en-de.low.unk.en --perform_train 24 | 25 | ## Advanced Examples 26 | 27 | You can also print out the probabilities of each word: 28 | 29 | python rnnlm-batch.py ../en-de/train.en-de.low.en ../en-de/valid.en-de.low.en ../en-de/test.en-de.low.unk.en --print_probs > result-probs.txt 30 | -------------------------------------------------------------------------------- /02-neurallm/replace_unk.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import defaultdict 3 | 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument('input', help='Path to the input file.') 6 | parser.add_argument('output', help='Path to the output file.') 7 | parser.add_argument('cutoff', help='Cutoff', type=int) 8 | parser.add_argument('vocab', help='Path to write the vocabulary file') 9 | args, unknown = parser.parse_known_args() 10 | 11 | # Read in the input data 12 | with open(args.input, "r") as f: 13 | lines = f.readlines() 14 | 15 | word_counts = defaultdict(lambda: 0) 16 | for line in lines: 17 | sent = line.strip().split(" ") + [""] 18 | for word in sent: 19 | word_counts[word] += 1 20 | 21 | # Write the output 22 | with open(args.output, "w") as f: 23 | for line in lines: 24 | sent = line.strip().split(" ") 25 | out = ' '.join([w if word_counts[w]>args.cutoff else "" for w in sent]) 26 | f.write(f"{out}\n") 27 | 28 | # Write the output vocab 29 | with open(args.vocab, "w") as f: 30 | for word in word_counts: 31 | if word_counts[word] > args.cutoff: 32 | f.write(f"{word}\t{word_counts[word]}\n") 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /02-neurallm/replace_unk_given_vocab.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import defaultdict 3 | 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument('input', help='Path to the input file.') 6 | parser.add_argument('output', help='Path to the output file.') 7 | parser.add_argument('vocab', help='Path to write the vocabulary file') 8 | args, unknown = parser.parse_known_args() 9 | 10 | # Read in the vocabulary 11 | vocab = defaultdict(lambda:0) 12 | with open(args.vocab, "r") as f: 13 | for line in f: 14 | l = line.split('\t') 15 | vocab[l[0]] = int(l[1]) 16 | 17 | 18 | # Read in the input data 19 | with open(args.input, "r") as f: 20 | lines = f.readlines() 21 | 22 | 23 | # Write the output 24 | with open(args.output, "w") as f: 25 | for line in lines: 26 | sent = line.strip().split(" ") 27 | out = ' '.join([w if w in vocab else "" for w in sent]) 28 | f.write(out + '\n') 29 | 30 | -------------------------------------------------------------------------------- /02-neurallm/rnnlm-batch.py: -------------------------------------------------------------------------------- 1 | import dynet as dy 2 | import time 3 | import random 4 | 5 | LAYERS = 2 6 | INPUT_DIM = 128 #50 #256 7 | HIDDEN_DIM = 256 # 50 #1024 8 | VOCAB_SIZE = 0 9 | MB_SIZE = 50 # mini batch size 10 | 11 | import argparse 12 | from collections import defaultdict 13 | from itertools import count 14 | import sys 15 | import util 16 | import math 17 | 18 | class RNNLanguageModel: 19 | def __init__(self, model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=dy.SimpleRNNBuilder): 20 | # Char-level LSTM (layers=2, input=256, hidden=128, model) 21 | self.builder = builder(LAYERS, INPUT_DIM, HIDDEN_DIM, model) 22 | # Lookup parameters for word embeddings 23 | self.lookup = model.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM)) 24 | # Softmax weights/biases on top of LSTM outputs 25 | self.R = model.add_parameters((VOCAB_SIZE, HIDDEN_DIM)) 26 | self.bias = model.add_parameters((VOCAB_SIZE)) 27 | 28 | def save_to_disk(self, filename): 29 | dy.save(filename, [self.builder, self.lookup, self.R, self.bias]) 30 | 31 | def load_from_disk(self, filename): 32 | (self.builder, self.lookup, self.R, self.bias) = dy.load(filename, model) 33 | 34 | # Build the language model graph 35 | def BuildLMGraph(self, sents): 36 | dy.renew_cg() 37 | # initialize the RNN 38 | init_state = self.builder.initial_state() 39 | # parameters -> expressions 40 | R = dy.parameter(self.R) 41 | bias = dy.parameter(self.bias) 42 | 43 | S = vocab.w2i[""] 44 | # get the cids and masks for each step 45 | tot_words = 0 46 | cids = [] 47 | masks = [] 48 | 49 | for i in range(len(sents[0])): 50 | cids.append([(vocab.w2i[sent[i]] if len(sent) > i else S) for sent in sents]) 51 | mask = [(1 if len(sent)>i else 0) for sent in sents] 52 | masks.append(mask) 53 | tot_words += sum(mask) 54 | 55 | # start the rnn with "" 56 | init_ids = cids[0] 57 | s = init_state.add_input(dy.lookup_batch(self.lookup, init_ids)) 58 | 59 | losses = [] 60 | 61 | # feed char vectors into the RNN and predict the next char 62 | for cid, mask in zip(cids[1:], masks[1:]): 63 | score = dy.affine_transform([bias, R, s.output()]) 64 | loss = dy.pickneglogsoftmax_batch(score, cid) 65 | # mask the loss if at least one sentence is shorter 66 | if mask[-1] != 1: 67 | mask_expr = dy.inputVector(mask) 68 | mask_expr = dy.reshape(mask_expr, (1,), len(sents)) 69 | loss = loss * mask_expr 70 | 71 | losses.append(loss) 72 | # update the state of the RNN 73 | cemb = dy.lookup_batch(self.lookup, cid) 74 | s = s.add_input(cemb) 75 | 76 | return dy.sum_batches(dy.esum(losses)), tot_words 77 | 78 | 79 | def sample(self, first=1, nchars=0, stop=-1): 80 | res = [first] 81 | dy.renew_cg() 82 | state = self.builder.initial_state() 83 | 84 | R = dy.parameter(self.R) 85 | bias = dy.parameter(self.bias) 86 | cw = first 87 | while True: 88 | x_t = dy.lookup(self.lookup, cw) 89 | state = state.add_input(x_t) 90 | y_t = state.output() 91 | r_t = bias + (R * y_t) 92 | ydist = dy.softmax(r_t) 93 | dist = ydist.vec_value() 94 | rnd = random.random() 95 | for i,p in enumerate(dist): 96 | rnd -= p 97 | if rnd <= 0: break 98 | res.append(i) 99 | cw = i 100 | if cw == stop: break 101 | if nchars and len(res) > nchars: break 102 | return res 103 | 104 | def print_probs(self, sent): 105 | dy.renew_cg() 106 | # initialize the RNN 107 | init_state = self.builder.initial_state() 108 | # parameters -> expressions 109 | R = dy.parameter(self.R) 110 | bias = dy.parameter(self.bias) 111 | 112 | # get the cids for each step 113 | tot_chars = 0 114 | cids = [] 115 | for w in sent: 116 | cids.append(vocab.w2i[w]) 117 | # start the rnn with "" 118 | init_ids = cids[0] 119 | s = init_state.add_input(dy.lookup(self.lookup, init_ids)) 120 | 121 | for cid in cids[1:]: 122 | score = dy.affine_transform([bias, R, s.output()]) 123 | loss = dy.pickneglogsoftmax(score, cid) 124 | print(f"{vocab.i2w[cid]} {math.exp(-loss.value())}") 125 | cemb = dy.lookup(self.lookup, cid) 126 | s = s.add_input(cemb) 127 | 128 | 129 | def get_ppl(self, sents): 130 | loss = 0.0 131 | chars = 0.0 132 | for sent in sents: 133 | errs, _ = lm.BuildLMGraph([sent]) 134 | loss += errs.scalar_value() 135 | chars += len(sent)-1 136 | return math.exp(loss/chars) 137 | 138 | 139 | if __name__ == '__main__': 140 | parser = argparse.ArgumentParser() 141 | parser.add_argument('train', help='Path to the corpus file.') 142 | parser.add_argument('dev', help='Path to the validation corpus file.') 143 | parser.add_argument('test', help='Path to the test corpus file.') 144 | parser.add_argument('--print_probs', action="store_true", help='whether to print the probabilities per word over the validation set') 145 | parser.add_argument('--perform_train', action="store_true", help='whether to perform training') 146 | args, unknown = parser.parse_known_args() 147 | 148 | train = util.CorpusReader(args.train, begin="", end="") 149 | dev = util.CorpusReader(args.dev, begin="", end="") 150 | test = util.CorpusReader(args.test, begin="", end="") 151 | 152 | vocab = util.Vocab.from_corpus(train) 153 | 154 | VOCAB_SIZE = vocab.size() 155 | print(f"VOCAB SIZE: {VOCAB_SIZE}") 156 | 157 | model = dy.Model() 158 | trainer = dy.SimpleSGDTrainer(model) 159 | 160 | #lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder) 161 | lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=dy.LSTMBuilder) 162 | 163 | if args.perform_train: 164 | train = list(train) 165 | # Sort training sentences in descending order and count minibatches 166 | train.sort(key=lambda x: -len(x)) 167 | train_order = [x*MB_SIZE for x in range(int((len(train)-1)/MB_SIZE + 1))] 168 | print(f"Created {len(train_order)} minibatches.") 169 | 170 | 171 | prev_dev_ppl = 100000 172 | # Perform training 173 | i = 0 174 | chars = loss = 0.0 175 | for ITER in range(100): 176 | random.shuffle(train_order) 177 | #_start = time.time() 178 | for sid in train_order: 179 | # train on the minibatch 180 | errs, mb_words = lm.BuildLMGraph(train[sid: sid + MB_SIZE]) 181 | loss += errs.scalar_value() 182 | chars += mb_words 183 | errs.backward() 184 | trainer.update() 185 | 186 | print("ITER",ITER,loss) 187 | trainer.status() 188 | # Eval on the development set 189 | devppl = lm.get_ppl(dev) 190 | print(f"DEV ppl: {devppl}") 191 | if devppl < prev_dev_ppl: 192 | lm.save_to_disk("models/LSTMLanguageModel-word-batch.model") 193 | prev_dev_ppl = devppl 194 | loss = 0.0 195 | chars = 0.0 196 | 197 | print("loading the saved model...") 198 | lm.load_from_disk("models/LSTMLanguageModel-word-batch.model") 199 | samp = lm.sample(first=vocab.w2i[""],stop=vocab.w2i[""]) 200 | print(" ".join([vocab.i2w[c] for c in samp]).strip()) 201 | 202 | test_ppl = lm.get_ppl(test) 203 | print(f"Test perplexity: {test_ppl}") 204 | 205 | if args.print_probs: 206 | for sent in list(dev): 207 | lm.print_probs(sent) 208 | print() 209 | 210 | -------------------------------------------------------------------------------- /02-neurallm/rnnlm-word.py: -------------------------------------------------------------------------------- 1 | import dynet as dy 2 | import time 3 | import random 4 | 5 | LAYERS = 2 6 | INPUT_DIM = 256 #50 #256 7 | HIDDEN_DIM = 256 # 50 #1024 8 | VOCAB_SIZE = 0 9 | 10 | from collections import defaultdict 11 | from itertools import count 12 | import argparse 13 | import sys 14 | import util 15 | import math 16 | 17 | class RNNLanguageModel: 18 | def __init__(self, model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=dy.SimpleRNNBuilder): 19 | self.builder = builder(LAYERS, INPUT_DIM, HIDDEN_DIM, model) 20 | 21 | self.lookup = model.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM)) 22 | self.R = model.add_parameters((VOCAB_SIZE, HIDDEN_DIM)) 23 | self.bias = model.add_parameters((VOCAB_SIZE)) 24 | 25 | def save_to_disk(self, filename): 26 | dy.save(filename, [self.builder, self.lookup, self.R, self.bias]) 27 | 28 | def load_from_disk(self, filename): 29 | (self.builder, self.lookup, self.R, self.bias) = dy.load(filename, model) 30 | 31 | def build_lm_graph(self, sent): 32 | dy.renew_cg() 33 | init_state = self.builder.initial_state() 34 | 35 | R = dy.parameter(self.R) 36 | bias = dy.parameter(self.bias) 37 | errs = [] # will hold expressions 38 | es=[] 39 | state = init_state 40 | for (cw,nw) in zip(sent,sent[1:]): 41 | # assume word is already a word-id 42 | x_t = dy.lookup(self.lookup, int(cw)) 43 | state = state.add_input(x_t) 44 | y_t = state.output() 45 | r_t = bias + (R * y_t) 46 | err = dy.pickneglogsoftmax(r_t, int(nw)) 47 | errs.append(err) 48 | nerr = dy.esum(errs) 49 | return nerr 50 | 51 | def predict_next_word(self, sentence): 52 | dy.renew_cg() 53 | init_state = self.builder.initial_state() 54 | R = dy.parameter(self.R) 55 | bias = dy.parameter(self.bias) 56 | state = init_state 57 | for cw in sentence: 58 | # assume word is already a word-id 59 | x_t = dy.lookup(self.lookup, int(cw)) 60 | state = state.add_input(x_t) 61 | y_t = state.output() 62 | r_t = bias + (R * y_t) 63 | prob = dy.softmax(r_t) 64 | return prob 65 | 66 | def sample(self, first=1, nchars=0, stop=-1): 67 | res = [first] 68 | dy.renew_cg() 69 | state = self.builder.initial_state() 70 | 71 | R = dy.parameter(self.R) 72 | bias = dy.parameter(self.bias) 73 | cw = first 74 | while True: 75 | x_t = dy.lookup(self.lookup, cw) 76 | state = state.add_input(x_t) 77 | y_t = state.output() 78 | r_t = bias + (R * y_t) 79 | ydist = dy.softmax(r_t) 80 | dist = ydist.vec_value() 81 | rnd = random.random() 82 | for i,p in enumerate(dist): 83 | rnd -= p 84 | if rnd <= 0: break 85 | res.append(i) 86 | cw = i 87 | if cw == stop: break 88 | if nchars and len(res) > nchars: break 89 | return res 90 | 91 | def get_ppl(self, sents): 92 | loss = 0.0 93 | chars = 0.0 94 | for sent in sents: 95 | isent = [vocab.w2i[w] for w in sent] 96 | errs = lm.build_lm_graph(isent) 97 | loss += errs.scalar_value() 98 | chars += len(sent)-1 99 | return math.exp(loss/chars) 100 | 101 | 102 | def print_probs(self, sent): 103 | dy.renew_cg() 104 | # initialize the RNN 105 | init_state = self.builder.initial_state() 106 | # parameters -> expressions 107 | R = dy.parameter(self.R) 108 | bias = dy.parameter(self.bias) 109 | 110 | # get the cids and masks for each step 111 | tot_chars = 0 112 | cids = [] 113 | for w in sent: 114 | cids.append(vocab.w2i[w]) 115 | # start the rnn with "" 116 | init_ids = cids[0] 117 | s = init_state.add_input(dy.lookup(self.lookup, init_ids)) 118 | 119 | # feed char vectors into the RNN and predict the next char 120 | for cid in cids[1:]: 121 | score = dy.affine_transform([bias, R, s.output()]) 122 | loss = dy.pickneglogsoftmax(score, cid) 123 | print(f"{vocab.i2w[cid]} {loss.value()}") 124 | # update the state of the RNN 125 | cemb = dy.lookup(self.lookup, cid) 126 | s = s.add_input(cemb) 127 | 128 | 129 | 130 | 131 | 132 | if __name__ == '__main__': 133 | parser = argparse.ArgumentParser() 134 | parser.add_argument('train', help='Path to the corpus file.') 135 | parser.add_argument('dev', help='Path to the validation corpus file.') 136 | parser.add_argument('test', help='Path to the test corpus file.') 137 | parser.add_argument('--print_probs',action="store_true", help='whether to print the probabilities per word over the validation set') 138 | parser.add_argument('--perform_train', action="store_true", help='whether to perform training') 139 | args, unknown = parser.parse_known_args() 140 | 141 | train = util.CorpusReader(args.train, begin="", end="") 142 | dev = util.CorpusReader(args.dev, begin="", end="") 143 | test = util.CorpusReader(args.test, begin="", end="") 144 | 145 | vocab = util.Vocab.from_corpus(train) 146 | 147 | VOCAB_SIZE = vocab.size() 148 | print(f"VOCAB SIZE: {VOCAB_SIZE}") 149 | 150 | model = dy.Model() 151 | trainer = dy.SimpleSGDTrainer(model, learning_rate=1.0) 152 | 153 | lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=dy.LSTMBuilder) 154 | 155 | if args.perform_train: 156 | train = list(train) 157 | 158 | prev_dev_ppl = 100000 159 | 160 | chars = loss = 0.0 161 | for ITER in range(100): 162 | random.shuffle(train) 163 | for i,sent in enumerate(train): 164 | _start = time.time() 165 | if i % 5000 == 0: 166 | trainer.status() 167 | if chars > 0: print(loss / chars,) 168 | for _ in range(1): 169 | samp = lm.sample(first=vocab.w2i[""],stop=vocab.w2i[""]) 170 | print(" ".join([vocab.i2w[c] for c in samp]).strip()) 171 | 172 | devppl = lm.get_ppl(dev) 173 | print(f"DEV ppl: {devppl}") 174 | if devppl < prev_dev_ppl: 175 | lm.save_to_disk("models/LSTMLanguageModel-word.model") 176 | prev_dev_ppl = devppl 177 | loss = 0.0 178 | chars = 0.0 179 | 180 | chars += len(sent)-1 181 | isent = [vocab.w2i[w] for w in sent] 182 | errs = lm.build_lm_graph(isent) 183 | loss += errs.scalar_value() 184 | errs.backward() 185 | trainer.update() 186 | #print "TM:",(time.time() - _start)/len(sent) 187 | print("ITER {}, loss={}".format(ITER, loss)) 188 | trainer.status() 189 | 190 | 191 | print("loading the saved model...") 192 | lm.load_from_disk("models/LSTMLanguageModel-word.model") 193 | samp = lm.sample(first=vocab.w2i[""],stop=vocab.w2i[""]) 194 | print(" ".join([vocab.i2w[c] for c in samp]).strip()) 195 | 196 | test_ppl = lm.get_ppl(test) 197 | print(f"Test perplexity: {test_ppl}") 198 | 199 | if args.print_probs: 200 | for sent in list(dev): 201 | lm.print_probs(sent) 202 | print() 203 | 204 | 205 | -------------------------------------------------------------------------------- /02-neurallm/rnnlm.py: -------------------------------------------------------------------------------- 1 | import dynet as dy 2 | import time 3 | import random 4 | 5 | LAYERS = 2 6 | INPUT_DIM = 256 #50 #256 7 | HIDDEN_DIM = 256 # 50 #1024 8 | VOCAB_SIZE = 0 9 | 10 | from collections import defaultdict 11 | from itertools import count 12 | import argparse 13 | import sys 14 | import util 15 | import math 16 | 17 | class RNNLanguageModel: 18 | def __init__(self, model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=dy.SimpleRNNBuilder): 19 | self.builder = builder(LAYERS, INPUT_DIM, HIDDEN_DIM, model) 20 | 21 | self.lookup = model.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM)) 22 | self.R = model.add_parameters((VOCAB_SIZE, HIDDEN_DIM)) 23 | self.bias = model.add_parameters((VOCAB_SIZE)) 24 | 25 | def save_to_disk(self, filename): 26 | dy.save(filename, [self.builder, self.lookup, self.R, self.bias]) 27 | 28 | def load_from_disk(self, filename): 29 | (self.builder, self.lookup, self.R, self.bias) = dy.load(filename, model) 30 | 31 | def build_lm_graph(self, sent): 32 | dy.renew_cg() 33 | init_state = self.builder.initial_state() 34 | 35 | R = dy.parameter(self.R) 36 | bias = dy.parameter(self.bias) 37 | errs = [] # will hold expressions 38 | es=[] 39 | state = init_state 40 | for (cw,nw) in zip(sent,sent[1:]): 41 | # assume word is already a word-id 42 | x_t = dy.lookup(self.lookup, int(cw)) 43 | state = state.add_input(x_t) 44 | y_t = state.output() 45 | r_t = bias + (R * y_t) 46 | err = dy.pickneglogsoftmax(r_t, int(nw)) 47 | errs.append(err) 48 | nerr = dy.esum(errs) 49 | return nerr 50 | 51 | def predict_next_word(self, sentence): 52 | dy.renew_cg() 53 | init_state = self.builder.initial_state() 54 | R = dy.parameter(self.R) 55 | bias = dy.parameter(self.bias) 56 | state = init_state 57 | for cw in sentence: 58 | # assume word is already a word-id 59 | x_t = dy.lookup(self.lookup, int(cw)) 60 | state = state.add_input(x_t) 61 | y_t = state.output() 62 | r_t = bias + (R * y_t) 63 | prob = dy.softmax(r_t) 64 | return prob 65 | 66 | def sample(self, first=1, nchars=0, stop=-1): 67 | res = [first] 68 | dy.renew_cg() 69 | state = self.builder.initial_state() 70 | 71 | R = dy.parameter(self.R) 72 | bias = dy.parameter(self.bias) 73 | cw = first 74 | while True: 75 | x_t = dy.lookup(self.lookup, cw) 76 | state = state.add_input(x_t) 77 | y_t = state.output() 78 | r_t = bias + (R * y_t) 79 | ydist = dy.softmax(r_t) 80 | dist = ydist.vec_value() 81 | rnd = random.random() 82 | for i,p in enumerate(dist): 83 | rnd -= p 84 | if rnd <= 0: break 85 | res.append(i) 86 | cw = i 87 | if cw == stop: break 88 | if nchars and len(res) > nchars: break 89 | return res 90 | 91 | def get_ppl(self, sents): 92 | loss = 0.0 93 | chars = 0.0 94 | for sent in sents: 95 | isent = [vocab.w2i[w] for w in sent] 96 | errs = lm.build_lm_graph(isent) 97 | loss += errs.scalar_value() 98 | chars += len(sent)-1 99 | return math.exp(loss/chars) 100 | 101 | 102 | 103 | 104 | if __name__ == '__main__': 105 | parser = argparse.ArgumentParser() 106 | parser.add_argument('train', help='Path to the corpus file.') 107 | parser.add_argument('dev', help='Path to the validation corpus file.') 108 | parser.add_argument('test', help='Path to the test corpus file.') 109 | parser.add_argument('--print_probs', action="store_true", help='whether to print the probabilities per word over the validation set') 110 | parser.add_argument('--perform_train', action="store_true", help='whether to perform training') 111 | args, unknown = parser.parse_known_args() 112 | 113 | train = util.CharsCorpusReader(args.train, begin="") 114 | dev = util.CharsCorpusReader(args.dev, begin="") 115 | test = util.CharsCorpusReader(args.test, begin="") 116 | 117 | vocab = util.Vocab.from_corpus(train) 118 | 119 | VOCAB_SIZE = vocab.size() 120 | 121 | model = dy.Model() 122 | trainer = dy.SimpleSGDTrainer(model, learning_rate=1.0) 123 | 124 | lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=dy.LSTMBuilder) 125 | 126 | train = list(train) 127 | 128 | prev_dev_ppl = 100000 129 | 130 | 131 | chars = loss = 0.0 132 | for ITER in range(100): 133 | random.shuffle(train) 134 | for i,sent in enumerate(train): 135 | _start = time.time() 136 | if i % 200 == 0: 137 | trainer.status() 138 | if chars > 0: print(loss / chars,) 139 | for _ in range(1): 140 | samp = lm.sample(first=vocab.w2i[""],stop=vocab.w2i["\n"]) 141 | print("".join([vocab.i2w[c] for c in samp]).strip()) 142 | 143 | devppl = lm.get_ppl(dev) 144 | print(f"DEV ppl: {devppl}") 145 | if devppl < prev_dev_ppl: 146 | lm.save_to_disk("LSTMLanguageModel.model") 147 | prev_dev_ppl = devppl 148 | loss = 0.0 149 | chars = 0.0 150 | 151 | chars += len(sent)-1 152 | isent = [vocab.w2i[w] for w in sent] 153 | errs = lm.build_lm_graph(isent) 154 | loss += errs.scalar_value() 155 | errs.backward() 156 | trainer.update() 157 | #print "TM:",(time.time() - _start)/len(sent) 158 | print("ITER {}, loss={}".format(ITER, loss)) 159 | trainer.status() 160 | 161 | 162 | #lm.save_to_disk("LSTMLanguageModel.model") 163 | 164 | print("loading the saved model...") 165 | lm.load_from_disk("LSTMLanguageModel.model") 166 | samp = lm.sample(first=vocab.w2i[""],stop=vocab.w2i["\n"]) 167 | print("".join([vocab.i2w[c] for c in samp]).strip()) 168 | 169 | test_ppl = lm.get_ppl(test) 170 | print(f"Test perplexity: {test_ppl}") 171 | 172 | 173 | -------------------------------------------------------------------------------- /02-neurallm/util.py: -------------------------------------------------------------------------------- 1 | import mmap 2 | 3 | class Vocab: 4 | def __init__(self, w2i): 5 | self.w2i = dict(w2i) 6 | self.i2w = {i:w for w,i in w2i.items()} 7 | 8 | @classmethod 9 | def from_corpus(cls, corpus): 10 | w2i = {} 11 | for sent in corpus: 12 | for word in sent: 13 | w2i.setdefault(word, len(w2i)) 14 | 15 | return Vocab(w2i) 16 | 17 | def size(self): 18 | return len(self.w2i.keys()) 19 | 20 | #This corpus reader can be used when reading large text file into a memory can solve IO bottleneck of training. 21 | #Use it exactly as the regular CorpusReader from the rnnlm.py 22 | class FastCorpusReader: 23 | def __init__(self, fname, begin=None, end=None): 24 | self.fname = fname 25 | self.f = open(fname, 'rb') 26 | self.begin = begin 27 | self.end = end 28 | def __iter__(self): 29 | #This usage of mmap is for a Linux\OS-X 30 | #For Windows replace prot=mmap.PROT_READ with access=mmap.ACCESS_READ 31 | m = mmap.mmap(self.f.fileno(), 0, prot=mmap.PROT_READ) 32 | begin = self.begin 33 | end = self.end 34 | data = m.readline() 35 | if begin: 36 | while data: 37 | line = data 38 | data = m.readline() 39 | line = line.strip().split() 40 | yield [begin] + line + [end] 41 | else: 42 | while data: 43 | line = data 44 | data = m.readline() 45 | line = line.strip().split() 46 | yield line + [end] 47 | 48 | class CorpusReader: 49 | def __init__(self, fname, begin=None, end=None): 50 | self.fname = fname 51 | self.begin = begin 52 | self.end = end 53 | def __iter__(self): 54 | begin = self.begin 55 | end = self.end 56 | for line in open(self.fname, 'r'): 57 | line = line.strip().split() 58 | #line = [' ' if x == '' else x for x in line] 59 | if begin: 60 | yield [begin] + line + [end] 61 | else: 62 | yield line + [end] 63 | 64 | class CharsCorpusReader: 65 | def __init__(self, fname, begin=None): 66 | self.fname = fname 67 | self.begin = begin 68 | 69 | def __iter__(self): 70 | begin = self.begin 71 | with open(self.fname) as f: 72 | for line in f: 73 | line = list(line) 74 | if begin: 75 | line = [begin] + line 76 | yield line 77 | -------------------------------------------------------------------------------- /03-encdec/README.md: -------------------------------------------------------------------------------- 1 | # Neural encoder decoder model code example 2 | by Antonis Anastasopoulos 3 | 4 | This is an example of a simple encoder decoder model, based on the DyNet examples. 5 | 6 | ## Data 7 | 8 | We will use a different enc-dec example: morphological inflection. 9 | The task is given a sequence of input morphological tags and the lemma of a word, to produce the inflected form. 10 | The Asturian data (taken from the SIGMORPHON 2019 challenge) provide the input and the output in each line, separated by `|||`. 11 | 12 | 13 | ## Basic Usage 14 | 15 | py encdec.py [train] [dev] [test] 16 | 17 | -------------------------------------------------------------------------------- /03-encdec/asturian-dev.txt: -------------------------------------------------------------------------------- 1 | V.PTCP PST FEM SG e n t a m a r ||| e n t a m a d a 2 | V IND PST 2 PL IPFV LGSPEC2 p e s l l a r ||| p e s l l a b a i s 3 | V 3 SG IND PST PRF LGSPEC2 a f o r r a r ||| a f o r r a r a 4 | V 2 SG IND FUT f r o t a r ||| f r o t a r á s 5 | V NFIN d e r i v a r ||| d e r i v a r 6 | V 1 SG COND e s c u c h a r ||| e s c u c h a r í a 7 | V 1 SG IND PST PRF LGSPEC2 a b o l i r ||| a b o l i e r a 8 | V 1 PL IND PST PRF LGSPEC2 e m p e c i p i a r ||| e m p e c i p i á r a m o s 9 | V IND PST 2 PL IPFV LGSPEC1 a m o s a r ||| a m o s a b e i s 10 | V 3 PL SBJV PRS LGSPEC1 e l i m i n a r ||| e l i m i n e n 11 | V IND PRS 3 PL d i r ||| v a n 12 | V IND PRS 3 SG c r i s t i a n i z a r ||| c r i s t i a n i z a 13 | V 2 SG IND FUT d e x a r ||| d e x a r á s 14 | V 1 SG IND FUT c a l c a r ||| c a l c a r é 15 | V 1 SG IND PST PRF LGSPEC2 d e b u r a r ||| d e b u r a r a 16 | V 2 PL IND FUT a m e d r a n a r ||| a m e d r a n a r é i s 17 | V 3 SG SBJV PRS c o n t e s t a r ||| c o n t e s t e 18 | V 2 PL COND LGSPEC2 ḥ i s p i a r ||| ḥ i s p i a r í a i s 19 | V 3 SG SBJV PRS a t a r ||| a t e 20 | V 3 PL IND PST PRF LGSPEC2 a f i l a r ||| a f i l a r a n 21 | V 1 SG IND PST PRF LGSPEC1 e s b a r a f u n d i a r ||| e s b a r a f u n d i a r e 22 | V.PTCP PST NEUT SG c o l l e c h a r ||| c o l l e c h a o 23 | V.PTCP PST NEUT SG a p o y a r ||| a p o y a o 24 | V 3 SG IND PST PRF LGSPEC3 f a e r ||| f i x e r a 25 | V 3 PL IND FUT t o r t u r a r ||| t o r t u r a r á n 26 | V IND PRS 1 PL c i r c u l a r ||| c i r c u l a m o s 27 | V 2 PL COND LGSPEC2 v e r a n i a r ||| v e r a n i a r í a i s 28 | V IND PST 1 PL IPFV LGSPEC1 s e m e n t a r ||| s e m e n t á b e m o s 29 | V.CVB PRS c o m p r a r ||| c o m p r a n d o 30 | V 2 PL IND FUT a m o s a r ||| a m o s a r é i s 31 | V IND PRS 2 SG p r e g u n t a r ||| p r e g u n t e s 32 | V 2 PL COND LGSPEC1 a m e y o r a r ||| a m e y o r a r í e i s 33 | V 1 PL IND PST PRF LGSPEC1 c a z a r ||| c a z á r e m o s 34 | V 2 PL IND PST PRF LGSPEC2 x i r i n g a r ||| x i r i n g a r a i s 35 | V 1 PL IND FUT r e b a x a r ||| r e b a x a r e m o s 36 | V 2 PL IMP t e n e r ||| t e n é i 37 | V 3 PL SBJV PST IPFV LGSPEC1 a f u m a r ||| a f u m a r e n 38 | V 2 SG SBJV PRS LGSPEC1 d e s e a r ||| d e s e e s 39 | V IND PST 3 PL PFV p r e g u n t a r ||| p r e g u n t a r o n 40 | V 3 PL SBJV PRS LGSPEC1 f u m a r ||| f u m e n 41 | V.PTCP PST MASC PL a c o l l e c h a r ||| a c o l l e c h a o s 42 | V IND PRS 1 PL a t r a p a r ||| a t r a p a m o s 43 | V IND PST 2 PL IPFV LGSPEC1 t r u ñ a r ||| t r u ñ a b e i s 44 | V.PTCP PST FEM PL p l e g a r ||| p l e g a e s 45 | V 1 PL COND LGSPEC2 a l z a r ||| a l z a r í a m o s 46 | V IND PST 2 SG PFV LGSPEC2 d u d a r ||| d u d e s t i 47 | V 3 SG IND PST PRF LGSPEC1 a f i t a r ||| a f i t a r e 48 | V 2 PL IND PST PRF LGSPEC2 e s i x i r ||| e s i x i e r a i s 49 | V 3 PL SBJV PRS LGSPEC1 a f i t a r ||| a f i t e n 50 | V IND PRS 2 PL a f e u t a r ||| a f e u t á i s 51 | V IND PST 2 PL PFV LGSPEC1 e n a n t a r ||| e n a n t a s t i s 52 | V 2 SG SBJV PRS LGSPEC1 a f i t a r ||| a f i t e s 53 | V.PTCP PST MASC SG l l i m p i a r ||| l l i m p i á u 54 | V IND PST 1 SG IPFV c l a v a r ||| c l a v a b a 55 | V 2 SG SBJV PRS LGSPEC1 b l i n c a r ||| b l i n q u e s 56 | V 1 PL SBJV PST IPFV LGSPEC2 e m p e c i p i a r ||| e m p e c i p i á r a m o s 57 | V 1 SG IND PST PRF LGSPEC2 t a r ||| t u v i e r e 58 | V 2 SG IND PST PRF LGSPEC1 x i b l a r ||| x i b l a r e s 59 | V IND PST 2 PL IPFV LGSPEC1 l l i m p i a r ||| l l i m p i a b e i s 60 | V 3 PL IND PST PRF LGSPEC1 s e n t a r ||| s e n t a r e n 61 | V 1 SG COND i n a u g u r a r ||| i n a u g u r a r í a 62 | V 1 SG IND PST PRF LGSPEC1 c o n t e s t a r ||| c o n t e s t a r e 63 | V 2 SG SBJV PRS LGSPEC1 a m p a r a r ||| a m p a r e s 64 | V 1 PL SBJV PST IPFV LGSPEC2 t o m a r ||| t o m á r a m o s 65 | V IND PRS 2 PL x u g a r ||| x u g á i s 66 | V 3 SG IND PST PRF LGSPEC1 p a g a r ||| p a g a r e 67 | V 1 SG IND FUT v i a x a r ||| v i a x a r é 68 | V 3 PL COND a c e p t a r ||| a c e p t a r í e n 69 | V 2 PL SBJV PST IPFV LGSPEC2 a s e d i a r ||| a s e d i a r a i s 70 | V 3 SG SBJV PRS p e ñ a r ||| p e ñ e 71 | V IND PST 1 SG PFV d u d a r ||| d u d é 72 | V 1 SG SBJV PRS o d i a r ||| o d i e 73 | V 2 SG SBJV PST IPFV LGSPEC2 a n c l a r ||| a n c l a r a s 74 | V IND PRS 1 PL i n f l a r ||| i n f l a m o s 75 | V.PTCP PST MASC SG p a r t i c i p a r ||| p a r t i c i p á u 76 | V 3 SG IND PST PRF LGSPEC1 a y u n a r ||| a y u n a r e 77 | V 2 SG COND c o m b a y a r ||| c o m b a y a r í e s 78 | V 1 PL IND PST PRF LGSPEC1 s u d a r ||| s u d á r e m o s 79 | V 1 SG IND PST PRF LGSPEC2 e n s e ñ a r ||| e n s e ñ a r a 80 | V IND PRS 2 PL p a r t i c i p a r ||| p a r t i c i p á i s 81 | V 2 PL SBJV PRS a n i c i a r ||| a n i c i é i s 82 | V 1 PL SBJV PST IPFV LGSPEC2 a s p i r a r ||| a s p i r á r a m o s 83 | V 1 PL SBJV PST IPFV LGSPEC1 a x u n t a r ||| a x u n t á r e m o s 84 | V IND PST 3 PL PFV e n s u g a r ||| e n s u g a r o n 85 | V 2 PL SBJV PST IPFV LGSPEC1 s a n g r a r ||| s a n g r a r e i s 86 | V 2 SG IND PST PRF LGSPEC2 c o r t a r ||| c o r t a r a s 87 | V 3 PL COND v a c i a r ||| v a c i a r í e n 88 | V 2 SG IND PST PRF LGSPEC1 c o l o c a r ||| c o l o c a r e s 89 | V 2 SG IND PST PRF LGSPEC1 d o b l a r ||| d o b l a r e s 90 | V 2 SG SBJV PST IPFV LGSPEC2 b a i l a r ||| b a i l a r a s 91 | V 2 SG IND PST PRF LGSPEC1 e n t r a r ||| e n t r a r e s 92 | V IND PST 1 SG PFV x a n t a r ||| x a n t é 93 | V 3 PL SBJV PRS LGSPEC1 s a c r i f i c a r ||| s a c r i f i q u e n 94 | V IND PST 2 SG PFV LGSPEC2 p e c a r ||| p e q u e s t i 95 | V.PTCP PST MASC PL l l e g a l i z a r ||| l l e g a l i z a o s 96 | V 2 PL SBJV PST IPFV LGSPEC2 e x i l i a r ||| e x i l i a r a i s 97 | V 2 SG IND PST PRF LGSPEC2 e s i x i r ||| e s i x i e r a s 98 | V 3 PL SBJV PST IPFV LGSPEC2 c i a r r a r ||| c i a r r a r a n 99 | V 1 SG COND d e s a r r o l l a r ||| d e s a r r o l l a r í a 100 | V 2 PL SBJV PST IPFV LGSPEC2 c o m e r ||| c o m e r í a i s 101 | V.PTCP PST FEM SG a l b o r i a r ||| a l b o r i a d a 102 | V IND PST 2 PL IPFV LGSPEC1 c a z a r ||| c a z a b e i s 103 | V 2 PL IMP s a n g r a r ||| s a n g r á i 104 | V 3 PL SBJV PST IPFV LGSPEC2 a s p i r a r ||| a s p i r a r a n 105 | V 3 SG SBJV PST IPFV LGSPEC1 f a c e r ||| f i c i e r a 106 | V NFIN e s t r o p i a r ||| e s t r o p i a r 107 | V 1 SG SBJV PST IPFV LGSPEC2 a x u n t a r ||| a x u n t a r a 108 | V 2 SG SBJV PST IPFV e s i x i r ||| e s i x i r í e s 109 | V 1 PL IND PST PRF LGSPEC1 e s t r o p i a r ||| e s t r o p i á r e m o s 110 | V 1 PL SBJV PRS c o m b i n a r ||| c o m b i n e m o s 111 | V IND PST 1 SG PFV a f a l u c a r ||| a f a l u q u é 112 | V.PTCP PST MASC PL c o l a r ||| c o l a o s 113 | V 3 PL COND v e n d e r ||| v e n d i e r e n 114 | V 1 PL SBJV PST IPFV LGSPEC1 c i r c u l a r ||| c i r c u l á r e m o s 115 | V 3 PL SBJV PRS LGSPEC2 i n v i t a r ||| i n v i t a n 116 | V.PTCP PST FEM SG t o c a r ||| t o c a d a 117 | V 3 PL COND s e n t a r ||| s e n t a r í e n 118 | V IND PST 3 SG IPFV v i a x a r ||| v i a x a b a 119 | V IND PST 2 PL PFV LGSPEC1 p a r t i c i p a r ||| p a r t i c i p a s t i s 120 | V NFIN a y u d a r ||| a y u d a r 121 | V IND PRS 2 SG t e n e r ||| t i e n e s 122 | ADJ PL FEM c a t a l á n ||| c a t a l a n e s 123 | V IND PRS 3 SG p a g a r ||| p a g a 124 | V 1 SG SBJV PST IPFV s o r r i r ||| s o r r i r í a 125 | V IND PST 2 PL PFV LGSPEC2 a p u ñ a l a r ||| a p u ñ a l e s t i s 126 | V 1 SG SBJV PST IPFV LGSPEC2 c r i s t i a n i z a r ||| c r i s t i a n i z a r a 127 | V 2 PL IMP a m a s u ñ a r ||| a m a s u ñ á i 128 | V IND PST 2 SG IPFV a t r a g a n t a r ||| a t r a g a n t a b e s 129 | V IND PST 3 SG IPFV a f u m i a r ||| a f u m i a b a 130 | V 2 SG IMP c o m e r ||| c o m i 131 | V IND PST 2 PL PFV LGSPEC1 a b r u x a r ||| a b r u x a s t i s 132 | V IND PST 2 PL IPFV LGSPEC1 e s i x i r ||| e s i x í e i s 133 | V IND PST 2 SG PFV LGSPEC1 e s c a p a r ||| e s c a p a s t i 134 | V 3 PL SBJV PRS LGSPEC2 e s c a p a r ||| e s c a p a n 135 | V IND PRS 2 SG f u m i a r ||| f u m i e s 136 | V 2 PL SBJV PST IPFV LGSPEC2 s o t e r r a r ||| s o t e r r a r a i s 137 | V 1 SG IND PST PRF LGSPEC2 r e c o y e r ||| r e c o y e r e 138 | V IND PRS 3 SG e s g a t u ñ a r ||| e s g a t u ñ a 139 | V 2 SG SBJV PRS LGSPEC2 a m a l a r ||| a m a l a s 140 | V 2 PL COND LGSPEC1 c o m p a r a r ||| c o m p a r a r í e i s 141 | V 2 PL IND FUT o r d e n a r ||| o r d e n a r é i s 142 | V 1 SG SBJV PRS a s o l e y a r ||| a s o l e y e 143 | V IND PRS 2 SG l l a m b e r ||| l l a m b e s 144 | V 1 PL IND PST PRF LGSPEC2 c h a r l a r ||| c h a r l á r a m o s 145 | V IND PRS 2 PL e s t a z a r ||| e s t a z á i s 146 | V IND PST 2 SG PFV LGSPEC2 t e n e r ||| t u v i s t i 147 | V 2 SG COND d e s a r r o l l a r ||| d e s a r r o l l a r í e s 148 | V 3 PL SBJV PST IPFV LGSPEC2 c r i b a r ||| c r i b a r a n 149 | V 2 PL IND PST PRF LGSPEC1 x i m i r ||| x i m i e r e i s 150 | V 2 PL IND FUT e s p l o r a r ||| e s p l o r a r é i s 151 | V 3 PL SBJV PRS LGSPEC2 p a g a r ||| p a g u a n 152 | V IND PST 3 SG IPFV r e s p o n d e r ||| r e s p o n d í a 153 | ADJ SG NEUT v a s c u ||| v a s c o 154 | V IND PST 1 SG IPFV e l i m i n a r ||| e l i m i n a b a 155 | V IND PST 1 SG PFV m e d r a r ||| m e d r é 156 | V IND PST 3 PL IPFV p r e g u n t a r ||| p r e g u n t a b e n 157 | V 2 PL COND LGSPEC2 a c a b a r ||| a c a b a r í a i s 158 | V IND PRS 1 SG b a i l l a r ||| b a i l l o 159 | V IND PST 3 PL PFV e m p e c i p i a r ||| e m p e c i p i a r o n 160 | V 3 PL IND PST PRF LGSPEC2 r e t a r ||| r e t a r a n 161 | V 1 SG IND PST PRF LGSPEC2 e n s u g a r ||| e n s u g a r a 162 | V 3 SG SBJV PRS a y u d a r ||| a y u d e 163 | V NFIN x a n t a r ||| x a n t a r 164 | V 3 PL SBJV PRS LGSPEC2 a g r u p a r ||| a g r u p a n 165 | V 1 PL IND PST PRF LGSPEC1 a c o m p a ñ a r ||| a c o m p a ñ á r e m o s 166 | V IND PRS 3 SG i n s p i r a r ||| i n s p i r a 167 | V IND PST 3 PL PFV a l l o ñ a r ||| a l l o ñ a r o n 168 | V 2 PL IND PST PRF LGSPEC1 p r e ñ a r ||| p r e ñ a r e i s 169 | V.PTCP PST NEUT SG d i r ||| d í o 170 | V 3 PL IND PST PRF LGSPEC2 c u s p i r ||| c u s p i e r a n 171 | V 1 PL IND PST PRF LGSPEC2 a l e n d a r ||| a l e n d á r a m o s 172 | V 2 PL COND LGSPEC2 a r r i c a r ||| a r r i c a r í a i s 173 | V 2 PL COND LGSPEC2 m e n t i r ||| m e n t i e r a i s 174 | V 2 SG SBJV PST IPFV LGSPEC1 e s f r e g a r ||| e s f r e g a r e s 175 | V IND PST 3 PL IPFV o m i t i r ||| o m i t í e n 176 | V 2 PL IMP v e n d e r ||| v e n d é i 177 | V IND PRS 2 SG p l e g a r ||| p l e g u e s 178 | ADJ SG MASC c a s t e l l a n u ||| c a s t e l l a n u 179 | V IND PRS 1 SG p e t r i f i c a r ||| p e t r i f i c o 180 | V.PTCP PST NEUT SG e s c a e c e r ||| e s c a e c í o 181 | V IND PST 3 SG PFV f a c e r ||| f i z o f i x o 182 | V 3 PL IND PST PRF LGSPEC1 u s a r ||| u s a r e n 183 | V.PTCP PST NEUT SG c a s t i g a r ||| c a s t i g a o 184 | V 3 PL COND q u e m a r ||| q u e m a r í e n 185 | V IND PST 2 SG PFV LGSPEC2 e m p r e s t a r ||| e m p r e s t e s t i 186 | V 2 SG IND PST PRF v e n d e r ||| v e n d i e r e s 187 | V NFIN a m p a r a r ||| a m p a r a r 188 | V IND PST 1 SG IPFV e n f i l a r ||| e n f i l a b a 189 | V 1 SG IND PST PRF LGSPEC2 b a x a r ||| b a x a r a 190 | V IND PST 2 PL IPFV LGSPEC1 x e l a r ||| x e l a b e i s 191 | V IND PST 3 PL PFV d e s e a r ||| d e s e a r o n 192 | V.CVB PRS a m a s u ñ a r ||| a m a s u ñ a n d o 193 | V.CVB PRS e s p a r d e r ||| e s p a r d i e n d o 194 | V IND PST 2 PL PFV LGSPEC1 m a x i n a r ||| m a x i n a s t i s 195 | V.PTCP PST NEUT SG u s a r ||| u s a o 196 | V 1 SG IND PST PRF LGSPEC2 p r e g u n t a r ||| p r e g u n t a r a 197 | V.PTCP PST NEUT SG l l a n t a r ||| l l a n t a o 198 | V 1 PL COND LGSPEC2 e n t a m a r ||| e n t a m a r í a m o s 199 | V 3 SG SBJV PST IPFV LGSPEC1 e s n a l a r ||| e s n a l a r e 200 | V 2 SG IND PST PRF LGSPEC2 f o r z a r ||| f o r z a r a s 201 | V IND PRS 2 PL g a n a r ||| g a n á i s 202 | V 2 PL COND LGSPEC2 a s e d i a r ||| a s e d i a r í a i s 203 | V 2 PL COND LGSPEC2 v i a x a r ||| v i a x a r í a i s 204 | V 2 PL IND PST PRF LGSPEC1 e m b o t e l l a r ||| e m b o t e l l a r e i s 205 | V 1 PL SBJV PST IPFV LGSPEC2 c l a v a r ||| c l a v á r a m o s 206 | V 3 PL COND g o l i f l a r ||| g o l i f l a r í e n 207 | V 3 SG IND PST PRF LGSPEC1 c o l l e c h a r ||| c o l l e c h a r e 208 | V 2 SG IND PST PRF LGSPEC2 d u r a r ||| d u r a r a s 209 | V 3 PL SBJV PST IPFV LGSPEC2 a p e n z a r ||| a p e n z a r a n 210 | V IND PST 2 SG IPFV e s a m i n a r ||| e s a m i n a b e s 211 | V IND PST 3 SG PFV b i l o r d i a r ||| b i l o r d i ó 212 | V IND PST 1 PL PFV t o m a r ||| t o m e m o s 213 | V 1 SG SBJV PST IPFV LGSPEC1 h u m i l d a r ||| h u m i l d a r e 214 | V IND PST 1 PL IPFV LGSPEC2 p e c a r ||| p e c á b a m o s 215 | V 1 SG COND a c o l l e c h a r ||| a c o l l e c h a r í a 216 | V 2 SG IND PST PRF LGSPEC1 a l g a m a r ||| a l g a m a r e s 217 | V.PTCP PST MASC SG a v e r a r ||| a v e r á u 218 | V NFIN a ñ i c i a r ||| a ñ i c i a r 219 | V IND PST 2 PL IPFV LGSPEC2 a c o m p a n g a r ||| a c o m p a n g a b a i s 220 | V 2 PL IMP d e s c a n s a r ||| d e s c a n s á i 221 | V.PTCP PST MASC SG e s m o r d i g a ñ a r ||| e s m o r d i g a ñ á u 222 | V 1 SG SBJV PRS p e ñ a r ||| p e ñ e 223 | V.CVB PRS f e r r a r ||| f e r r a n d o 224 | V.PTCP PST FEM PL s i g n i f i c a r ||| s i g n i f i c a e s 225 | V 2 SG IND PST PRF LGSPEC2 s o n r i r ||| s o n r i e r a s 226 | V 2 PL SBJV PST IPFV LGSPEC2 m a t a r ||| m a t a r a i s 227 | V 2 SG SBJV PST IPFV LGSPEC2 a g r u p a r ||| a g r u p a r a s 228 | V IND PST 2 SG PFV LGSPEC1 d u r a r ||| d u r a s t i 229 | V IND PST 2 SG IPFV d e s p e s l l a r ||| d e s p e s l l a b e s 230 | V 1 PL COND LGSPEC2 a c o l l e c h a r ||| a c o l l e c h a r í a m o s 231 | V IND PST 1 PL IPFV LGSPEC2 c r i s t i a n i z a r ||| c r i s t i a n i z á b a m o s 232 | V 3 SG IND PST PRF LGSPEC2 a f u m a r ||| a f u m a r a 233 | V IND PST 3 PL PFV a b o t o n a r ||| a b o t o n a r o n 234 | V.PTCP PST NEUT SG a c a b a r ||| a c a b a o 235 | V 3 SG IND PST PRF LGSPEC1 m a x i n a r ||| m a x i n a r e 236 | V IND PST 3 PL IPFV b r i n c a r ||| b r i n c a b e n 237 | V 1 SG IND PST PRF LGSPEC1 a b l u c a r ||| a b l u c a r e 238 | V IND PST 1 SG PFV r u t i a r ||| r u t i é 239 | V 2 PL COND LGSPEC2 i m i t a r ||| i m i t a r í a i s 240 | V 3 PL IND FUT r e m e d i a r ||| r e m e d i a r á n 241 | V 3 SG IND PST PRF LGSPEC2 l l i b e r a r ||| l l i b e r a r a 242 | V 1 SG SBJV PRS e s c a e c e r ||| e s c a e c e r é 243 | V IND PRS 2 PL a ñ i c i a r ||| a ñ i c i á i s 244 | V 2 SG IND PST PRF LGSPEC1 a l l a r g a r ||| a l l a r g a r e s 245 | V 1 PL SBJV PRS e s t a n d a r i z a r ||| e s t a n d a r i c e m o s 246 | V 2 SG SBJV PRS LGSPEC2 c e g a r ||| c e g u a s 247 | V 3 SG COND g o l a r ||| g o l a r í a 248 | V 2 PL IND PST PRF LGSPEC2 p a r t i c i p a r ||| p a r t i c i p a r a i s 249 | V.PTCP PST NEUT SG a t a r ||| a t a o 250 | V 3 PL COND e s t a n d a r i z a r ||| e s t a n d a r i z a r í e n 251 | V IND PST 2 PL IPFV LGSPEC1 e s t a z a r ||| e s t a z a b e i s 252 | V 1 PL COND LGSPEC1 q u e m a r ||| q u e m a r í e m o s 253 | V IND PST 2 PL IPFV LGSPEC1 d e s a f i a r ||| d e s a f i a b e i s 254 | V.CVB PRS v o m i t a r ||| v o m i t a n d o 255 | V 2 SG SBJV PST IPFV LGSPEC2 e s p r e s a r ||| e s p r e s a r a s 256 | V IND PST 1 SG IPFV e s c r i b i r ||| e s c r i b í a 257 | V IND PST 1 PL IPFV LGSPEC2 e n s e ñ a r ||| e n s e ñ á b a m o s 258 | V.PTCP PST MASC PL o d i a r ||| o d i a o s 259 | V 3 SG COND f a r t u c a r ||| f a r t u c a r í a 260 | V IND PST 1 PL IPFV LGSPEC2 v a l t a r ||| v a l t á b a m o s 261 | V 1 SG SBJV PST IPFV LGSPEC1 x u b i l a r ||| x u b i l a r e 262 | V IND PST 2 SG PFV LGSPEC1 n u m b e r a r ||| n u m b e r a s t i 263 | V IND PST 2 SG PFV LGSPEC2 t r e s l l a d a r ||| t r e s l l a d e s t i 264 | V 2 SG IND PST PRF LGSPEC2 e s t r a ñ a r ||| e s t r a ñ a r a s 265 | V 2 SG IND FUT t i r a r ||| t i r a r á s 266 | V 2 PL IND PST PRF LGSPEC1 e n s e ñ a r ||| e n s e ñ a r e i s 267 | V 3 SG COND c o m e r ||| c o m i e r e 268 | V 3 PL IND PST PRF LGSPEC1 c o m i c a r ||| c o m i c a r e n 269 | V 2 PL IND PST PRF LGSPEC1 c a v i l g a r ||| c a v i l g a r e i s 270 | V 1 PL COND LGSPEC1 x a n t a r ||| x a n t a r í e m o s 271 | V 3 SG IND PST PRF LGSPEC2 d i b u x a r ||| d i b u x a r a 272 | V 3 PL IND PST PRF LGSPEC2 i n a u g u r a r ||| i n a u g u r a r a n 273 | V 3 SG IND FUT a m a s u ñ a r ||| a m a s u ñ a r á 274 | V 3 SG SBJV PRS e s p l o t a r ||| e s p l o t e 275 | V IND PRS 1 PL c a t a r ||| c a t a m o s 276 | V IND PST 2 SG PFV LGSPEC2 c o m i c a r ||| c o m i q u e s t i 277 | V IND PST 3 PL PFV a f o r r a r ||| a f o r r a r o n 278 | V 3 PL IND FUT LGSPEC2 t r a e r ||| t r a e r á n 279 | V 1 PL SBJV PRS c o l l e c h a r ||| c o l l e c h e m o s 280 | V 3 PL IND FUT a f o r r a r ||| a f o r r a r á n 281 | V 2 PL COND LGSPEC1 m a x i n a r ||| m a x i n a r í e i s 282 | V IND PST 2 PL IPFV LGSPEC1 a c o l l e c h a r ||| a c o l l e c h a b e i s 283 | V 1 PL SBJV PRS o r d e n a r ||| o r d e n e m o s 284 | V 3 SG IND FUT a b r u x a r ||| a b r u x a r á 285 | V IND PST 2 PL IPFV LGSPEC2 a c t i v a r ||| a c t i v a b a i s 286 | V 2 SG SBJV PST IPFV LGSPEC2 b e s a r ||| b e s a r a s 287 | V IND PST 2 SG PFV LGSPEC1 c o n n o t a r ||| c o n n o t a s t i 288 | V 3 SG COND p e s l l a r ||| p e s l l a r í a 289 | V.PTCP PST MASC SG v a c u n a r ||| v a c u n á u 290 | V IND PRS 2 PL c a v i l g a r ||| c a v i l g á i s 291 | V IND PST 3 PL PFV i n f l a r ||| i n f l a r o n 292 | V 1 SG SBJV PST IPFV LGSPEC1 a f l a c a r ||| a f l a c a r e 293 | V IND PRS 1 SG a b r u x a r ||| a b r u x o 294 | V IND PST 2 PL PFV LGSPEC2 a f a l a r ||| a f a l e s t i s 295 | V 2 PL IND PST PRF LGSPEC1 c o n x u g a r ||| c o n x u g a r e i s 296 | V 1 PL SBJV PST IPFV LGSPEC2 a i d a r ||| a i d á r a m o s 297 | V NFIN e s t r a ñ a r ||| e s t r a ñ a r 298 | V.PTCP PST MASC SG a l l i s t a r ||| a l l i s t á u 299 | V IND PST 2 PL PFV LGSPEC2 c l a v a r ||| c l a v e s t i s 300 | V.PTCP PST MASC SG r e g u l a r ||| r e g u l á u 301 | V 2 SG COND LGSPEC4 d i r ||| f u e r e s 302 | V.PTCP PST FEM SG p a r t i c i p a r ||| p a r t i c i p a d a 303 | V 3 PL SBJV PRS LGSPEC1 e m p e c i p i a r ||| e m p e c i p i e n 304 | V 3 SG IND FUT r e g u l a r ||| r e g u l a r á 305 | V IND PRS 3 SG a r r i c a r ||| a r r i c a 306 | V IND PRS 1 PL a m a s u ñ a r ||| a m a s u ñ a m o s 307 | V IND PST 3 PL IPFV d e s a r r o l l a r ||| d e s a r r o l l a b e n 308 | V 1 PL IND PST PRF LGSPEC2 c a t a r ||| c a t á r a m o s 309 | V 2 SG SBJV PRS LGSPEC1 g o l i f l a r ||| g o l i f l e s 310 | V IND PST 2 PL IPFV LGSPEC1 e s c u c h a r ||| e s c u c h a b e i s 311 | V 3 PL SBJV PRS LGSPEC1 a c e u t a r ||| a c e u t e n 312 | V 2 PL COND LGSPEC1 a f a i t a r ||| a f a i t a r í e i s 313 | V 1 PL SBJV PST IPFV LGSPEC2 e s ñ a l a r ||| e s ñ a l á r a m o s 314 | V IND PRS 2 PL p o d e r ||| p o d é i s 315 | V IND PST 1 SG PFV m a n c a r ||| m a n q u é 316 | V IND PST 3 PL PFV t r a b a y a r ||| t r a b a y a r o n 317 | V 2 PL SBJV PST IPFV LGSPEC2 c a v i l g a r ||| c a v i l g a r a i s 318 | V IND PRS 3 PL a s p i r a r ||| a s p i r e n 319 | V 2 PL COND LGSPEC1 r e m e d i a r ||| r e m e d i a r í e i s 320 | V 3 PL SBJV PRS LGSPEC2 p a r t i c i p a r ||| p a r t i c i p a n 321 | V IND PST 2 SG IPFV a s p i r a r ||| a s p i r a b e s 322 | V IND PST 1 SG IPFV a s p i r a r ||| a s p i r a b a 323 | V IND PRS 1 SG e l i m i n a r ||| e l i m i n o 324 | V.PTCP PST FEM PL i n a u g u r a r ||| i n a u g u r a e s 325 | V IND PST 3 SG IPFV c i r c u l a r ||| c i r c u l a b a 326 | V 2 SG IND FUT a f o r r a r ||| a f o r r a r á s 327 | V IND PRS 1 PL a f a m i a r ||| a f a m i a m o s 328 | V 3 PL COND a g r u p a r ||| a g r u p a r í e n 329 | V 2 SG COND f u m a r ||| f u m a r í e s 330 | V IND PST 2 PL PFV LGSPEC1 s e l l a r ||| s e l l a s t i s 331 | V IND PST 3 PL PFV n a m o r a r ||| n a m o r a r o n 332 | V IND PST 2 PL PFV LGSPEC1 a n c l a r ||| a n c l a s t i s 333 | V 2 PL COND LGSPEC2 c o l a r ||| c o l a r í a i s 334 | V 1 PL IND PST PRF LGSPEC2 a l l o ñ a r ||| a l l o ñ á r a m o s 335 | V 3 PL IND PST PRF LGSPEC1 e s t o r n u d a r ||| e s t o r n u d a r e n 336 | V 1 SG COND e s t a n d a r i z a r ||| e s t a n d a r i z a r í a 337 | V 3 PL SBJV PST IPFV r e s p o n d e r ||| r e s p o n d e r í e n 338 | V IND PST 3 PL IPFV q u e m a r ||| q u e m a b e n 339 | V 3 SG IND FUT d u n v i a r ||| d u n v i a r á 340 | V IND PST 2 PL PFV LGSPEC1 a p e n z a r ||| a p e n z a s t i s 341 | V IND PRS 1 SG m a r c h a r ||| m a r c h o 342 | V 3 SG IND PST PRF LGSPEC2 a n i c i a r ||| a n i c i a r a 343 | V IND PRS 1 SG c a t a r ||| c a t o 344 | V 1 PL IND PST PRF LGSPEC1 c o r t a r ||| c o r t á r e m o s 345 | V 2 SG SBJV PST IPFV LGSPEC1 p a g a r ||| p a g a r e s 346 | V 2 SG IND PST PRF LGSPEC2 n u m b e r a r ||| n u m b e r a r a s 347 | V IND PST 2 PL IPFV LGSPEC2 s o ñ a r ||| s o ñ a b a i s 348 | V 2 SG IND PST PRF LGSPEC2 i d o l a t r a r ||| i d o l a t r a r a s 349 | V 1 PL SBJV PRS e c h a r ||| e c h e m o s 350 | V IND PRS 1 PL d a ñ a r ||| d a ñ a m o s 351 | V 3 PL SBJV PRS LGSPEC2 m e x a r ||| m e x a n 352 | V 2 SG SBJV PST IPFV LGSPEC1 a f a l u c a r ||| a f a l u c a r e s 353 | V IND PRS 2 PL c a s t i g a r ||| c a s t i g á i s 354 | V 2 SG SBJV PRS LGSPEC1 c o m e n z a r ||| c o m i e n c e s 355 | V 1 SG IND PST PRF LGSPEC2 a f u m a r ||| a f u m a r a 356 | V IND PST 2 PL PFV e s t e n d e r ||| e s t e n d i e s t i s 357 | V 2 PL SBJV PST IPFV LGSPEC1 e x i l i a r ||| e x i l i a r e i s 358 | V 2 PL COND LGSPEC2 x a n t a r ||| x a n t a r í a i s 359 | V 1 PL IND PST PRF LGSPEC2 h e r e d a r ||| h e r e d á r a m o s 360 | V 3 SG COND d e b u r a r ||| d e b u r a r í a 361 | V 1 SG COND x u r a r ||| x u r a r í a 362 | V 2 PL SBJV PRS e s p o x i g a r ||| e s p o x i g u é i s 363 | V 2 PL IND PST PRF LGSPEC2 e s p i r r i a r ||| e s p i r r i a r a i s 364 | V 3 PL SBJV PRS LGSPEC2 d e d i c a r ||| d e d i q u a n 365 | ADJ SG FEM s i c i l i a n u ||| s i c i l i a n a 366 | V 1 PL COND LGSPEC1 f o r m a r ||| f o r m a r í e m o s 367 | V IND PST 1 SG IPFV c o p i a r ||| c o p i a b a 368 | V NFIN a c t i v a r ||| a c t i v a r 369 | V 1 PL IND PST PRF LGSPEC2 r e t a r ||| r e t á r a m o s 370 | V IND PRS 2 PL d a ñ a r ||| d a ñ á i s 371 | V IND PST 1 PL IPFV LGSPEC1 a f a l a r ||| a f a l á b e m o s 372 | V 1 PL SBJV PRS t o m a r ||| t o m e m o s 373 | V.PTCP PST MASC SG i n t e r p r e t a r ||| i n t e r p r e t á u 374 | V 2 PL COND LGSPEC1 a m e d r a n a r ||| a m e d r a n a r í e i s 375 | V 1 SG COND r e m o l c a r ||| r e m o l c a r í a 376 | V.PTCP PST MASC SG x u b i l a r ||| x u b i l á u 377 | V 1 PL SBJV PRS LGSPEC1 t r a e r ||| t r a y a m o s 378 | V IND PST 2 SG PFV LGSPEC4 f a e r ||| f i x e s t i 379 | V 3 PL COND e s p i r r i a r ||| e s p i r r i a r í e n 380 | V 2 PL IND FUT c a s t i g a r ||| c a s t i g a r é i s 381 | V.PTCP PST NEUT SG i m i t a r ||| i m i t a o 382 | V IND PST 3 PL IPFV c o m i c a r ||| c o m i c a b e n 383 | V 2 SG IMP d e s p e s l l a r ||| d e s p i e s l l a 384 | V IND PST 2 PL IPFV LGSPEC1 f o t o g r a f i a r ||| f o t o g r a f i a b e i s 385 | V 3 PL IND PST PRF LGSPEC1 a l l i s t a r ||| a l l i s t a r e n 386 | V 1 SG COND d e s p e s l l a r ||| d e s p e s l l a r í a 387 | V 3 SG IND FUT n u m b e r a r ||| n u m b e r a r á 388 | V 3 PL SBJV PST IPFV LGSPEC1 a f a i t a r ||| a f a i t a r e n 389 | V 2 PL IND PST PRF LGSPEC2 g a n a r ||| g a n a r a i s 390 | V IND PST 2 SG IPFV b e s a r ||| b e s a b e s 391 | V 2 SG IND PST PRF LGSPEC2 e s m o r d i g a ñ a r ||| e s m o r d i g a ñ a r a s 392 | V 1 PL IND PST PRF LGSPEC1 ñ i c i a r ||| ñ i c i á r e m o s 393 | V 2 PL COND LGSPEC2 f o r m a r ||| f o r m a r í a i s 394 | V IND PST 1 SG IPFV d e s e n d o l c a r ||| d e s e n d o l c a b a 395 | V 1 PL COND LGSPEC1 v i a x a r ||| v i a x a r í e m o s 396 | V IND PST 3 PL IPFV s o r r i r ||| s o r r í e n 397 | V 2 PL SBJV PST IPFV LGSPEC1 c o ñ o c e r ||| c o ñ o c e r í e i s 398 | V IND PST 2 SG PFV LGSPEC1 a f e i t a r ||| a f e i t a s t i 399 | V IND PST 2 PL PFV LGSPEC1 a y u d a r ||| a y u d a s t i s 400 | V 2 SG IND FUT a m e y o r a r ||| a m e y o r a r á s 401 | V 3 PL SBJV PST IPFV LGSPEC1 r e s p i r a r ||| r e s p i r a r e n 402 | V 1 PL SBJV PST IPFV LGSPEC1 p o r t a r ||| p o r t á r e m o s 403 | V IND PST 2 SG PFV LGSPEC2 r e g u l a r ||| r e g u l e s t i 404 | V IND PST 2 PL PFV LGSPEC1 u s a r ||| u s a s t i s 405 | V 1 PL COND LGSPEC2 m a x i n a r ||| m a x i n a r í a m o s 406 | V 1 PL SBJV PST IPFV LGSPEC1 a v e r a r ||| a v e r á r e m o s 407 | V IND PST 3 PL PFV n o m b r a r ||| n o m b r a r o n 408 | V 1 PL IND PST PRF LGSPEC1 s a n g r a r ||| s a n g r á r e m o s 409 | V 2 SG IND PST PRF LGSPEC2 g u a r n i r ||| g u a r n i e r a s 410 | V 1 PL IND PST PRF LGSPEC2 x i n t a r ||| x i n t á r a m o s 411 | V.PTCP PST FEM PL e s c l u c a r ||| e s c l u c a e s 412 | V 2 PL SBJV PST IPFV LGSPEC2 r e m o l c a r ||| r e m o l c a r a i s 413 | V IND PST 2 PL IPFV LGSPEC1 d a ñ a r ||| d a ñ a b e i s 414 | V 1 PL SBJV PRS b a t a l l a r ||| b a t a l l e m o s 415 | V IND PST 2 PL IPFV LGSPEC2 r u t a r ||| r u t a b a i s 416 | V 3 PL SBJV PRS r e c o y e r ||| r e c u e y a n 417 | V IND PST 2 SG IPFV f o t o g r a f i a r ||| f o t o g r a f i a b e s 418 | V 1 PL IND FUT c o r t a r ||| c o r t a r e m o s 419 | V IND PST 1 PL IPFV LGSPEC2 f i r i r ||| f i r í a m o s 420 | ADJ SG NEUT a l t u ||| a l t o 421 | V 2 SG IND PST PRF LGSPEC1 s e r ||| f u e r a s 422 | V 2 PL SBJV PRS c o l a r ||| c o l é i s 423 | V 1 SG IND PST PRF LGSPEC2 e v i t a r ||| e v i t a r a 424 | V 1 SG COND LGSPEC1 g u a r n i r ||| g u a r n i e r e 425 | V 2 SG SBJV PRS LGSPEC1 d e s a v e r a r ||| d e s a v e r e s 426 | V 2 SG SBJV PRS c o ñ o c e r ||| c o ñ o c e r á s 427 | V 3 SG SBJV PST IPFV LGSPEC1 e n t r a r ||| e n t r a r e 428 | V IND PST 3 PL IPFV l l u c h a r ||| l l u c h a b e n 429 | V IND PST 2 SG IPFV g u a r n i r ||| g u a r n í e s 430 | V 2 SG SBJV PRS LGSPEC1 h u m i l d a r ||| h u m i l d e s 431 | V IND PST 1 PL IPFV LGSPEC2 a f a m i a r ||| a f a m i á b a m o s 432 | V.PTCP PST FEM SG c a l c a r ||| c a l c a d a 433 | V 1 SG IND PST PRF LGSPEC1 g o l i f l a r ||| g o l i f l a r e 434 | V 1 PL SBJV PST IPFV LGSPEC1 f o r m a r ||| f o r m á r e m o s 435 | V 1 SG COND p o l i n i z a r ||| p o l i n i z a r í a 436 | V.PTCP PST MASC SG a p e n z a r ||| a p e n z á u 437 | V IND PST 2 SG PFV LGSPEC1 c o l a r ||| c o l a s t i 438 | V 1 PL SBJV PST IPFV LGSPEC2 c a u s a r ||| c a u s á r a m o s 439 | V 2 SG SBJV PRS LGSPEC2 i n a u g u r a r ||| i n a u g u r a s 440 | V 1 SG SBJV PRS a b o t o n a r ||| a b o t o n e 441 | V IND PST 2 SG PFV LGSPEC2 e n c a x a r ||| e n c a x e s t i 442 | V IND PST 3 SG PFV i m a x i n a r ||| i m a x i n ó 443 | V 3 SG SBJV PST IPFV LGSPEC1 b i l o r d i a r ||| b i l o r d i a r e 444 | V 3 SG COND x u b i l a r ||| x u b i l a r í a 445 | V IND PST 3 SG IPFV e l i m i n a r ||| e l i m i n a b a 446 | V 3 PL COND ñ a d a r ||| ñ a d a r í e n 447 | V 1 SG IND PST PRF LGSPEC1 r e s c a t a r ||| r e s c a t a r e 448 | V 2 PL SBJV PST IPFV LGSPEC1 f u m a r ||| f u m a r e i s 449 | V.PTCP PST FEM SG e s p i r r i a r ||| e s p i r r i a d a 450 | V 3 PL IND FUT d u l d a r ||| d u l d a r á n 451 | V NFIN a f u m i a r ||| a f u m i a r 452 | V.CVB PRS m u d a r ||| m u d a n d o 453 | V IND PST 3 SG PFV c a v i l g a r ||| c a v i l g ó 454 | V.PTCP PST MASC SG d e s a f i a r ||| d e s a f i á u 455 | V IND PST 2 PL PFV LGSPEC1 m o y a r ||| m o y a s t i s 456 | V 3 PL IND FUT t r a b a y a r ||| t r a b a y a r á n 457 | V 1 SG COND e s p r e s a r ||| e s p r e s a r í a 458 | V IND PRS 1 PL c o n o c e r ||| c o n o c e m o s 459 | V 3 SG IND PST PRF LGSPEC2 r e c o y e r ||| r e c o y e r e 460 | V 2 PL IND FUT a f e i t a r ||| a f e i t a r é i s 461 | V 2 SG IND PST PRF LGSPEC1 e s p e r a r ||| e s p e r a r e s 462 | V 2 SG IND PST PRF LGSPEC1 e s p a ñ a r ||| e s p a ñ a r e s 463 | V 1 PL SBJV PST IPFV LGSPEC2 l l e g a l i z a r ||| l l e g a l i z á r a m o s 464 | V 1 PL IND PST PRF LGSPEC1 f o t o g r a f i a r ||| f o t o g r a f i á r e m o s 465 | V 1 PL IND PST PRF LGSPEC2 i l l e g a l i z a r ||| i l l e g a l i z á r a m o s 466 | V.PTCP PST NEUT SG r e c o r d a r ||| r e c o r d a o 467 | V IND PST 2 PL IPFV LGSPEC2 c h a r l a r ||| c h a r l a b a i s 468 | V 2 PL SBJV PRS a p u ñ a l a r ||| a p u ñ a l é i s 469 | V 2 SG SBJV PST IPFV LGSPEC2 b a u t i z a r ||| b a u t i z a r a s 470 | V 3 SG COND d i n a m i t a r ||| d i n a m i t a r í a 471 | V 1 PL IND PST PRF LGSPEC1 s a l t a r ||| s a l t á r e m o s 472 | V IND PST 2 SG IPFV i d o l a t r a r ||| i d o l a t r a b e s 473 | V 1 SG COND c a u s a r ||| c a u s a r í a 474 | V 2 SG SBJV PST IPFV LGSPEC2 c a v i l a r ||| c a v i l a r a s 475 | V.CVB PRS c o m p a r a r ||| c o m p a r a n d o 476 | ADJ SG FEM x e i t u ||| x e i t a 477 | V 2 PL IND PST PRF LGSPEC1 a f u m i a r ||| a f u m i a r e i s 478 | V 1 PL SBJV PST IPFV LGSPEC2 s a c r i f i c a r ||| s a c r i f i c á r a m o s 479 | V 2 PL IMP m a x i n a r ||| m a x i n á i 480 | V 1 SG IND FUT a p u ñ a l a r ||| a p u ñ a l a r é 481 | V 1 PL COND LGSPEC1 e n f i l a r ||| e n f i l a r í e m o s 482 | V 2 PL IND PST PRF LGSPEC1 l l a n t a r ||| l l a n t a r e i s 483 | V 2 SG SBJV PRS LGSPEC2 n u m b e r a r ||| n u m b e r a s 484 | V 3 SG SBJV PST IPFV LGSPEC2 s a l t a r ||| s a l t a r a 485 | V 1 SG IND PST PRF LGSPEC2 f u m i a r ||| f u m i a r a 486 | V 1 SG IND PST PRF LGSPEC2 a c u s a r ||| a c u s a r a 487 | V 1 SG COND a s a r ||| a s a r í a 488 | V 3 SG COND s e m e n t a r ||| s e m e n t a r í a 489 | V IND PST 3 SG IPFV f e s t e x a r ||| f e s t e x a b a 490 | V.PTCP PST MASC PL d i n a m i t a r ||| d i n a m i t a o s 491 | V 2 SG SBJV PRS LGSPEC2 p l e g a r ||| p l e g u a s 492 | V 3 SG COND e s t a n d a r i z a r ||| e s t a n d a r i z a r í a 493 | V IND PST 1 SG IPFV d i r ||| d i b a 494 | V IND PST 1 PL PFV f u n c i o n a r ||| f u n c i o n e m o s 495 | V 2 SG IMP e n t r u g a r ||| e n t r u g a 496 | V NFIN a c e u t a r ||| a c e u t a r 497 | V.PTCP PST NEUT SG a l i m e n t a r ||| a l i m e n t a o 498 | V IND PST 2 PL IPFV LGSPEC1 s o n r i r ||| s o n r í e i s 499 | V 3 PL IND PST PRF LGSPEC2 e x i l i a r ||| e x i l i a r a n 500 | V 2 PL SBJV PRS p e c a r ||| p e q u é i s 501 | -------------------------------------------------------------------------------- /03-encdec/asturian-test.txt: -------------------------------------------------------------------------------- 1 | V 3 SG IND PST PRF LGSPEC2 a g a r r a r ||| a g a r r a r a 2 | V.PTCP PST FEM PL f u m i a r ||| f u m i a e s 3 | V IND PST 3 PL PFV i l l e g a l i z a r ||| i l l e g a l i z a r o n 4 | V 3 SG SBJV PST IPFV LGSPEC1 c a n t a r ||| c a n t a r e 5 | V IND PST 3 PL PFV c a l c a r ||| c a l c a r o n 6 | V 1 PL SBJV PST IPFV LGSPEC2 a t a r ||| a t á r a m o s 7 | V IND PST 2 SG PFV LGSPEC1 e n c a d e n a r ||| e n c a d e n a s t i 8 | V 2 SG COND b r a n i a r ||| b r a n i a r í e s 9 | V 1 SG SBJV PRS b e s a r ||| b e s e 10 | V IND PST 2 PL IPFV LGSPEC1 c o y e r ||| c o y í a i s 11 | V IND PRS 3 PL p l a n c h a r ||| p l a n c h e n 12 | V 2 SG IND FUT e c h a r ||| e c h a r á s 13 | V 2 PL SBJV PST IPFV LGSPEC1 c o m b a y a r ||| c o m b a y a r e i s 14 | V 3 PL SBJV PRS LGSPEC1 a t r o p e l l a r ||| a t r o p e l l e n 15 | V 3 PL COND d u n v i a r ||| d u n v i a r í e n 16 | V IND PST 3 SG PFV a c o y e r ||| a c o y ó 17 | V IND PST 2 SG IPFV c o n o c e r ||| c o n o c í e s 18 | V 2 SG IMP d e x a r ||| d e x a 19 | V NFIN i n o r a r ||| i n o r a r 20 | V 1 SG IND FUT m i r a r ||| m i r a r é 21 | V.PTCP PST NEUT SG ḥ i s p i a r ||| ḥ i s p i a o 22 | V 3 SG COND LGSPEC2 p o n e r ||| p u n x e r e 23 | V 2 SG IND FUT a l e n d a r ||| a l e n d a r á s 24 | V IND PST 1 PL IPFV LGSPEC1 b e s a r ||| b e s á b e m o s 25 | V 3 SG SBJV PST IPFV LGSPEC1 v i a x a r ||| v i a x a r e 26 | V 1 PL IND FUT h u m i l d a r ||| h u m i l d a r e m o s 27 | V IND PST 1 PL PFV a r r a n a r ||| a r r a n e m o s 28 | V 3 SG IND PST PRF LGSPEC2 e s b a b a y a r ||| e s b a b a y a r a 29 | V 3 PL COND e c h a r ||| e c h a r í e n 30 | V IND PST 1 PL IPFV LGSPEC2 c a m b i a r ||| c a m b i á b a m o s 31 | V 2 PL COND LGSPEC2 d e r i v a r ||| d e r i v a r í a i s 32 | V 3 SG COND t o r t u r a r ||| t o r t u r a r í a 33 | V 3 PL SBJV PRS LGSPEC1 e s t a n d a r i z a r ||| e s t a n d a r i c e n 34 | V 1 SG COND c r e a r ||| c r e a r í a 35 | V 2 SG IND FUT b e s a r ||| b e s a r á s 36 | V IND PST 1 SG PFV m a t a r ||| m a t é 37 | V 3 SG IND PST PRF LGSPEC2 e s c a n c i a r ||| e s c a n c i a r a 38 | V.PTCP PST MASC SG a f a m i a r ||| a f a m i á u 39 | V 2 SG IND FUT v a c i a r ||| v a c i a r á s 40 | V 1 SG IND PST PRF LGSPEC1 b l i n c a r ||| b l i n c a r e 41 | V 3 PL IND PST PRF c o ñ o c e r ||| c o ñ o c i e r e n 42 | V IND PRS 1 SG e m o c i o n a r ||| e m o c i o n o 43 | V 2 SG IMP a f a l a g a r ||| a f a l a g a 44 | V IND PST 1 SG IPFV a g r u p a r ||| a g r u p a b a 45 | V 3 PL IND PST PRF LGSPEC2 a t a r ||| a t a r a n 46 | V 2 SG SBJV PST IPFV LGSPEC2 e s ñ a l a r ||| e s ñ a l a r a s 47 | V 2 PL IMP e n t r a r ||| e n t r á i 48 | V 1 PL COND LGSPEC2 a p o y a r ||| a p o y a r í a m o s 49 | V 1 SG SBJV PST IPFV LGSPEC1 e m o c i o n a r ||| e m o c i o n a r e 50 | V IND PST 2 PL PFV LGSPEC1 f e s t e x a r ||| f e s t e x a s t i s 51 | V 1 SG IND PST PRF LGSPEC2 v a l t a r ||| v a l t a r a 52 | V IND PRS 3 PL r e p o s a r ||| r e p o s e n 53 | V 3 PL IND PST PRF LGSPEC1 m a r c h a r ||| m a r c h a r e n 54 | V 2 PL SBJV PRS e l i m i n a r ||| e l i m i n é i s 55 | V 2 PL SBJV PST IPFV LGSPEC2 a n d o r g a r ||| a n d o r g a r a i s 56 | V 3 SG IND PST PRF LGSPEC1 s a n g r a r ||| s a n g r a r e 57 | V 1 PL COND LGSPEC2 l l i m p i a r ||| l l i m p i a r í a m o s 58 | V IND PST 3 SG IPFV l l a n t a r ||| l l a n t a b a 59 | V IND PST 1 PL IPFV LGSPEC2 v o l a r ||| v o l á b a m o s 60 | V IND PST 2 PL IPFV LGSPEC1 s e n t a r ||| s e n t a b e i s 61 | V 2 PL IND PST PRF LGSPEC1 q u e m a r ||| q u e m a r e i s 62 | V IND PST 2 PL IPFV LGSPEC1 n i c i a r ||| n i c i a b e i s 63 | V 3 PL IND PST PRF LGSPEC1 a t a r ||| a t a r e n 64 | V 2 SG SBJV PRS LGSPEC2 p a r a r ||| p a r a s 65 | V 3 PL COND e n t r u g a r ||| e n t r u g a r í e n 66 | ADJ SG NEUT h ú n g a r u ||| h ú n g a r o 67 | V IND PST 2 PL IPFV LGSPEC2 a b o t o n a r ||| a b o t o n a b a i s 68 | V 2 SG SBJV PST IPFV LGSPEC1 e s c l u c a r ||| e s c l u c a r e s 69 | V IND PRS 1 SG e n t r e n a r ||| e n t r e n o 70 | V IND PST 1 PL IPFV LGSPEC2 b a i l l a r ||| b a i l l á b a m o s 71 | V 3 SG COND s u a ñ a r ||| s u a ñ a r í a 72 | V.PTCP PST MASC PL a c o m p a n g a r ||| a c o m p a n g a o s 73 | V 2 PL SBJV PST IPFV LGSPEC2 a t r a g a n t a r ||| a t r a g a n t a r a i s 74 | V IND PST 2 SG PFV LGSPEC2 t o m a r ||| t o m e s t i 75 | V 2 PL SBJV PST IPFV LGSPEC1 s e l l a r ||| s e l l a r e i s 76 | V 1 PL SBJV PST IPFV LGSPEC2 e s c l u c a r ||| e s c l u c á r a m o s 77 | V 3 SG SBJV PST IPFV e s p a r d e r ||| e s p a r d e r í a 78 | V 3 PL IND FUT d i b u x a r ||| d i b u x a r á n 79 | V IND PRS 3 SG c o m e n z a r ||| c o m i e n z a 80 | V 3 PL SBJV PRS LGSPEC2 p e t r i f i c a r ||| p e t r i f i c a n 81 | V 1 SG IND FUT a m p a r a r ||| a m p a r a r é 82 | V 2 SG SBJV PST IPFV LGSPEC1 r u t i a r ||| r u t i a r e s 83 | V 2 SG SBJV PST IPFV LGSPEC2 e n t r e n a r ||| e n t r e n a r a s 84 | V IND PST 3 SG PFV r e c o y e r ||| r e c o y ó 85 | V 3 PL SBJV PST IPFV LGSPEC2 r e t a r ||| r e t a r a n 86 | V IND PST 2 PL PFV LGSPEC2 e s b a r a f u n d i a r ||| e s b a r a f u n d i e s t i s 87 | V IND PRS 2 SG g u e t a r ||| g u e t e s 88 | V.PTCP PST NEUT SG d u n v i a r ||| d u n v i a o 89 | V 2 SG SBJV PRS LGSPEC2 r e m o l c a r ||| r e m o l q u a s 90 | V IND PST 2 PL IPFV LGSPEC1 a l b o r i a r ||| a l b o r i a b e i s 91 | V 3 PL SBJV PST IPFV LGSPEC1 e s p e y a r ||| e s p e y a r e n 92 | V 1 PL COND LGSPEC2 i n t e r p r e t a r ||| i n t e r p r e t a r í a m o s 93 | V IND PST 2 PL PFV LGSPEC2 q u e m a r ||| q u e m e s t i s 94 | V IND PST 1 SG PFV p a g a r ||| p a g u é 95 | V IND PRS 3 PL a x u n t a r ||| a x u n t e n 96 | V 3 PL IND FUT p o l i n i z a r ||| p o l i n i z a r á n 97 | V 3 SG COND l l i m p i a r ||| l l i m p i a r í a 98 | V 2 PL SBJV PRS a y u d a r ||| a y u d é i s 99 | V 3 PL SBJV PRS LGSPEC2 o d i a r ||| o d i a n 100 | V 3 PL SBJV PRS LGSPEC2 a l l i s t a r ||| a l l i s t a n 101 | V 3 PL SBJV PST IPFV LGSPEC1 a f l a c a r ||| a f l a c a r e n 102 | V 1 SG IND PST PRF LGSPEC1 c h a r l a r ||| c h a r l a r e 103 | V IND PST 3 SG IPFV a c u t a r ||| a c u t a b a 104 | V 3 PL SBJV PRS LGSPEC1 c a v i l a r ||| c a v i l e n 105 | V 1 SG COND e n f i l a r ||| e n f i l a r í a 106 | V 2 SG SBJV PRS LGSPEC2 e n d o l c a r ||| e n d o l q u a s 107 | V 3 SG SBJV PST IPFV LGSPEC1 e s g a t u ñ a r ||| e s g a t u ñ a r e 108 | V IND PRS 3 SG e s c a e c e r ||| e s c a e z 109 | V 3 SG SBJV PST IPFV LGSPEC1 v a c i a r ||| v a c i a r e 110 | V 3 PL IND PST PRF LGSPEC1 e s m o r d i g a ñ a r ||| e s m o r d i g a ñ a r e n 111 | V IND PRS 3 SG a m o s a r ||| a m u e s a 112 | V 1 SG SBJV PST IPFV LGSPEC1 c o l o c a r ||| c o l o c a r e 113 | V IND PST 2 PL PFV LGSPEC1 p e ñ a r ||| p e ñ a s t i s 114 | V 2 SG SBJV PRS LGSPEC2 a c e p t a r ||| a c e p t a s 115 | V 2 PL IND PST PRF c o m e r ||| c o m i e r e i s 116 | V 1 PL IND PST PRF LGSPEC1 a i d a r ||| a i d á r e m o s 117 | V 3 PL IND PST PRF LGSPEC1 a t r a p a r ||| a t r a p a r e n 118 | V 1 SG SBJV PRS f a c e r ||| f a g a f a i g a 119 | V 2 PL SBJV PST IPFV LGSPEC2 d i x e b r a r ||| d i x e b r a r a i s 120 | V 3 PL IND PST PRF LGSPEC1 d e s a r r o l l a r ||| d e s a r r o l l a r e n 121 | V 2 SG SBJV PST IPFV LGSPEC1 e s t u d i a r ||| e s t u d i a r e s 122 | V 2 PL COND LGSPEC2 e s t r a ñ a r ||| e s t r a ñ a r í a i s 123 | V IND PST 2 SG PFV LGSPEC1 o d i a r ||| o d i a s t i 124 | V 2 PL COND LGSPEC1 b a u t i z a r ||| b a u t i z a r í e i s 125 | V IND PST 3 PL IPFV v a l t a r ||| v a l t a b e n 126 | V 3 PL SBJV PRS LGSPEC2 m i r a r ||| m i r a n 127 | V 1 SG SBJV PRS a m e y o r a r ||| a m e y o r e 128 | V 3 PL IND FUT e s p a r d e r ||| e s p a r d á i s 129 | V 1 SG COND c o m b a y a r ||| c o m b a y a r í a 130 | V IND PST 1 SG PFV a l l i x e r a r ||| a l l i x e r é 131 | V.PTCP PST MASC PL a l l i x e r a r ||| a l l i x e r a o s 132 | V 2 SG IND FUT l l o r a r ||| l l o r a r á s 133 | V 1 PL IND FUT e s c a n c i a r ||| e s c a n c i a r e m o s 134 | V 2 SG IMP d e s e a r ||| d e s e a 135 | V.CVB PRS e x i s t i r ||| e x i s t i e n d o 136 | V 2 SG IMP x i m i r ||| x i m i 137 | V 2 SG IMP i n o r a r ||| i n o r a 138 | V 3 SG SBJV PST IPFV r e s p o n d e r ||| r e s p o n d e r í a 139 | V IND PST 1 SG IPFV LGSPEC2 t r a e r ||| t r a í a 140 | V 1 SG COND LGSPEC1 f a e r ||| f a d r í a 141 | V 1 PL SBJV PST IPFV LGSPEC2 n o m b r a r ||| n o m b r á r a m o s 142 | V 1 SG COND a r r a n a r ||| a r r a n a r í a 143 | V 3 SG COND a p e n z a r ||| a p e n z a r í a 144 | V 2 SG IND FUT c r i s t i a n i z a r ||| c r i s t i a n i z a r á s 145 | V 3 SG SBJV PST IPFV LGSPEC2 c o l o c a r ||| c o l o c a r a 146 | V 1 PL SBJV PRS f o r m a r ||| f o r m e m o s 147 | V 2 PL IND FUT x i b l a r ||| x i b l a r é i s 148 | V 3 PL IND FUT d o b l a r ||| d o b l a r á n 149 | V IND PST 1 SG IPFV a l e n d a r ||| a l e n d a b a 150 | V.PTCP PST NEUT SG c o r t a r ||| c o r t a o 151 | V 3 PL SBJV PRS LGSPEC1 ñ a d a r ||| ñ a d e n 152 | V 2 SG IND PST PRF LGSPEC1 b a i l a r ||| b a i l a r e s 153 | V IND PST 2 PL PFV LGSPEC1 o r d e n a r ||| o r d e n a s t i s 154 | V IND PST 2 PL PFV LGSPEC1 a r r a n a r ||| a r r a n a s t i s 155 | V 2 PL IMP e m o c i o n a r ||| e m o c i o n á i 156 | V 2 PL IND PST PRF LGSPEC2 d e x a r ||| d e x a r a i s 157 | V 3 SG IND PST PRF LGSPEC2 e s ñ a l a r ||| e s ñ a l a r a 158 | V IND PRS 3 PL e s b a r a f u n d i a r ||| e s b a r a f u n d i e n 159 | V 1 PL SBJV PST IPFV LGSPEC1 a f e u t a r ||| a f e u t á r e m o s 160 | V IND PRS 1 SG d e s p e s l l a r ||| d e s p i e s l l o 161 | V 2 PL IND FUT m e y o r a r ||| m e y o r a r é i s 162 | V 1 PL IND FUT p a r a r ||| p a r a r e m o s 163 | V.CVB PRS a c o m p a ñ a r ||| a c o m p a ñ a n d o 164 | V IND PST 1 SG PFV a f a l a g a r ||| a f a l a g u é 165 | V 3 PL SBJV PST IPFV LGSPEC1 c a v i l a r ||| c a v i l a r e n 166 | V 2 PL IND FUT c a v i l g a r ||| c a v i l g a r é i s 167 | V 1 SG SBJV PST IPFV LGSPEC2 d e s t a c a r ||| d e s t a c a r a 168 | V IND PST 2 SG PFV LGSPEC1 v e r a n i a r ||| v e r a n i a s t i 169 | V 3 PL SBJV PST IPFV LGSPEC1 s a c r i f i c a r ||| s a c r i f i c a r e n 170 | V 2 PL IND PST PRF LGSPEC1 ñ i c i a r ||| ñ i c i a r e i s 171 | V 3 SG COND e m o c i o n a r ||| e m o c i o n a r í a 172 | V 1 SG IND FUT e s p a ñ a r ||| e s p a ñ a r é 173 | V IND PRS 1 SG a r d e r ||| a r d o 174 | V.PTCP PST NEUT SG s o n r i r ||| s o n r í o 175 | V.PTCP PST FEM SG a b r u x a r ||| a b r u x a d a 176 | V IND PST 3 SG IPFV n a d a r ||| n a d a b a 177 | V 2 PL COND LGSPEC1 d i n a m i t a r ||| d i n a m i t a r í e i s 178 | V IND PST 3 PL PFV a m a s u ñ a r ||| a m a s u ñ a r o n 179 | V 1 SG SBJV PST IPFV LGSPEC2 i n f l a r ||| i n f l a r a 180 | V IND PRS 1 SG e n c a d e n a r ||| e n c a d e n o 181 | V.PTCP PST MASC PL p a g a r ||| p a g a o s 182 | V IND PRS 2 PL a l l a r g a r ||| a l l a r g á i s 183 | V 2 SG IND PST PRF LGSPEC1 s e n t a r ||| s e n t a r e s 184 | V 2 PL IMP d e s a r r o l l a r ||| d e s a r r o l l á i 185 | V 1 PL SBJV PST IPFV LGSPEC1 c e g a r ||| c e g á r e m o s 186 | V 1 PL IND PST PRF LGSPEC2 d e s a v e r a r ||| d e s a v e r á r a m o s 187 | V NFIN e m o c i o n a r ||| e m o c i o n a r 188 | V 3 PL IND PST PRF LGSPEC2 n o m b r a r ||| n o m b r a r a n 189 | V 3 PL SBJV PRS LGSPEC2 d e s e a r ||| d e s e a n 190 | V IND PRS 3 PL f a l a r ||| f a l e n 191 | V 3 PL COND a c t i v a r ||| a c t i v a r í e n 192 | V 3 PL SBJV PST IPFV LGSPEC2 a f a m i a r ||| a f a m i a r a n 193 | V 2 SG IMP d e s e n d o l c a r ||| d e s e n d o l c a 194 | V 3 SG SBJV PRS m a r c h a r ||| m a r c h e 195 | V NFIN LGSPEC1 t r a e r ||| t r a y e r 196 | V IND PRS 2 SG x i r i n g a r ||| x i r i n g u e s 197 | V.PTCP PST NEUT SG b a i l l a r ||| b a i l l a o 198 | V 3 PL SBJV PRS LGSPEC2 e n t r u g a r ||| e n t r u g u a n 199 | V 1 PL SBJV PST IPFV LGSPEC2 e s c a n c i a r ||| e s c a n c i á r a m o s 200 | V IND PRS 3 SG a f o r r a r ||| a f o r r a 201 | V IND PST 2 SG PFV LGSPEC1 t r u ñ a r ||| t r u ñ a s t i 202 | V 2 PL SBJV PRS s e m a r ||| s e m é i s 203 | V 1 SG COND d i x e b r a r ||| d i x e b r a r í a 204 | V 2 PL IND PST PRF LGSPEC1 r e g u l a r ||| r e g u l a r e i s 205 | V 3 SG SBJV PST IPFV LGSPEC1 d e s a f i a r ||| d e s a f i a r e 206 | V IND PST 1 PL PFV r e s c a t a r ||| r e s c a t e m o s 207 | V 1 SG SBJV PRS a n i c i a r ||| a n i c i e 208 | V 2 PL SBJV PRS r e m a r ||| r e m é i s 209 | V IND PST 3 PL PFV n o m a r ||| n o m a r o n 210 | V 2 SG SBJV PRS LGSPEC2 b a i l a r ||| b a i l a s 211 | V 3 SG SBJV PRS c o l o c a r ||| c o l o q u e 212 | V IND PST 1 PL IPFV LGSPEC1 c l o n a r ||| c l o n á b e m o s 213 | V 2 PL IND PST PRF LGSPEC1 t r e s f o r m a r ||| t r e s f o r m a r e i s 214 | V IND PRS 2 SG e s f r e g a r ||| e s f r e g u e s 215 | V 2 PL IND PST PRF LGSPEC1 m o y a r ||| m o y a r e i s 216 | V 1 PL COND LGSPEC1 z a r r a r ||| z a r r a r í e m o s 217 | V 2 SG IND FUT e s c a e c e r ||| e s c a e z a 218 | V 3 PL IND PST PRF LGSPEC2 f a e r ||| f i c i e r e n 219 | V 1 PL IND PST PRF LGSPEC1 b a u t i z a r ||| b a u t i z á r e m o s 220 | V 2 SG IND PST PRF LGSPEC2 m e d r a r ||| m e d r a r a s 221 | V.PTCP PST MASC PL e s b a r a f u n d i a r ||| e s b a r a f u n d i a o s 222 | V 2 SG IND PST PRF LGSPEC2 a f a l a g a r ||| a f a l a g a r a s 223 | V 2 PL IND FUT e s p a ñ a r ||| e s p a ñ a r é i s 224 | V IND PST 2 SG PFV LGSPEC2 r e p o s a r ||| r e p o s e s t i 225 | V IND PST 2 PL IPFV LGSPEC2 x i n t a r ||| x i n t a b a i s 226 | V 2 SG IND PST PRF LGSPEC2 b l i n c a r ||| b l i n c a r a s 227 | V IND PST 1 PL IPFV LGSPEC2 s o t e r r a r ||| s o t e r r á b a m o s 228 | V 2 PL IND FUT l l a d r a r ||| l l a d r a r é i s 229 | V 2 SG IND PST PRF LGSPEC2 d u n v i a r ||| d u n v i a r a s 230 | V 3 SG IND PST PRF LGSPEC1 c r e a r ||| c r e a r e 231 | V.PTCP PST MASC PL a m e y o r a r ||| a m e y o r a o s 232 | V IND PRS 2 SG u s a r ||| u s e s 233 | V IND PRS 2 SG x e l a r ||| x e l e s 234 | V 2 PL IND PST PRF LGSPEC2 e s c o m e n z a r ||| e s c o m e n z a r a i s 235 | V IND PST 2 PL IPFV LGSPEC1 e s b a r a f u n d i a r ||| e s b a r a f u n d i a b e i s 236 | V IND PST 2 SG IPFV d a ñ a r ||| d a ñ a b e s 237 | V 2 PL IND PST PRF LGSPEC2 c a l c a r ||| c a l c a r a i s 238 | V 2 SG IND FUT a s a r ||| a s a r á s 239 | V 2 SG SBJV PRS LGSPEC2 x a n t a r ||| x a n t a s 240 | V.PTCP PST MASC PL c o n s i d e r a r ||| c o n s i d e r a o s 241 | V.PTCP PST FEM PL n o m b r a r ||| n o m b r a e s 242 | V 2 SG COND e n s u g a r ||| e n s u g a r í e s 243 | V 3 SG SBJV PST IPFV LGSPEC2 c o n x e l a r ||| c o n x e l a r a 244 | V.PTCP PST NEUT SG p o d a r ||| p o d a o 245 | V 3 SG SBJV PRS a n c l a r ||| a n c l e 246 | V 2 PL SBJV PRS c a t a r ||| c a t é i s 247 | V IND PST 2 PL IPFV LGSPEC2 e n d o l c a r ||| e n d o l c a b a i s 248 | V 3 PL IND PST PRF LGSPEC2 e s t o r n u d a r ||| e s t o r n u d a r a n 249 | V.PTCP PST MASC PL e s i x i r ||| e s i x í o s 250 | V 2 SG SBJV PRS LGSPEC1 f e r r a r ||| f i e r r e s 251 | V 1 PL SBJV PRS e n t r e n a r ||| e n t r e n e m o s 252 | V IND PST 2 SG PFV LGSPEC2 d i v o r c i a r ||| d i v o r c i e s t i 253 | V 3 SG IND PST PRF LGSPEC2 c o n n o t a r ||| c o n n o t a r a 254 | V 2 PL IND PST PRF LGSPEC2 b u s c a r ||| b u s c a r a i s 255 | V 1 SG SBJV PRS d i n a m i t a r ||| d i n a m i t e 256 | V 2 PL SBJV PRS t o r n a r ||| t o r n é i s 257 | V IND PST 2 PL IPFV a r d e r ||| a r d í e i s 258 | V 3 SG SBJV PRS i n s p i r a r ||| i n s p i r e 259 | V IND PST 3 SG PFV l l e n d a r ||| l l e n d ó 260 | V 2 PL SBJV PST IPFV LGSPEC1 l l e g a l i z a r ||| l l e g a l i z a r e i s 261 | V IND PST 3 SG PFV a b l u c a r ||| a b l u c ó 262 | V 1 PL SBJV PRS g u a r n i r ||| g u a r n i r e m o s 263 | V 2 PL COND LGSPEC2 a n i c i a r ||| a n i c i a r í a i s 264 | V 1 PL IND FUT e x i l i a r ||| e x i l i a r e m o s 265 | V IND PST 1 SG IPFV s a n g r a r ||| s a n g r a b a 266 | V 1 SG IND PST PRF LGSPEC2 a b o t o n a r ||| a b o t o n a r a 267 | V 2 PL IMP e x i s t i r ||| e x i s t í i 268 | V 1 SG IND PST PRF LGSPEC1 e s c l u c a r ||| e s c l u c a r e 269 | V IND PRS 2 PL e s p r e s a r ||| e s p r e s á i s 270 | V 3 SG SBJV PST IPFV LGSPEC1 c o n x e l a r ||| c o n x e l a r e 271 | V 1 SG IND FUT e s g a t u ñ a r ||| e s g a t u ñ a r é 272 | V IND PRS 2 SG e x i l i a r ||| e x i l i e s 273 | V 2 PL IND FUT LGSPEC1 d i r ||| v a i g a m o s 274 | V 1 PL COND LGSPEC2 c o y e r ||| c o y e r í e m o s 275 | V.PTCP PST NEUT SG r o b a r ||| r o b a o 276 | V IND PST 1 SG IPFV e s p i r r i a r ||| e s p i r r i a b a 277 | V IND PST 3 PL PFV d e r i v a r ||| d e r i v a r o n 278 | V IND PST 3 PL IPFV a f a l a g a r ||| a f a l a g a b e n 279 | V.PTCP PST MASC PL r o b a r ||| r o b a o s 280 | V.PTCP PST FEM SG e s m o r d i g a ñ a r ||| e s m o r d i g a ñ a d a 281 | V 3 PL IND PST PRF LGSPEC2 v e r a n i a r ||| v e r a n i a r a n 282 | V 1 PL SBJV PST IPFV LGSPEC2 ñ a m o r a r ||| ñ a m o r á r a m o s 283 | V 2 SG IND PST PRF LGSPEC1 d e r i v a r ||| d e r i v a r e s 284 | V.PTCP PST FEM SG t r u ñ a r ||| t r u ñ a d a 285 | V 2 PL COND LGSPEC1 z a r r a r ||| z a r r a r í e i s 286 | V 3 SG IND PST PRF LGSPEC2 b a i l l a r ||| b a i l l a r a 287 | V 3 PL IND PST PRF LGSPEC2 l l e g a l i z a r ||| l l e g a l i z a r a n 288 | V 2 PL IND PST PRF LGSPEC2 a y u n a r ||| a y u n a r a i s 289 | V 2 PL COND LGSPEC1 s a l t a r ||| s a l t a r í e i s 290 | V 3 PL IND PST PRF LGSPEC1 b r a n i a r ||| b r a n i a r e n 291 | V 2 PL IND PST PRF LGSPEC2 n a m o r a r ||| n a m o r a r a i s 292 | V 2 PL IMP a c a b a r ||| a c a b á i 293 | V 1 PL SBJV PST IPFV LGSPEC2 a f a m i a r ||| a f a m i á r a m o s 294 | V IND PST 1 PL IPFV LGSPEC2 r e g u l a r ||| r e g u l á b a m o s 295 | V 1 PL COND LGSPEC2 m i r a r ||| m i r a r í a m o s 296 | V 3 PL SBJV PST IPFV LGSPEC1 v a c i a r ||| v a c i a r e n 297 | V IND PRS 2 PL c i r c u l a r ||| c i r c u l á i s 298 | V IND PST 3 PL IPFV r e t a r ||| r e t a b e n 299 | V IND PRS 1 SG a b l u c a r ||| a b l u c o 300 | V 2 PL IND FUT a c t i v a r ||| a c t i v a r é i s 301 | V IND PST 3 SG PFV a m e d r a n a r ||| a m e d r a n ó 302 | V.PTCP PST FEM SG d e b u r a r ||| d e b u r a d a 303 | V 3 SG IND PST PRF LGSPEC1 c o n v i d a r ||| c o n v i d a r e 304 | V.PTCP PST MASC PL e s f r e g a r ||| e s f r e g a o s 305 | V IND PST 2 SG PFV LGSPEC1 p o n e r ||| p u n x e s t i 306 | V 3 SG COND LGSPEC4 d i r ||| f u e r e 307 | V 1 PL COND LGSPEC1 f r o t a r ||| f r o t a r í e m o s 308 | V 2 SG IMP e n s u g a r ||| e n s u g a 309 | V 3 PL IND FUT s u d a r ||| s u d a r á n 310 | V 2 SG COND a b l u c a r ||| a b l u c a r í e s 311 | V 2 PL IND PST PRF LGSPEC2 s o ñ a r ||| s o ñ a r a i s 312 | V 3 PL IND PST PRF LGSPEC1 s a c r i f i c a r ||| s a c r i f i c a r e n 313 | V.PTCP PST FEM SG e v i t a r ||| e v i t a d a 314 | V.CVB PRS m e d r a r ||| m e d r a n d o 315 | V 1 SG IND FUT d u d a r ||| d u d a r é 316 | V 1 SG IND PST PRF LGSPEC1 a n i c i a r ||| a n i c i a r e 317 | V 1 PL COND LGSPEC2 x u b i l a r ||| x u b i l a r í a m o s 318 | V 1 PL IND PST PRF LGSPEC2 c o m b a y a r ||| c o m b a y á r a m o s 319 | V 2 PL IND PST PRF LGSPEC1 a l l o ñ a r ||| a l l o ñ a r e i s 320 | V 2 PL SBJV PST IPFV LGSPEC1 i n t e r p r e t a r ||| i n t e r p r e t a r e i s 321 | V NFIN d u d a r ||| d u d a r 322 | V 3 SG SBJV PST IPFV LGSPEC2 r e c o r d a r ||| r e c o r d a r a 323 | V 1 SG SBJV PST IPFV LGSPEC1 n i c i a r ||| n i c i a r e 324 | V NFIN e n d o l c a r ||| e n d o l c a r 325 | V 2 SG SBJV PRS LGSPEC1 s a c r i f i c a r ||| s a c r i f i q u e s 326 | V 2 SG COND s i g n i f i c a r ||| s i g n i f i c a r í e s 327 | V 1 SG COND e s c o y e r ||| e s c o y e r í a 328 | V 3 SG SBJV PRS e s c o y e r ||| e s c u e y a 329 | V.PTCP PST MASC PL n a d a r ||| n a d a o s 330 | V 3 SG SBJV PST IPFV LGSPEC1 f u m a r ||| f u m a r e 331 | V 2 PL COND LGSPEC1 f a l a r ||| f a l a r í e i s 332 | V IND PST 3 SG IPFV b i l o r d i a r ||| b i l o r d i a b a 333 | V.PTCP PST FEM SG a x u d a r ||| a x u d a d a 334 | V 2 SG IND PST PRF LGSPEC1 a l b o r i a r ||| a l b o r i a r e s 335 | V 1 SG IND FUT a c a b a r ||| a c a b a r é 336 | V IND PST 3 PL IPFV o r i n a r ||| o r i n a b e n 337 | V 3 SG IND PST PRF LGSPEC2 p e t r i f i c a r ||| p e t r i f i c a r a 338 | V IND PRS 3 PL d e c i d i r ||| d e c i d e n 339 | V.PTCP PST NEUT SG c a n t a r ||| c a n t a o 340 | V 1 SG COND a g a r r a r ||| a g a r r a r í a 341 | V 1 PL SBJV PST IPFV LGSPEC1 s e c a r ||| s e c á r e m o s 342 | V IND PRS 1 SG v e n d e r ||| v e n d o 343 | V.PTCP PST FEM PL i n s u l t a r ||| i n s u l t a e s 344 | V 1 PL COND LGSPEC2 x e l a r ||| x e l a r í a m o s 345 | V 2 PL SBJV PST IPFV LGSPEC2 f r o t a r ||| f r o t a r a i s 346 | V 1 PL COND LGSPEC2 a l i m e n t a r ||| a l i m e n t a r í a m o s 347 | V IND PST 2 SG PFV LGSPEC2 m i r a r ||| m i r e s t i 348 | V 3 SG IND PST PRF LGSPEC2 m u d a r ||| m u d a r a 349 | V IND PST 3 SG IPFV p o r t a r ||| p o r t a b a 350 | V 1 SG SBJV PST IPFV LGSPEC2 e s ñ a l a r ||| e s ñ a l a r a 351 | V IND PST 2 PL IPFV LGSPEC1 a t a r ||| a t a b e i s 352 | V 2 PL COND LGSPEC2 g o l i q u i a r ||| g o l i q u i a r í a i s 353 | V 2 SG SBJV PST IPFV LGSPEC1 e s ñ a l a r ||| e s ñ a l a r e s 354 | V 1 SG IND FUT ñ a m o r a r ||| ñ a m o r a r é 355 | V 1 PL IND PST PRF LGSPEC1 c o n x u g a r ||| c o n x u g á r e m o s 356 | V IND PST 2 PL IPFV LGSPEC1 e x i s t i r ||| e x i s t í e i s 357 | V IND PST 2 PL IPFV LGSPEC1 a t r a g a n t a r ||| a t r a g a n t a b e i s 358 | V 2 PL COND LGSPEC2 s i g n i f i c a r ||| s i g n i f i c a r í a i s 359 | V 3 PL SBJV PRS LGSPEC2 a f i l a r ||| a f i l a n 360 | V 2 PL COND LGSPEC1 a g r u p a r ||| a g r u p a r í e i s 361 | V 3 PL SBJV PST IPFV x i m i r ||| x i m i r í e n 362 | V IND PST 3 PL PFV a c a b a r ||| a c a b a r o n 363 | V 2 PL SBJV PRS i l l e g a l i z a r ||| i l l e g a l i c é i s 364 | V 3 SG COND LGSPEC2 s o n r i r ||| s o n r i e r a 365 | V IND PST 2 SG PFV LGSPEC1 r e c o r d a r ||| r e c o r d a s t i 366 | V.PTCP PST MASC PL e m o c i o n a r ||| e m o c i o n a o s 367 | V IND PST 1 SG IPFV m a n c a r ||| m a n c a b a 368 | V 2 SG IND FUT p e s l l a r ||| p e s l l a r á s 369 | V 2 SG SBJV PST IPFV LGSPEC2 i m i t a r ||| i m i t a r a s 370 | V.PTCP PST MASC PL s e c a r ||| s e c a o s 371 | V 2 SG SBJV PRS LGSPEC2 c r e a r ||| c r e a s 372 | V 3 SG COND e s p l o r a r ||| e s p l o r a r í a 373 | V 3 SG SBJV PST IPFV LGSPEC1 a l e n d a r ||| a l e n d a r e 374 | V 2 SG SBJV PRS LGSPEC2 a f i l a r ||| a f i l a s 375 | V IND PST 2 SG PFV LGSPEC1 p a u t a r ||| p a u t a s t i 376 | V 2 SG SBJV PRS LGSPEC2 c l a v a r ||| c l a v a s 377 | V 2 SG IND PST PRF LGSPEC2 c a v i l g a r ||| c a v i l g a r a s 378 | V.PTCP PST MASC PL a l z a r ||| a l z a o s 379 | V IND PST 1 PL IPFV LGSPEC2 a g u a n t a r ||| a g u a n t á b a m o s 380 | V IND PST 3 SG PFV n o m a r ||| n o m ó 381 | V 2 PL IND PST PRF LGSPEC1 e n c a x a r ||| e n c a x a r e i s 382 | V IND PST 1 PL PFV a l l o ñ a r ||| a l l o ñ e m o s 383 | V 1 PL SBJV PST IPFV LGSPEC1 e s t a z a r ||| e s t a z á r e m o s 384 | V 1 SG SBJV PST IPFV LGSPEC1 e c h a r ||| e c h a r e 385 | V 1 PL IND PST PRF LGSPEC2 v e r a n i a r ||| v e r a n i á r a m o s 386 | V NFIN d e s e n d o l c a r ||| d e s e n d o l c a r 387 | V 2 PL SBJV PST IPFV LGSPEC1 r e m o l c a r ||| r e m o l c a r e i s 388 | V.PTCP PST MASC PL p e s c a r ||| p e s c a o s 389 | V IND PST 1 PL IPFV LGSPEC1 f e r r a r ||| f e r r á b e m o s 390 | V IND PST 3 PL PFV v a l t a r ||| v a l t a r o n 391 | V 1 PL SBJV PST IPFV LGSPEC1 b a t a l l a r ||| b a t a l l á r e m o s 392 | V 1 SG SBJV PRS c o m i c a r ||| c o m i q u e 393 | V 2 SG SBJV PST IPFV LGSPEC1 a u d a r ||| a u d a r e s 394 | V IND PST 2 PL IPFV LGSPEC2 a l z a r ||| a l z a b a i s 395 | V 1 SG IND PST PRF LGSPEC2 a l z a r ||| a l z a r a 396 | V IND PRS 3 PL a r r a n a r ||| a r r a n e n 397 | V IND PST 3 PL IPFV t r e s p o r t a r ||| t r e s p o r t a b e n 398 | ADJ SG NEUT b r e t ó n ||| b r e t o n o 399 | V 1 SG SBJV PST IPFV LGSPEC1 e s p o x i g a r ||| e s p o x i g a r e 400 | V 2 SG SBJV PST IPFV LGSPEC1 f o r z a r ||| f o r z a r e s 401 | V 2 PL COND LGSPEC2 d o b l a r ||| d o b l a r í a i s 402 | V IND PST 2 PL PFV LGSPEC2 b l i n c a r ||| b l i n q u e s t i s 403 | V 3 PL IND PST PRF LGSPEC2 x a n t a r ||| x a n t a r a n 404 | V 1 SG IND PST PRF LGSPEC1 e s t o r n u d a r ||| e s t o r n u d a r e 405 | V 1 SG SBJV PRS e m o c i o n a r ||| e m o c i o n e 406 | V 3 PL SBJV PRS LGSPEC2 r e b a x a r ||| r e b a x a n 407 | V 2 SG SBJV PRS LGSPEC1 a l l i x e r a r ||| a l l i x e r e s 408 | V IND PST 2 PL PFV LGSPEC1 c r e a r ||| c r e a s t i s 409 | V 2 PL COND LGSPEC1 a l g a m a r ||| a l g a m a r í e i s 410 | V.PTCP PST FEM PL g u e t a r ||| g u e t a e s 411 | V 1 PL SBJV PST IPFV LGSPEC1 r e b a x a r ||| r e b a x á r e m o s 412 | V IND PRS 2 SG e n s u g a r ||| e n s u g u e s 413 | V.PTCP PST MASC PL a b o t o n a r ||| a b o t o n a o s 414 | V 1 PL COND LGSPEC2 l l e n d a r ||| l l e n d a r í a m o s 415 | V IND PST 2 PL IPFV LGSPEC1 d u n v i a r ||| d u n v i a b e i s 416 | V 3 SG IND PST PRF LGSPEC2 c o m p a r a r ||| c o m p a r a r a 417 | V.PTCP PST MASC SG a y u n a r ||| a y u n á u 418 | V IND PST 1 PL IPFV LGSPEC1 c o p i a r ||| c o p i á b e m o s 419 | V 3 PL COND b r a n i a r ||| b r a n i a r í e n 420 | V IND PRS 3 SG e s c o m e n z a r ||| e s c o m e n z a 421 | V 1 PL SBJV PST IPFV LGSPEC2 r e g u l a r ||| r e g u l á r a m o s 422 | V 3 PL SBJV PRS LGSPEC2 p u x a r ||| p u x a n 423 | V 2 SG SBJV PST IPFV LGSPEC2 f e s t e x a r ||| f e s t e x a r a s 424 | V 1 PL SBJV PST IPFV LGSPEC1 c r e a r ||| c r e á r e m o s 425 | V IND PST 1 PL IPFV LGSPEC4 f a e r ||| f a í e m o s 426 | V 2 SG SBJV PST IPFV LGSPEC1 r e m e d i a r ||| r e m e d i a r e s 427 | V 3 SG IND FUT e m p r i m a r ||| e m p r i m a r á 428 | V IND PST 2 PL PFV LGSPEC2 a l l u g a r ||| a l l u g u e s t i s 429 | V IND PST 2 PL PFV LGSPEC2 c a t a r ||| c a t e s t i s 430 | V.PTCP PST FEM SG e m p e o r a r ||| e m p e o r a d a 431 | V IND PST 2 SG PFV LGSPEC1 p o d e r ||| p u d i s t i 432 | V IND PST 3 SG IPFV d e s a f i a r ||| d e s a f i a b a 433 | V 3 PL SBJV PST IPFV LGSPEC2 e n s u g a r ||| e n s u g a r a n 434 | V.PTCP PST MASC SG r e m o l c a r ||| r e m o l c á u 435 | V IND PST 1 PL IPFV LGSPEC1 p a r i r ||| p a r í e m o s 436 | V 2 SG COND b e s a r ||| b e s a r í e s 437 | V 1 PL SBJV PRS ḥ i s p i a r ||| ḥ i s p i e m o s 438 | V IND PST 2 PL IPFV LGSPEC1 n u m b e r a r ||| n u m b e r a b e i s 439 | V IND PST 2 SG IPFV s e n t a r ||| s e n t a b e s 440 | V IND PST 1 SG IPFV e s a m i n a r ||| e s a m i n a b a 441 | V 3 PL COND c o m p r a r ||| c o m p r a r í e n 442 | V.PTCP PST MASC SG a s o l e y a r ||| a s o l e y á u 443 | V IND PST 1 PL PFV a n c l a r ||| a n c l e m o s 444 | V 2 PL IND PST PRF LGSPEC2 e s p e y a r ||| e s p e y a r a i s 445 | V.CVB PRS a f a l a g a r ||| a f a l a g a n d o 446 | V 1 PL IND FUT c o p i a r ||| c o p i a r e m o s 447 | V 3 SG SBJV PRS x u b i l a r ||| x u b i l e 448 | V 2 SG IND PST PRF LGSPEC2 f a c e r ||| f i c i e r e s 449 | V 3 SG SBJV PST IPFV LGSPEC2 e s c a p a r ||| e s c a p a r a 450 | V IND PRS 2 SG d i b u x a r ||| d i b u x e s 451 | V 3 PL SBJV PST IPFV LGSPEC1 c h a r l a r ||| c h a r l a r e n 452 | V 1 PL SBJV PRS a n d o r g a r ||| a n d o r g u e m o s 453 | V IND PST 1 SG PFV e x i s t i r ||| e x i s t í 454 | V 1 PL COND LGSPEC2 s e l l a r ||| s e l l a r í a m o s 455 | V 1 SG IND FUT f u m i a r ||| f u m i a r é 456 | V 1 SG COND f a l a r ||| f a l a r í a 457 | V 1 PL IND PST PRF LGSPEC1 e s c u c h a r ||| e s c u c h á r e m o s 458 | V 2 PL IMP a f a l a g a r ||| a f a l a g á i 459 | V 2 SG IND PST PRF LGSPEC2 p r e ñ a r ||| p r e ñ a r a s 460 | V IND PST 3 SG IPFV a f i l a r ||| a f i l a b a 461 | V 2 PL SBJV PST IPFV LGSPEC1 i n s u l t a r ||| i n s u l t a r e i s 462 | V 2 PL COND LGSPEC2 c o n x e l a r ||| c o n x e l a r í a i s 463 | V 3 SG IND PST PRF LGSPEC2 f a e r ||| f i c i e r e 464 | V 2 PL COND LGSPEC2 b l i n c a r ||| b l i n c a r í a i s 465 | V IND PRS 1 SG f e s t e x a r ||| f e s t e x o 466 | V 3 PL SBJV PST IPFV LGSPEC1 f a e r ||| f i c i e r a n 467 | ADJ SG MASC o c c i t a n u ||| o c c i t a n u 468 | V 1 PL IND PST PRF LGSPEC1 m a x i n a r ||| m a x i n á r e m o s 469 | V 2 PL IND PST PRF LGSPEC2 l l u c h a r ||| l l u c h a r a i s 470 | V 3 PL SBJV PST IPFV LGSPEC2 a c o l l e c h a r ||| a c o l l e c h a r a n 471 | V 3 SG IND PST PRF LGSPEC1 e s c o y e r ||| e s c o y e r a 472 | V IND PST 3 PL IPFV a r r i c a r ||| a r r i c a b e n 473 | V 1 SG IND FUT d i n a m i t a r ||| d i n a m i t a r é 474 | V 3 PL SBJV PRS LGSPEC2 d u r a r ||| d u r a n 475 | V 1 PL SBJV PST IPFV LGSPEC2 a c e u t a r ||| a c e u t á r a m o s 476 | V 3 SG IND FUT a t a r ||| a t a r á 477 | V IND PRS 1 SG a b o t o n a r ||| a b o t o n o 478 | V 3 PL IND PST PRF LGSPEC2 e n c a d e n a r ||| e n c a d e n a r a n 479 | V 2 PL COND LGSPEC2 p a r i r ||| p a r i e r a i s 480 | V IND PST 1 PL PFV LGSPEC3 f a c e r ||| f i x i m o s 481 | V.PTCP PST FEM PL a s e d i a r ||| a s e d i a e s 482 | V 1 PL IND PST PRF LGSPEC1 r e s f r e g a r ||| r e s f r e g á r e m o s 483 | V 3 PL IND FUT r o b a r ||| r o b a r á n 484 | V 3 PL SBJV PST IPFV LGSPEC1 l l o r a r ||| l l o r a r e n 485 | V IND PST 2 PL PFV LGSPEC2 t r e s p o r t a r ||| t r e s p o r t e s t i s 486 | V 1 PL SBJV PRS a c e p t a r ||| a c e p t e m o s 487 | V 3 SG IND PST PRF LGSPEC1 d u l d a r ||| d u l d a r e 488 | V IND PST 2 SG PFV LGSPEC2 a m e y o r a r ||| a m e y o r e s t i 489 | V IND PST 3 SG IPFV i l l e g a l i z a r ||| i l l e g a l i z a b a 490 | V NFIN f u m a r ||| f u m a r 491 | V 3 SG SBJV PST IPFV LGSPEC2 e s c a n c i a r ||| e s c a n c i a r a 492 | V.PTCP PST FEM SG e n d o l c a r ||| e n d o l c a d a 493 | V.PTCP PST FEM PL e n d o l c a r ||| e n d o l c a e s 494 | V IND PST 2 SG PFV LGSPEC2 a f e i t a r ||| a f e i t e s t i 495 | V IND PST 2 SG PFV LGSPEC2 a m a l a r ||| a m a l e s t i 496 | V 2 PL SBJV PRS f u m a r ||| f u m é i s 497 | V IND PRS 3 PL t r e s p o r t a r ||| t r e s p o r t e n 498 | V 1 PL COND LGSPEC1 t i r a r ||| t i r a r í e m o s 499 | V 1 PL SBJV PST IPFV LGSPEC1 a b o t o n a r ||| a b o t o n á r e m o s 500 | V 1 PL IND PST PRF c o m e r ||| c o m i é r e m o s 501 | -------------------------------------------------------------------------------- /03-encdec/encdec.py: -------------------------------------------------------------------------------- 1 | import dynet as dy 2 | import random 3 | import argparse 4 | import util 5 | 6 | EOS = "" 7 | 8 | 9 | INPUT_VOCAB_SIZE = 0 10 | OUTPUT_VOCAB_SIZE = 0 11 | LSTM_NUM_OF_LAYERS = 1 12 | EMBEDDINGS_SIZE = 32 13 | STATE_SIZE = 32 14 | ATTENTION_SIZE = 32 15 | 16 | 17 | class EncDecModel: 18 | def __init__(self, model, LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, INPUT_VOCAB_SIZE, OUTPUT_VOCAB_SIZE): 19 | self.model = model 20 | self.lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) 21 | self.input_lookup = model.add_lookup_parameters((INPUT_VOCAB_SIZE, EMBEDDINGS_SIZE)) 22 | self.decoder_w = model.add_parameters( (OUTPUT_VOCAB_SIZE, STATE_SIZE)) 23 | self.decoder_b = model.add_parameters( (OUTPUT_VOCAB_SIZE)) 24 | self.output_lookup = model.add_lookup_parameters((OUTPUT_VOCAB_SIZE, EMBEDDINGS_SIZE)) 25 | 26 | def save_to_disk(self, filename): 27 | dy.save(filename, [self.lstm, self.input_lookup, self.decoder_w, self.decoder_b, self.output_lookup]) 28 | 29 | def load_from_disk(self, filename): 30 | (self.lstm, self.input_lookup, self.decoder_w, self.decoder_b, self.output_lookup) = dy.load(filename, self.model) 31 | 32 | 33 | def embed_sentence(self, sentence): 34 | sentence = [input_vocab.w2i[c] for c in sentence] 35 | return [self.input_lookup[char] for char in sentence] 36 | 37 | 38 | def run_lstm(self, init_state, input_vecs): 39 | s = init_state 40 | for vector in input_vecs: 41 | s = s.add_input(vector) 42 | return s 43 | 44 | 45 | def encode_sentence(self, sentence): 46 | sentence_rev = list(reversed(sentence)) 47 | state = self.run_lstm(self.lstm.initial_state(), sentence) 48 | return state 49 | 50 | 51 | def decode(self, output, state): 52 | output = list(output) 53 | output = [output_vocab.w2i[c] for c in output] 54 | 55 | w = dy.parameter(self.decoder_w) 56 | b = dy.parameter(self.decoder_b) 57 | 58 | # Feed in EOS as input to denote the start decoding 59 | last_output_embedding = self.output_lookup[output_vocab.w2i[EOS]] 60 | loss = [] 61 | 62 | for char in output: 63 | input_vector = last_output_embedding 64 | state = state.add_input(input_vector) 65 | out_vector = w * state.output() + b 66 | probs = dy.softmax(out_vector) 67 | last_output_embeddings = self.output_lookup[char] 68 | loss.append(-dy.log(dy.pick(probs, char))) 69 | loss = dy.esum(loss) 70 | return loss 71 | 72 | 73 | def generate(self, in_seq): 74 | embedded = self.embed_sentence(in_seq) 75 | state = self.encode_sentence(embedded) 76 | 77 | w = dy.parameter(self.decoder_w) 78 | b = dy.parameter(self.decoder_b) 79 | 80 | last_output_embedding = self.output_lookup[output_vocab.w2i[EOS]] 81 | 82 | out = '' 83 | count_EOS = 0 84 | for i in range(len(in_seq)*2): 85 | if count_EOS == 2: break 86 | inp_vector = last_output_embedding 87 | state = state.add_input(inp_vector) 88 | out_vector = w * state.output() + b 89 | probs = dy.softmax(out_vector).vec_value() 90 | next_char = probs.index(max(probs)) 91 | last_output_embeddings = self.output_lookup[next_char] 92 | if output_vocab.i2w[next_char] == EOS: 93 | count_EOS += 1 94 | continue 95 | 96 | out += output_vocab.i2w[next_char] 97 | return out 98 | 99 | 100 | def get_loss(self, input_sentence, output_sentence): 101 | dy.renew_cg() 102 | embedded = self.embed_sentence(input_sentence) 103 | s = self.encode_sentence(embedded) 104 | return self.decode(output_sentence, s) 105 | 106 | def eval(self, inputs, outputs): 107 | N = len(inputs) 108 | correct = 0.0 109 | for i in range(N): 110 | prediction = self.generate(inputs[i]) 111 | if i < 5: 112 | print(f"\t{' '.join(inputs[i])}\t{prediction}\t{''.join(outputs[i][:-1])}") 113 | if prediction == ''.join(outputs[i][:-1]): 114 | correct += 1 115 | accuracy = correct/N 116 | return accuracy 117 | 118 | 119 | def train(self, train, dev): 120 | train_i = train[0] 121 | train_o = train[1] 122 | dev_i = dev[0] 123 | dev_o = dev[1] 124 | 125 | prev_dev_acc = 0 126 | trainer = dy.SimpleSGDTrainer(self.model) 127 | N = len(train_i) 128 | ids = list(range(N)) 129 | for iteration in range(50): 130 | random.shuffle(ids) 131 | for i in ids: 132 | loss = self.get_loss(train_i[i], train_o[i]) 133 | loss_value = loss.value() 134 | loss.backward() 135 | trainer.update() 136 | print(f"Total Loss at Iteration {iteration} : {loss_value}") 137 | # Eval on dev 138 | dev_acc = self.eval(dev_i, dev_o) 139 | print(f"Dev accuracy at iteration {iteration} : {dev_acc}") 140 | if dev_acc > prev_dev_acc: 141 | prev_dev_acc = dev_acc 142 | self.save_to_disk("models/inflection.model") 143 | 144 | 145 | 146 | 147 | 148 | if __name__ == '__main__': 149 | parser = argparse.ArgumentParser() 150 | parser.add_argument('train', help='Path to the corpus file.') 151 | parser.add_argument('dev', help='Path to the validation corpus file.') 152 | parser.add_argument('test', help='Path to the test corpus file.') 153 | args, unknown = parser.parse_known_args() 154 | 155 | train_corpus = util.read_inflection_data(args.train, end=EOS) 156 | dev_corpus = util.read_inflection_data(args.dev, end=EOS) 157 | test_corpus = util.read_inflection_data(args.test, end=EOS) 158 | 159 | input_vocab = util.Vocab.from_corpus(train_corpus[0]) 160 | output_vocab = util.Vocab.from_corpus(train_corpus[1]) 161 | 162 | INPUT_VOCAB_SIZE = input_vocab.size() 163 | print(f"INPUT VOCAB SIZE: {INPUT_VOCAB_SIZE}") 164 | OUTPUT_VOCAB_SIZE = output_vocab.size() 165 | print(f"OUTPUT VOCAB SIZE: {OUTPUT_VOCAB_SIZE}") 166 | 167 | model = dy.Model() 168 | inflectionModel = EncDecModel(model, LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, INPUT_VOCAB_SIZE, OUTPUT_VOCAB_SIZE) 169 | 170 | inflectionModel.train(train_corpus, dev_corpus) 171 | 172 | # Load the best model 173 | inflectionModel.load_from_disk("models/inflection.model") 174 | 175 | # Test on test data 176 | test_acc = inflectionModel.eval(test_corpus[0], test_corpus[1]) 177 | print(f"Accuracy on test: {test_acc}") 178 | 179 | 180 | -------------------------------------------------------------------------------- /03-encdec/util.py: -------------------------------------------------------------------------------- 1 | 2 | class Vocab: 3 | def __init__(self, w2i): 4 | self.w2i = dict(w2i) 5 | self.i2w = {i:w for w,i in w2i.items()} 6 | 7 | @classmethod 8 | def from_corpus(cls, corpus): 9 | w2i = {} 10 | for sent in corpus: 11 | for word in sent: 12 | w2i.setdefault(word, len(w2i)) 13 | 14 | return Vocab(w2i) 15 | 16 | def size(self): 17 | return len(self.w2i.keys()) 18 | 19 | 20 | def read_inflection_data(f, end="EOS"): 21 | with open(f, 'r') as inp: 22 | lines = inp.readlines() 23 | 24 | inputs = [] 25 | outputs = [] 26 | for l in lines: 27 | l = l.strip().split(' ||| ') 28 | inputs.append(l[0].split(' ') + [end]) 29 | outputs.append(l[1].split(' ') + [end]) 30 | 31 | return (inputs, outputs) -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/README.md: -------------------------------------------------------------------------------- 1 | # Attention-Visualization 2 | Visualization for simple attention and Google's multi-head attention. 3 | 4 | ## Requirements 5 | 6 | - python 7 | - jdk1.8 8 | 9 | ## Usage 10 | 11 | 1\. Python version (for simple attention only): 12 | 13 | ``` bash 14 | python exec/plot_heatmap.py --input xxx.attention 15 | ``` 16 | 17 | 2\. Java version (for both simple attention and Google's multi-head attention): 18 | ``` bash 19 | java -jar exec/plot_heatmap.jar 20 | ``` 21 | then select the attention file on the GUI. 22 | 23 | 24 | ## Data Format 25 | 26 | The name of the attention file should end with ".attention" extension especially when using exec/plot_heatmap.jar and the file should be json format: 27 | 28 | ``` python 29 | { "0": { 30 | "source": " ", # the source sentence (without and symbols) 31 | "translation": " ", # the target sentence (without and symbols) 32 | "attentions": [ # various attention results 33 | { 34 | "name": " ", # a unique name for this attention 35 | "type": " ", # the type of this attention (simple or multihead) 36 | "value": [...] # the attention weights, a json array 37 | }, # end of one attention result 38 | {...}, ...] # end of various attention results 39 | }, # end of the first sample 40 | "1":{ 41 | ... 42 | }, # end of the second sample 43 | ... 44 | } # end of file 45 | ``` 46 | 47 | Note that due to the hard coding, the `name` of each attention should contain "encoder_decoder_attention", "encoder_self_attention" or "decoder_self_attention" substring on the basis of its real meaning. 48 | 49 | The `value` has shape [length_queries, length_keys] when `type`=simple and has shape [num_heads, length_queries, length_keys] when `type`=multihead. 50 | 51 | For more details, see [attention.py](https://github.com/zhaocq-nlp/NJUNMT-tf/blob/master/njunmt/inference/attention.py). 52 | 53 | ## Demo 54 | 55 | The `toydata/toy.attention` is generated by a NMT model with a self-attention encoder, Bahdanau's attention and a RNN decoder using [NJUNMT-tf](https://github.com/zhaocq-nlp/NJUNMT-tf). 56 | 57 | 1\. Execute the python version (for simple attention only): 58 | 59 | ``` bash 60 | python exec/plot_heatmap.py --input toydata/toy.attention 61 | ``` 62 | It will plot the traditional attention heatmap: 63 | 64 |
65 |

66 |
67 | 68 | 69 | 2\. As for java version (for both simple attention and Google's multihead attention), execute 70 | ``` bash 71 | java -jar exec/plot_heatmap.jar 72 | ``` 73 | then select the `toydata/toy.attention` on the GUI. 74 | 75 |
76 |

77 |
78 |
79 |

80 |
81 | 82 | The words on the left side are attention "queries" and attention "keys" are on the right. Click on the words on the left side to see the heatmap: 83 | 84 |
85 |

86 |
87 | 88 | Here shows the traditional `encoder_decoder_attention` of word "obtained". The color depth of lines and squares indicate the degree of attention. 89 | 90 | Next, select `encoder_self_attention0` under the menu bar. Click on the "获得" on the left. 91 | 92 |
93 |

94 |
95 | 96 | It shows the multi-head attention of the word "获得". Attention weights of head0 - head7 are displayed on the right. -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/exec/plot_heatmap.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neubig/mtandseq2seq-code/956656278903bb626b9136ef034a69e3d03bfd78/04-attention/Attention-Visualization/exec/plot_heatmap.jar -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/exec/plot_heatmap.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | 4 | import numpy 5 | import matplotlib.pyplot as plt 6 | from matplotlib.font_manager import FontProperties 7 | import json 8 | import argparse 9 | 10 | chineseFont = FontProperties(fname = '/Library/Fonts/Songti.ttc') 11 | 12 | # input: 13 | # alignment matrix - numpy array 14 | # shape (target tokens + eos, number of hidden source states = source tokens +eos) 15 | # one line correpsonds to one decoding step producing one target token 16 | # each line has the attention model weights corresponding to that decoding step 17 | # each float on a line is the attention model weight for a corresponding source state. 18 | # plot: a heat map of the alignment matrix 19 | # x axis are the source tokens (alignment is to source hidden state that roughly corresponds to a source token) 20 | # y axis are the target tokens 21 | 22 | # http://stackoverflow.com/questions/14391959/heatmap-in-matplotlib-with-pcolor 23 | def plot_head_map(mma, target_labels, source_labels): 24 | fig, ax = plt.subplots() 25 | heatmap = ax.pcolor(mma, cmap=plt.cm.Blues) 26 | 27 | # put the major ticks at the middle of each cell 28 | ax.set_xticks(numpy.arange(mma.shape[1]) + 0.5, minor=False) 29 | ax.set_yticks(numpy.arange(mma.shape[0]) + 0.5, minor=False) 30 | 31 | # without this I get some extra columns rows 32 | # http://stackoverflow.com/questions/31601351/why-does-this-matplotlib-heatmap-have-an-extra-blank-column 33 | ax.set_xlim(0, int(mma.shape[1])) 34 | ax.set_ylim(0, int(mma.shape[0])) 35 | 36 | # want a more natural, table-like display 37 | ax.invert_yaxis() 38 | ax.xaxis.tick_top() 39 | 40 | # source words -> column labels 41 | ax.set_xticklabels(source_labels, minor=False, fontproperties=chineseFont) 42 | # target words -> row labels 43 | ax.set_yticklabels(target_labels, minor=False) 44 | 45 | plt.xticks(rotation=45) 46 | 47 | # plt.tight_layout() 48 | plt.show() 49 | 50 | 51 | # column labels -> target words 52 | # row labels -> source words 53 | 54 | def read_alignment_matrix(f): 55 | header = f.readline().strip().split('|||') 56 | if header[0] == '': 57 | return None, None, None, None 58 | sid = int(header[0].strip()) 59 | # number of tokens in source and translation +1 for eos 60 | src_count, trg_count = map(int, header[-1].split()) 61 | # source words 62 | source_labels = header[3].decode('UTF-8').split() 63 | # source_labels.append('
') 64 | # target words 65 | target_labels = header[1].decode('UTF-8').split() 66 | target_labels.append('
') 67 | 68 | mm = [] 69 | for r in range(trg_count): 70 | alignment = map(float, f.readline().strip().split()) 71 | mm.append(alignment) 72 | mma = numpy.array(mm) 73 | return sid, mma, target_labels, source_labels 74 | 75 | 76 | def read_plot_alignment_matrices(f, start=0): 77 | attentions = json.load(f, encoding="utf-8") 78 | 79 | for idx, att in attentions.items(): 80 | idx = int(idx) 81 | if idx < start: continue 82 | source_labels = att["source"].split() + ["SEQUENCE_END"] 83 | target_labels = att["translation"].split() 84 | att_list = att["attentions"] 85 | assert att_list[0]["type"] == "simple", "Do not use this tool for multihead attention." 86 | mma = numpy.array(att_list[0]["value"]) 87 | if mma.shape[0] == len(target_labels) + 1: 88 | target_labels += ["SEQUENCE_END"] 89 | 90 | plot_head_map(mma, target_labels, source_labels) 91 | 92 | 93 | if __name__ == "__main__": 94 | parser = argparse.ArgumentParser() 95 | parser.add_argument('--input', '-i', type=argparse.FileType("rb"), 96 | default="trans.att", 97 | metavar='PATH', 98 | help="Input file (default: standard input)") 99 | parser.add_argument('--start', type=int, default=0) 100 | 101 | args = parser.parse_args() 102 | 103 | read_plot_alignment_matrices(args.input, args.start) 104 | -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/multihead-att-java/ActionLabel.java: -------------------------------------------------------------------------------- 1 | import javax.swing.*; 2 | import javax.swing.border.BevelBorder; 3 | import javax.swing.event.MouseInputListener; 4 | import java.awt.*; 5 | import java.awt.event.MouseEvent; 6 | import java.awt.event.MouseListener; 7 | import java.awt.event.MouseMotionListener; 8 | 9 | 10 | public class ActionLabel extends JLabel { 11 | 12 | private boolean isActive = false; 13 | private HeatmapPanel parent = null; 14 | private String currentText = ""; 15 | private int id = -1; 16 | 17 | // private HintPanel hintPanel = null; 18 | 19 | public ActionLabel(HeatmapPanel parent, Integer id, String text, int horizontalAlignment) { 20 | super(text, horizontalAlignment); 21 | this.parent = parent; 22 | this.currentText = text; 23 | this.id = id; 24 | LabelMouseListener listener = new LabelMouseListener(); 25 | this.addMouseListener(listener); 26 | 27 | } 28 | 29 | @Override 30 | public void setText(String text) { 31 | super.setText(text); 32 | this.currentText = text; 33 | if (text.length() > 0) { 34 | isActive = true; 35 | } else { 36 | isActive = false; 37 | } 38 | } 39 | 40 | @Override 41 | public void setBounds(int x, int y, int width, int height) { 42 | super.setBounds(x, y, width, height); 43 | // if(hintPanel == null){ 44 | // hintPanel = new HintPanel(x - 50, (int) y - 25, 200, 30); 45 | // } 46 | } 47 | 48 | private class HintPanel extends JPanel{ 49 | 50 | JLabel label = new JLabel("", JLabel.CENTER); 51 | public HintPanel(int x, int y, int width, int height){ 52 | super(); 53 | super.setBounds(x, y, width, height); 54 | super.add(label); 55 | label.setBounds(0, 0, width, height); 56 | label.setFont(new Font("TimesRoman", Font.PLAIN, 20)); 57 | } 58 | 59 | public void setText(String text){ 60 | label.setText(text); 61 | } 62 | 63 | @Override 64 | protected void paintComponent(Graphics g) { 65 | super.paintComponent(g); 66 | Graphics2D g2d = (Graphics2D) g; 67 | g2d.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 0.7f)); 68 | g2d.setColor(Color.YELLOW); 69 | g2d.fill(getBounds()); 70 | g2d.dispose(); 71 | } 72 | } 73 | 74 | private class LabelMouseListener implements MouseListener { 75 | 76 | @Override 77 | public void mouseClicked(MouseEvent e) { 78 | if (isActive) { 79 | parent.setWordIndex(id); 80 | } 81 | } 82 | 83 | @Override 84 | public void mouseEntered(MouseEvent e) { 85 | if (isActive) { 86 | // if (hintPanel != null) { 87 | // parent.add(hintPanel); 88 | // parent.flushPanel(); 89 | // } 90 | ((JLabel) e.getComponent()).setBorder(new BevelBorder(BevelBorder.RAISED, null, null, null, null)); 91 | ((JLabel) e.getComponent()).setCursor(new Cursor(Cursor.HAND_CURSOR)); 92 | } 93 | } 94 | 95 | @Override 96 | public void mouseExited(MouseEvent e) { 97 | if (isActive) { 98 | // parent.remove(hintPanel); 99 | // parent.flushPanel(); 100 | ((JLabel) e.getComponent()).setBorder(null); 101 | } 102 | } 103 | 104 | @Override 105 | public void mousePressed(MouseEvent e) { 106 | if (isActive) { 107 | ((JLabel) e.getComponent()).setBorder(new BevelBorder(BevelBorder.LOWERED, null, null, null, null)); 108 | } 109 | } 110 | 111 | @Override 112 | public void mouseReleased(MouseEvent e) { 113 | if (isActive) { 114 | ((JLabel) e.getComponent()).setBorder(new BevelBorder(BevelBorder.RAISED, null, null, null, null)); 115 | } 116 | } 117 | } 118 | 119 | } 120 | -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/multihead-att-java/DataObject.java: -------------------------------------------------------------------------------- 1 | import org.json.JSONArray; 2 | import org.json.JSONObject; 3 | 4 | import java.io.BufferedReader; 5 | import java.io.FileInputStream; 6 | import java.io.InputStreamReader; 7 | import java.util.ArrayList; 8 | import java.util.Collections; 9 | import java.util.List; 10 | 11 | public class DataObject { 12 | 13 | public int numSamples = 0; 14 | public List attentionFieldList; 15 | 16 | 17 | private JSONObject dataObject = null; 18 | 19 | public DataObject(String filename){ 20 | this.reload(filename); 21 | } 22 | 23 | public JSONObject get(int index){ 24 | try { 25 | JSONObject obj = this.dataObject.getJSONObject(String.format("%d", index)); 26 | return obj; 27 | } catch(Exception e){ 28 | e.printStackTrace(); 29 | System.exit(0); 30 | } 31 | return null; 32 | } 33 | 34 | public String getAttentionType(int index, String attentionField){ 35 | JSONObject instanceObj = this.get(index); 36 | try{ 37 | JSONArray attLists = instanceObj.getJSONArray("attentions"); 38 | for (int i = 0; i < attLists.length(); ++i) { 39 | JSONObject obj = attLists.getJSONObject(i); 40 | if(obj.getString("name").equals(attentionField)){ 41 | return obj.getString("type"); 42 | } 43 | } 44 | } catch(Exception e){ 45 | e.printStackTrace(); 46 | System.exit(0); 47 | } 48 | System.exit(0); 49 | return null; 50 | } 51 | 52 | 53 | public JSONArray getAttentionWeight(int index, String attentionField){ 54 | JSONObject instanceObj = this.get(index); 55 | try{ 56 | JSONArray attLists = instanceObj.getJSONArray("attentions"); 57 | for (int i = 0; i < attLists.length(); ++i) { 58 | JSONObject obj = attLists.getJSONObject(i); 59 | if(obj.getString("name").equals(attentionField)){ 60 | return obj.getJSONArray("value"); 61 | } 62 | } 63 | } catch(Exception e){ 64 | e.printStackTrace(); 65 | System.exit(0); 66 | } 67 | System.exit(0); 68 | return null; 69 | } 70 | 71 | public void reload(String filename){ 72 | try { 73 | BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8")); 74 | String str = br.readLine(); 75 | br.close(); 76 | this.dataObject = new JSONObject(str); 77 | 78 | this.numSamples = this.dataObject.length(); 79 | this.attentionFieldList = new ArrayList(); 80 | JSONArray attArray = this.dataObject.getJSONObject("0").getJSONArray("attentions"); 81 | for (int i = 0; i < attArray.length(); ++i) { 82 | JSONObject obj = attArray.getJSONObject(i); 83 | String type =obj.getString("type"); 84 | if (type.equals("multihead") || type.equals("simple")) { 85 | this.attentionFieldList.add(obj.getString("name")); 86 | } else{ 87 | System.err.println(String.format("Error with type: %s", type)); 88 | System.exit(0); 89 | } 90 | } 91 | Collections.sort(this.attentionFieldList); 92 | } catch (Exception e){ 93 | e.printStackTrace(); 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/multihead-att-java/HeatmapPanel.java: -------------------------------------------------------------------------------- 1 | 2 | import org.json.JSONArray; 3 | import org.json.JSONObject; 4 | 5 | import javax.swing.*; 6 | import java.awt.*; 7 | import java.util.ArrayList; 8 | import java.util.Arrays; 9 | 10 | 11 | public class HeatmapPanel extends JPanel { 12 | 13 | private MainFrame parent = null; 14 | private DataObject dataObject = null; 15 | private int preDefineMaxLength = 50; 16 | 17 | public int currentSampleId = 0; 18 | public String currentAttentionName = ""; 19 | public int wordIndex = -1; 20 | 21 | 22 | // left 23 | private ArrayList leftLabelList = new ArrayList(); 24 | private ArrayList rightLabelList = new ArrayList(); 25 | private ArrayList leftNumLabelList = new ArrayList(); 26 | private ArrayList rightNumLabelList = new ArrayList(); 27 | 28 | public HeatmapPanel(MainFrame parent, DataObject dataObject, int preDefineMaxLength) { 29 | super(); 30 | this.parent = parent; 31 | this.dataObject = dataObject; 32 | this.preDefineMaxLength = preDefineMaxLength; 33 | this.setBounds(0, 0, 768, 3400); 34 | this.setLayout(null); 35 | this.addEmptyLabels(); 36 | } 37 | 38 | public void flushPanel() { 39 | this.parent.flushFrame(); 40 | } 41 | 42 | private void addEmptyLabels() { 43 | for (int i = 0; i < this.preDefineMaxLength; ++i) { 44 | JLabel label = new ActionLabel(this, i, "", JLabel.RIGHT); 45 | label.setFont(new Font("TimesRoman", Font.PLAIN, 18)); 46 | label.setBounds(100, 50 + i * 20 + 7 * i, 140, 20); 47 | this.add(label); 48 | this.leftLabelList.add(label); 49 | } 50 | for (int i = 0; i < this.preDefineMaxLength; ++i) { 51 | JLabel label = new JLabel("", JLabel.LEFT); 52 | label.setFont(new Font("TimesRoman", Font.PLAIN, 18)); 53 | label.setBounds(500, 50 + i * 20 + 7 * i, 140, 20); 54 | this.add(label); 55 | this.rightLabelList.add(label); 56 | } 57 | for (int i = 0; i < this.preDefineMaxLength; ++i) { 58 | JLabel label = new JLabel("", JLabel.RIGHT); 59 | label.setFont(new Font("TimesRoman", Font.PLAIN, 18)); 60 | label.setBounds(60, 50 + i * 20 + 7 * i, 30, 20); 61 | this.add(label); 62 | this.leftNumLabelList.add(label); 63 | } 64 | for (int i = 0; i < this.preDefineMaxLength; ++i) { 65 | JLabel label = new JLabel("", JLabel.LEFT); 66 | label.setFont(new Font("TimesRoman", Font.PLAIN, 18)); 67 | label.setBounds(640, 50 + i * 20 + 7 * i, 30, 20); 68 | this.add(label); 69 | this.rightNumLabelList.add(label); 70 | } 71 | } 72 | 73 | public void setWordIndex(int wordIndex) { 74 | this.wordIndex = wordIndex; 75 | this.parent.flushFrame(); 76 | } 77 | 78 | public void display(String currentAttentionName) { 79 | int curPrefixIndex = this.currentAttentionName.indexOf("_attention"); 80 | int nextPrefixIndex = currentAttentionName.indexOf("_attention"); 81 | if (this.currentAttentionName.substring(0, curPrefixIndex).equals( 82 | currentAttentionName.substring(0, nextPrefixIndex))) { 83 | this.currentAttentionName = currentAttentionName; 84 | } else { 85 | this.currentAttentionName = currentAttentionName; 86 | this.display(this.currentSampleId); 87 | } 88 | } 89 | 90 | public void display(int sampleId) { 91 | this.currentSampleId = sampleId; 92 | this.wordIndex = -1; 93 | String left = ""; 94 | String right = ""; 95 | try { 96 | JSONObject currentObj = this.dataObject.get(this.currentSampleId); 97 | String source = currentObj.getString("source"); 98 | String target = currentObj.getString("translation"); 99 | if (this.currentAttentionName.contains("encoder_decoder_attention")) { 100 | left = target; 101 | right = source; 102 | } else if (this.currentAttentionName.contains("encoder_self_attention")) { 103 | left = source; 104 | right = source; 105 | } else if (this.currentAttentionName.contains("decoder_self_attention")) { 106 | left = target; 107 | right = target; 108 | } else { 109 | System.err.println("Error name with attention"); 110 | System.exit(0); 111 | } 112 | if (this.currentAttentionName.contains("decoder_self_attention")) { 113 | left = " " + left; 114 | right = " " + right; 115 | } else { 116 | left += " "; 117 | right += " "; 118 | } 119 | 120 | } catch (Exception e) { 121 | e.printStackTrace(); 122 | } 123 | String[] leftTokens = left.trim().split(" "); 124 | String[] rightTokens = right.trim().split(" "); 125 | int auxIndex = this.leftLabelList.size(); 126 | while (auxIndex < leftTokens.length) { 127 | JLabel label = new ActionLabel(this, auxIndex, "", JLabel.RIGHT); 128 | label.setFont(new Font("TimesRoman", Font.PLAIN, 18)); 129 | label.setBounds(100, 50 + auxIndex * 20 + 7 * auxIndex, 140, 20); 130 | this.add(label); 131 | this.leftLabelList.add(label); 132 | 133 | JLabel numLabel = new JLabel("", JLabel.RIGHT); 134 | numLabel.setFont(new Font("TimesRoman", Font.PLAIN, 18)); 135 | numLabel.setBounds(60, 50 + auxIndex * 20 + 7 * auxIndex, 30, 20); 136 | this.add(numLabel); 137 | this.leftNumLabelList.add(numLabel); 138 | ++auxIndex; 139 | } 140 | auxIndex = this.rightLabelList.size(); 141 | while (auxIndex < rightTokens.length) { 142 | JLabel label = new JLabel("", JLabel.LEFT); 143 | label.setFont(new Font("TimesRoman", Font.PLAIN, 18)); 144 | label.setBounds(500, 50 + auxIndex * 20 + 7 * auxIndex, 130, 20); 145 | this.add(label); 146 | this.rightLabelList.add(label); 147 | 148 | JLabel numlabel = new JLabel("", JLabel.LEFT); 149 | numlabel.setFont(new Font("TimesRoman", Font.PLAIN, 18)); 150 | numlabel.setBounds(640, 50 + auxIndex * 20 + 7 * auxIndex, 30, 20); 151 | this.add(numlabel); 152 | this.rightNumLabelList.add(numlabel); 153 | ++auxIndex; 154 | } 155 | for (int i = 0; i < this.leftLabelList.size(); ++i) { 156 | if (i < leftTokens.length) { 157 | this.leftLabelList.get(i).setText(leftTokens[i]); 158 | this.leftNumLabelList.get(i).setText(String.format("%d", i)); 159 | } else { 160 | this.leftLabelList.get(i).setText(""); 161 | this.leftNumLabelList.get(i).setText(""); 162 | } 163 | } 164 | for (int i = 0; i < this.rightLabelList.size(); ++i) { 165 | if (i < rightTokens.length) { 166 | this.rightLabelList.get(i).setText(rightTokens[i]); 167 | this.rightNumLabelList.get(i).setText(String.format("%d", i)); 168 | } else { 169 | this.rightLabelList.get(i).setText(""); 170 | this.rightNumLabelList.get(i).setText(""); 171 | } 172 | } 173 | // this.validate(); 174 | // this.invalidate(); 175 | // this.repaint(); 176 | } 177 | 178 | @Override 179 | public void paint(Graphics g) { 180 | super.paint(g); 181 | if (this.wordIndex < 0) { 182 | return; 183 | } 184 | Graphics2D g2 = (Graphics2D) g; 185 | g2.setColor(Color.BLUE); 186 | // g2.setColor(Color.CYAN); 187 | JSONArray attArray = this.dataObject.getAttentionWeight(this.currentSampleId, this.currentAttentionName); 188 | String attType = this.dataObject.getAttentionType(this.currentSampleId, this.currentAttentionName); 189 | if (attType.equals("multihead")) { 190 | double[] accumulateScores = null; 191 | g2.setStroke(new BasicStroke(3.0f)); 192 | for (int head = 0; head < attArray.length(); ++head) { 193 | JSONArray att = null; 194 | try { 195 | att = (JSONArray) ((JSONArray) (attArray.get(head))).get(this.wordIndex); 196 | if (head == 0) { 197 | accumulateScores = new double[att.length()]; 198 | Arrays.fill(accumulateScores, 0.0); 199 | } 200 | for (int idx = 0; idx < att.length(); ++idx) { 201 | accumulateScores[idx] += att.getDouble(idx); 202 | g2.setComposite(AlphaComposite.getInstance( 203 | AlphaComposite.SRC_OVER, (float) (att.getDouble(idx) * 0.7))); 204 | g2.fillRect(500 + head * 20, 50 + 27 * idx, 20, 20); 205 | } 206 | } catch (Exception e) { 207 | e.printStackTrace(); 208 | } 209 | } 210 | double sum = 0.0; 211 | for (int idx = 0; idx < accumulateScores.length; ++idx) { 212 | sum += accumulateScores[idx]; 213 | } 214 | int[] topIndexes = Utils.topIndexes(accumulateScores, 5); 215 | double multiplier = 0.6 / (accumulateScores[topIndexes[0]] / sum); 216 | for (int idx = 0; idx < topIndexes.length; ++idx) { 217 | double prob = accumulateScores[topIndexes[idx]] / sum * multiplier; 218 | g2.setComposite(AlphaComposite.getInstance( 219 | AlphaComposite.SRC_OVER, (float) (prob))); 220 | g2.drawLine(240, 60 + this.wordIndex * 27, 500, 60 + topIndexes[idx] * 27); 221 | } 222 | } else if (attType.equals("simple")) { 223 | double[] accumulateScores = null; 224 | g2.setStroke(new BasicStroke(3.0f)); 225 | JSONArray att = null; 226 | try { 227 | att = (JSONArray) (attArray.get(this.wordIndex)); 228 | accumulateScores = new double[att.length()]; 229 | Arrays.fill(accumulateScores, 0.0); 230 | for (int idx = 0; idx < att.length(); ++idx) { 231 | accumulateScores[idx] = att.getDouble(idx); 232 | g2.setComposite(AlphaComposite.getInstance( 233 | AlphaComposite.SRC_OVER, (float) (att.getDouble(idx) * 0.7))); 234 | g2.fillRect(500 + 20, 50 + 27 * idx, 20 * 8, 20); 235 | } 236 | } catch (Exception e) { 237 | e.printStackTrace(); 238 | } 239 | 240 | int[] topIndexes = Utils.topIndexes(accumulateScores, 5); 241 | double multiplier = 0.6 / accumulateScores[topIndexes[0]]; 242 | for (int idx = 0; idx < topIndexes.length; ++idx) { 243 | double prob = accumulateScores[topIndexes[idx]] * multiplier; 244 | g2.setComposite(AlphaComposite.getInstance( 245 | AlphaComposite.SRC_OVER, (float) (prob))); 246 | g2.drawLine(240, 60 + this.wordIndex * 27, 500, 60 + topIndexes[idx] * 27); 247 | } 248 | } 249 | g.dispose(); 250 | } 251 | } 252 | -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/multihead-att-java/MainFrame.java: -------------------------------------------------------------------------------- 1 | import java.awt.*; 2 | import javax.swing.*; 3 | import javax.swing.filechooser.FileFilter; 4 | 5 | import java.awt.event.ActionEvent; 6 | import java.awt.event.ActionListener; 7 | import java.io.File; 8 | 9 | 10 | public class MainFrame extends JDialog { 11 | 12 | // for main panel 13 | private JTabbedPane mainTabbedPane = new JTabbedPane(JTabbedPane.TOP); 14 | 15 | // for menu 16 | private JFileChooser fileChooser = new JFileChooser(); 17 | 18 | private int panelCount = 0; 19 | 20 | public MainFrame(String name) throws Exception { 21 | this.setTitle(name); 22 | this.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); 23 | this.setMinimumSize(new Dimension(800, 600));//setSize(); 24 | this.setVisible(true); 25 | this.setLocation((Toolkit.getDefaultToolkit().getScreenSize().width - this.getWidth()) / 2, 26 | (Toolkit.getDefaultToolkit().getScreenSize().height - this.getHeight()) / 2); 27 | this.setResizable(true); 28 | 29 | FileFilter attFileFilter = new FileFilter() { 30 | @Override 31 | public boolean accept(File f) { 32 | String name = f.getName(); 33 | return f.isDirectory() || name.endsWith(".attention"); 34 | } 35 | 36 | @Override 37 | public String getDescription() { 38 | return "*.attention"; 39 | } 40 | }; 41 | fileChooser.setFileFilter(attFileFilter); 42 | fileChooser.addChoosableFileFilter(attFileFilter); 43 | 44 | this.createMenuBar(); 45 | 46 | 47 | Container contentPane = this.getContentPane(); 48 | contentPane.add(this.mainTabbedPane); 49 | this.flushFrame(); 50 | System.out.println("Finish"); 51 | } 52 | 53 | public void openFile(String filename) { 54 | try { 55 | DataObject dataObject = new DataObject(filename); 56 | HeatmapPanel heatmapPanel = new HeatmapPanel(this, dataObject, 30); 57 | MainPanel panel = new MainPanel(new JPanel(), dataObject); 58 | panel.addHeatmapPanel(heatmapPanel); 59 | this.mainTabbedPane.add(panel, Utils.extractFilePrefix(filename)); 60 | this.mainTabbedPane.setSelectedComponent(panel); 61 | this.flushFrame(); 62 | } catch (Exception e) { 63 | e.printStackTrace(); 64 | } 65 | } 66 | 67 | public void flushFrame() { 68 | this.validate(); 69 | this.invalidate(); 70 | this.repaint(); 71 | } 72 | 73 | 74 | private void createMenuBar() { 75 | JMenu menu = new JMenu("File"); 76 | JMenuItem openItem = new JMenuItem("Open..."); 77 | menu.add(openItem); 78 | JMenuBar br = new JMenuBar(); 79 | br.add(menu); 80 | 81 | JMenuItem closeItem = new JMenuItem("Close Tab"); 82 | menu.add(closeItem); 83 | 84 | JMenuItem quitItem = new JMenuItem("Quit"); 85 | menu.add(quitItem); 86 | 87 | closeItem.setEnabled(false); 88 | 89 | openItem.addActionListener(new ActionListener() { 90 | @Override 91 | public void actionPerformed(ActionEvent e) { 92 | int state = fileChooser.showOpenDialog(null); 93 | if (state == 1) { 94 | return; 95 | } else { 96 | String filename = fileChooser.getSelectedFile().getAbsolutePath(); 97 | openFile(filename); 98 | ++panelCount; 99 | closeItem.setEnabled(true); 100 | } 101 | } 102 | }); 103 | 104 | closeItem.addActionListener(new ActionListener() { 105 | @Override 106 | public void actionPerformed(ActionEvent e) { 107 | if(closeItem.isEnabled()){ 108 | mainTabbedPane.remove(mainTabbedPane.getSelectedComponent()); 109 | --panelCount; 110 | if (panelCount == 0){ 111 | closeItem.setEnabled(false); 112 | } 113 | } 114 | 115 | } 116 | }); 117 | 118 | quitItem.addActionListener(new ActionListener() { 119 | @Override 120 | public void actionPerformed(ActionEvent e) { 121 | dispose(); 122 | } 123 | }); 124 | 125 | this.setJMenuBar(br); 126 | } 127 | 128 | 129 | public static void main(String[] args) throws Exception { 130 | new MainFrame("Heatmap"); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/multihead-att-java/MainPanel.java: -------------------------------------------------------------------------------- 1 | import javax.swing.*; 2 | import java.awt.*; 3 | import java.awt.event.ActionEvent; 4 | import java.awt.event.ActionListener; 5 | 6 | public class MainPanel extends JScrollPane { 7 | 8 | 9 | private JPanel mainPanel = null; 10 | private HeatmapPanel heatmapPanel = null; 11 | private JComboBox sampleComboBox = null; 12 | private JComboBox attentionComboBox = null; 13 | private DataObject dataObject = null; 14 | 15 | private JButton prevButton = null; 16 | private JButton nextButton = null; 17 | 18 | public MainPanel(JPanel panel, DataObject dataObj) { 19 | super(panel); 20 | this.mainPanel = panel; 21 | this.mainPanel.setPreferredSize(new Dimension(768, 3500)); 22 | this.mainPanel.setLayout(null); 23 | this.setBounds(0, 0, 200, 200); 24 | this.setBackground(Color.WHITE); 25 | this.setOpaque(true); 26 | 27 | this.dataObject = dataObj; 28 | this.createPopupMenu(); 29 | } 30 | 31 | public void addHeatmapPanel(HeatmapPanel panel) { 32 | this.mainPanel.add(panel); 33 | this.heatmapPanel = panel; 34 | heatmapPanel.currentAttentionName = (String) this.attentionComboBox.getSelectedItem(); 35 | heatmapPanel.display(Integer.parseInt((String) this.sampleComboBox.getSelectedItem())); 36 | } 37 | 38 | public void flushPanel() { 39 | this.mainPanel.validate(); 40 | this.mainPanel.invalidate(); 41 | this.mainPanel.repaint(); 42 | } 43 | 44 | public void createPopupMenu() { 45 | JLabel sampleLabel = new JLabel("Sample: ", JLabel.RIGHT); 46 | this.mainPanel.add(sampleLabel); 47 | sampleLabel.setFont(new Font("TimesRoman", Font.PLAIN, 16)); 48 | sampleLabel.setBounds(50, 5, 60, 30); 49 | 50 | this.prevButton = new JButton("prev"); 51 | this.mainPanel.add(this.prevButton); 52 | this.prevButton.setFont(new Font("TimesRoman", Font.PLAIN, 16)); 53 | this.prevButton.setBounds(185, 5, 60, 30); 54 | 55 | this.nextButton = new JButton("next"); 56 | this.mainPanel.add(this.nextButton); 57 | this.nextButton.setFont(new Font("TimesRoman", Font.PLAIN, 16)); 58 | this.nextButton.setBounds(250, 5, 60, 30); 59 | 60 | JLabel attLabel = new JLabel("Displaying: ", JLabel.RIGHT); 61 | this.mainPanel.add(attLabel); 62 | attLabel.setFont(new Font("TimesRoman", Font.PLAIN, 16)); 63 | attLabel.setBounds(290, 5, 120, 30); 64 | // } 65 | this.sampleComboBox = new JComboBox(); 66 | for (int i = 0; i < this.dataObject.numSamples; ++i) { 67 | this.sampleComboBox.addItem(String.format("%d", i)); 68 | } 69 | this.mainPanel.add(sampleComboBox); 70 | this.sampleComboBox.setBounds(110, 5, 70, 30); 71 | this.sampleComboBox.setSelectedIndex(0); 72 | 73 | this.attentionComboBox = new JComboBox(); 74 | this.mainPanel.add(attentionComboBox); 75 | for (int i = 0; i < this.dataObject.attentionFieldList.size(); ++i) { 76 | this.attentionComboBox.addItem(this.dataObject.attentionFieldList.get(i)); 77 | } 78 | this.attentionComboBox.setBounds(410, 5, 250, 30); 79 | this.attentionComboBox.setSelectedIndex(0); 80 | 81 | this.sampleComboBox.addActionListener( 82 | new ActionListener() { 83 | @Override 84 | public void actionPerformed(ActionEvent e) { 85 | heatmapPanel.display(Integer.parseInt((String) sampleComboBox.getSelectedItem())); 86 | flushPanel(); 87 | } 88 | }); 89 | this.attentionComboBox.addActionListener( 90 | new ActionListener() { 91 | @Override 92 | public void actionPerformed(ActionEvent e) { 93 | heatmapPanel.display((String) attentionComboBox.getSelectedItem()); 94 | flushPanel(); 95 | } 96 | }); 97 | 98 | this.prevButton.addActionListener(new ActionListener() { 99 | @Override 100 | public void actionPerformed(ActionEvent e) { 101 | if (heatmapPanel.currentSampleId > 0){ 102 | int prev = heatmapPanel.currentSampleId - 1; 103 | sampleComboBox.setSelectedIndex(prev); 104 | heatmapPanel.display(prev); 105 | flushPanel(); 106 | } 107 | } 108 | }); 109 | this.nextButton.addActionListener(new ActionListener() { 110 | @Override 111 | public void actionPerformed(ActionEvent e) { 112 | if(heatmapPanel.currentSampleId < dataObject.numSamples - 1){ 113 | int next = heatmapPanel.currentSampleId + 1; 114 | sampleComboBox.setSelectedIndex(next); 115 | heatmapPanel.display(next); 116 | flushPanel(); 117 | } 118 | } 119 | }); 120 | 121 | } 122 | 123 | 124 | } 125 | -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/multihead-att-java/Utils.java: -------------------------------------------------------------------------------- 1 | import java.util.Iterator; 2 | import java.util.TreeMap; 3 | 4 | public final class Utils { 5 | public static int[] topIndexes(double[] values, int topk) { 6 | int[] indexes = new int[topk]; 7 | TreeMap map = new TreeMap(); 8 | for (int i = 0; i < values.length; ++i) { 9 | map.put(-values[i], i); 10 | } 11 | int cnt = 0; 12 | Iterator iter = map.values().iterator(); 13 | while (iter.hasNext() && cnt < topk) { 14 | indexes[cnt] = (int) iter.next(); 15 | cnt += 1; 16 | } 17 | return indexes; 18 | } 19 | 20 | public static void main(String[] args) { 21 | topIndexes(new double[]{1., 2., 3., 2.5, 2.1}, 3); 22 | } 23 | 24 | public static String extractFilePrefix(String path) { 25 | String[] paths = path.trim().split("/"); 26 | String filename = paths[paths.length - 1]; 27 | int tmp = filename.indexOf(".attention"); 28 | return filename.substring(0, tmp); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/toydata/figures/java-heatmap1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neubig/mtandseq2seq-code/956656278903bb626b9136ef034a69e3d03bfd78/04-attention/Attention-Visualization/toydata/figures/java-heatmap1.png -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/toydata/figures/java-heatmap2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neubig/mtandseq2seq-code/956656278903bb626b9136ef034a69e3d03bfd78/04-attention/Attention-Visualization/toydata/figures/java-heatmap2.png -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/toydata/figures/java-heatmap3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neubig/mtandseq2seq-code/956656278903bb626b9136ef034a69e3d03bfd78/04-attention/Attention-Visualization/toydata/figures/java-heatmap3.png -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/toydata/figures/java-heatmap4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neubig/mtandseq2seq-code/956656278903bb626b9136ef034a69e3d03bfd78/04-attention/Attention-Visualization/toydata/figures/java-heatmap4.png -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/toydata/figures/py-heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neubig/mtandseq2seq-code/956656278903bb626b9136ef034a69e3d03bfd78/04-attention/Attention-Visualization/toydata/figures/py-heatmap.png -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/toydata/figures/py-heatmap1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neubig/mtandseq2seq-code/956656278903bb626b9136ef034a69e3d03bfd78/04-attention/Attention-Visualization/toydata/figures/py-heatmap1.png -------------------------------------------------------------------------------- /04-attention/Attention-Visualization/toydata/figures/py-heatmap2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neubig/mtandseq2seq-code/956656278903bb626b9136ef034a69e3d03bfd78/04-attention/Attention-Visualization/toydata/figures/py-heatmap2.png -------------------------------------------------------------------------------- /04-attention/README.md: -------------------------------------------------------------------------------- 1 | # Attention code example 2 | by Xinyi Wang 3 | 4 | This is an example of a simple attention module, based on the pytorch examples. 5 | 6 | ## Data 7 | the file simply uses a list of dummy source encodings and one target encoding to illustrate how the code works. We provide both dot product attention module and Mlp attention module. 8 | ## Basic Usage 9 | 10 | python attention.py 11 | 12 | ## Extra visualization 13 | We provide a visualization from an MT model using an open sourced git repo. You can try out the visualization by 14 | cd Attention-Visualization/ 15 | python exec/plot_heatmap.py --input toydata/toy.attention 16 | 17 | Note that we slightly did some bug fix for the original repo code. If you are not using a mac, the Chinese characters may not show up on the plot correctly... 18 | -------------------------------------------------------------------------------- /04-attention/attention-dynet.py: -------------------------------------------------------------------------------- 1 | import dynet as dy 2 | import random 3 | import argparse 4 | import util 5 | 6 | EOS = "" 7 | 8 | 9 | INPUT_VOCAB_SIZE = 0 10 | OUTPUT_VOCAB_SIZE = 0 11 | LSTM_NUM_OF_LAYERS = 1 12 | EMBEDDINGS_SIZE = 32 13 | STATE_SIZE = 32 14 | ATTENTION_SIZE = 32 15 | 16 | 17 | class EncDecModel: 18 | def __init__(self, model, LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, INPUT_VOCAB_SIZE, OUTPUT_VOCAB_SIZE): 19 | self.model = model 20 | self.enc_fwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) 21 | self.enc_bwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) 22 | 23 | self.dec_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE*2+EMBEDDINGS_SIZE, STATE_SIZE, model) 24 | 25 | self.input_lookup = model.add_lookup_parameters((INPUT_VOCAB_SIZE, EMBEDDINGS_SIZE)) 26 | self.attention_w1 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE*2)) 27 | self.attention_w2 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE*LSTM_NUM_OF_LAYERS*2)) 28 | self.attention_v = model.add_parameters( (1, ATTENTION_SIZE)) 29 | self.decoder_w = model.add_parameters( (OUTPUT_VOCAB_SIZE, STATE_SIZE)) 30 | self.decoder_b = model.add_parameters( (OUTPUT_VOCAB_SIZE)) 31 | self.output_lookup = model.add_lookup_parameters((OUTPUT_VOCAB_SIZE, EMBEDDINGS_SIZE)) 32 | 33 | def save_to_disk(self, filename): 34 | dy.save(filename, [self.enc_fwd_lstm, self.enc_bwd_lstm, self.dec_lstm, self.input_lookup, self.attention_w1, self.attention_w2, self.attention_v, self.decoder_w, self.decoder_b, self.output_lookup]) 35 | 36 | def load_from_disk(self, filename): 37 | (self.enc_fwd_lstm, self.enc_bwd_lstm, self.dec_lstm, self.input_lookup, self.attention_w1, self.attention_w2, self.attention_v, self.decoder_w, self.decoder_b, self.output_lookup) = dy.load(filename, self.model) 38 | 39 | 40 | def embed_sentence(self, sentence): 41 | #sentence = list(sentence) 42 | sentence = [input_vocab.w2i[c] for c in sentence] 43 | 44 | global input_lookup 45 | 46 | return [self.input_lookup[char] for char in sentence] 47 | 48 | 49 | def run_lstm(self, init_state, input_vecs): 50 | s = init_state 51 | 52 | out_vectors = [] 53 | for vector in input_vecs: 54 | s = s.add_input(vector) 55 | out_vector = s.output() 56 | out_vectors.append(out_vector) 57 | return out_vectors 58 | 59 | 60 | def encode_sentence(self, sentence): 61 | sentence_rev = list(reversed(sentence)) 62 | 63 | fwd_vectors = self.run_lstm(self.enc_fwd_lstm.initial_state(), sentence) 64 | bwd_vectors = self.run_lstm(self.enc_bwd_lstm.initial_state(), sentence_rev) 65 | bwd_vectors = list(reversed(bwd_vectors)) 66 | vectors = [dy.concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)] 67 | 68 | return vectors 69 | 70 | 71 | def attend(self, input_mat, state, w1dt): 72 | w2 = dy.parameter(self.attention_w2) 73 | v = dy.parameter(self.attention_v) 74 | 75 | # input_mat: (encoder_state x seqlen) => input vecs concatenated as cols 76 | # w1dt: (attdim x seqlen) 77 | # w2dt: (attdim x attdim) 78 | w2dt = w2*dy.concatenate(list(state.s())) 79 | # att_weights: (seqlen,) row vector 80 | unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt))) 81 | att_weights = dy.softmax(unnormalized) 82 | # context: (encoder_state) 83 | context = input_mat * att_weights 84 | return context 85 | 86 | 87 | def decode(self, vectors, output): 88 | output = list(output) 89 | output = [output_vocab.w2i[c] for c in output] 90 | 91 | w = dy.parameter(self.decoder_w) 92 | b = dy.parameter(self.decoder_b) 93 | w1 = dy.parameter(self.attention_w1) 94 | input_mat = dy.concatenate_cols(vectors) 95 | w1dt = None 96 | 97 | last_output_embeddings = self.output_lookup[output_vocab.w2i[EOS]] 98 | s = self.dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings])) 99 | loss = [] 100 | 101 | for char in output: 102 | # w1dt can be computed and cached once for the entire decoding phase 103 | w1dt = w1dt or w1 * input_mat 104 | vector = dy.concatenate([self.attend(input_mat, s, w1dt), last_output_embeddings]) 105 | s = s.add_input(vector) 106 | out_vector = w * s.output() + b 107 | probs = dy.softmax(out_vector) 108 | last_output_embeddings = self.output_lookup[char] 109 | loss.append(-dy.log(dy.pick(probs, char))) 110 | loss = dy.esum(loss) 111 | return loss 112 | 113 | 114 | def generate(self, in_seq): 115 | embedded = self.embed_sentence(in_seq) 116 | encoded = self.encode_sentence(embedded) 117 | 118 | w = dy.parameter(self.decoder_w) 119 | b = dy.parameter(self.decoder_b) 120 | w1 = dy.parameter(self.attention_w1) 121 | input_mat = dy.concatenate_cols(encoded) 122 | w1dt = None 123 | 124 | last_output_embeddings = self.output_lookup[output_vocab.w2i[EOS]] 125 | s = self.dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings])) 126 | 127 | out = '' 128 | count_EOS = 0 129 | for i in range(len(in_seq)*2): 130 | if count_EOS == 2: break 131 | # w1dt can be computed and cached once for the entire decoding phase 132 | w1dt = w1dt or w1 * input_mat 133 | vector = dy.concatenate([self.attend(input_mat, s, w1dt), last_output_embeddings]) 134 | s = s.add_input(vector) 135 | out_vector = w * s.output() + b 136 | probs = dy.softmax(out_vector).vec_value() 137 | next_char = probs.index(max(probs)) 138 | last_output_embeddings = self.output_lookup[next_char] 139 | if output_vocab.i2w[next_char] == EOS: 140 | count_EOS += 1 141 | continue 142 | 143 | out += output_vocab.i2w[next_char] 144 | return out 145 | 146 | 147 | def get_loss(self, input_sentence, output_sentence): 148 | dy.renew_cg() 149 | embedded = self.embed_sentence(input_sentence) 150 | encoded = self.encode_sentence(embedded) 151 | return self.decode(encoded, output_sentence) 152 | 153 | def eval(self, inputs, outputs): 154 | N = len(inputs) 155 | correct = 0.0 156 | for i in range(N): 157 | prediction = self.generate(inputs[i]) 158 | if i < 5: 159 | print(f"\t{' '.join(inputs[i])}\t{prediction}\t{''.join(outputs[i][:-1])}") 160 | if prediction == ''.join(outputs[i][:-1]): 161 | correct += 1 162 | accuracy = correct/N 163 | return accuracy 164 | 165 | 166 | def train(self, train, dev): 167 | train_i = train[0] 168 | train_o = train[1] 169 | dev_i = dev[0] 170 | dev_o = dev[1] 171 | 172 | prev_dev_acc = 0 173 | trainer = dy.SimpleSGDTrainer(self.model) 174 | N = len(train_i) 175 | ids = list(range(N)) 176 | for iteration in range(10): 177 | random.shuffle(ids) 178 | for i in ids: 179 | loss = self.get_loss(train_i[i], train_o[i]) 180 | loss_value = loss.value() 181 | loss.backward() 182 | trainer.update() 183 | print(f"Total Loss at Iteration {iteration} : {loss_value}") 184 | # Eval on dev 185 | dev_acc = self.eval(dev_i, dev_o) 186 | print(f"Dev accuracy at iteration {iteration} : {dev_acc}") 187 | if dev_acc > prev_dev_acc: 188 | prev_dev_acc = dev_acc 189 | self.save_to_disk("models/inflection.model") 190 | 191 | 192 | 193 | 194 | 195 | if __name__ == '__main__': 196 | parser = argparse.ArgumentParser() 197 | parser.add_argument('train', help='Path to the corpus file.') 198 | parser.add_argument('dev', help='Path to the validation corpus file.') 199 | parser.add_argument('test', help='Path to the test corpus file.') 200 | #parser.add_argument('--print_probs', action="store_true", help='whether to print the probabilities per word over the validation set') 201 | #parser.add_argument('--perform_train', action="store_true", help='whether to perform training') 202 | args, unknown = parser.parse_known_args() 203 | 204 | train_corpus = util.read_inflection_data(args.train, end=EOS) 205 | dev_corpus = util.read_inflection_data(args.dev, end=EOS) 206 | test_corpus = util.read_inflection_data(args.test, end=EOS) 207 | 208 | input_vocab = util.Vocab.from_corpus(train_corpus[0]) 209 | output_vocab = util.Vocab.from_corpus(train_corpus[1]) 210 | 211 | INPUT_VOCAB_SIZE = input_vocab.size() 212 | print(f"INPUT VOCAB SIZE: {INPUT_VOCAB_SIZE}") 213 | OUTPUT_VOCAB_SIZE = output_vocab.size() 214 | print(f"OUTPUT VOCAB SIZE: {OUTPUT_VOCAB_SIZE}") 215 | 216 | model = dy.Model() 217 | inflectionModel = EncDecModel(model, LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, INPUT_VOCAB_SIZE, OUTPUT_VOCAB_SIZE) 218 | 219 | inflectionModel.train(train_corpus, dev_corpus) 220 | 221 | # Load the best model 222 | inflectionModel.load_from_disk("models/inflection.model") 223 | 224 | # Test on test data 225 | test_acc = inflectionModel.eval(test_corpus[0], test_corpus[1]) 226 | print(f"Accuracy on test: {test_acc}") 227 | 228 | 229 | -------------------------------------------------------------------------------- /04-attention/attention.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch import nn 4 | # very simple example code to calculate attention 5 | 6 | class DotProdAttn(nn.Module): 7 | def __init__(self, d_model): 8 | super(DotProdAttn, self).__init__() 9 | 10 | def forward(self, q, k, v): 11 | batch_size, d_q = q.size() 12 | batch_size, len_k, d_k = k.size() 13 | batch_size, len_v, d_v = v.size() 14 | assert d_k == d_q 15 | assert len_k == len_v 16 | 17 | # (batch_size, len_k, d_k) 18 | att_score_hidden = torch.bmm(q.unsqueeze(1), k.transpose(1, 2)) 19 | # (batch_size, len_k) 20 | att_score_weights = torch.softmax(att_score_hidden, dim=-1) 21 | att_score = torch.softmax(att_score_weights, dim=-1) 22 | 23 | ctx = torch.bmm(att_score, v).squeeze(1) 24 | return ctx, att_score 25 | 26 | 27 | 28 | class MlpAttn(nn.Module): 29 | def __init__(self, d_model): 30 | super(MlpAttn, self).__init__() 31 | self.w_trg = nn.Linear(d_model, d_model) 32 | self.w_att = nn.Linear(d_model, 1) 33 | 34 | def forward(self, q, k, v): 35 | batch_size, d_q = q.size() 36 | batch_size, len_k, d_k = k.size() 37 | batch_size, len_v, d_v = v.size() 38 | assert d_k == d_q 39 | assert len_k == len_v 40 | 41 | # (batch_size, len_k, d_k) 42 | att_score_hidden = torch.tanh(k + self.w_trg(q).unsqueeze(1)) 43 | # (batch_size, len_k) 44 | att_score_weights = self.w_att(att_score_hidden).squeeze(2) 45 | att_score = torch.softmax(att_score_weights, dim=-1) 46 | 47 | ctx = torch.bmm(att_score.unsqueeze(1), v).squeeze(1) 48 | return ctx, att_score 49 | 50 | if __name__ == "__main__": 51 | mlp_attn = MlpAttn(2) 52 | dotprod_attn = DotProdAttn(2) 53 | 54 | src_encs = torch.FloatTensor([[[-1, 2], [2, 4], [3, 5]]]) 55 | trg_enc = torch.FloatTensor([[1, 2]]) 56 | 57 | print("src encodings:") 58 | print(src_encs) 59 | 60 | print("target encoding:") 61 | print(trg_enc) 62 | 63 | 64 | print("mlp attention scores") 65 | ctx, attn_score = mlp_attn(trg_enc, src_encs, src_encs) 66 | print(attn_score) 67 | 68 | print("dot prod attention scores") 69 | ctx, attn_score = dotprod_attn(trg_enc, src_encs, src_encs) 70 | print(attn_score) 71 | -------------------------------------------------------------------------------- /05-selfattention/sample.vec: -------------------------------------------------------------------------------- 1 | I -0.1457 0.0950 0.0409 -0.0168 0.1127 -0.1418 -0.0357 -0.0303 0.0223 0.2413 0.0339 0.0034 0.0461 -0.0844 0.0300 0.0151 0.0160 0.0092 -0.0707 0.0058 -0.0431 0.0746 -0.0021 0.1113 -0.0817 -0.0628 -0.0605 0.2241 0.0918 -0.2270 -0.0061 0.0322 0.0322 -0.0107 0.0367 0.0551 -0.0599 0.1813 0.0245 -0.0172 -0.0476 -0.0337 0.0405 -0.0681 0.1028 0.0224 -0.0540 0.0301 -0.0508 -0.0500 0.0681 0.1809 -0.6069 -0.0063 0.1388 -0.0124 0.0922 -0.1211 0.0782 -0.0226 0.1240 0.0100 0.2230 -0.0446 -0.0351 -0.0056 -0.0428 -0.0875 0.0296 -0.0934 -0.0112 -0.0426 0.0044 0.1148 0.0721 0.0789 0.0151 -0.0322 -0.0403 0.2163 0.0242 -0.0833 -0.0237 -0.1477 -0.0037 -0.0019 -0.0251 -0.0542 -0.3398 -0.0222 -0.0059 -0.0069 0.0244 0.0353 -0.0003 -0.0198 -0.0342 0.0668 0.1040 -0.0270 -0.1069 -0.0660 0.0106 0.0476 -0.4434 -0.0615 -0.1380 0.1599 -0.0922 -0.0401 0.0426 0.0549 0.0228 0.0137 -0.1334 0.0391 0.0201 0.0808 -0.0417 -0.3798 -0.1119 -0.0163 -0.0614 -0.2090 -0.2429 0.0590 -0.0617 0.1442 -0.0823 0.0216 -0.0441 -0.0258 -0.1604 0.0170 0.0853 -0.3443 -0.0652 -0.0065 -0.0396 0.0167 0.0355 -0.0518 0.0572 0.2620 0.0843 -0.0144 0.1588 -0.0197 -0.1024 -0.1016 0.1108 -0.0011 0.0777 0.0038 0.0010 -0.0219 -0.0085 0.0311 0.0468 -0.0486 0.0793 0.0764 0.0183 -0.0015 -0.2573 -0.0599 -0.0405 -0.0452 0.0761 0.0551 -0.1502 0.0078 -0.0057 -0.0746 -0.0434 0.0333 0.1287 0.8932 0.0272 0.0102 -0.0435 0.0098 -0.0621 -0.0340 -0.0056 -0.0385 -0.0183 -0.2272 0.0405 -0.0103 0.0571 0.0601 -0.0432 -0.0233 0.0197 0.0638 0.0341 0.0176 0.1361 0.0212 0.0708 -0.0197 0.0697 -0.0120 0.0302 0.1049 0.0271 0.0812 -0.0678 0.0515 -0.0005 -0.0127 -0.0374 -0.0769 -0.0415 -0.0022 -0.3292 -0.0446 0.0951 0.0374 0.0736 0.0200 0.0504 0.0220 0.0316 0.2593 0.0107 -0.0258 0.0332 -0.1500 0.1587 0.0514 0.1835 -0.0460 -0.0506 -0.0219 0.1066 -0.0275 -0.1448 -0.0941 -0.0155 -0.0213 -0.0429 -0.0180 -0.0443 -0.0102 0.0130 0.0159 0.0173 0.0798 0.0663 -0.0266 0.0659 0.0198 0.0286 -0.0274 -0.0017 -0.0363 0.0049 0.0640 0.1002 -0.0368 0.0249 -0.0462 -0.6402 -0.0521 -0.1897 -0.0475 0.0936 -0.0117 0.0272 0.0296 0.0672 -0.0150 0.0554 -0.0341 -0.1299 -0.0043 -0.0005 0.0536 0.1434 0.1040 -0.0132 0.0011 0.0810 -0.0291 0.0810 -0.0792 0.0506 0.0443 0.2485 -0.0952 0.0450 -0.0468 -0.0563 -0.0073 -0.0444 0.2813 0.1541 -0.0762 2 | went 0.0467 -0.0838 0.0580 -0.0609 -0.0139 -0.1265 0.0749 0.0098 -0.0260 -0.0028 0.0534 -0.0241 -0.0594 -0.0325 -0.0536 -0.0801 0.0102 0.0035 0.0903 0.0398 -0.0195 0.0401 -0.1187 0.2209 0.0755 -0.2416 0.0793 0.0204 0.0732 -0.0962 0.0339 -0.0178 0.0753 -0.0039 0.1003 0.1279 -0.0195 0.1250 0.0381 -0.0136 -0.0817 0.0327 -0.0778 -0.0838 0.0554 0.0593 0.1337 0.0162 0.0203 0.0240 0.1498 -0.0508 -0.5014 -0.0188 0.0416 -0.1048 -0.0906 0.0282 0.2232 -0.0609 -0.0139 -0.0280 -0.0226 -0.1179 -0.0142 0.0167 -0.0094 0.0738 0.0292 0.0403 -0.0152 -0.0313 0.0156 -0.0154 0.1535 0.1203 0.1300 0.0669 -0.0274 -0.0578 0.0042 -0.1283 0.0065 -0.3228 -0.0038 -0.0869 -0.0375 -0.0397 -0.3984 -0.0758 -0.0811 -0.0481 -0.0679 -0.0925 -0.0305 0.0622 -0.1255 0.0918 0.1229 0.0137 -0.1111 -0.0439 0.0065 0.0280 -0.0331 0.0407 0.2049 -0.1327 0.1690 0.0053 -0.0021 0.0610 0.0695 0.0900 0.0110 0.1070 0.0300 0.0775 -0.0346 -0.4190 -0.0881 0.0207 -0.0765 -0.0140 -0.1374 0.1138 0.0352 0.0714 -0.0863 0.0125 -0.0684 0.0564 -0.0451 0.0252 -0.0248 0.0835 -0.0398 -0.0006 0.0924 0.0732 -0.0899 -0.0716 0.1539 0.2328 -0.0155 0.0124 0.0372 0.0128 -0.0761 -0.1601 0.0877 -0.0013 -0.0809 0.0769 0.0016 0.0081 -0.0511 0.0791 -0.0388 -0.0420 -0.0200 -0.0905 0.0335 -0.0550 0.3674 0.0407 0.0087 -0.0301 0.0758 0.0679 0.1827 -0.0089 -0.0242 0.1674 -0.1766 0.0369 0.2299 0.0676 0.0588 0.1260 0.0049 -0.0063 0.0358 -0.0127 0.0709 -0.2053 0.0132 -0.0507 0.8497 0.0029 0.0085 0.0087 0.0410 -0.0169 0.1116 0.0175 0.0290 0.0168 0.0874 0.0200 0.0831 0.0306 0.0283 -0.0512 -0.0987 -0.0392 -0.0054 -0.0736 0.0361 0.0524 -0.0023 0.0333 0.0415 -0.0421 -0.0610 -0.0653 -0.1530 0.0116 0.0578 -0.0598 0.0521 -0.0417 0.0042 0.0093 0.0801 -0.1401 0.0581 0.0876 -0.0869 -0.0170 0.1514 -0.0346 0.2658 -0.0164 0.0448 -0.2631 -0.1000 0.1377 -0.2621 -0.0952 -0.0712 -0.0251 -0.0605 -0.0740 -0.1043 0.1608 -0.0157 0.0539 -0.0492 0.3297 -0.0923 0.0625 0.0320 0.0395 -0.0153 0.1407 0.0323 -0.0435 0.0262 0.0449 -0.0749 0.1461 -0.1443 -0.1032 -0.4579 -0.0923 0.2562 -0.2060 0.0937 0.0045 -0.0129 0.0889 0.0571 -0.0050 0.0251 -0.0037 0.0969 -0.0385 0.0365 -0.0185 -0.1262 0.0877 0.1615 -0.0972 -0.0556 0.0615 -0.1405 -0.0087 -0.2415 0.0299 -0.0326 0.2026 -0.0504 -0.0295 0.1693 -0.0551 0.1077 0.0659 0.1104 0.0870 3 | to 0.0495 0.0411 0.0041 0.0309 -0.0044 -0.1151 0.0060 0.0170 0.0045 -0.0288 0.0170 0.0007 0.0533 0.0094 -0.0609 -0.0267 0.0497 0.0474 0.0054 0.0511 -0.0715 0.0876 0.0550 -0.0010 -0.0746 0.0008 0.0258 -0.1404 0.0022 0.0469 0.0114 0.0083 -0.0127 -0.0453 0.0011 -0.0080 -0.0130 -0.1271 0.0020 0.0389 -0.0395 -0.0295 -0.0308 0.0348 -0.1388 -0.0647 0.0302 0.0184 0.0499 0.0168 -0.0176 0.0890 -0.5547 0.0144 0.0300 0.0127 0.0345 0.1792 0.0629 -0.0242 -0.0491 -0.0397 -0.0014 -0.0571 0.0906 -0.0009 0.0266 -0.0018 0.0308 -0.0057 0.0569 0.0273 -0.0338 0.1003 0.0299 0.0115 0.0717 0.0319 -0.0726 0.1526 -0.0026 -0.1321 -0.0287 -0.2439 0.0073 -0.0062 0.0101 -0.0128 -0.0106 0.0202 -0.0165 -0.0867 0.0493 -0.0916 0.0507 0.1032 0.0108 0.0881 0.0655 -0.0127 -0.0895 -0.0348 0.0439 0.0069 -0.3768 -0.0176 0.1296 0.0027 0.2343 -0.0009 0.0337 0.0613 -0.0369 0.0564 -0.0901 -0.0046 0.0360 0.0341 -0.0171 -0.1717 -0.0041 -0.0553 -0.0661 0.0957 -0.0804 0.0868 -0.0181 -0.0602 -0.1523 -0.0104 -0.0034 -0.0547 0.0094 -0.0223 -0.0184 -0.3151 -0.0358 0.0354 0.0393 0.0526 0.0010 -0.0163 0.0497 0.2518 -0.0173 -0.0036 0.0180 -0.1081 0.0368 -0.0141 -0.0436 0.0291 -0.0366 -0.0523 0.0464 0.0018 -0.0183 0.0766 0.0156 0.0276 0.0522 -0.0221 0.0408 -0.0703 -0.2291 -0.0030 0.0343 -0.0961 -0.0092 0.0222 0.0166 -0.0344 0.0463 0.0186 0.0283 -0.0522 0.0369 -0.4955 0.0276 -0.0247 0.0257 0.0632 -0.0232 -0.0063 -0.0076 -0.3897 -0.0108 -0.0612 0.1962 -0.0060 -0.0353 -0.0994 -0.0124 -0.0031 0.0427 0.0134 -0.0043 -0.0102 0.2100 0.0163 -0.0155 0.1707 -0.0339 -0.0125 0.0200 0.0148 -0.0342 -0.0254 0.0001 0.0412 -0.0258 -0.0169 0.0113 0.0031 0.0075 -0.0059 0.1255 0.0225 0.0005 0.0462 0.0096 -0.0664 0.0138 0.0511 0.0372 0.0073 -0.0187 0.0186 -0.0021 -0.0120 0.0007 -0.0026 0.1725 0.0790 0.0265 -0.0066 -0.0765 0.2336 -0.1445 0.0072 -0.0260 -0.0147 0.0521 0.0011 -0.0244 0.0583 -0.0207 0.0320 0.0294 0.4830 -0.0444 -0.0870 -0.0754 0.0276 -0.0606 -0.0227 -0.0057 -0.0298 0.0540 -0.0607 -0.0746 0.0666 -0.0240 -0.0399 -0.2321 0.0054 0.0233 0.0460 0.1874 -0.0530 -0.0285 -0.0521 -0.0146 0.0264 -0.0093 0.0250 -0.0552 0.0152 0.0242 -0.0577 0.0060 0.0511 0.0230 -0.0345 0.0134 -0.0042 -0.1267 -0.1572 -0.0783 0.0706 0.0004 -0.0142 -0.0976 -0.0489 -0.0625 -0.0327 0.0070 0.2371 -0.0298 -0.0284 4 | a 0.0047 0.0223 -0.0087 0.0250 -0.0660 0.0212 0.0178 -0.0149 0.0282 -0.0166 0.0075 0.0284 0.0166 0.0255 0.0125 -0.0220 0.0177 0.0667 -0.1204 0.0402 0.0260 0.0406 0.0285 -0.1023 -0.0021 0.0387 0.0208 0.0541 0.0066 -0.0689 0.0140 -0.0271 0.0677 0.0005 0.0392 -0.0209 -0.0417 -0.0173 0.0303 -0.0256 0.0016 -0.0574 -0.0039 -0.0377 -0.0412 0.0315 -0.0139 0.0184 -0.0302 0.0000 -0.0378 0.1718 -0.6255 -0.0129 0.0399 0.0301 0.0420 0.0338 0.0888 -0.0156 0.0362 -0.0332 -0.0199 0.0223 -0.1082 0.0558 0.0027 -0.0084 0.0286 0.0098 -0.0286 -0.0252 -0.0119 0.0971 0.0381 0.0101 -0.0005 0.0114 0.0230 -0.0090 0.0443 0.0617 -0.0237 -0.1204 -0.0046 0.0186 0.0183 -0.0117 -0.0270 0.0182 -0.0114 0.0041 0.0046 0.0407 0.0151 0.0106 0.0192 0.0212 0.0075 -0.0428 -0.0867 -0.0490 -0.0021 -0.0062 -0.2014 -0.0003 0.3178 -0.0558 0.0698 0.0748 -0.0212 -0.0547 -0.0097 0.0682 0.0141 -0.0684 0.0136 0.0069 -0.0577 -0.2443 -0.0377 -0.0591 0.0631 -0.0212 0.0315 0.0999 0.0101 0.0379 -0.1429 -0.0078 0.0575 0.0202 0.0216 -0.0053 0.0377 -0.4783 -0.0013 0.0285 0.0465 -0.0584 0.0057 -0.0250 0.0457 0.4414 0.0631 0.0244 0.0717 0.0060 0.0290 0.0283 0.0569 -0.0073 -0.0503 -0.0482 0.0121 0.0125 -0.0128 0.0006 0.0029 -0.0173 0.0452 0.0258 0.0658 -0.0031 -0.1641 0.0163 0.0143 -0.0953 0.0144 -0.0018 0.0694 -0.0215 0.0211 0.0342 0.1518 -0.0250 0.0690 -0.2225 0.0283 -0.0152 0.0286 0.0279 -0.0334 -0.0021 -0.0189 0.0055 0.0215 -0.0307 0.1230 0.0153 -0.0179 -0.0328 -0.0181 0.0465 -0.0164 -0.0042 -0.0657 -0.0230 0.2529 -0.0214 0.0124 -0.0861 -0.0359 -0.0401 0.0448 0.0004 0.0403 -0.0272 -0.0080 0.0349 -0.0228 0.0085 -0.0471 0.0004 0.0433 0.0081 0.2004 0.0231 -0.0084 0.0694 -0.0030 -0.0391 0.0038 0.0032 -0.0199 -0.1149 0.0009 0.0198 0.0176 -0.2361 0.0961 -0.0251 0.1812 0.0025 0.0225 -0.0061 -0.0230 -0.0354 -0.1134 -0.1130 -0.0527 0.0041 -0.0109 -0.0166 -0.0035 0.0720 0.0270 0.0134 -0.0017 0.4862 0.0024 -0.0162 -0.0046 0.0082 -0.1879 0.0087 -0.1016 0.0127 0.0068 0.0103 -0.0979 -0.0229 0.0354 -0.0009 -0.1563 0.0113 -0.0840 -0.0169 -0.3069 -0.0476 -0.0503 -0.0123 -0.0127 0.0604 0.0000 -0.0035 -0.0301 -0.0114 -0.0112 -0.0029 0.0606 0.0608 0.0899 -0.0041 0.0044 -0.0289 -0.1618 0.0085 0.0647 0.0376 0.0452 0.0139 -0.0032 -0.0253 0.0244 0.0463 0.0178 0.1479 0.1324 -0.0318 5 | store 0.1153 -0.1704 -0.1207 -0.0818 0.1475 -0.0396 0.0023 -0.0947 0.0567 -0.0694 -0.0181 -0.0678 -0.0310 0.0948 -0.0624 0.0401 0.0032 0.0131 -0.0768 0.0635 -0.1465 -0.0262 -0.0859 0.0684 -0.0864 -0.1022 0.1384 0.0092 0.0587 -0.0241 0.0348 -0.0040 0.1074 -0.0179 0.0023 -0.1650 0.0245 -0.0348 -0.0167 0.1509 -0.0464 -0.0661 -0.1495 -0.1155 0.0849 -0.0718 0.0590 0.2752 0.0030 0.1221 -0.1442 0.1222 -0.6403 0.0243 0.1315 -0.0289 0.2172 0.0151 0.0297 -0.0801 0.0781 0.0183 0.0024 0.1819 0.1259 0.0638 -0.0365 0.0905 -0.0146 0.0529 -0.0387 0.0906 -0.0487 -0.0240 0.1477 -0.1106 -0.0949 0.0995 0.1439 0.0026 0.0006 -0.1222 0.0974 -0.2052 -0.1110 -0.0981 0.1635 0.0255 0.0929 -0.1102 -0.0335 -0.0782 0.0156 -0.1386 0.0230 -0.0964 0.0086 -0.0785 0.1477 -0.0895 -0.2046 -0.0138 0.0560 -0.2461 -0.0984 0.1811 -0.1168 0.0287 0.0269 0.0744 -0.0511 -0.1478 0.2025 -0.0878 0.0325 0.0509 0.0108 0.2023 -0.0510 -0.2732 -0.1802 0.0333 -0.0953 0.0596 0.1066 0.2224 0.1110 -0.0433 -0.0746 0.0562 0.0853 0.1813 0.2339 0.0212 -0.0741 0.1476 -0.0714 0.1253 -0.0171 0.0355 0.1493 0.0121 0.0354 0.1486 0.0686 0.1272 -0.0797 -0.0581 -0.1118 -0.0637 -0.0825 -0.0409 0.0878 0.1757 0.0985 0.0175 -0.1086 0.0096 0.0031 -0.0469 -0.0417 -0.0312 -0.0599 -0.0980 -0.1058 -0.0073 0.0298 0.0279 0.0164 0.1775 0.0220 0.1614 -0.0705 0.1096 0.0502 -0.0666 0.2664 -0.1213 -0.1859 0.0110 0.0614 -0.0604 -0.0606 0.2025 -0.0343 -0.2559 -0.0513 -0.0692 -0.0394 -0.0352 -0.0769 0.0828 0.0548 0.0482 -0.1559 0.0312 -0.0133 0.2308 0.1015 -0.2433 0.0769 0.0799 -0.1401 0.1040 0.0261 -0.0726 -0.0340 -0.1762 -0.0686 -0.1818 0.0625 0.1687 0.0158 -0.0583 -0.0693 0.0038 -0.0755 -0.1202 -0.0011 0.2024 0.0911 -0.0196 -0.0252 0.0389 0.0046 -0.1144 0.1192 -0.2127 -0.0873 0.0367 -0.1074 0.1626 0.3389 -0.1174 -0.2059 -0.1477 -0.2338 0.0673 -0.2272 0.1461 -0.0426 -0.1439 0.0505 0.1390 -0.0014 -0.0239 -0.0197 -0.3516 -0.1192 0.3902 0.0302 -0.0076 -0.1779 -0.1580 -0.0821 0.0588 -0.0890 -0.0052 0.1085 -0.1672 0.0869 -0.1333 -0.0904 -0.1226 -0.2606 0.0101 -0.1049 -0.1571 -0.0899 -0.0084 -0.1556 0.0116 -0.1085 0.0622 -0.0159 0.0934 0.1410 -0.0960 -0.0353 -0.0136 0.0348 -0.1130 0.1330 0.0773 0.0247 -0.0789 -0.0764 0.0398 0.0679 0.0315 0.1099 0.0614 -0.0474 0.1434 0.0894 0.0525 -0.0616 0.0512 -0.0655 0.0141 6 | to 0.0495 0.0411 0.0041 0.0309 -0.0044 -0.1151 0.0060 0.0170 0.0045 -0.0288 0.0170 0.0007 0.0533 0.0094 -0.0609 -0.0267 0.0497 0.0474 0.0054 0.0511 -0.0715 0.0876 0.0550 -0.0010 -0.0746 0.0008 0.0258 -0.1404 0.0022 0.0469 0.0114 0.0083 -0.0127 -0.0453 0.0011 -0.0080 -0.0130 -0.1271 0.0020 0.0389 -0.0395 -0.0295 -0.0308 0.0348 -0.1388 -0.0647 0.0302 0.0184 0.0499 0.0168 -0.0176 0.0890 -0.5547 0.0144 0.0300 0.0127 0.0345 0.1792 0.0629 -0.0242 -0.0491 -0.0397 -0.0014 -0.0571 0.0906 -0.0009 0.0266 -0.0018 0.0308 -0.0057 0.0569 0.0273 -0.0338 0.1003 0.0299 0.0115 0.0717 0.0319 -0.0726 0.1526 -0.0026 -0.1321 -0.0287 -0.2439 0.0073 -0.0062 0.0101 -0.0128 -0.0106 0.0202 -0.0165 -0.0867 0.0493 -0.0916 0.0507 0.1032 0.0108 0.0881 0.0655 -0.0127 -0.0895 -0.0348 0.0439 0.0069 -0.3768 -0.0176 0.1296 0.0027 0.2343 -0.0009 0.0337 0.0613 -0.0369 0.0564 -0.0901 -0.0046 0.0360 0.0341 -0.0171 -0.1717 -0.0041 -0.0553 -0.0661 0.0957 -0.0804 0.0868 -0.0181 -0.0602 -0.1523 -0.0104 -0.0034 -0.0547 0.0094 -0.0223 -0.0184 -0.3151 -0.0358 0.0354 0.0393 0.0526 0.0010 -0.0163 0.0497 0.2518 -0.0173 -0.0036 0.0180 -0.1081 0.0368 -0.0141 -0.0436 0.0291 -0.0366 -0.0523 0.0464 0.0018 -0.0183 0.0766 0.0156 0.0276 0.0522 -0.0221 0.0408 -0.0703 -0.2291 -0.0030 0.0343 -0.0961 -0.0092 0.0222 0.0166 -0.0344 0.0463 0.0186 0.0283 -0.0522 0.0369 -0.4955 0.0276 -0.0247 0.0257 0.0632 -0.0232 -0.0063 -0.0076 -0.3897 -0.0108 -0.0612 0.1962 -0.0060 -0.0353 -0.0994 -0.0124 -0.0031 0.0427 0.0134 -0.0043 -0.0102 0.2100 0.0163 -0.0155 0.1707 -0.0339 -0.0125 0.0200 0.0148 -0.0342 -0.0254 0.0001 0.0412 -0.0258 -0.0169 0.0113 0.0031 0.0075 -0.0059 0.1255 0.0225 0.0005 0.0462 0.0096 -0.0664 0.0138 0.0511 0.0372 0.0073 -0.0187 0.0186 -0.0021 -0.0120 0.0007 -0.0026 0.1725 0.0790 0.0265 -0.0066 -0.0765 0.2336 -0.1445 0.0072 -0.0260 -0.0147 0.0521 0.0011 -0.0244 0.0583 -0.0207 0.0320 0.0294 0.4830 -0.0444 -0.0870 -0.0754 0.0276 -0.0606 -0.0227 -0.0057 -0.0298 0.0540 -0.0607 -0.0746 0.0666 -0.0240 -0.0399 -0.2321 0.0054 0.0233 0.0460 0.1874 -0.0530 -0.0285 -0.0521 -0.0146 0.0264 -0.0093 0.0250 -0.0552 0.0152 0.0242 -0.0577 0.0060 0.0511 0.0230 -0.0345 0.0134 -0.0042 -0.1267 -0.1572 -0.0783 0.0706 0.0004 -0.0142 -0.0976 -0.0489 -0.0625 -0.0327 0.0070 0.2371 -0.0298 -0.0284 7 | buy -0.0383 -0.1207 0.0446 -0.0489 -0.0228 -0.0605 0.0124 -0.0085 -0.2114 -0.2070 -0.0436 0.0813 0.0252 0.1689 -0.0123 -0.0499 -0.0459 -0.0509 -0.1930 -0.0459 -0.0594 0.0975 0.0234 -0.0942 0.0757 -0.0298 0.0816 0.1021 0.0045 0.1090 0.0989 -0.0583 0.0589 0.0183 -0.0853 0.0549 -0.0247 -0.0671 0.0611 -0.0126 -0.0201 0.0625 -0.1309 -0.0275 0.0813 0.0830 -0.1404 0.0034 0.0002 -0.0986 -0.0390 0.0710 -0.7616 -0.0065 0.0203 0.0124 -0.1375 -0.0377 0.1424 -0.0470 -0.0530 -0.0017 -0.0064 0.0031 0.0047 -0.0771 0.0402 0.0940 -0.0065 -0.0339 0.0448 0.0793 -0.0979 0.0393 -0.0301 -0.0605 -0.0798 -0.0761 0.0755 0.0310 0.0314 0.0271 0.0410 -0.2236 -0.1316 0.0346 -0.0483 -0.0907 -0.1757 -0.0174 -0.0338 -0.0297 0.0271 -0.1891 -0.0441 0.0340 -0.0426 0.0566 0.0186 -0.0444 -0.1966 -0.0414 -0.0333 -0.0991 -0.2033 0.0649 0.0702 0.0817 -0.0751 0.1939 0.1020 -0.1991 0.0797 -0.0401 -0.1216 0.0456 0.0245 0.1675 -0.0809 -0.3011 -0.0869 0.0164 -0.1075 0.0899 0.0413 0.1917 -0.0191 0.0742 -0.0840 0.0007 0.1997 -0.0313 -0.0368 0.1528 0.1239 0.3293 -0.0140 -0.0447 0.1532 0.1501 0.0761 0.1207 -0.0249 0.1769 -0.0540 -0.0187 -0.0212 -0.1746 -0.0887 -0.0905 -0.2786 -0.0271 0.0029 -0.0203 0.1398 0.0176 -0.0691 -0.0024 0.0153 -0.1281 -0.1055 0.0241 -0.0575 0.0674 -0.2193 -0.0097 0.0118 -0.0448 -0.0203 -0.0849 0.3452 0.1944 -0.0284 0.0808 0.0902 0.0752 0.2634 -0.0268 -0.0493 -0.0713 0.0010 -0.0388 0.0146 -0.0487 0.0997 -0.5735 0.0478 -0.2434 0.3406 0.0232 0.0331 0.0496 0.1096 0.0141 -0.0063 0.0855 -0.1039 -0.1019 0.1766 -0.1784 -0.0518 -0.0120 -0.0754 -0.1076 -0.0084 0.0305 -0.0451 0.0638 0.0219 0.0002 -0.0382 0.0189 0.0198 0.0373 -0.0573 -0.1039 -0.0369 -0.0037 -0.0443 0.1332 0.1588 -0.0539 -0.0601 0.1249 0.1449 -0.1100 0.1588 -0.0845 0.0031 -0.1214 -0.1287 0.0119 0.2790 -0.0463 -0.1451 -0.1774 -0.2258 0.0837 -0.2105 0.1055 -0.0997 -0.0528 -0.0731 -0.0090 -0.0776 0.1464 0.0371 -0.1323 -0.1076 0.3599 0.0677 -0.0547 -0.1363 -0.0223 0.0496 0.0342 0.0645 0.0701 -0.0417 -0.0616 0.0310 0.0656 0.0520 -0.0299 0.0915 0.0527 -0.0556 -0.0361 -0.0215 -0.1207 0.2737 0.0753 -0.0559 0.0594 0.0397 0.2017 0.0547 -0.0628 0.0655 -0.0841 -0.0025 -0.0061 0.1956 0.0315 -0.0740 0.0146 -0.0218 0.0802 -0.0823 0.0252 0.0196 -0.0382 0.0411 -0.0205 0.1017 0.1655 -0.2232 0.1460 0.0398 0.0031 8 | a 0.0047 0.0223 -0.0087 0.0250 -0.0660 0.0212 0.0178 -0.0149 0.0282 -0.0166 0.0075 0.0284 0.0166 0.0255 0.0125 -0.0220 0.0177 0.0667 -0.1204 0.0402 0.0260 0.0406 0.0285 -0.1023 -0.0021 0.0387 0.0208 0.0541 0.0066 -0.0689 0.0140 -0.0271 0.0677 0.0005 0.0392 -0.0209 -0.0417 -0.0173 0.0303 -0.0256 0.0016 -0.0574 -0.0039 -0.0377 -0.0412 0.0315 -0.0139 0.0184 -0.0302 0.0000 -0.0378 0.1718 -0.6255 -0.0129 0.0399 0.0301 0.0420 0.0338 0.0888 -0.0156 0.0362 -0.0332 -0.0199 0.0223 -0.1082 0.0558 0.0027 -0.0084 0.0286 0.0098 -0.0286 -0.0252 -0.0119 0.0971 0.0381 0.0101 -0.0005 0.0114 0.0230 -0.0090 0.0443 0.0617 -0.0237 -0.1204 -0.0046 0.0186 0.0183 -0.0117 -0.0270 0.0182 -0.0114 0.0041 0.0046 0.0407 0.0151 0.0106 0.0192 0.0212 0.0075 -0.0428 -0.0867 -0.0490 -0.0021 -0.0062 -0.2014 -0.0003 0.3178 -0.0558 0.0698 0.0748 -0.0212 -0.0547 -0.0097 0.0682 0.0141 -0.0684 0.0136 0.0069 -0.0577 -0.2443 -0.0377 -0.0591 0.0631 -0.0212 0.0315 0.0999 0.0101 0.0379 -0.1429 -0.0078 0.0575 0.0202 0.0216 -0.0053 0.0377 -0.4783 -0.0013 0.0285 0.0465 -0.0584 0.0057 -0.0250 0.0457 0.4414 0.0631 0.0244 0.0717 0.0060 0.0290 0.0283 0.0569 -0.0073 -0.0503 -0.0482 0.0121 0.0125 -0.0128 0.0006 0.0029 -0.0173 0.0452 0.0258 0.0658 -0.0031 -0.1641 0.0163 0.0143 -0.0953 0.0144 -0.0018 0.0694 -0.0215 0.0211 0.0342 0.1518 -0.0250 0.0690 -0.2225 0.0283 -0.0152 0.0286 0.0279 -0.0334 -0.0021 -0.0189 0.0055 0.0215 -0.0307 0.1230 0.0153 -0.0179 -0.0328 -0.0181 0.0465 -0.0164 -0.0042 -0.0657 -0.0230 0.2529 -0.0214 0.0124 -0.0861 -0.0359 -0.0401 0.0448 0.0004 0.0403 -0.0272 -0.0080 0.0349 -0.0228 0.0085 -0.0471 0.0004 0.0433 0.0081 0.2004 0.0231 -0.0084 0.0694 -0.0030 -0.0391 0.0038 0.0032 -0.0199 -0.1149 0.0009 0.0198 0.0176 -0.2361 0.0961 -0.0251 0.1812 0.0025 0.0225 -0.0061 -0.0230 -0.0354 -0.1134 -0.1130 -0.0527 0.0041 -0.0109 -0.0166 -0.0035 0.0720 0.0270 0.0134 -0.0017 0.4862 0.0024 -0.0162 -0.0046 0.0082 -0.1879 0.0087 -0.1016 0.0127 0.0068 0.0103 -0.0979 -0.0229 0.0354 -0.0009 -0.1563 0.0113 -0.0840 -0.0169 -0.3069 -0.0476 -0.0503 -0.0123 -0.0127 0.0604 0.0000 -0.0035 -0.0301 -0.0114 -0.0112 -0.0029 0.0606 0.0608 0.0899 -0.0041 0.0044 -0.0289 -0.1618 0.0085 0.0647 0.0376 0.0452 0.0139 -0.0032 -0.0253 0.0244 0.0463 0.0178 0.1479 0.1324 -0.0318 9 | candy 0.0753 -0.1464 -0.1270 -0.2575 -0.0910 0.0350 0.0283 -0.0810 -0.0192 -0.0220 -0.2178 -0.0866 -0.1865 0.0275 0.1131 -0.0327 -0.0183 0.1048 -0.1447 -0.1042 -0.2216 0.0707 -0.2615 -0.1984 -0.0120 0.0073 0.1967 0.1264 0.0726 0.1784 0.1826 0.0677 0.0007 -0.1751 -0.0325 -0.1226 0.1251 0.1272 -0.2134 0.0229 -0.0160 0.1320 -0.1675 0.0455 0.1518 0.0341 0.0335 -0.0867 0.0858 -0.0888 0.0243 -0.0620 -0.7815 -0.0089 0.0497 0.0550 0.0858 0.1297 0.0575 -0.0237 0.0751 -0.2031 -0.0990 0.2005 0.1032 -0.1223 0.0383 -0.1367 0.0544 -0.0359 -0.3136 -0.0669 -0.1549 0.0283 0.1459 0.0215 0.1477 0.0548 0.3014 0.0265 0.0209 0.0288 -0.0392 -0.1530 0.0294 0.0271 0.0591 -0.1513 0.2906 0.0764 -0.1399 -0.1526 0.2753 -0.0928 0.0225 0.0832 -0.1159 0.0185 0.0531 -0.0791 -0.1778 0.1562 0.1611 0.0010 0.0371 0.0225 -0.1014 0.1543 0.1145 0.2095 -0.0048 0.0222 0.1948 0.0057 0.0712 0.1834 -0.0111 -0.0403 0.0610 -0.2513 -0.1295 0.0748 -0.1008 0.0420 0.0661 0.1967 -0.0391 -0.1325 -0.0519 0.0366 -0.0442 0.0436 0.1419 0.0022 0.2609 -0.0199 0.1000 0.0938 0.0095 0.0679 0.2929 0.3207 -0.0390 0.2056 0.1144 0.1959 -0.0798 -0.0415 -0.1708 0.0714 -0.2876 0.1323 0.0526 -0.0741 -0.0810 -0.0759 -0.0910 0.0945 0.0855 0.0790 0.0125 -0.2307 0.1919 -0.0652 0.1324 -0.0028 0.1800 0.2008 -0.0756 0.1995 0.0535 0.0136 -0.2409 0.1389 0.1627 0.0578 0.3020 -0.3220 -0.1560 -0.0062 0.0783 0.1399 -0.0915 0.1070 0.2663 -0.1889 -0.2495 0.0732 -0.2348 0.0617 0.0336 -0.0476 0.0244 0.0782 0.1732 0.1114 -0.0313 0.1666 0.0665 -0.1936 0.0442 -0.0056 -0.1360 0.2510 0.1313 0.1778 -0.0060 -0.2534 -0.2312 -0.1736 0.0298 -0.0209 -0.0610 -0.0014 0.0132 -0.1410 -0.1271 -0.1504 -0.0526 0.0782 -0.0372 -0.2143 -0.0683 -0.0476 0.1307 -0.2399 -0.0710 0.0274 -0.1123 0.0153 0.0401 0.1414 0.2961 0.0626 -0.0974 -0.2066 -0.0252 -0.1938 -0.1918 0.0022 -0.0069 -0.1170 0.0685 0.0215 0.1194 -0.1135 -0.0758 0.1078 -0.1322 0.3919 -0.1020 -0.0077 -0.0907 -0.0543 -0.0426 0.1433 -0.0145 -0.0620 -0.0607 -0.0281 -0.0336 0.1896 -0.2552 0.1172 -0.3381 -0.0025 -0.1401 -0.1125 -0.1175 -0.0028 0.1996 0.1273 -0.2013 0.0641 -0.0772 0.0041 0.0366 -0.0263 0.0672 0.0665 -0.1989 -0.0809 0.0752 0.0623 -0.0265 -0.0564 0.1577 -0.1356 -0.0225 0.0553 0.1698 -0.1010 -0.0225 -0.2278 -0.0826 -0.0635 0.0106 0.0229 0.1026 -0.0337 10 | . 0.0004 0.0032 -0.0204 0.0479 -0.0450 -0.1165 0.0142 0.0068 -0.0334 -0.0504 0.0224 -0.0029 -0.0258 0.0265 0.0059 -0.0459 0.0753 0.0422 0.0269 -0.0283 -0.1013 0.0992 -0.0114 0.0583 -0.1547 -0.1972 -0.0282 -0.1391 -0.0288 -0.0283 0.0273 0.0189 0.0275 -0.0540 0.0458 0.0306 -0.0158 0.2338 0.0206 -0.0081 -0.0180 -0.0059 0.1045 0.0409 0.0352 -0.0038 0.0403 -0.0129 -0.0074 0.0003 -0.0484 0.0412 -0.5999 0.0224 -0.0153 0.0296 0.0011 0.0640 -0.1061 0.0009 -0.0038 -0.0197 0.0198 -0.0056 -0.0287 0.0157 -0.0262 -0.0003 -0.0033 -0.0007 -0.0421 0.0367 -0.0240 -0.0519 -0.0098 0.0297 0.0251 -0.0110 -0.0059 -0.0042 0.0191 0.0912 0.0142 -0.0469 0.0047 -0.0461 -0.0007 -0.0242 -0.1023 0.0221 -0.0055 -0.0246 0.0235 0.1175 0.0527 -0.0013 0.0069 0.0075 0.0653 0.0739 -0.0852 -0.0170 -0.0102 -0.0225 -0.3273 -0.0040 -0.0259 0.0374 -0.1285 -0.0260 0.0512 0.0295 -0.0648 0.0080 0.0100 -0.0888 0.0268 0.0209 0.0172 -0.2961 0.0117 -0.1024 -0.0671 -0.1541 0.0014 0.0895 -0.0090 -0.0117 0.0023 0.0197 0.0513 0.0514 -0.0087 -0.0016 -0.0187 -0.1328 -0.0309 0.0093 -0.0160 -0.0328 0.0123 -0.0135 0.0707 -0.4418 -0.0293 0.0321 0.0725 -0.0150 -0.0241 -0.0308 0.1423 0.0205 -0.0443 -0.0164 -0.0040 0.0410 0.0311 0.0291 -0.0144 0.0029 0.1101 0.0305 0.0559 -0.1322 -0.2437 -0.0496 0.1666 -0.0371 -0.0255 -0.0138 -0.2298 -0.0060 0.0206 0.0459 -0.1113 -0.0365 -0.0248 -0.3067 0.0166 0.0334 0.0021 -0.0163 0.0237 -0.0250 0.0108 -0.1783 0.0301 -0.0656 0.1937 0.0227 0.0142 -0.0309 -0.0313 0.0592 0.0157 -0.0146 0.0691 -0.0355 0.2422 0.0033 0.0094 0.0925 -0.0280 -0.0084 0.1211 0.0053 -0.0082 0.0111 -0.0628 -0.0273 0.0068 0.0178 -0.0397 0.0079 0.0130 -0.0139 -0.1617 -0.0350 -0.0590 -0.0596 0.0098 0.0481 0.0207 -0.0105 0.0466 0.2175 0.0148 0.0207 -0.0174 -0.1542 0.0322 -0.0149 0.6264 0.0136 -0.0067 0.2430 -0.0644 -0.1055 -0.1890 -0.0042 -0.0424 -0.0319 0.0419 0.0078 -0.0486 -0.0519 -0.0194 0.0320 0.0181 0.0615 -0.0305 -0.0008 -0.0281 0.0642 0.0569 0.0512 -0.0689 -0.0100 0.0339 -0.0010 -0.0024 0.0837 0.0032 -0.0312 -0.1129 0.0081 -0.0320 0.0065 0.0968 -0.0263 -0.0471 -0.0256 -0.0003 0.0188 -0.0397 0.0475 -0.0811 -0.0430 -0.0117 0.0414 -0.0028 0.0524 0.0216 0.0820 0.0114 -0.0173 -0.0362 -0.0067 -0.0118 0.0435 0.0637 0.0022 -0.0096 -0.0360 -0.1679 0.0304 0.0290 0.2070 0.0689 -0.0467 11 | -------------------------------------------------------------------------------- /05-selfattention/transformer_modules.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | import torch 4 | import math 5 | import numpy as np 6 | 7 | # multi-head self attention 8 | class MultiheadAttention(nn.Module): 9 | def __init__(self, 10 | input_size, 11 | key_size, 12 | value_size, 13 | output_size, 14 | attention_dropout=0.1, 15 | num_heads=8): 16 | super(MultiheadAttention, self).__init__() 17 | self.key_size = key_size 18 | self.value_size = value_size 19 | self.num_heads = num_heads 20 | 21 | # transformation for input query, key and value 22 | self.input_query_transform = nn.Linear(input_size, key_size) 23 | self.input_key_transform = nn.Linear(input_size, key_size) 24 | self.input_value_transform = nn.Linear(input_size, value_size) 25 | 26 | self.attention_softmax = nn.Softmax(dim=-1) 27 | self.attention_dropout = nn.Dropout(attention_dropout) 28 | self.output_transform = nn.Linear(value_size, output_size) 29 | 30 | def split_heads(self, x, num_heads): 31 | batch, length, input_size = x.size() 32 | assert input_size % num_heads == 0, ( 33 | "the input size should be a multiple of number of heads") 34 | new_dim = input_size // num_heads 35 | ans = x.view(batch, length, num_heads, new_dim).transpose(1, 2) 36 | return ans 37 | 38 | def combine_heads(self, x, num_heads): 39 | batch, _, length, new_dim = x.size() 40 | ans = x.transpose(1, 2).contiguous().view(batch, length, num_heads * new_dim) 41 | return ans 42 | 43 | def forward(self, 44 | query, 45 | bias=None): 46 | """ 47 | query: query, key and value of self-attention, [batch_size, length, input_size] 48 | num_heads: number of heads 49 | bias: the bias to mask the padded words, [batch_size, length, length] 50 | """ 51 | 52 | batch_size, length, _ = query.size() 53 | 54 | q = self.input_query_transform(query) 55 | k = self.input_key_transform(query) 56 | v = self.input_value_transform(query) 57 | 58 | q = self.split_heads(q, self.num_heads) 59 | k = self.split_heads(k, self.num_heads) 60 | v = self.split_heads(v, self.num_heads) 61 | 62 | key_size_per_head = self.key_size // self.num_heads 63 | 64 | # refer to the paper "Attention is all you need" 65 | q = q / math.sqrt(key_size_per_head) 66 | 67 | logits = torch.matmul(q, k.transpose(2, 3)) 68 | 69 | # mask the padded words 70 | if bias is not None: 71 | bias = bias.unsqueeze(1).expand_as(logits) 72 | logits += bias 73 | 74 | # calculate the attention for each head 75 | attn = self.attention_softmax(logits) 76 | drop_attn = self.attention_dropout(attn) 77 | x = torch.matmul(drop_attn, v) 78 | 79 | # get attention score all heads 80 | attn = attn.view(batch_size, self.num_heads, length, length) 81 | 82 | # combine the attention heads 83 | x = self.combine_heads(x, self.num_heads) 84 | 85 | ans = self.output_transform(x) 86 | 87 | return ans, attn 88 | 89 | 90 | class FeadForwadLayer(nn.Module): 91 | def __init__(self, 92 | input_size, 93 | filter_size, 94 | output_size, 95 | relu_dropout=0.0): 96 | super(FeadForwadLayer, self).__init__() 97 | self.mid_layer = nn.Linear(input_size, filter_size) 98 | self.out_layer = nn.Linear(filter_size, output_size) 99 | self.relu = nn.ReLU() 100 | self.relu_dropout = nn.Dropout(relu_dropout) 101 | 102 | def forward(self, x): 103 | t = self.relu(self.mid_layer(x)) 104 | o = self.out_layer(self.relu_dropout(t)) 105 | return o 106 | 107 | 108 | class LayerNorm(nn.Module): 109 | def __init__(self,input_size, eps=1e-6): 110 | super(LayerNorm, self).__init__() 111 | self.eps = eps 112 | self.scale = nn.Parameter(torch.ones(input_size), requires_grad=True) 113 | self.bias = nn.Parameter(torch.zeros(input_size), requires_grad=True) 114 | 115 | def forward(self, x): 116 | # get mean and std of x 117 | mean = torch.mean(x, dim=-1, keepdim=True) 118 | std = torch.std(x, dim=-1, keepdim=True) 119 | norm_x = (x - mean) / (std + self.eps) 120 | return norm_x * self.scale + self.bias 121 | 122 | 123 | class EncoderLayer(nn.Module): 124 | def __init__(self, 125 | hidden_size, 126 | filter_size, 127 | dropout, 128 | relu_dropout, 129 | attention_dropout, 130 | num_heads=8): 131 | super(EncoderLayer, self).__init__() 132 | self.num_heads = num_heads 133 | self.dropout = dropout 134 | self.relu_dropout = relu_dropout 135 | self.attention_dropout = attention_dropout 136 | 137 | self.ma = MultiheadAttention(input_size=hidden_size, 138 | key_size=hidden_size, 139 | value_size=hidden_size, 140 | output_size=hidden_size, 141 | attention_dropout=attention_dropout, 142 | num_heads=num_heads) 143 | self.ffn = FeadForwadLayer(input_size=hidden_size, 144 | filter_size=filter_size, 145 | output_size=hidden_size, 146 | relu_dropout=self.relu_dropout) 147 | self.ma_prenorm = LayerNorm(hidden_size) 148 | self.ffn_prenorm = LayerNorm(hidden_size) 149 | self.ma_postdropout = nn.Dropout(dropout) 150 | self.ffn_postdropout = nn.Dropout(dropout) 151 | 152 | def forward(self, x, bias=None): 153 | # layer normalization + multi-attention head 154 | y, _ = self.ma(self.ma_prenorm(x)) 155 | # dropout + residual connection 156 | x = self.ma_postdropout(y) + x 157 | # layer normalization + feed forward layer 158 | y = self.ffn(self.ffn_prenorm(x)) 159 | # dropout + residual connection 160 | ans = self.ffn_postdropout(y) + x 161 | return ans 162 | 163 | 164 | def test_ma(query=None): 165 | batch_size = 50 166 | length = 20 167 | input_size = 512 168 | 169 | key_size = 1024 170 | value_size = 1024 171 | output_size = 512 172 | 173 | attention_dropout = 0.1 174 | num_heads = 8 175 | 176 | if query is None: 177 | query = torch.rand(batch_size, length, input_size) 178 | else: 179 | batch_size, length, input_size = query.shape 180 | 181 | multihead_attn = MultiheadAttention(input_size=input_size, 182 | key_size=key_size, 183 | value_size=value_size, 184 | output_size=output_size, 185 | attention_dropout=attention_dropout, 186 | num_heads=num_heads) 187 | 188 | # masking for padded words is mandatory for multi-head attention but it's not implemented in the demo 189 | ans, attn = multihead_attn(query) 190 | 191 | assert ans.shape == torch.Size([batch_size, length, output_size]) 192 | assert attn.shape == torch.Size([batch_size, num_heads, length, length]) 193 | return ans, attn 194 | 195 | 196 | def test_el(x): 197 | batch_size, length, hidden_size = x.shape 198 | filter_size = 2048 199 | dropout = 0.1 200 | relu_dropout = 0.1 201 | attention_dropout = 0.1 202 | num_heads = 10 203 | 204 | encoder_layer = EncoderLayer(hidden_size, filter_size, dropout, relu_dropout, attention_dropout, num_heads) 205 | ans = encoder_layer(x) 206 | 207 | assert ans.shape == torch.Size([batch_size, length, hidden_size]) 208 | return ans 209 | 210 | 211 | def load_fasttext(): 212 | import os 213 | ex_sents = "I went to a store to buy a candy \." 214 | words = ex_sents.split() 215 | for word in words: 216 | os.system("grep '^{} ' wiki-news-300d-1M.vec >> sample.vec".format(word)) 217 | 218 | 219 | def read_sample_vec(): 220 | lines = open("sample.vec", "r").readlines() 221 | vec = [[float(t) for t in line.split()[1:]] for line in lines] 222 | return vec 223 | 224 | 225 | if __name__ == '__main__': 226 | # load_fasttext() 227 | x = torch.tensor(read_sample_vec()).unsqueeze(0) 228 | 229 | # test a multi-head attention 230 | # attn: [batch_size, num_heads, length, length] 231 | ans_ma, attn = test_ma(x) 232 | first_attn = attn[:, 0, :, :].contiguous() 233 | print("First attention head") 234 | print(first_attn) 235 | second_attn = attn[:, 1, :, :].contiguous() 236 | print("Second attention head") 237 | print(second_attn) 238 | 239 | # test one encoder layer 240 | ans_el = test_el(x) 241 | -------------------------------------------------------------------------------- /20-ibmmodels/.gitignore: -------------------------------------------------------------------------------- 1 | fastalign-alignments.txt 2 | fastalign-probs.txt 3 | model1-probs.txt 4 | train.en-de.low.filt.deen 5 | fastalign-revalignments.txt 6 | fastalign-synchronized.txt 7 | model1-probs-2.txt 8 | -------------------------------------------------------------------------------- /20-ibmmodels/README.md: -------------------------------------------------------------------------------- 1 | This is an example of word alignment. 2 | 3 | `model1.py` implements IBM model 1. You can run it by the following command: 4 | 5 | python model1.py ../en-de/train.en-de.low.filt.{de,en} | tee model1-probs.txt 6 | 7 | The example of the output probabilities is in model-1-probs.txt. 8 | 9 | We can also try some better models. For example, let's try [fast_align](https://github.com/clab/fast_align). First we install it. Then, 10 | 11 | paste ../en-de/train.en-de.low.filt.{de,en} | perl -p -e 's/\t/ ||| /g' > train.en-de.low.filt.deen 12 | fast_align -i train.en-de.low.filt.deen -d -o -v -p fastalign-probs.txt > fastalign-alignments.txt 13 | 14 | Finally, let's visualize the outputs: 15 | 16 | perl visualize.pl ../en-de/train.en-de.low.filt.{de,en} fastalign-alignments.txt | less 17 | 18 | fast_align works quite well on similar languages, but on very different languages (e.g. English and Chinese or Japanese), you often get better results by using GIZA++. GIZA++ can be used most easily through the [Moses](http://www.statmt.org/moses/) toolkit. 19 | -------------------------------------------------------------------------------- /20-ibmmodels/model1.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import math 3 | import numpy as np 4 | from collections import defaultdict 5 | 6 | NUM_ITERS = 20; 7 | TERMINATE = 1/1000; 8 | CUTOFF = 1e-5; 9 | 10 | if len(sys.argv) != 3: 11 | print("Usage: model1.pl FFILE EFILE\n") 12 | 13 | def loadcorp(fname, add_null=False): 14 | wmap = defaultdict(lambda: len(wmap)) 15 | if add_null: 16 | nid = wmap["NULL"] 17 | corp = [] 18 | with open(fname, 'r') as f: 19 | for line in f: 20 | orig = [nid] if add_null else [] 21 | corp.append(orig + [wmap[x] for x in line.strip().split()]) 22 | warr = list(range(len(wmap))) 23 | for k, v in wmap.items(): 24 | warr[v] = k 25 | return corp, warr 26 | 27 | fcorp, fsyms = loadcorp(sys.argv[1]) 28 | ecorp, esyms = loadcorp(sys.argv[2], add_null=True) 29 | assert(len(fcorp) == len(ecorp)) 30 | fcount = sum([len(fsent) for fsent in fcorp]) 31 | 32 | print(f"Loaded {len(fcorp)} sentences", file=sys.stderr); 33 | 34 | # initialize to uniform 35 | uniprob = 1/float(len(fsyms)) 36 | t = {} 37 | for fsent, esent in zip(fcorp, ecorp): 38 | for f in fsent: 39 | for e in esent: 40 | t[f,e] = uniprob 41 | 42 | # train t 43 | lastll = None 44 | for myiter in range(NUM_ITERS): 45 | count = defaultdict(lambda: 0) 46 | total = np.zeros(len(esyms)) 47 | ll = 0 48 | # E step 49 | for fsent, esent in zip(fcorp, ecorp): 50 | stotal = defaultdict(lambda: 0) 51 | tfe = np.zeros( (len(fsent), len(esent)) ) 52 | for fi, f in enumerate(fsent): 53 | for ei, e in enumerate(esent): 54 | tfe[fi,ei] = t[f,e] 55 | stotal[f] += tfe[fi,ei] 56 | ll += math.log(stotal[f]/len(esent)) 57 | for fi, f in enumerate(fsent): 58 | for ei, e in enumerate(esent): 59 | count[f,e] += tfe[fi,ei]/stotal[f] 60 | total[e] += tfe[fi,ei]/stotal[f] 61 | # M step 62 | for (f,e), v in count.items(): 63 | t[f,e] = count[f,e]/total[e] 64 | print(f'Iter {myiter}: ll={ll/fcount}', file=sys.stderr) 65 | if lastll and (lastll-ll) > ll * TERMINATE: 66 | break 67 | lastll = ll 68 | 69 | for f, e in sorted(t.keys()): 70 | tfe = t[f,e] 71 | if tfe > CUTOFF: 72 | print(f'{fsyms[f]} {esyms[e]} {tfe}') 73 | -------------------------------------------------------------------------------- /20-ibmmodels/visualize.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use utf8; 4 | use List::Util qw( max min sum ); 5 | use strict; 6 | binmode STDOUT, ":utf8"; 7 | 8 | if(@ARGV < 3 or @ARGV > 5) { 9 | print "Usage: visualize.pl FFILE EFILE AFILE [ADDPOS] [REVERSE]\n"; 10 | exit 1; 11 | } 12 | 13 | open EFILE, "<:utf8", $ARGV[0] or die "$ARGV[0]: $!"; 14 | open FFILE, "<:utf8", $ARGV[1] or die "$ARGV[1]: $!"; 15 | open AFILE, "<:utf8", $ARGV[2] or die "$ARGV[2]: $!"; 16 | 17 | sub asciilength { 18 | my $str = shift; 19 | my $ret = 0; 20 | for(split(//, $str)) { 21 | if(/(\p{InHiragana}|\p{InKatakana}|\p{InCJKUnifiedIdeographs}|[a-zA-Z0-9!”#$%&’()「」『』、。々])/) { 22 | $ret += 2; 23 | } else { 24 | $ret += 1; 25 | } 26 | } 27 | return $ret; 28 | } 29 | 30 | my ($i, $j); 31 | my (@actives, $actmax, $estr, $fstr, $astr); 32 | while($estr = and $fstr = and $astr = ) { 33 | chomp $estr; chomp $fstr; chomp $astr; 34 | # if(not $astr) { next; print "\n"; } 35 | my %active = map { my ($e,$f) = split(/-/); my $id = "".$e."-".$f; $id => 1 } split(/ /,$astr); 36 | my @e = split(/ /,$estr); 37 | my $elen = max( map { asciilength($_) } @e ); 38 | my @f = split(/ /,$fstr); 39 | my $flen = max( map { length($_) } @f ); 40 | for(0 .. $flen) { 41 | print " " for(0 .. $elen); 42 | my $pos = $flen-$_; 43 | for(@f) { 44 | if($pos"] 11 | ctxt = "" 12 | for val in vals: 13 | ctxts1 += 1 14 | ctxts2[ctxt] += 1 15 | count1[val] += 1 16 | count2[(ctxt,val)] += 1 17 | ctxt = val 18 | 19 | ALPHA=0.1 20 | 21 | stateid = defaultdict(lambda: len(stateid)) 22 | 23 | # Print the fallbacks 24 | print("%d %d %.4f" % (stateid[""], stateid[""], -math.log(ALPHA))) 25 | for ctxt, val in ctxts2.items(): 26 | if ctxt != "": 27 | print("%d %d %.4f" % (stateid[ctxt], stateid[""], -math.log(ALPHA))) 28 | 29 | # Print the unigrams 30 | for word, val in count1.items(): 31 | v1 = val/ctxts1 32 | print("%d %d %s %s %.4f" % (stateid[""], stateid[word], word, word, -math.log(v1))) 33 | 34 | # Print the unigrams 35 | for (ctxt, word), val in count2.items(): 36 | v1 = count1[word]/ctxts1 37 | v2 = val/ctxts2[ctxt] 38 | val = 0.9 * v2 + 0.1 * v1 39 | print("%d %d %s %s %.4f" % (stateid[ctxt], stateid[word], word, word, -math.log(val))) 40 | 41 | # Print the final state 42 | print(stateid[""]) 43 | 44 | -------------------------------------------------------------------------------- /21-wfst/bigram.ssym: -------------------------------------------------------------------------------- 1 | 0 2 | NULL 1 3 | apricot 2 4 | an 3 5 | apple 4 6 | peach 5 7 | i 6 8 | a 7 9 | ate 8 10 | she 9 11 | 10 12 | -------------------------------------------------------------------------------- /21-wfst/corpuse.txt: -------------------------------------------------------------------------------- 1 | she ate an apple 2 | she ate a peach 3 | i ate an apricot 4 | -------------------------------------------------------------------------------- /21-wfst/corpusf.txt: -------------------------------------------------------------------------------- 1 | ella comió una manzana 2 | ella comió un melocotón 3 | yo comi un albaricoque 4 | -------------------------------------------------------------------------------- /21-wfst/example.isym: -------------------------------------------------------------------------------- 1 | the 0 2 | that 1 3 | tax 2 4 | axe 3 5 | is 4 6 | taxes 5 7 | axes 6 8 | -------------------------------------------------------------------------------- /21-wfst/example.txt: -------------------------------------------------------------------------------- 1 | 0 1 the the 1 2 | 0 2 that that 2 3 | 1 3 tax tax 1 4 | 2 3 axe axe 1 5 | 3 4 is is 0.5 6 | 1 4 taxes taxes 3 7 | 2 4 axes axes 2 8 | 4 9 | -------------------------------------------------------------------------------- /21-wfst/input.txt: -------------------------------------------------------------------------------- 1 | 0 1 yo yo 2 | 1 2 comi comi 3 | 2 3 un un 4 | 3 4 melocotón melocotón 5 | 4 5 6 | 5 7 | -------------------------------------------------------------------------------- /21-wfst/onetoone.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import math 3 | from collections import defaultdict 4 | 5 | with open(sys.argv[1], 'r') as f: 6 | flines = [x.strip().split() for x in f.readlines()] 7 | 8 | with open(sys.argv[2], 'r') as e: 9 | elines = [x.strip().split() for x in e.readlines()] 10 | 11 | fecount = defaultdict(lambda: 0) 12 | ecount = defaultdict(lambda: 0) 13 | for fl, el in zip(flines, elines): 14 | for f, e in zip(fl, el): 15 | fecount[f,e] += 1 16 | ecount[e] += 1 17 | 18 | for (f,e), val in fecount.items(): 19 | print("0 0 %s %s %.4f" % (f, e, 0 if val == ecount[e] else -math.log(val/ecount[e]))) 20 | print("0 0 0") 21 | print("0") 22 | -------------------------------------------------------------------------------- /21-wfst/process.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Simple example FST 5 | fstcompile --keep_isymbols --keep_osymbols --isymbols=example.isym --osymbols=example.isym example.txt example.fst 6 | fstdraw example.fst example.dot 7 | dot -Tps example.dot > example.ps 8 | ps2pdf example.ps 9 | pdfcrop example.pdf 10 | mv example-crop.pdf example.pdf 11 | 12 | # Create a bigram language model from the corpus 13 | python bigram.py < corpuse.txt > bigram.txt 14 | python symbols.py 2 < bigram.txt > bigram.isym 15 | 16 | fstcompile --keep_isymbols --keep_osymbols --isymbols=bigram.isym --osymbols=bigram.isym bigram.txt bigram.fst 17 | fstdraw --acceptor --show_weight_one --ssymbols=bigram.ssym bigram.fst bigram.dot 18 | dot -Tps bigram.dot > bigram.ps 19 | ps2pdf bigram.ps 20 | pdfcrop bigram.pdf 21 | mv bigram-crop.pdf bigram.pdf 22 | 23 | # Create a one-to-one translation model 24 | python onetoone.py corpusf.txt corpuse.txt > onetoone.txt 25 | python symbols.py 2 < onetoone.txt > onetoone.isym 26 | python symbols.py 3 < onetoone.txt > onetoone.osym 27 | 28 | fstcompile --keep_isymbols --keep_osymbols --isymbols=onetoone.isym --osymbols=onetoone.osym onetoone.txt onetoone.fst 29 | fstdraw --acceptor --show_weight_one onetoone.fst onetoone.dot 30 | dot -Tps onetoone.dot > onetoone.ps 31 | ps2pdf onetoone.ps 32 | pdfcrop onetoone.pdf 33 | mv onetoone-crop.pdf onetoone.pdf 34 | open onetoone.pdf 35 | 36 | # Compose together a translation model and languge model 37 | fstcompile --keep_isymbols --keep_osymbols --isymbols=onetoone.isym --osymbols=bigram.isym onetoone.txt | fstarcsort --sort_type=olabel > onetoone.fst 38 | fstcompose onetoone.fst bigram.fst composed.fst 39 | fstdraw --show_weight_one composed.fst composed.dot 40 | dot -Tps composed.dot > composed.ps 41 | ps2pdf composed.ps 42 | pdfcrop composed.pdf 43 | mv composed-crop.pdf composed.pdf 44 | open composed.pdf 45 | 46 | # Formulate the input as a WFST 47 | fstcompile --keep_isymbols --keep_osymbols --isymbols=onetoone.isym --osymbols=onetoone.isym input.txt input.fst 48 | fstdraw --acceptor input.fst input.dot 49 | dot -Tps input.dot > input.ps 50 | ps2pdf input.ps 51 | pdfcrop input.pdf 52 | mv input-crop.pdf input.pdf 53 | open input.pdf 54 | 55 | # Compose together into a search graph 56 | fstcompose input.fst composed.fst search.fst 57 | fstdraw search.fst search.dot 58 | dot -Tps search.dot > search.ps 59 | ps2pdf search.ps 60 | pdfcrop search.pdf 61 | mv search-crop.pdf search.pdf 62 | open search.pdf 63 | 64 | # Remove epsilons to make it easier to read 65 | fstrmepsilon search.fst searchrmeps.fst 66 | fstdraw searchrmeps.fst searchrmeps.dot 67 | dot -Tps searchrmeps.dot > searchrmeps.ps 68 | ps2pdf searchrmeps.ps 69 | pdfcrop searchrmeps.pdf 70 | mv searchrmeps-crop.pdf searchrmeps.pdf 71 | open searchrmeps.pdf 72 | 73 | # Some extra examples of composing 74 | python symbols.py 2 < t1.txt > t1.sym 75 | python symbols.py 2 < t2.txt > t2.sym 76 | python symbols.py 3 < t2.txt > t3.sym 77 | 78 | fstcompile --keep_isymbols --keep_osymbols --isymbols=t1.sym --osymbols=t2.sym t1.txt t1.fst 79 | fstdraw t1.fst t1.dot 80 | dot -Tps t1.dot > t1.ps 81 | ps2pdf t1.ps 82 | pdfcrop t1.pdf 83 | mv t1-crop.pdf t1.pdf 84 | open t1.pdf 85 | 86 | fstcompile --keep_isymbols --keep_osymbols --isymbols=t2.sym --osymbols=t3.sym t2.txt t2.fst 87 | fstdraw t2.fst t2.dot 88 | dot -Tps t2.dot > t2.ps 89 | ps2pdf t2.ps 90 | pdfcrop t2.pdf 91 | mv t2-crop.pdf t2.pdf 92 | open t2.pdf 93 | 94 | fstcompose t1.fst t2.fst t3.fst 95 | fstdraw --ssymbols=t3.ssym t3.fst t3.dot 96 | dot -Tps t3.dot > t3.ps 97 | ps2pdf t3.ps 98 | pdfcrop t3.pdf 99 | mv t3-crop.pdf t3.pdf 100 | open t3.pdf 101 | 102 | -------------------------------------------------------------------------------- /21-wfst/symbols.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from collections import defaultdict 3 | 4 | col = int(sys.argv[1]) 5 | 6 | wid = defaultdict(lambda: len(wid)) 7 | 8 | x = wid[""] 9 | for line in sys.stdin: 10 | arr = line.strip().split() 11 | if len(arr) == 5: 12 | x = wid[arr[col]] 13 | 14 | it = list(wid.items()) 15 | for x, y in sorted(it, key=lambda x: x[1]): 16 | print(x, y) 17 | -------------------------------------------------------------------------------- /21-wfst/t1.txt: -------------------------------------------------------------------------------- 1 | 0 1 a i 0.5 2 | 0 1 a j 1.5 3 | 0 2 b i 2.0 4 | 0 2 b j 3.0 5 | 1 3 c k 1.0 6 | 2 3 c k 0.0 7 | 3 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mtandseq2seq-code 2 | by Graham Neubig and Contributors 3 | 4 | Code examples for [CMU CS 11-731, Machine Translation and Sequence-to-sequence Models](http://phontron.com/class/mtandseq2seq2019/). 5 | 6 | These code examples assume Python 3 and may depend on some other libraries such as DyNet or PyTorch. Most examples will rely on example data, please download it now before running the examples. 7 | 8 | wget http://phontron.com/data/iwslt-en-de-preprocessed.tar.gz 9 | tar -xzf iwslt-en-de-preprocessed.tar.gz 10 | --------------------------------------------------------------------------------