├── UnsupNTS.png ├── utils ├── SARI.pyc ├── parsetrain.py ├── lev.py ├── parseresult.py ├── fk_ts_ds.py ├── supdata-iterators.py ├── SARI.py └── evaluate.py ├── predictions ├── noredund.py ├── gen_lower.sen.src2trg.wgan.unsup.noadvcompl.control1.allclass.denoi.singleclassf.rho1.0.10k.13500.test.noredund └── gen_lower.sen.src2trg.wgan.semisup10k-sel-6-4.noadvcompl.control1.allclass.denoi.singleclassf.rho1.0.10k.10000.test.noredund ├── undreamt ├── train.py ├── undreamt │ ├── devices.py │ ├── wordvecs.py │ ├── generator.py │ ├── trainset.py │ ├── similarity_scorer.py │ ├── attention.py │ ├── encoder.py │ ├── discriminator.py │ ├── decoder.py │ ├── data.py │ └── translator.py ├── README.md ├── translate.py ├── namedentity.py └── LICENSE.txt ├── translate.sh ├── README.md └── train.sh /UnsupNTS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saisuryadv/UnsupNTS/HEAD/UnsupNTS.png -------------------------------------------------------------------------------- /utils/SARI.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saisuryadv/UnsupNTS/HEAD/utils/SARI.pyc -------------------------------------------------------------------------------- /predictions/noredund.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | def main(): 4 | parser = argparse.ArgumentParser(description='noredund') 5 | parser.add_argument('-i', '--input', default=sys.stdin.fileno(), help='the input file (defaults to stdin)') 6 | args = parser.parse_args() 7 | fl = open(args.input) 8 | for sent in fl: 9 | lsent = sent.strip().split() 10 | if len(lsent) == 0: 11 | print("") 12 | continue 13 | lst = [lsent[0]] 14 | prev_word = lsent[0] 15 | for word in lsent: 16 | if word==prev_word: 17 | prev_word=word 18 | else: 19 | lst.append(word) 20 | prev_word=word 21 | print(' '.join(lst)) 22 | if __name__ == '__main__': 23 | main() -------------------------------------------------------------------------------- /undreamt/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018 Mikel Artetxe 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | 16 | import undreamt.train 17 | 18 | 19 | if __name__ == '__main__': 20 | 21 | undreamt.train.main_train() 22 | -------------------------------------------------------------------------------- /undreamt/undreamt/devices.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018 Mikel Artetxe 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | 16 | 17 | def cpu(x): 18 | return x.cpu() if x is not None else None 19 | 20 | 21 | def gpu(x): 22 | return x.cuda(0) if x is not None else None 23 | 24 | 25 | default = cpu -------------------------------------------------------------------------------- /utils/parsetrain.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | import matplotlib.pyplot as plt 3 | 4 | f = open("logs/TSdecembedd/out.wgan.allclass.denoi.back1.singleclassf.rho1.0.10k",'r') 5 | 6 | celoss = [] 7 | discloss = [] 8 | classloss = [] 9 | genloss = [] 10 | for ln in f: 11 | if ln == "\n": 12 | continue 13 | line =ln.strip().split() 14 | if line[0]=='BATCH': 15 | if 'celoss' in line[1] : 16 | val = line[-1] 17 | celoss.append(tuple(float(x) for x in val.split(','))) #(celoss,pplperword) 18 | elif 'disc' in line[3]: 19 | val = line[-4:] 20 | discloss.append(tuple(float(x) for x in val)) #((recsim,norsim,norcom,reccom)) 21 | elif 'class' in line[3]: 22 | val = line[-2:] 23 | classloss.append(tuple(float(x) for x in val)) #(recsim,reccom) 24 | elif 'gener' in line[3]: 25 | val = line[-4:] 26 | genloss.append(tuple(float(x) for x in val)) #(discnorsim,discnorcom,classnorsim,classnorcom) 27 | 28 | # print(celoss) 29 | # print() 30 | plt.plot(list(range(len(celoss))),list(list(zip(*celoss))), linewidth=1.0) 31 | 32 | plt.show() 33 | -------------------------------------------------------------------------------- /utils/lev.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from fuzzywuzzy import fuzz 4 | 5 | def main(): 6 | # Parse command line arguments 7 | parser = argparse.ArgumentParser(description='edit distance') 8 | parser.add_argument('-i', '--input', default=sys.stdin.fileno(), help='the input file (defaults to stdin)') 9 | parser.add_argument('-src','--source',default="",help='source file with which edit distance should be calculated.') 10 | args = parser.parse_args() 11 | infile = open(args.input,'r') 12 | srcfile = open(args.source,'r') 13 | inplines = [ln.strip() for ln in infile] 14 | srclines = [ln.strip() for ln in srcfile] 15 | ratio = 0 16 | n = 0 17 | for i in range(len(inplines)): 18 | try: 19 | ratio+=fuzz.ratio(inplines[i],srclines[i])/100 20 | n+=1 21 | except Exception: 22 | continue 23 | ratio/=n 24 | print("edit distance between {} and {} is: {:.4f}".format(args.input.split("/")[-1],args.source.split("/")[-1],ratio)) 25 | 26 | if __name__ == '__main__': 27 | main() -------------------------------------------------------------------------------- /translate.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | #defining paths 4 | ts=`pwd` 5 | src=en 6 | tgt=sen 7 | tsdata=$ts/tsdata 8 | codepath=$ts/undreamt 9 | ntsevalcode=$ts/utils/evaluate.py 10 | gendir=$ts/predictions 11 | genfile=$gendir/gen_lower.$tgt 12 | model=$ts/modeldir 13 | logdir="$ts/logs/TS.GEN" 14 | file="test" 15 | srcfile=tsdata/$file.en 16 | 17 | #creating new directories 18 | mkdir -p "$gendir" 19 | mkdir -p "$model" 20 | mkdir -p "$logdir" 21 | 22 | 23 | 24 | #Generating simplifications 25 | nlines=( 10000 ) 26 | control_nums=( 1 ) 27 | for ncontrol in "${control_nums[@]}" 28 | do 29 | for nline in "${nlines[@]}" 30 | do 31 | modelnum=$nline 32 | pref="wgan.semisup10k-sel-6-4.noadvcompl.control1.allclass.denoi.singleclassf.rho1.0.10k" 33 | noise=0.0 34 | pref="$pref" 35 | modelfile=$model/model.$pref.it$modelnum.src2trg.pth 36 | echo model.$pref.it$modelnum.src2trg.pth 37 | python3 -u "$codepath/translate.py" "$modelfile" --input "$srcfile" --output "$genfile.src2trg.${pref}.$nline.$file" --noise $noise \ 38 | --batch_size 100 --ncontrol $ncontrol \ 39 | >> "$logdir/out.src2trg.$pref" 40 | 41 | done 42 | done 43 | 44 | 45 | 46 | 47 | 48 | 49 | #Evaluating the Simplifications 50 | nlines=( 10000 ) 51 | control_nums=( 1 ) 52 | for ncontrol in "${control_nums[@]}" 53 | do 54 | for nline in "${nlines[@]}" 55 | do 56 | pref="wgan.semisup10k-sel-6-4.noadvcompl.control1.allclass.denoi.singleclassf.rho1.0.10k" 57 | noise=0.0 58 | modelnum=$nline 59 | modelfile=$modeldir/model.$pref.it$modelnum.src2trg.pth 60 | echo model.$pref.it$modelnum.src2trg.pth 61 | genf=$genfile.src2trg.${pref}.$modelnum.$file 62 | python predictions/noredund.py < "$genf" > "${genf}.noredund" 63 | genf="$genf.noredund" 64 | python utils/lev.py --input "$genf" --source "$srcfile" 65 | python utils/fk_ts_ds.py -i "$genf" -src "$srcfile" 66 | mkdir -p tmp 67 | cp "$genf" tmp/ 68 | python2 "$ntsevalcode" "$srcfile" "$tsdata/references.tsv" tmp 69 | rm -rf tmp 70 | 71 | done 72 | done 73 | 74 | -------------------------------------------------------------------------------- /utils/parseresult.py: -------------------------------------------------------------------------------- 1 | import xlwt 2 | from xlwt import Workbook 3 | 4 | wb = Workbook() 5 | 6 | sheet = wb.add_sheet('Valsheet') 7 | bold = xlwt.easyxf('font: bold 1') 8 | bluebold = xlwt.easyxf('font: bold 1, color blue;') 9 | f = open("result.noclass",'r') 10 | dic = {} 11 | for ln in f: 12 | line = ln.strip() 13 | if line == "": 14 | continue 15 | if line[0] == 'E': 16 | continue 17 | tups = [x for x in line.split() if len(x)>=2 and (x[-2]=='h' or x[-2]=='x')] 18 | if len(tups)==0: 19 | #find the class model type and iteration num 20 | # classmodel,iterationnum,typegen = [('.'.join(x.split('.')[4:-2]),int(x.split('.')[-2]),x.split('.')[-1]) for x in line.split() if x[0]=='g'][0] 21 | classmodel,iterationnum,typegen = [('.'.join(x.split('.')[4:-3]),int(x.split('.')[-3]),x.split('.')[-2]) for x in line.split() if x[0]=='g'][0] 22 | tup = (classmodel,iterationnum,typegen) 23 | else: 24 | tup=(tups[0],0,'baseline') 25 | # print(classmodel,iterationnum) 26 | if dic.get(tup) is None: 27 | dic[(tup)]={} 28 | if line[0]=='e': 29 | #edit distance 30 | dic[tup]['edit'] = float(line.split()[-1]) 31 | if line[0]=='(': 32 | #fk,ts,ds 33 | dic[tup]['fk'] = float(line.split()[-3]) 34 | dic[tup]['ts'] = float(line.split()[-2]) 35 | dic[tup]['ds'] = float(line.split()[-1]) 36 | if line[0]=='S': 37 | dic[tup]['SARI'] = float(line.split()[3]) 38 | if line[0]=='B': 39 | dic[tup]['BLEU'] = float(line.split()[3]) 40 | if line[0]=='i': 41 | dic[tup]['iBLEU'] = float(line.split()[3]) 42 | if line[0]=='f': 43 | dic[tup]['fkBLEU'] = float(line.split()[3]) 44 | if line[0]=='w': 45 | dic[tup]['worddiff'] = float(line.split()[3]) 46 | print(dic) 47 | 48 | dic = dic.items() 49 | for idx,(key,val) in enumerate(list(dic)): 50 | if idx==0: 51 | columntitles = ['modelname','modelnum','gen-type']+[k for k in val] 52 | for i,colname in enumerate(columntitles): 53 | sheet.write(0,i,colname) 54 | columnvalues = [key[0],key[1],key[2]]+[vl for ky,vl in val.items()] 55 | for i,colval in enumerate(columnvalues): 56 | sheet.write(idx+1,i,colval) 57 | 58 | 59 | wb.save("sample2.xls") -------------------------------------------------------------------------------- /undreamt/undreamt/wordvecs.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore", message="numpy.dtype size changed") 3 | warnings.filterwarnings("ignore", message="numpy.ufunc size changed") 4 | import argparse 5 | from undreamt import data 6 | import torch 7 | import torch.nn as nn 8 | 9 | def saveembedds(embedds,comdict,filename): 10 | fl = open(filename,'w') 11 | dim = embedds.weight.data.size()[1] 12 | count = embedds.weight.data.size()[0]-1 13 | header = '{} {}\n'.format(count,dim) 14 | fl.write(header) 15 | for i in range(1,count+1): 16 | word = comdict.id2word[i] 17 | vec = ' '.join(str(x) for x in embedds.weight.data[i]) 18 | pair = '{} {}\n'.format(word,vec) 19 | fl.write(pair) 20 | 21 | 22 | def getembeddings(srcpath,trgpath,compath,cutoff=50000): 23 | ts='/home/15CS10013/important-sai/ts12' 24 | tsdata=ts+'/tsdata' 25 | compath = tsdata+'/fk.lower.vec' 26 | srcpath = tsdata+'/fkdifficpart.lower.vec.id' 27 | trgpath = tsdata+'/fkeasypart.lower.vec.id' 28 | vocabcom = data.read_embeddings(open(compath),vocabonly=True) 29 | vocabsrc = data.read_embeddings(open(srcpath),vocabonly=True) 30 | vocabtrg = data.read_embeddings(open(trgpath),vocabonly=True) 31 | vocabcom = set(vocabcom.id2word[1:]) 32 | vocabsrc = set(vocabsrc.id2word[1:]) 33 | vocabtrg = set(vocabtrg.id2word[1:]) 34 | vocabinter = vocabcom & vocabsrc & vocabtrg 35 | embeddcom,vocabcom = data.read_embeddings(open(compath),vocabulary=vocabinter) 36 | embeddsrc,vocabsrc = data.read_embeddings(open(srcpath),vocabulary=vocabinter) 37 | embeddtrg,vocabtrg = data.read_embeddings(open(trgpath),vocabulary=vocabinter) 38 | saveembedds(embeddsrc,vocabsrc,tsdata+'/fkeasypart.lower.vec.id.com') 39 | saveembedds(embeddtrg,vocabtrg,tsdata+'/fkdifficpart.lower.vec.id.com') 40 | saveembedds(embeddcom,vocabcom,tsdata+'/fk.lower.vec.com') 41 | 42 | # embeddsrccom = nn.Embedding(embeddsrc.weight.data.size(0),embeddsrc.weight.data.size(1)+embeddcom.weight.data.size(1)) 43 | # embeddsrccom.weight.data = torch.cat([embeddsrc.weight.data,embeddcom.weight.data],dim=1) 44 | # embeddtrgcom = nn.Embedding(embeddtrg.weight.data.size(0),embeddtrg.weight.data.size(1)+embeddcom.weight.data.size(1)) 45 | # embeddtrgcom.weight.data = torch.cat([embeddtrg.weight.data,embeddcom.weight.data],dim=1) 46 | return (embeddsrccom,vocabsrc),(embeddtrgcom,vocabtrg) 47 | 48 | if __name__ == '__main__': 49 | getembeddings(None,None,None) -------------------------------------------------------------------------------- /utils/fk_ts_ds.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import torch 4 | from fuzzywuzzy import fuzz 5 | from textstat.textstat import textstat 6 | 7 | import spacy 8 | from collections import Counter 9 | import numpy as np 10 | 11 | nlp = spacy.load('en') 12 | 13 | 14 | def inorder(node): 15 | tags = str(node.dep_) +" " 16 | #print tags, node.text 17 | if node.lefts: 18 | for n in node.lefts: 19 | tags+= inorder(n) 20 | if node.rights: 21 | for n in node.rights: 22 | tags+= inorder(n) 23 | return tags 24 | 25 | def doc_sim(doc1,doc2): 26 | sim = doc1.similarity(doc2) 27 | return sim 28 | 29 | def tree_sim(doc1,doc2): 30 | ino1 = "" 31 | ino2 = "" 32 | for tok in doc1: 33 | if tok.dep_=="ROOT": 34 | ino1 = inorder(tok) 35 | break 36 | for tok in doc2: 37 | if tok.dep_=="ROOT": 38 | ino2 = inorder(tok) 39 | break 40 | ts = fuzz.ratio(ino1,ino2) 41 | return ts 42 | 43 | def sentence_stats(s1,s2): 44 | #s2 should be predictions and s1 should be source 45 | try: 46 | fkdiff = textstat.flesch_reading_ease(s2)-textstat.flesch_reading_ease(s1) 47 | except Exception: 48 | fkdiff = 0.0 49 | doc1 = nlp(s1) 50 | doc2 = nlp(s2) 51 | ts = tree_sim(doc1,doc2)/100 52 | ds = doc_sim(doc1,doc2) 53 | return (torch.FloatTensor([fkdiff,ts,ds])) 54 | 55 | def main(): 56 | # Parse command line arguments 57 | parser = argparse.ArgumentParser(description='edit distance') 58 | parser.add_argument('-i', '--input', default=sys.stdin.fileno(), help='the input file (defaults to stdin)') 59 | parser.add_argument('-src','--source',default="",help='source file with which edit distance should be calculated.') 60 | args = parser.parse_args() 61 | infile = open(args.input,'r') 62 | srcfile = open(args.source,'r') 63 | inplines = [ln.strip() for ln in infile] 64 | srclines = [ln.strip() for ln in srcfile] 65 | stats = torch.FloatTensor(3).fill_(0.0) 66 | for i in range(len(inplines)): 67 | stats+=sentence_stats(srclines[i],inplines[i]) 68 | stats=stats.div(len(inplines)) 69 | print("(fkdiff,ts,ds) between {} and {} are: {:.4f} {:.4f} {:.4f}".format(args.input.split("/")[-1],args.source.split("/")[-1]\ 70 | ,stats[0],stats[1],stats[2])) 71 | 72 | if __name__ == '__main__': 73 | main() -------------------------------------------------------------------------------- /utils/supdata-iterators.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | from random import shuffle 4 | random.seed(7) 5 | 6 | def getrepl(word): 7 | if word =='-LRB-': 8 | return '(' 9 | if word=='-RRB-': 10 | return ')' 11 | return word 12 | 13 | def replace_RB(pair): 14 | lst = [] 15 | for sent in pair: 16 | lst.append(' '.join([getrepl(w) for w in sent.strip().split()])+'\n') 17 | return tuple(lst) 18 | 19 | def getsplit(nsplit,nwiki,fsplit,fen,fsen,outpathen,outpathsen): 20 | ns = 0 21 | fouten = open(outpathen,'w') 22 | foutsen = open(outpathsen,'w') 23 | lst = [] 24 | for pair in yieldsplit(fsplit): 25 | pr = replace_RB(pair) 26 | lst.append(pr) 27 | ns+=1 28 | if ns==nsplit: 29 | break 30 | ns=0 31 | for senten,sentsen in yieldwiki(fen,fsen): 32 | lst.append(replace_RB((senten,sentsen))) 33 | ns+=1 34 | if ns==nwiki: 35 | break 36 | shuffle(lst) 37 | for pair in lst: 38 | fouten.write(pair[0]) 39 | foutsen.write(pair[1]) 40 | return 41 | 42 | def yieldsplit(fsplit): 43 | prevsrc = None 44 | prevtrglst = [] 45 | while True: 46 | ln = fsplit.readline() 47 | ln = ln.strip() 48 | if ln == "": 49 | continue 50 | if len(ln.split(':'))==1: 51 | #src area 52 | if prevsrc is not None: 53 | assert len(prevtrglst) >=1 54 | yield (prevsrc,prevtrglst[random.randint(0,len(prevtrglst)-1)]) 55 | prevsrc = fsplit.readline().strip()+'\n' 56 | prevtrglst = [] 57 | else: 58 | #trg area, get all the sentences 59 | assert prevsrc is not None 60 | splits = [] 61 | while(True): 62 | lln = fsplit.readline().strip() 63 | if lln =="": 64 | break 65 | if lln.split('=')[0]=='category': 66 | continue 67 | else: 68 | splits.append(lln) 69 | #isolating 1-1 split type. 70 | prevtrglst = [ln for ln in prevtrglst if len([w for w in ln.split() if w=='.'])==2] 71 | prevtrglst.append(' '.join(splits)+'\n') 72 | 73 | def yieldwiki(fen,fsen): 74 | while(True): 75 | senten = fen.readline().strip() 76 | sentsen = fsen.readline().strip() 77 | if " " in [senten,sentsen] or senten==sentsen: 78 | continue 79 | yield senten+'\n',sentsen+'\n' 80 | 81 | 82 | 83 | def main(): 84 | fsplit = open('tsdata/benchmark-v1.0/final-complexsimple-meanpreserve-intreeorder-full.txt') 85 | fen = open('tsdata/train.en') 86 | fsen = open('tsdata/train.sen') 87 | outpathen = sys.argv[1] 88 | outpathsen = sys.argv[2] 89 | # getsplit(15000,5000,fsplit,fen,fsen,outpathen,outpathsen) #the first 20k version. 90 | getsplit(16000,4000,fsplit,fen,fsen,outpathen,outpathsen) 91 | if __name__ == '__main__': 92 | main() -------------------------------------------------------------------------------- /undreamt/undreamt/generator.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018 Mikel Artetxe 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | 16 | from undreamt import data 17 | 18 | import torch 19 | import torch.nn as nn 20 | import torch.nn.functional as F 21 | import random 22 | 23 | # random.seed(7) 24 | # torch.manual_seed(7) 25 | # torch.cuda.manual_seed_all(7) 26 | 27 | class EmbeddingGenerator(nn.Module): 28 | def __init__(self, hidden_size, embedding_size): 29 | super(EmbeddingGenerator, self).__init__() 30 | self.hidden2embedding = nn.Linear(hidden_size, embedding_size) 31 | self.special_out = nn.Linear(embedding_size, data.SPECIAL_SYMBOLS, bias=False) 32 | self.logsoftmax = nn.LogSoftmax() 33 | 34 | def forward(self, hidden, embeddings): 35 | emb = self.hidden2embedding(hidden) 36 | word_scores = F.linear(emb, embeddings.weight[1:, :]) 37 | special_scores = self.special_out(emb) 38 | scores = torch.cat((special_scores, word_scores), dim=1) 39 | return self.logsoftmax(scores) 40 | 41 | def output_classes(self): 42 | return None 43 | 44 | 45 | class WrappedEmbeddingGenerator(nn.Module): 46 | def __init__(self, embedding_generator, embeddings): 47 | super(WrappedEmbeddingGenerator, self).__init__() 48 | self.embedding_generator = embedding_generator 49 | self.embeddings = embeddings 50 | 51 | def forward(self, hidden): 52 | return self.embedding_generator(hidden, self.embeddings) 53 | 54 | def output_classes(self): 55 | return self.embeddings.weight.data.size()[0] + data.SPECIAL_SYMBOLS - 1 56 | 57 | 58 | class LinearGenerator(nn.Module): 59 | def __init__(self, hidden_size, vocabulary_size, bias=True): 60 | super(LinearGenerator, self).__init__() 61 | self.out = nn.Linear(hidden_size, data.SPECIAL_SYMBOLS + vocabulary_size, bias=bias) 62 | self.logsoftmax = nn.LogSoftmax(dim=1) 63 | 64 | def forward(self, hidden): 65 | return self.logsoftmax(self.out(hidden)) 66 | 67 | def output_classes(self): 68 | return self.out.weight.size()[0] -------------------------------------------------------------------------------- /undreamt/undreamt/trainset.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import random as rnd 4 | # fe = open('fkeasypart4.lower','r') 5 | # fd = open('fkdifficpart4.lower','r') 6 | # fouts2t=open('fksrc2trg.lower','w') 7 | # foutt2s=open('fktrg2src.lower','w') 8 | # rnd.seed(7) 9 | 10 | 11 | def repeatnoise(fe): 12 | fouts2t = [] 13 | for e in fe: 14 | #create fksrc2trg 15 | easy = e.strip().split() 16 | winsize = 1 if rnd.randint(0,1) == 0 else 2 17 | ndups = len(easy)//8 if winsize==1 else len(easy)//11 18 | if ndups !=0 and len(easy)!=0: 19 | idces = set(np.random.choice(len(easy),size=(ndups,),replace=False)) 20 | outsent = [] 21 | for idx,word in enumerate(easy): 22 | wrd = ' '.join(easy[idx:idx+winsize]) 23 | reword = wrd+' '+wrd.split()[0] if idx in idces else word 24 | outsent.append(reword) 25 | fouts2t.append(' '.join(outsent)) 26 | elif ndups ==0: 27 | fouts2t.append(e.strip()) 28 | 29 | return fouts2t 30 | 31 | 32 | def dropnoise(fd): 33 | foutt2s = [] 34 | for d in fd: 35 | #create fktrg2src 36 | diffi = d.strip().split() 37 | winsize = 1 38 | ndups = len(diffi)//8 if winsize==1 else len(diffi)//11 39 | if ndups!=0 and len(diffi)!=0: 40 | idces = set(np.random.choice(len(diffi),size=(ndups,),replace=False)) 41 | outsent = [] 42 | for idx,word in enumerate(diffi): 43 | wrd = ' '.join(diffi[idx:idx+winsize]) 44 | if idx not in idces: 45 | outsent.append(wrd) 46 | foutt2s.append(' '.join(outsent)) 47 | elif ndups ==0: 48 | foutt2s.append(d.strip()) 49 | return foutt2s 50 | 51 | 52 | def hasNumbers(inputString): 53 | return any(char.isdigit() for char in inputString) 54 | 55 | 56 | def wordordernoise(sents,noiseratio): 57 | sents = [sent.strip().split() for sent in sents] 58 | lengths = [len(sent) for sent in sents] 59 | for idx,length in enumerate(lengths): 60 | if length > 5: 61 | for it in range(int(noiseratio*length)): 62 | j = rnd.randint(0, length-2) 63 | sents[idx][j], sents[idx][j+1] = sents[idx][j+1], sents[idx][j] 64 | return [' '.join(sent) for sent in sents] 65 | 66 | 67 | def numberfiltering(sents): 68 | #replace any word with numbers in it as 69 | sents = [sent.strip().split() for sent in sents] 70 | for idx in range(len(sents)): 71 | for pos in range(len(sents[idx])): 72 | if hasNumbers(sents[idx][pos]): 73 | sents[idx][pos] = 'BlahBlah' 74 | # print(sents) 75 | return [' '.join(sent) for sent in sents] 76 | 77 | 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | UnsupNTS: Unsupervised Neural Text Simplification 2 | ============== 3 | 4 | This is the original implementation of the Unsupervised Neural Text Simplification system and their semi-supervised variants mentioned in the ACL 2019 long paper: 5 | 6 | Sai Surya, Abhijit Mishra, Anirban Laha, Parag Jain, and Karthik Sankaranarayanan. **[Unsupervised Neural Text Simplification](https://arxiv.org/pdf/1810.07931.pdf)** arXiv preprint arXiv:1810.07931 (2018). 7 | 8 | 9 | 10 | 11 | Training 12 | -------- 13 | Download `tsdata.zip` from **[link](https://drive.google.com/open?id=1oHDTOX5u4JS8RvnvlogeQaGPvarjKRk-)** and extract 14 | ``` 15 | unzip tsdata.zip 16 | ``` 17 | `tsdata.zip` has 18 | - Unsupervised sets of easy and difficult set of sentences judged on readability ease scores. 19 | - Dict2vec embeddings trained on the above unsupervised sets. 20 | - 10k parallel pairs of difficult and simplified variants. 21 | - Test set and references - eight tab seperated references per each test sentence. 22 | 23 | Train the models using 24 | ``` 25 | bash train.sh 26 | ``` 27 | `train.sh` has 28 | - UNTS system from unsupervised simplification data using the exact same settings described in the paper. 29 | - UNTS-10k system, using additional 10k supervised pairs of mixture of split-rephrase and simplification parallel pairs. 30 | - UNMT system on the unsupervised simplification data. 31 | - ablations on adversarial and separation/classifier losses. 32 | 33 | For more details and additional options, run the above scripts with the `--help` flag. 34 | Alternatively, visit the **[ipynb](https://drive.google.com/file/d/1cVuzsU389WC9-1NliaP6mpBU77ZkgW6v/view?usp=sharing)** in google colaboratory to reproduce the results. To access pretrained models visit **[link](https://drive.google.com/file/d/11U-MnbjkLQXK_z5R6RPsfSZWwmSPoj34/view?usp=sharing)**. The folder `predictions` has the generations from the pretrained models. 35 | 36 | **Note**: Pretrained models were trained with pytorch 0.3.1. 37 | 38 | Generation and Evaluation of Simplifications 39 | -------- 40 | ``` 41 | bash translate.sh 42 | ``` 43 | `translate.sh` is used for 44 | - Generating simplifications of test dataset. 45 | - Computing stand alone metrics such as Flesch readability ease score difference, Tree similarity and Document similarity metrics. 46 | - Computing SARI, BLEU and Word-diff metrics. 47 | 48 | Acknowledgements 49 | -------- 50 | Our code uses functions from https://github.com/artetxem/undreamt and https://github.com/senisioi/NeuralTextSimplification extensively. 51 | 52 | If you use our system for academic research, please cite the following paper: 53 | ``` 54 | @inproceedings{surya-etal-2019-unsupervised, 55 | title = "Unsupervised Neural Text Simplification", 56 | author = "Surya, Sai and 57 | Mishra, Abhijit and 58 | Laha, Anirban and 59 | Jain, Parag and 60 | Sankaranarayanan, Karthik", 61 | booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics", 62 | month = jul, 63 | year = "2019", 64 | address = "Florence, Italy", 65 | publisher = "Association for Computational Linguistics", 66 | url = "https://www.aclweb.org/anthology/P19-1198", 67 | doi = "10.18653/v1/P19-1198", 68 | pages = "2058--2068" 69 | } 70 | ``` 71 | -------------------------------------------------------------------------------- /undreamt/undreamt/similarity_scorer.py: -------------------------------------------------------------------------------- 1 | # Compare semantic (word-embedding based) and syntactic similarity between two sentencces 2 | 3 | #Insall spacy via `pip install spacy' if you get import error. Then run `python -m spacy download en' 4 | #Install fuzzywuzzy using `pip install fuzzywuzzy' 5 | # Install python-Levenshtein using `pip install python-Levenshtein' 6 | # import future # pip install future 7 | # import builtins # pip install future 8 | # import past # pip install future 9 | # import six # pip install six 10 | import spacy 11 | from fuzzywuzzy import fuzz 12 | from collections import Counter 13 | import numpy as np 14 | 15 | nlp = spacy.load('en') 16 | 17 | def find_overlap(sent,original): 18 | # interseclen/reflen 19 | lsent = sent.split() 20 | lorig = original.split() 21 | w1 = Counter(lsent) 22 | w2 = Counter(lorig) 23 | w3 = w1 & w2 24 | overlap = float(sum(w3.values()))/max(1e-8,len(lsent)) 25 | return overlap if overlap<=1 else 1 26 | 27 | def inorder(node): 28 | tags = str(node.dep_) +" " 29 | #print tags, node.text 30 | if node.lefts: 31 | for n in node.lefts: 32 | tags+= inorder(n) 33 | if node.rights: 34 | for n in node.rights: 35 | tags+= inorder(n) 36 | return tags 37 | def doc_sim(doc1,doc2): 38 | sim = doc1.similarity(doc2) 39 | return sim 40 | def tree_sim(doc1,doc2): 41 | ino1 = "" 42 | ino2 = "" 43 | for tok in doc1: 44 | if tok.dep_=="ROOT": 45 | ino1 = inorder(tok) 46 | break 47 | for tok in doc2: 48 | if tok.dep_=="ROOT": 49 | ino2 = inorder(tok) 50 | break 51 | ts = fuzz.ratio(ino1,ino2) 52 | return ts 53 | 54 | 55 | def sentence_stats(s1,s2): 56 | #CALL this function . 57 | #Returns a list: [document_similarity, tree_similarity] 58 | overlap = find_overlap(s1,s2) 59 | doc1 = nlp(s1) 60 | doc2 = nlp(s2) 61 | ts = tree_sim(doc1,doc2) 62 | # if overlap >0.9 and ts==100: 63 | # #even changing a very minor portion would result in a high sentence similarity, which we do not want. 64 | # ds = 0.001 65 | # ts = 0.1 66 | # return [ds,ts] 67 | 68 | ds = doc_sim(doc1,doc2) 69 | return (np.asarray((ds,ts,overlap))) 70 | 71 | 72 | 73 | 74 | # def similarity(s1,s2,lexical=True): 75 | # #we want the model to repeat the input but using different words 76 | # #document similarity: words either verbs or nouns should be synonyms in two sentences. 77 | # #tree similarity: if syntactic structure of the two sentences should be same 78 | # #to achieve lexical paraphrasing -> the document similarity should be high but the overlap should be low and tree similarity should be high 79 | # #to achieve syntactic paraphrasing -> the tree similarity should be low and document similarity should be high. 80 | # #this function returns reward which shall improve the lexical paraphrasing ability of the model. 81 | 82 | # #docsimil-intersection 83 | # #stage-I: doesn't return anything relevant, hence overlap will be low. hence safe to use docsimil as reward 84 | # #stage-II: returns outputs with repeating words. Hence intersection between input and output is a littile considerable. overlap measure 85 | # #compromised, 86 | # #stage-III: complete autoencoding, returns whatver input is given. here we have high tree similarity and document similarity. 87 | 88 | 89 | 90 | 91 | if __name__ =="__main__": 92 | s1 = "I love blue" 93 | s2= "I like blue" 94 | print(rewardfunc(s1,s2)) 95 | -------------------------------------------------------------------------------- /undreamt/undreamt/attention.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018 Mikel Artetxe 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | import torch 16 | import torch.nn as nn 17 | import random 18 | # random.seed(7) 19 | # torch.manual_seed(7) 20 | # torch.cuda.manual_seed_all(7) 21 | 22 | class GlobalAttention(nn.Module): 23 | def __init__(self, dim, alignment_function='general'): 24 | super(GlobalAttention, self).__init__() 25 | self.alignment_function = alignment_function 26 | if self.alignment_function == 'general': 27 | self.linear_align = nn.Linear(dim, dim, bias=False) 28 | elif self.alignment_function != 'dot': 29 | raise ValueError('Invalid alignment function: {0}'.format(alignment_function)) 30 | self.softmax = nn.Softmax(dim=1) 31 | self.linear_context = nn.Linear(dim, dim, bias=False) 32 | self.linear_query = nn.Linear(dim, dim, bias=False) 33 | self.tanh = nn.Tanh() 34 | 35 | def forward(self, query, context, mask, pass_weights=False, pass_context=False,detach_encoder=False): 36 | # query: batch*dim 37 | # context: length*batch*dim 38 | # ans: batch*dim 39 | 40 | context_t = context.transpose(0, 1) # batch*length*dim 41 | 42 | # Compute alignment scores 43 | q = query if self.alignment_function == 'dot' else self.linear_align(query) 44 | align = context_t.bmm(q.unsqueeze(2)).squeeze(2) # batch*length 45 | 46 | # Mask alignment scores 47 | if mask is not None: 48 | align.data.masked_fill_(mask, -float('inf')) 49 | 50 | # Compute attention from alignment scores 51 | attention = self.softmax(align) # batch*length 52 | 53 | # Computed weighted context 54 | if not detach_encoder: 55 | weighted_context = attention.unsqueeze(1).bmm(context_t).squeeze(1) # batch*dim\ 56 | weighted_context_pass = weighted_context 57 | else: 58 | weighted_context = attention.unsqueeze(1).bmm(context_t).squeeze(1) 59 | weighted_context_pass = attention.unsqueeze(1).bmm(context_t.detach()).squeeze(1) 60 | 61 | 62 | # Combine context and query 63 | if not pass_context: 64 | if not pass_weights: 65 | return self.tanh(self.linear_context(weighted_context) + self.linear_query(query)) 66 | else: 67 | return self.tanh(self.linear_context(weighted_context) + self.linear_query(query)), attention 68 | else: 69 | if not pass_weights: 70 | return self.tanh(self.linear_context(weighted_context) + self.linear_query(query)), weighted_context_pass 71 | else: 72 | return self.tanh(self.linear_context(weighted_context) + self.linear_query(query)), attention, weighted_context_pass 73 | -------------------------------------------------------------------------------- /undreamt/undreamt/encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018 Mikel Artetxe 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | 16 | from undreamt import data 17 | 18 | import torch 19 | import torch.nn as nn 20 | from torch.autograd import Variable 21 | import random 22 | # random.seed(7) 23 | # torch.manual_seed(7) 24 | # torch.cuda.manual_seed_all(7) 25 | 26 | class RNNEncoder(nn.Module): 27 | def __init__(self, embedding_size, hidden_size, bidirectional=False, layers=1, dropout=0): 28 | super(RNNEncoder, self).__init__() 29 | if bidirectional and hidden_size % 2 != 0: 30 | raise ValueError('The hidden dimension must be even for bidirectional encoders') 31 | self.directions = 2 if bidirectional else 1 32 | self.bidirectional = bidirectional 33 | self.layers = layers 34 | self.hidden_size = hidden_size // self.directions 35 | self.special_embeddings = nn.Embedding(data.SPECIAL_SYMBOLS+1, embedding_size, padding_idx=0) 36 | self.rnn = nn.GRU(embedding_size, self.hidden_size, bidirectional=bidirectional, num_layers=layers, 37 | dropout=dropout) 38 | 39 | def forward(self, ids, lengths, word_embeddings, hidden, pass_embedds=False): 40 | sorted_lengths = sorted(lengths, reverse=True) 41 | is_sorted = sorted_lengths == lengths 42 | is_varlen = sorted_lengths[0] != sorted_lengths[-1] 43 | if not is_sorted: 44 | true2sorted = sorted(range(len(lengths)), key=lambda x: -lengths[x]) 45 | sorted2true = sorted(range(len(lengths)), key=lambda x: true2sorted[x]) 46 | ids = torch.stack([ids[:, i] for i in true2sorted], dim=1) 47 | lengths = [lengths[i] for i in true2sorted] 48 | embeddings = word_embeddings(data.word_ids(ids)) + self.special_embeddings(data.special_ids(ids)) 49 | passembeddings = embeddings 50 | # print("EMBEDDINGS TENSOR SIZE: ",passembeddings,passembeddings.requires_grad) 51 | # print("EMBEDDINGS ENCODER: ",word_embeddings.weight.requires_grad) 52 | if is_varlen: 53 | embeddings = nn.utils.rnn.pack_padded_sequence(embeddings, lengths) 54 | output, hidden = self.rnn(embeddings, hidden) 55 | if self.bidirectional: 56 | hidden = torch.stack([torch.cat((hidden[2*i], hidden[2*i+1]), dim=1) for i in range(self.layers)]) 57 | if is_varlen: 58 | output = nn.utils.rnn.pad_packed_sequence(output)[0] 59 | if not is_sorted: 60 | hidden = torch.stack([hidden[:, i, :] for i in sorted2true], dim=1) 61 | output = torch.stack([output[:, i, :] for i in sorted2true], dim=1) 62 | if not pass_embedds: 63 | return hidden, output 64 | else: 65 | return hidden,output,passembeddings 66 | 67 | def initial_hidden(self, batch_size): 68 | return Variable(torch.zeros(self.layers*self.directions, batch_size, self.hidden_size), requires_grad=False) -------------------------------------------------------------------------------- /undreamt/undreamt/discriminator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import random 6 | # random.seed(7) 7 | # torch.manual_seed(7) 8 | # torch.cuda.manual_seed_all(7) 9 | 10 | class CNN(nn.Module): 11 | 12 | def __init__(self, args): 13 | super(CNN, self).__init__() 14 | self.args = args 15 | 16 | # V = args.embed_num 17 | D = args.embedd_dim 18 | C = args.class_num 19 | Ci = 1 20 | Co = args.kernel_num 21 | Ks = args.kernel_sizes 22 | 23 | # self.embed = nn.Embedding(V, D) 24 | # self.convs1 = [nn.Conv2d(Ci, Co, (K, D)) for K in Ks] 25 | self.convs1 = nn.ModuleList([nn.Conv2d(Ci, Co, (K, D)) for K in Ks]) 26 | ''' 27 | self.conv13 = nn.Conv2d(Ci, Co, (3, D)) 28 | self.conv14 = nn.Conv2d(Ci, Co, (4, D)) 29 | self.conv15 = nn.Conv2d(Ci, Co, (5, D)) 30 | ''' 31 | self.add_control = args.add_control 32 | self.control_num = args.control_num 33 | self.dropout = nn.Dropout(args.dropout) 34 | if not self.add_control: 35 | self.fc3 = nn.Linear(len(Ks)*Co, C) 36 | 37 | if C ==2: 38 | self.fc2 = nn.Linear(len(Ks)*Co, C) 39 | self.fc1 = nn.Linear(len(Ks)*Co, C) 40 | self.fc4 = nn.Linear(len(Ks)*Co, C) 41 | else: 42 | self.classf = nn.Linear(len(Ks)*Co, C*self.control_num) 43 | for i in range(1,C*self.control_num+1): 44 | setattr(self,'disc{}'.format(i),nn.Linear(len(Ks)*Co,C)) 45 | # self.discs = [nn.Linear(len(Ks)*Co,C) for i in range(C*control_num)] 46 | 47 | def _train(self, mode): 48 | self.train(mode) 49 | 50 | def conv_and_pool(self, x, conv): 51 | x = F.relu(conv(x)).squeeze(3) # (N, Co, W) 52 | x = F.max_pool1d(x, x.size(2)).squeeze(2) 53 | return x 54 | 55 | def forward(self, x,target,type=None, train=True,ncontrol=0): 56 | # x = self.embed(x) # (N, W, D) 57 | self._train(train) 58 | 59 | 60 | x = x.unsqueeze(1) # (N, Ci, W, D) 61 | # print('input to conv',x.size()) 62 | x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] # [(N, Co, W), ...]*len(Ks) 63 | 64 | x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] # [(N, Co), ...]*len(Ks) 65 | 66 | x = torch.cat(x, 1) 67 | 68 | ''' 69 | x1 = self.conv_and_pool(x,self.conv13) #(N,Co) 70 | x2 = self.conv_and_pool(x,self.conv14) #(N,Co) 71 | x3 = self.conv_and_pool(x,self.conv15) #(N,Co) 72 | x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co) 73 | ''' 74 | x = self.dropout(x) # (N, len(Ks)*Co) 75 | if not self.add_control: 76 | if type == "discsim": 77 | logit = self.fc1(x) # (N, C) 78 | elif type == "disccom": 79 | logit = self.fc2(x) 80 | elif type == "classsim": 81 | logit = self.fc3(x) 82 | elif type == "classcom": 83 | logit = self.fc4(x) 84 | else: 85 | if type == "discsim": 86 | logit = getattr(self,"disc{}".format(ncontrol))(x) # (N, C) 87 | elif type == "disccom": 88 | logit = getattr(self,"disc{}".format(ncontrol+self.control_num))(x) 89 | elif type == "classsim": 90 | logit = self.classf(x) 91 | 92 | loss = F.cross_entropy(logit, target,size_average=False) 93 | return loss 94 | 95 | 96 | -------------------------------------------------------------------------------- /undreamt/README.md: -------------------------------------------------------------------------------- 1 | **Note**: This is an adaptation to the original UNdreaMT repository, with additional discriminator and seperator losses and controllable simplification framework. 2 | 3 | UNdreaMT: Unsupervised Neural Machine Translation 4 | ============== 5 | 6 | This is an open source implementation of our unsupervised neural machine translation system, described in the following paper: 7 | 8 | Mikel Artetxe, Gorka Labaka, Eneko Agirre, and Kyunghyun Cho. 2018. **[Unsupervised Neural Machine Translation](https://arxiv.org/pdf/1710.11041.pdf)**. In *Proceedings of the Sixth International Conference on Learning Representations (ICLR 2018)*. 9 | 10 | If you use this software for academic research, please cite the paper in question: 11 | ``` 12 | @inproceedings{artetxe2018iclr, 13 | author = {Artetxe, Mikel and Labaka, Gorka and Agirre, Eneko and Cho, Kyunghyun}, 14 | title = {Unsupervised neural machine translation}, 15 | booktitle = {Proceedings of the Sixth International Conference on Learning Representations}, 16 | month = {April}, 17 | year = {2018} 18 | } 19 | ``` 20 | 21 | 22 | Requirements 23 | -------- 24 | - Python 3 25 | - PyTorch (tested with v0.3) 26 | 27 | 28 | Usage 29 | -------- 30 | 31 | The following command trains an unsupervised NMT system from monolingual corpora using the exact same settings described in the paper: 32 | 33 | ``` 34 | python3 train.py --src SRC.MONO.TXT --trg TRG.MONO.TXT --src_embeddings SRC.EMB.TXT --trg_embeddings TRG.EMB.TXT --save MODEL_PREFIX --cuda 35 | ``` 36 | 37 | The data in the above command should be provided as follows: 38 | - `SRC.MONO.TXT` and `TRG.MONO.TXT` are the source and target language monolingual corpora. They should both be pre-processed so atomic symbols (either tokens or BPE units) are separated by whitespaces. For that purpose, we recommend using [Moses](http://www.statmt.org/moses/) to tokenize and truecase the corpora and, optionally, [Subword-NMT](https://github.com/rsennrich/subword-nmt) if you want to use BPE. 39 | - `SRC.EMB.TXT` and `TRG.EMB.TXT` are the source and target language cross-lingual embeddings. In order to obtain them, we recommend training monolingual embeddings in the corpora above using either [word2vec](https://github.com/tmikolov/word2vec) or [fasttext](https://github.com/facebookresearch/fastText), and then map them to a shared space using [VecMap](https://github.com/artetxem/vecmap). Please make sure to cutoff the vocabulary as desired before mapping the embeddings. 40 | - `MODEL_PREFIX` is the prefix of the output model. 41 | 42 | Using the above settings, training takes about 3 days in a single Titan Xp. Once training is done, you can use the resulting model for translation as follows: 43 | 44 | ``` 45 | python3 translate.py MODEL_PREFIX.final.src2trg.pth < INPUT.TXT > OUTPUT.TXT 46 | ``` 47 | 48 | For more details and additional options, run the above scripts with the `--help` flag. 49 | 50 | 51 | FAQ 52 | -------- 53 | 54 | ###### You claim that your unsupervised NMT system is trained on monolingual corpora alone, but it also requires bilingual embeddings... Isn't that cheating? 55 | 56 | Not really, because we also learn the bilingual embeddings from monolingual corpora alone. We use our companion tool [VecMap](https://github.com/artetxem/vecmap) for that. 57 | 58 | 59 | ###### Can I use this software to train a regular NMT system on parallel corpora? 60 | 61 | Yes! You can use the following arguments to make UNdreaMT behave like a regular NMT system: 62 | 63 | ``` 64 | python3 train.py --src2trg SRC.PARALLEL.TXT TRG.PARALLEL.TXT --src_vocabulary SRC.VOCAB.TXT --trg_vocabulary TRG.VOCAB.TXT --embedding_size 300 --learn_encoder_embeddings --disable_denoising --save MODEL_PREFIX --cuda 65 | ``` 66 | 67 | 68 | License 69 | ------- 70 | 71 | Copyright (C) 2018, Mikel Artetxe 72 | 73 | Licensed under the terms of the GNU General Public License, either version 3 or (at your option) any later version. A full copy of the license can be found in LICENSE.txt. 74 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | #defining global hyperparameters 4 | maxitersteps=200000 5 | loginterval=100 6 | saveinterval=2000 7 | cuda="--cuda" 8 | unsup="--unsup" 9 | 10 | #defining paths 11 | ts=`pwd` 12 | tsdata=$ts/tsdata 13 | codepath=$ts/undreamt 14 | model=$ts/modeldir 15 | 16 | #creating new directories 17 | mkdir -p "$model" 18 | mkdir -p "$tsdata" 19 | 20 | 21 | echo "${ts}" 22 | echo "${tsdata}" 23 | 24 | 25 | 26 | #UNTS - Using both separator and discriminator 27 | batchsize=36 28 | lr=0.00012 29 | hidden=600 30 | dropout=0.2 31 | loginterval=100 32 | saveinterval=200 33 | embeddcom="$tsdata/fk.lower.vec" 34 | pref="wgan.unsup.noadvcompl.control1.allclass.denoi.singleclassf.rho1.0.10k" 35 | MONO=( tsdata/fkdifficpart-2m tsdata/fkeasypart-2m ) 36 | PARALLLEL=( tsdata/wiki-split.en.lower tsdata/wiki-split.sen.lower ) 37 | 38 | 39 | 40 | python3 "$codepath/train.py" --src_embeddings "$embeddcom" --trg_embeddings "$embeddcom" --save "$model/model.$pref" \ 41 | --batch $batchsize $cuda --disable_backtranslation --unsup --enable_mgan --add_control --easyprefix "tsdata/fkeasypart-2m" \ 42 | --difficprefix "tsdata/fkdifficpart-2m" --start_save 9000 --stop_save 13000 43 | 44 | 45 | 46 | exit 47 | 48 | 49 | 50 | #UNTS-10k - Using both separator and discriminator with 10k parallel pairs 51 | batchsize=36 52 | lr=0.00012 53 | hidden=600 54 | dropout=0.2 55 | loginterval=100 56 | saveinterval=200 57 | embeddcom="$tsdata/fk.lower.vec" 58 | pref="wgan.semisup10k-sel-6-4.noadvcompl.control1.allclass.denoi.singleclassf.rho1.0.10k" 59 | MONO=( tsdata/fkdifficpart-2m tsdata/fkeasypart-2m ) 60 | PARALLLEL=( tsdata/wiki-split.en.lower tsdata/wiki-split.sen.lower ) 61 | 62 | 63 | python3 "$codepath/train.py" --src_embeddings "$embeddcom" --trg_embeddings "$embeddcom" --save "$model/model.$pref" $cuda\ 64 | --src2trg "${PARALLLEL[0]}" "${PARALLLEL[1]}" --trg2src "${PARALLLEL[1]}" "${PARALLLEL[0]}" --disable_backtranslation \ 65 | --enable_mgan --add_control --easyprefix "tsdata/fkeasypart-2m" --difficprefix "tsdata/fkdifficpart-2m" --start_save 6000 --stop_save 13000 66 | 67 | 68 | 69 | exit 70 | 71 | 72 | 73 | 74 | # UNTS-10k - only with discriminator loss with 10k parallel pairs 75 | batchsize=36 76 | lr=0.00012 77 | hidden=600 78 | dropout=0.2 79 | loginterval=100 80 | saveinterval=200 81 | embeddcom="$tsdata/fk.lower.vec" 82 | pref="wgan.semisup10k-sel-6-4.noadvcompl.control1.noclassf.denoi.singleclassf.rho1.0.10k" 83 | MONO=( tsdata/fkdifficpart-2m tsdata/fkeasypart-2m ) 84 | PARALLLEL=( tsdata/wiki-split.en.lower tsdata/wiki-split.sen.lower ) 85 | 86 | 87 | python3 "$codepath/train.py" --src_embeddings "$embeddcom" --trg_embeddings "$embeddcom" --save "$model/model.$pref" --batch $batchsize $cuda \ 88 | --src2trg "${PARALLLEL[0]}" "${PARALLLEL[1]}" --trg2src "${PARALLLEL[1]}" "${PARALLLEL[0]}" --disable_backtranslation --enable_mgan --add_control \ 89 | --easyprefix "tsdata/fkeasypart-2m" --difficprefix "tsdata/fkdifficpart-2m" --noclassf --start_save 8000 --stop_save 13000 90 | 91 | 92 | exit 93 | 94 | # UNTS-10k - only with separator loss with 10k parallel pairs 95 | batchsize=36 96 | lr=0.00012 97 | hidden=600 98 | dropout=0.2 99 | loginterval=100 100 | saveinterval=200 101 | embeddcom="$tsdata/fk.lower.vec" 102 | pref="wgan.semisup10k-sel-6-4.noadvcompl.control1.nodisc.denoi.singleclassf.rho1.0.10k" 103 | MONO=( tsdata/fkdifficpart-2m tsdata/fkeasypart-2m ) 104 | PARALLLEL=( tsdata/wiki-split.en.lower tsdata/wiki-split.sen.lower ) 105 | 106 | python3 "$codepath/train.py" --src_embeddings "$embeddcom" --trg_embeddings "$embeddcom" --save "$model/model.$pref" \ 107 | --batch $batchsize $cuda --src2trg "${PARALLLEL[0]}" "${PARALLLEL[1]}" --trg2src "${PARALLLEL[1]}" "${PARALLLEL[0]}" \ 108 | --disable_backtranslation --enable_mgan --add_control --easyprefix "tsdata/fkeasypart-2m" --difficprefix "tsdata/fkdifficpart-2m" --nodisc --start_save 6000 --stop_save 13000 109 | 110 | exit 111 | 112 | 113 | # UNMT using backtranslation and denoising - Artetxe et al 2018. 114 | batchsize=32 115 | lr=0.00012 116 | hidden=600 117 | dropout=0.2 118 | loginterval=100 119 | saveinterval=200 120 | embeddcom="tsdata/fk.lower.vec" 121 | pref="wgan.onlyback.denoi.back1.singleclassf.rho1.0.10k" 122 | MONO=( tsdata/fkdifficpart-2m-1.lower tsdata/fkeasypart-2m-1.lower ) 123 | 124 | python3 "$codepath/train.py" --src "${MONO[0]}" --trg "${MONO[1]}" --src_embeddings "$embeddcom" --trg_embeddings "$embeddcom" --save "$model/model.$pref" \ 125 | --batch $batchsize $cuda --unsup --start_save 18000 --stop_save 24000 126 | 127 | exit 128 | 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /undreamt/translate.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018 Mikel Artetxe 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | 16 | import argparse 17 | import sys 18 | import torch 19 | import undreamt 20 | from undreamt.translator import Translator 21 | from undreamt import data 22 | from undreamt.devices import gpu 23 | import random 24 | random.seed(7) 25 | torch.manual_seed(7) 26 | # torch.cuda.manual_seed_all(7) 27 | 28 | def main(): 29 | # Parse command line arguments 30 | parser = argparse.ArgumentParser(description='Translate using a pre-trained model') 31 | parser.add_argument('model', help='a model previously trained with train.py') 32 | parser.add_argument('--batch_size', type=int, default=50, help='the batch size (defaults to 50)') 33 | parser.add_argument('--beam_size', type=int, default=12, help='the beam size (defaults to 12, 0 for greedy search)') 34 | parser.add_argument('--encoding', default='utf-8', help='the character encoding for input/output (defaults to utf-8)') 35 | parser.add_argument('-i', '--input', default=sys.stdin.fileno(), help='the input file (defaults to stdin)') 36 | parser.add_argument('-o', '--output', default=sys.stdout.fileno(), help='the output file (defaults to stdout)') 37 | parser.add_argument('--noise',type=float,default=0.5) 38 | parser.add_argument('--pass_att',action='store_true',default=False) 39 | parser.add_argument('--src_embeddings',default=None,help='common intersection source embeddings') 40 | parser.add_argument('--cutoff', type=int, default=None, help='cutoff for source embeddings above') 41 | parser.add_argument('--cat_embedds',help='use torch.load to load src and trg ') 42 | parser.add_argument('--ncontrol',type=int,default=0,help='control number given while using the decoder') 43 | args = parser.parse_args() 44 | 45 | try: 46 | t = torch.load(args.model) 47 | except Exception: 48 | t = torch.load(args.model,map_location={'cuda:1':'cuda:0'}) 49 | 50 | # Translate sentences 51 | end = False 52 | fin = open(args.input, encoding=args.encoding, errors='surrogateescape') 53 | fout = open(args.output, mode='w', encoding=args.encoding, errors='surrogateescape') 54 | if args.src_embeddings is not None: 55 | encoder_embeddings,src_dictionary = data.read_embeddings(open(args.src_embeddings,'r'),threshold=args.cutoff) 56 | encoder_embeddings = gpu(encoder_embeddings) 57 | t.decoder_embeddings=gpu(t.decoder_embeddings) 58 | t.generator=gpu(t.generator) 59 | t.encoder=gpu(t.encoder) 60 | t.decoder=gpu(t.decoder) 61 | 62 | translator_new = Translator(encoder_embeddings,t.decoder_embeddings,t.generator,src_dictionary,\ 63 | t.trg_dictionary,t.encoder,t.decoder,t.denoising,t.device) 64 | else: 65 | t.device=gpu 66 | t.encoder=gpu(t.encoder) 67 | t.decoder=gpu(t.decoder) 68 | t.encoder_embeddings=gpu(t.encoder_embeddings) 69 | t.decoder_embeddings=gpu(t.decoder_embeddings) 70 | t.generator=gpu(t.generator) 71 | t.src_dictionary = data.Dictionary(t.src_dictionary.id2word[1:]) 72 | t.trg_dictionary = data.Dictionary(t.trg_dictionary.id2word[1:]) 73 | translator_new = Translator(t.encoder_embeddings,t.decoder_embeddings,t.generator,t.src_dictionary,\ 74 | t.trg_dictionary,t.encoder,t.decoder,t.denoising,t.device) 75 | # print (translator_new.denoising) 76 | # exit(0) 77 | while not end: 78 | batch = [] 79 | while len(batch) < args.batch_size and not end: 80 | line = fin.readline() 81 | if not line: 82 | end = True 83 | else: 84 | batch.append(line) 85 | if args.beam_size <= 0 and len(batch) > 0: 86 | for translation in translator_new.greedy(batch, train=False): 87 | print(translation, file=fout) 88 | elif len(batch) > 0: 89 | translations = translator_new.beam_search(batch, train=False, beam_size=12, max_ratio=2,rnk=6,noiseratio=args.noise,pass_att=args.pass_att,ncontrol=args.ncontrol if args.ncontrol!=0 else None) 90 | print(translations) 91 | if args.pass_att: 92 | for translation1,trans2 in translations: 93 | print(translation1,trans2, file=fout) 94 | else: 95 | for translation in translations: 96 | print(translation, file=fout) 97 | fout.flush() 98 | fin.close() 99 | fout.close() 100 | 101 | 102 | if __name__ == '__main__': 103 | main() 104 | -------------------------------------------------------------------------------- /undreamt/undreamt/decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018 Mikel Artetxe 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | 16 | from undreamt import data 17 | from undreamt.attention import GlobalAttention 18 | 19 | import torch 20 | import torch.nn as nn 21 | from torch.autograd import Variable 22 | import torch.nn.functional as F 23 | from undreamt.devices import gpu 24 | 25 | import random 26 | random.seed(7) 27 | torch.manual_seed(7) 28 | # torch.cuda.manual_seed_all(7) 29 | 30 | 31 | 32 | class RNNAttentionDecoder(nn.Module): 33 | def __init__(self, embedding_size, hidden_size, layers=1, dropout=0, input_feeding=True): 34 | super(RNNAttentionDecoder, self).__init__() 35 | self.layers = layers 36 | self.hidden_size = hidden_size 37 | self.special_embeddings = nn.Embedding(data.SPECIAL_SYMBOLS+1, embedding_size, padding_idx=0) 38 | self.attention = GlobalAttention(hidden_size, alignment_function='general') 39 | self.input_feeding = input_feeding 40 | self.input_size = embedding_size + hidden_size if input_feeding else embedding_size 41 | self.stacked_rnn = StackedGRU(self.input_size, hidden_size, layers=layers, dropout=dropout) 42 | self.dropout = nn.Dropout(dropout) 43 | #supposed to be a dictionary 44 | self.sosembeddings = nn.Embedding(3+1,embedding_size,padding_idx=0) 45 | 46 | def forward(self, ids, lengths, word_embeddings, hidden, context, context_mask, prev_output, generator,\ 47 | att_embeddings=None,pass_att=False,pass_context=False,detach_encoder=False,ncontrol = None): 48 | if ncontrol is None: 49 | embeddings = word_embeddings(data.word_ids(ids)) + self.special_embeddings(data.special_ids(ids)) 50 | else: 51 | embeddings = word_embeddings(data.word_ids(ids)) + self.special_embeddings(data.special_ids_nosos(ids)) + \ 52 | self.sosembeddings(data.sos_ids(ids).div(3).mul(ncontrol)) 53 | output = prev_output 54 | scores = [] 55 | find_cosine= True if att_embeddings is not None else False 56 | cosineloss = Variable(gpu(torch.FloatTensor(1).fill_(0))) 57 | att_scores=[] 58 | att_contexts=[] 59 | for emb in embeddings.split(1): 60 | if self.input_feeding: 61 | input = torch.cat([emb.squeeze(0), output], 1) 62 | else: 63 | input = emb.squeeze(0) 64 | output, hidden = self.stacked_rnn(input, hidden) 65 | output,att_weights,weighted_context = self.attention(output, context, context_mask,pass_weights=True,pass_context=True,detach_encoder=detach_encoder) 66 | output = self.dropout(output) 67 | score = generator(output) 68 | if pass_context: 69 | # print('weighted_context size:',weighted_context.size()) 70 | att_contexts.append(weighted_context) 71 | if find_cosine: 72 | # print("att_weights:",att_weights.requires_grad) 73 | att_embeddings = att_embeddings.detach() 74 | # print("att_embeddings:",att_embeddings.requires_grad) 75 | weighted_embedd = att_weights.unsqueeze(1).bmm(att_embeddings.transpose(0,1)).squeeze(1) 76 | # print("score: ",score.exp()) 77 | # print("special_embeddings: ",self.special_embeddings.weight.size()) 78 | # print("word_embeddings: ",word_embeddings.weight.size()) 79 | weighted_predembedd = score.exp().unsqueeze(1).matmul(torch.cat([self.special_embeddings.weight[1:],word_embeddings.weight[1:]])).squeeze(1) 80 | # print("weighted_predembedd: ",weighted_predembedd.size()) 81 | att_cosine = torch.sum(F.cosine_similarity(weighted_embedd,weighted_predembedd)) 82 | cosineloss+=att_cosine 83 | att_scores.append(att_weights) 84 | scores.append(score) 85 | 86 | if not pass_context: 87 | if not pass_att: 88 | if not find_cosine: 89 | return torch.stack(scores), hidden, output 90 | else: 91 | return torch.stack(scores), hidden, output, cosineloss 92 | else: 93 | att_scores = torch.stack(att_scores) 94 | if not find_cosine: 95 | return torch.stack(scores), hidden, output, att_scores 96 | else: 97 | return torch.stack(scores), hidden, output, cosineloss, att_scores 98 | else: 99 | att_contexts = torch.stack(att_contexts) 100 | # print('att_contexts size',att_contexts.size()) 101 | if not pass_att: 102 | if not find_cosine: 103 | return torch.stack(scores), hidden, output, att_contexts 104 | else: 105 | return torch.stack(scores), hidden, output, cosineloss, att_contexts 106 | else: 107 | att_scores = torch.stack(att_scores) 108 | if not find_cosine: 109 | return torch.stack(scores), hidden, output, att_scores, att_contexts 110 | else: 111 | return torch.stack(scores), hidden, output, cosineloss, att_scores, att_contexts 112 | 113 | def initial_output(self, batch_size): 114 | return Variable(torch.zeros(batch_size, self.hidden_size), requires_grad=False) 115 | 116 | 117 | # Based on OpenNMT-py 118 | class StackedGRU(nn.Module): 119 | def __init__(self, input_size, hidden_size, layers, dropout): 120 | super(StackedGRU, self).__init__() 121 | self.dropout = nn.Dropout(dropout) 122 | self.num_layers = layers 123 | self.layers = nn.ModuleList() 124 | for i in range(layers): 125 | self.layers.append(nn.GRUCell(input_size, hidden_size)) 126 | input_size = hidden_size 127 | 128 | def forward(self, input, hidden): 129 | h_1 = [] 130 | for i, layer in enumerate(self.layers): 131 | h_1_i = layer(input, hidden[i]) 132 | input = h_1_i 133 | if i + 1 != self.num_layers: 134 | input = self.dropout(input) 135 | h_1 += [h_1_i] 136 | h_1 = torch.stack(h_1) 137 | return input, h_1 -------------------------------------------------------------------------------- /undreamt/namedentity.py: -------------------------------------------------------------------------------- 1 | 2 | import warnings 3 | warnings.filterwarnings("ignore", message="numpy.dtype size changed") 4 | warnings.filterwarnings("ignore", message="numpy.ufunc size changed") 5 | import argparse 6 | import nltk 7 | import undreamt 8 | from undreamt import data 9 | 10 | def extract_entity_names(t): 11 | entity_names = [] 12 | 13 | if hasattr(t, 'label') and t.label: 14 | if t.label() == 'NE': 15 | entity_names.append(' '.join([child[0] for child in t])) 16 | else: 17 | for child in t: 18 | entity_names.extend(extract_entity_names(child)) 19 | 20 | return entity_names 21 | 22 | 23 | 24 | 25 | # def main(): 26 | # parser = argparse.ArgumentParser() 27 | # parser.add_argument('--input',default='gendirprod/gen_lower.sen.src2trg.learndec.back3.newvocab.fk4.semilater.20.test',\ 28 | # help='sequence of sentences for which named entities are to be identified and replaced with token') 29 | # parser.add_argument('--output',default='gendirprod/gen_lower.sen.src2trg.learndec.back3.newvocab.fk4.semilater.20.test.named') 30 | # args=parser.parse_args() 31 | # nlp = spacy.load('en_core_web_sm') 32 | # doc = nlp('San Francisco considers banning sidewalk delivery robots') 33 | # outfl = open(args.output,'w') 34 | # for ln in open(args.input,'r'): 35 | # line = [] 36 | # doc = nlp(ln.strip()) 37 | # for word in doc: 38 | # if word.ent_type_ != "": 39 | # line.append("") 40 | # else: 41 | # line.append(word.text) 42 | # outfl.write(' '.join(line)+'\n') 43 | 44 | 45 | # def main(): 46 | # parser = argparse.ArgumentParser() 47 | # parser.add_argument('--input',default='gendirprod/gen_lower.sen.src2trg.learndec.back3.newvocab.fk4.semilater.20.test',\ 48 | # help='sequence of sentences for which named entities are to be identified and replaced with token') 49 | # parser.add_argument('--output',default='gendirprod/gen_lower.sen.src2trg.learndec.back3.newvocab.fk4.semilater.20.test.named') 50 | # args=parser.parse_args() 51 | # outfl = open(args.output,'w') 52 | # sentences = [ln.strip() for ln in open(args.input,'r')] 53 | # tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences] 54 | # tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences] 55 | # chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True) 56 | # entity_names = [] 57 | # for tree in chunked_sentences: 58 | # # Print results per sentence 59 | # # print extract_entity_names(tree) 60 | 61 | # entity_names.extend(extract_entity_names(tree)) 62 | 63 | # # Print all entity names 64 | # #print entity_names 65 | # print(entity_names) 66 | # # Print unique entity names 67 | # print(set(entity_names)) 68 | 69 | 70 | def main(): 71 | parser = argparse.ArgumentParser() 72 | parser.add_argument('--inputsrc',default='/home/15CS10013/important-sai/ts12/tsdata/test.en.lower',\ 73 | help='sequence of sentences for which named entities are to be identified and replaced with token') 74 | parser.add_argument('--inputgen',default='/home/15CS10013/important-sai/ts12/gendirprod/gen_lower.sen.src2trg.learndec.back3.newvocab.fk4.semilater.20.test',\ 75 | help='sequence of sentences for which named entities are to be identified and replaced with token') 76 | parser.add_argument('--output',default='/home/15CS10013/important-sai/ts12/gendirprod/gen_lower.sen.src2trg.learndec.back3.newvocab.fk4.semilater.20.test') 77 | args=parser.parse_args() 78 | outposgen = open(args.output+'.pos','w') 79 | outoovsrc = open(args.inputsrc+'.oov','w') 80 | outoovgen = open(args.inputgen+'.oov','w') 81 | sentencesgen = [ln.strip() for ln in open(args.inputgen,'r')] 82 | tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentencesgen] 83 | tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences] 84 | # chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True) 85 | # entity_names = [] 86 | # for tree in chunked_sentences: 87 | # # Print results per sentence 88 | # # print extract_entity_names(tree) 89 | 90 | # entity_names.extend(extract_entity_names(tree)) 91 | 92 | # # Print all entity names 93 | # #print entity_names 94 | # print(entity_names) 95 | # Print unique entity names 96 | # print(set(entity_names)) 97 | ts='/home/15CS10013/important-sai/ts12' 98 | tsdata=ts+'/tsdata' 99 | vocabgen = data.read_embeddings(open(tsdata+'/fkeasypart.lower.vec'),vocabonly=True,threshold=50000) 100 | vocabsrc = data.read_embeddings(open(tsdata+'/fkdifficpart.lower.vec.id.com'),vocabonly=True,threshold=50000) 101 | sentencessrc = [ln.strip() for ln in open(args.inputsrc,'r')] 102 | for sentence in tagged_sentences: 103 | print(sentence,file=outposgen) 104 | for sentence in sentencessrc: 105 | print([(word1,word2) for word1,word2 in zip(sentence.split(),vocabsrc.ids2sentence(vocabsrc.sentence2ids(sentence)).split())] ,file=outoovsrc) 106 | for sentence in sentencessrc: 107 | print( [(word1,word2) for word1,word2 in zip(sentence.split(),vocabgen.ids2sentence(vocabgen.sentence2ids(sentence)).split())],file=outoovgen) 108 | 109 | 110 | # def saveembedds(embedds,comdict,filename): 111 | # fl = open(filename,'w') 112 | # dim = embedds.weight.data.size()[1] 113 | # count = embedds.weight.data.size()[0]-1 114 | # header = '{} {}\n'.format(count,dim) 115 | # fl.write(header) 116 | # for i in range(1,count+1): 117 | # word = comdict.id2word[i] 118 | # vec = ' '.join(str(x) for x in embedds.weight.data[i]) 119 | # pair = '{} {}\n'.format(word,vec) 120 | # fl.write(pair) 121 | 122 | # def main(): 123 | # parser = argparse.ArgumentParser() 124 | # parser.add_argument('--inputsrc',default='/home/15CS10013/important-sai/ts12/tsdata/test.en.lower',\ 125 | # help='sequence of sentences for which named entities are to be identified and replaced with token') 126 | # parser.add_argument('--inputgen',default='/home/15CS10013/important-sai/ts12/gendirprod/gen_lower.sen.src2trg.learndec.back3.newvocab.fk4.semilater.20.test',\ 127 | # help='sequence of sentences for which named entities are to be identified and replaced with token') 128 | # parser.add_argument('--output',default='/home/15CS10013/important-sai/ts12/gendirprod/gen_lower.sen.src2trg.learndec.back3.newvocab.fk4.semilater.20.test') 129 | # args=parser.parse_args() 130 | # ts='/home/15CS10013/important-sai/ts12' 131 | # tsdata=ts+'/tsdata' 132 | # vocabtrg = data.read_embeddings(open(tsdata+'/fkeasypart.lower.vec.id','r'),vocabonly=True,threshold=50000) 133 | # vocabsrc = data.read_embeddings(open(tsdata+'/fkdifficpart.lower.vec.id','r'),vocabonly=True,threshold=50000) 134 | # commonvocab = [x for x in list(set(vocabtrg.id2word)&set(vocabsrc.id2word)) if x is not None] 135 | # embeddstrgcom,trgcomdict = data.read_embeddings(open(tsdata+'/fkeasypart.lower.vec.id','r'),vocabulary=commonvocab,threshold=50000) 136 | # embeddssrccom,srccomdict = data.read_embeddings(open(tsdata+'/fkdifficpart.lower.vec.id','r'),vocabulary=commonvocab,threshold=50000) 137 | # print(len(trgcomdict.id2word),len(srccomdict.id2word)) 138 | # saveembedds(embeddstrgcom,trgcomdict,tsdata+'/fkeasypart.lower.vec.id.com') 139 | # saveembedds(embeddssrccom,srccomdict,tsdata+'/fkdifficpart.lower.vec.id.com') 140 | 141 | if __name__ == '__main__': 142 | main() 143 | -------------------------------------------------------------------------------- /utils/SARI.py: -------------------------------------------------------------------------------- 1 | # ======================================================= 2 | # SARI -- Text Simplification Tunable Evaluation Metric 3 | # ======================================================= 4 | # 5 | # Author: Wei Xu (UPenn xwe@cis.upenn.edu) 6 | # 7 | # A Python implementation of the SARI metric for text simplification 8 | # evaluation in the following paper 9 | # 10 | # "Optimizing Statistical Machine Translation for Text Simplification" 11 | # Wei Xu, Courtney Napoles, Ellie Pavlick, Quanze Chen and Chris Callison-Burch 12 | # In Transactions of the Association for Computational Linguistics (TACL) 2015 13 | # 14 | # There is also a Java implementation of the SARI metric 15 | # that is integrated into the Joshua MT Decoder. It can 16 | # be used for tuning Joshua models for a real end-to-end 17 | # text simplification model. 18 | # 19 | 20 | from __future__ import division 21 | from collections import Counter 22 | import sys 23 | import logging 24 | logging.basicConfig(format = u'[LINE:%(lineno)d]# %(levelname)-8s [%(asctime)s] %(message)s', level = logging.NOTSET) 25 | 26 | 27 | 28 | def ReadInFile (filename): 29 | 30 | with open(filename) as f: 31 | lines = f.readlines() 32 | lines = [x.strip() for x in lines] 33 | return lines 34 | 35 | 36 | def SARIngram(sgrams, cgrams, rgramslist, numref): 37 | 38 | 39 | rgramsall = [rgram for rgrams in rgramslist for rgram in rgrams] 40 | rgramcounter = Counter(rgramsall) 41 | 42 | sgramcounter = Counter(sgrams) 43 | sgramcounter_rep = Counter() 44 | for sgram, scount in sgramcounter.items(): 45 | sgramcounter_rep[sgram] = scount * numref 46 | 47 | cgramcounter = Counter(cgrams) 48 | cgramcounter_rep = Counter() 49 | for cgram, ccount in cgramcounter.items(): 50 | cgramcounter_rep[cgram] = ccount * numref 51 | 52 | 53 | # KEEP 54 | keepgramcounter_rep = sgramcounter_rep & cgramcounter_rep 55 | keepgramcountergood_rep = keepgramcounter_rep & rgramcounter 56 | keepgramcounterall_rep = sgramcounter_rep & rgramcounter 57 | 58 | keeptmpscore1 = 0 59 | keeptmpscore2 = 0 60 | for keepgram in keepgramcountergood_rep: 61 | keeptmpscore1 += keepgramcountergood_rep[keepgram] / keepgramcounter_rep[keepgram] 62 | keeptmpscore2 += keepgramcountergood_rep[keepgram] / keepgramcounterall_rep[keepgram] 63 | #print "KEEP", keepgram, keepscore, cgramcounter[keepgram], sgramcounter[keepgram], rgramcounter[keepgram] 64 | keepscore_precision = 0 65 | if len(keepgramcounter_rep) > 0: 66 | keepscore_precision = keeptmpscore1 / len(keepgramcounter_rep) 67 | keepscore_recall = 0 68 | if len(keepgramcounterall_rep) > 0: 69 | keepscore_recall = keeptmpscore2 / len(keepgramcounterall_rep) 70 | keepscore = 0 71 | if keepscore_precision > 0 or keepscore_recall > 0: 72 | keepscore = 2 * keepscore_precision * keepscore_recall / (keepscore_precision + keepscore_recall) 73 | 74 | 75 | # DELETION 76 | delgramcounter_rep = sgramcounter_rep - cgramcounter_rep 77 | delgramcountergood_rep = delgramcounter_rep - rgramcounter 78 | delgramcounterall_rep = sgramcounter_rep - rgramcounter 79 | 80 | deltmpscore1 = 0 81 | deltmpscore2 = 0 82 | for delgram in delgramcountergood_rep: 83 | deltmpscore1 += delgramcountergood_rep[delgram] / delgramcounter_rep[delgram] 84 | deltmpscore2 += delgramcountergood_rep[delgram] / delgramcounterall_rep[delgram] 85 | delscore_precision = 0 86 | if len(delgramcounter_rep) > 0: 87 | delscore_precision = deltmpscore1 / len(delgramcounter_rep) 88 | delscore_recall = 0 89 | if len(delgramcounterall_rep) > 0: 90 | delscore_recall = deltmpscore1 / len(delgramcounterall_rep) 91 | delscore = 0 92 | if delscore_precision > 0 or delscore_recall > 0: 93 | delscore = 2 * delscore_precision * delscore_recall / (delscore_precision + delscore_recall) 94 | 95 | 96 | # ADDITION 97 | addgramcounter = set(cgramcounter) - set(sgramcounter) 98 | addgramcountergood = set(addgramcounter) & set(rgramcounter) 99 | addgramcounterall = set(rgramcounter) - set(sgramcounter) 100 | 101 | addtmpscore = 0 102 | for addgram in addgramcountergood: 103 | addtmpscore += 1 104 | 105 | addscore_precision = 0 106 | addscore_recall = 0 107 | if len(addgramcounter) > 0: 108 | addscore_precision = addtmpscore / len(addgramcounter) 109 | if len(addgramcounterall) > 0: 110 | addscore_recall = addtmpscore / len(addgramcounterall) 111 | addscore = 0 112 | if addscore_precision > 0 or addscore_recall > 0: 113 | addscore = 2 * addscore_precision * addscore_recall / (addscore_precision + addscore_recall) 114 | 115 | 116 | return (keepscore, delscore_precision, addscore) 117 | 118 | 119 | def SARIsent (ssent, csent, rsents) : 120 | numref = len(rsents) 121 | 122 | s1grams = ssent.lower().split(" ") 123 | c1grams = csent.lower().split(" ") 124 | s2grams = [] 125 | c2grams = [] 126 | s3grams = [] 127 | c3grams = [] 128 | s4grams = [] 129 | c4grams = [] 130 | 131 | r1gramslist = [] 132 | r2gramslist = [] 133 | r3gramslist = [] 134 | r4gramslist = [] 135 | for rsent in rsents: 136 | r1grams = rsent.lower().split(" ") 137 | r2grams = [] 138 | r3grams = [] 139 | r4grams = [] 140 | r1gramslist.append(r1grams) 141 | for i in range(0, len(r1grams)-1) : 142 | if i < len(r1grams) - 1: 143 | r2gram = r1grams[i] + " " + r1grams[i+1] 144 | r2grams.append(r2gram) 145 | if i < len(r1grams)-2: 146 | r3gram = r1grams[i] + " " + r1grams[i+1] + " " + r1grams[i+2] 147 | r3grams.append(r3gram) 148 | if i < len(r1grams)-3: 149 | r4gram = r1grams[i] + " " + r1grams[i+1] + " " + r1grams[i+2] + " " + r1grams[i+3] 150 | r4grams.append(r4gram) 151 | r2gramslist.append(r2grams) 152 | r3gramslist.append(r3grams) 153 | r4gramslist.append(r4grams) 154 | 155 | for i in range(0, len(s1grams)-1) : 156 | if i < len(s1grams) - 1: 157 | s2gram = s1grams[i] + " " + s1grams[i+1] 158 | s2grams.append(s2gram) 159 | if i < len(s1grams)-2: 160 | s3gram = s1grams[i] + " " + s1grams[i+1] + " " + s1grams[i+2] 161 | s3grams.append(s3gram) 162 | if i < len(s1grams)-3: 163 | s4gram = s1grams[i] + " " + s1grams[i+1] + " " + s1grams[i+2] + " " + s1grams[i+3] 164 | s4grams.append(s4gram) 165 | 166 | for i in range(0, len(c1grams)-1) : 167 | if i < len(c1grams) - 1: 168 | c2gram = c1grams[i] + " " + c1grams[i+1] 169 | c2grams.append(c2gram) 170 | if i < len(c1grams)-2: 171 | c3gram = c1grams[i] + " " + c1grams[i+1] + " " + c1grams[i+2] 172 | c3grams.append(c3gram) 173 | if i < len(c1grams)-3: 174 | c4gram = c1grams[i] + " " + c1grams[i+1] + " " + c1grams[i+2] + " " + c1grams[i+3] 175 | c4grams.append(c4gram) 176 | 177 | 178 | (keep1score, del1score, add1score) = SARIngram(s1grams, c1grams, r1gramslist, numref) 179 | (keep2score, del2score, add2score) = SARIngram(s2grams, c2grams, r2gramslist, numref) 180 | (keep3score, del3score, add3score) = SARIngram(s3grams, c3grams, r3gramslist, numref) 181 | (keep4score, del4score, add4score) = SARIngram(s4grams, c4grams, r4gramslist, numref) 182 | 183 | avgkeepscore = sum([keep1score,keep2score,keep3score,keep4score])/4 184 | avgdelscore = sum([del1score,del2score,del3score,del4score])/4 185 | avgaddscore = sum([add1score,add2score,add3score,add4score])/4 186 | finalscore = ( avgkeepscore + avgdelscore + avgaddscore ) / 3 187 | 188 | return finalscore 189 | 190 | 191 | if __name__ == '__main__': 192 | print "Not implemented..." 193 | -------------------------------------------------------------------------------- /utils/evaluate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import codecs 4 | import logging 5 | from itertools import izip 6 | from SARI import SARIsent 7 | from nltk.translate.bleu_score import * 8 | import numpy as np 9 | smooth = SmoothingFunction() 10 | from nltk import word_tokenize 11 | from textstat.textstat import textstat 12 | 13 | logging.basicConfig(format = u'[LINE:%(lineno)d]# %(levelname)-8s [%(asctime)s] %(message)s', level = logging.NOTSET) 14 | 15 | def files_in_folder(mypath): 16 | return [ os.path.join(mypath,f) for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath,f)) ] 17 | 18 | def folders_in_folder(mypath): 19 | return [ os.path.join(mypath,f) for f in os.listdir(mypath) if os.path.isdir(os.path.join(mypath,f)) ] 20 | 21 | def files_in_folder_only(mypath): 22 | return [ f for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath,f)) ] 23 | 24 | def remove_features(sent): 25 | tokens = sent.split(" ") 26 | return " ".join([token.split("|")[0] for token in tokens]) 27 | 28 | def remove_underscores(sent): 29 | return sent.replace("_", " ") 30 | 31 | def replace_parant(sent): 32 | sent = sent.replace("-lrb-", "(").replace("-rrb-", ")") 33 | return sent.replace("(", "-lrb-").replace(")", "-rrb-") 34 | 35 | def lowstrip(sent): 36 | return sent.lower().strip() 37 | 38 | def normalize(sent): 39 | return replace_parant(lowstrip(sent)) 40 | 41 | def as_is(sent): 42 | return sent 43 | 44 | def get_hypothesis(filename): 45 | hypothesis = '-' 46 | if "_h1" in filename: 47 | hypothesis = '1' 48 | elif "_h2" in filename: 49 | hypothesis = '2' 50 | elif "_h3" in filename: 51 | hypothesis = '3' 52 | elif "_h4" in filename: 53 | hypothesis = '4' 54 | return hypothesis 55 | 56 | def mean(numbers): 57 | return float(sum(numbers)) / max(len(numbers), 1) 58 | 59 | def print_scores(pairs, whichone = ''): 60 | # replace filenames by hypothesis name for csv pretty print 61 | for k,v in pairs: 62 | hypothesis = get_hypothesis(k) 63 | print "\t".join( [whichone, "{:10.2f}".format(v), k, hypothesis] ) 64 | 65 | def SARI_file(source, preds, refs, preprocess,pass_indiv=False): 66 | files = [codecs.open(fis, "r", 'utf-8') for fis in [source, preds, refs]] 67 | scores = [] 68 | for src, pred, ref in izip(*files): 69 | references = [preprocess(r) for r in ref.split('\t')] 70 | scores.append(SARIsent(preprocess(src), preprocess(pred), references)) 71 | for fis in files: 72 | fis.close() 73 | if not pass_indiv : 74 | return mean(scores) 75 | else: 76 | return mean(scores),scores 77 | 78 | 79 | # BLEU doesn't need the source 80 | def BLEU_file(source, preds, refs, preprocess=as_is,pass_indiv=False): 81 | files = [codecs.open(fis, "r", 'utf-8') for fis in [preds, refs]] 82 | scores = [] 83 | references = [] 84 | hypothese = [] 85 | for pred, ref in izip(*files): 86 | references.append([word_tokenize(preprocess(r)) for r in ref.split('\t')]) 87 | hypothese.append(word_tokenize(preprocess(pred))) 88 | for fis in files: 89 | fis.close() 90 | # Smoothing method 3: NIST geometric sequence smoothing 91 | if not pass_indiv: 92 | return corpus_bleu(references, hypothese, smoothing_function=smooth.method3) 93 | else: 94 | return corpus_bleu(references, hypothese, smoothing_function=smooth.method3),[corpus_bleu([ref],[hypo], smoothing_function=smooth.method3) for ref,hypo in zip(references,hypothese)] 95 | 96 | def iBLEU_file(source, preds, refs, preprocess=as_is,pass_indiv=False): 97 | files = [codecs.open(fis, "r", 'utf-8') for fis in [source, preds, refs]] 98 | scores = [] 99 | references = [] 100 | hypothese = [] 101 | ibleu = 0 102 | n = 0 103 | for src, pred, ref in izip(*files): 104 | n+=1 105 | references = [word_tokenize(preprocess(r)) for r in ref.split('\t')] 106 | hypothese = word_tokenize(preprocess(pred)) 107 | source = word_tokenize(preprocess(src)) 108 | ibleu+=0.9*corpus_bleu([references],[hypothese], smoothing_function=smooth.method3)-0.1*corpus_bleu([source],[hypothese], smoothing_function=smooth.method3) 109 | 110 | ibleu/=n 111 | for fis in files: 112 | fis.close() 113 | # Smoothing method 3: NIST geometric sequence smoothing 114 | return ibleu 115 | 116 | 117 | def fkBLEU_file(source, preds, refs, preprocess=as_is,pass_indiv=False): 118 | files = [codecs.open(fis, "r", 'utf-8') for fis in [source, preds, refs]] 119 | scores = [] 120 | references = [] 121 | hypothese = [] 122 | fkbleu = 0 123 | n = 0 124 | for src, pred, ref in izip(*files): 125 | references = [word_tokenize(preprocess(r)) for r in ref.split('\t')] 126 | hypothese = word_tokenize(preprocess(pred)) 127 | source = word_tokenize(preprocess(src)) 128 | ibleu=0.9*corpus_bleu([references],[hypothese], smoothing_function=smooth.method3)-0.1*corpus_bleu([source],[hypothese], smoothing_function=smooth.method3) 129 | try: 130 | fkdiff = textstat.flesch_reading_ease(' '.join(hypothese))-textstat.flesch_reading_ease(' '.join(source)) 131 | n+=1 132 | fkdiff= 1/(1+np.exp(-fkdiff)) 133 | fkbleu+=fkdiff*ibleu 134 | except Exception: 135 | continue 136 | fkbleu/=n 137 | for fis in files: 138 | fis.close() 139 | # Smoothing method 3: NIST geometric sequence smoothing 140 | return ibleu 141 | 142 | 143 | def worddiff_file(source, preds, refs, preprocess=as_is,pass_indiv=False): 144 | files = [codecs.open(fis, "r", 'utf-8') for fis in [source, preds]] 145 | scores = [] 146 | references = [] 147 | hypothese = [] 148 | worddiff = 0 149 | n = 0 150 | for src, pred in izip(*files): 151 | source = word_tokenize(preprocess(src)) 152 | hypothese = word_tokenize(preprocess(pred)) 153 | n+=1 154 | worddiff+=len(source)-len(hypothese) 155 | # print(worddiff) 156 | # print(n) 157 | worddiff/=float(n) 158 | for fis in files: 159 | fis.close() 160 | # Smoothing method 3: NIST geometric sequence smoothing 161 | return worddiff/100.0 162 | 163 | def score(source, refs, fold, METRIC_file, preprocess=as_is,pass_indiv=False): 164 | new_files = files_in_folder(fold) 165 | data = [] 166 | for fis in new_files: 167 | # ignore log files 168 | if ".log" in os.path.basename(fis): 169 | continue 170 | logging.info("Processing "+os.path.basename(fis)) 171 | if not pass_indiv: 172 | score = METRIC_file(source, fis, refs, preprocess,pass_indiv=False) 173 | val = 100*score 174 | else: 175 | score,scorearr = METRIC_file(source, fis, refs, preprocess,pass_indiv=True) 176 | val = 100*score 177 | valarr = [100*scoreelem for scoreelem in scorearr] 178 | logging.info("Done "+str(val)) 179 | data.append((os.path.basename(fis), val)) 180 | data.sort(key=lambda tup: tup[1]) 181 | data.reverse() 182 | if not pass_indiv: 183 | return data 184 | else: 185 | return data,valarr 186 | 187 | if __name__ == '__main__': 188 | try: 189 | revbleu = None 190 | source = sys.argv[1] 191 | logging.info("Source: " + source) 192 | refs = sys.argv[2] 193 | logging.info("References in tsv format: " + refs) 194 | fold = sys.argv[3] 195 | logging.info("Directory of predictions: " + fold) 196 | if(len(sys.argv)==5): 197 | revbleu = True 198 | except: 199 | logging.error("Input parameters must be: " + sys.argv[0] 200 | + " SOURCE_FILE REFS_TSV (paste -d \"\t\" * > reference.tsv) DIRECTORY_OF_PREDICTIONS") 201 | sys.exit(1) 202 | 203 | ''' 204 | SARI can become very unstable to small changes in the data. 205 | The newsela turk references have all the parantheses replaced 206 | with -lrb- and -rrb-. Our output, however, contains the actual 207 | parantheses '(', ')', thus we prefer to apply a preprocessing 208 | step to normalize the text. 209 | ''' 210 | 211 | sari_test,sariarr = score(source, refs, fold, SARI_file, normalize,pass_indiv=True) if not revbleu else None 212 | bleu_test,bleuarr = score(source, refs, fold, BLEU_file, lowstrip,pass_indiv=True) 213 | #find IBLEU = 0.9*BLEU(fold,refs)-0.1*BLEU(fold,source) 214 | # ibleu = score(source,refs,fold,iBLEU_file,lowstrip,pass_indiv=False) 215 | # fkbleu = score(source,refs,fold,fkBLEU_file,lowstrip,pass_indiv=False) 216 | worddiff =score(source,refs,fold,worddiff_file,lowstrip,pass_indiv=False) 217 | whichone = os.path.basename(os.path.abspath(os.path.join(fold, '..'))) + \ 218 | '\t' + \ 219 | os.path.basename(refs).replace('.ref', '').replace("test_0_", "") 220 | print_scores(sari_test, "SARI\t" + whichone) 221 | print_scores(bleu_test, "BLEU\t" + whichone) 222 | # print_scores(ibleu, "iBLEU\t" + whichone) 223 | # print_scores(fkbleu,"fkBLEU\t"+whichone) 224 | print_scores(worddiff,"worddiff\t"+whichone) 225 | # print('SARI individual scores') 226 | # print('\n'.join([str(score) for score in sariarr])) 227 | # print('BLEU individual scores') 228 | # print('\n'.join([str(score) for score in bleuarr])) -------------------------------------------------------------------------------- /undreamt/undreamt/data.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018 Mikel Artetxe 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | 16 | import collections 17 | import numpy as np 18 | import torch 19 | import torch.nn as nn 20 | import nltk 21 | import random 22 | SPECIAL_SYMBOLS = 4 23 | PAD, OOV, EOS, SOS = 0, 1, 2, 3 24 | 25 | # random.seed(7) 26 | # torch.manual_seed(7) 27 | # torch.cuda.manual_seed_all(7) 28 | 29 | def hasNumbers(inputString): 30 | return any(char.isdigit() for char in inputString) 31 | 32 | 33 | 34 | class Dictionary: 35 | def __init__(self, words): 36 | self.id2word = [None] + words 37 | self.word2id = {word: 1 + i for i, word in enumerate(words)} 38 | 39 | def sentence2ids(self, sentence, eos=False, sos=False,testing=False): 40 | tokens = tokenize(sentence) 41 | if not testing: 42 | ids = [SPECIAL_SYMBOLS + self.word2id[word] - 1 if word in self.word2id else OOV for word in tokens] 43 | if testing: 44 | tagged = nltk.pos_tag(tokens) 45 | # print(tagged) 46 | ftag = (tagged[0][1] =='NNP' or tagged[0][1] =='NNPS' ) 47 | 48 | ids = [SPECIAL_SYMBOLS + self.word2id[(word.lower() if i==0 and not ftag else word )] - 1 if ((word.lower() if i==0 and not ftag else word ) in self.word2id and not hasNumbers(word)) else OOV for i,word in enumerate(tokens)] 49 | 50 | if eos: 51 | ids = ids + [EOS] 52 | if sos: 53 | ids = [SOS] + ids 54 | return ids 55 | 56 | def sentences2ids(self, sentences, eos=False, sos=False,testing=False): 57 | ids = [self.sentence2ids(sentence, eos=eos, sos=sos,testing=testing) for sentence in sentences] 58 | lengths = [len(s) for s in ids] 59 | ids = [s + [PAD]*(max(lengths)-len(s)) for s in ids] # Padding 60 | ids = [[ids[i][j] for i in range(len(ids))] for j in range(max(lengths))] # batch*len -> len*batch 61 | return ids, lengths 62 | 63 | def ids2sentence(self, ids,translation_att=None,sentence=None,pass_att=False,testing=False): 64 | if sentence is not None: 65 | sentence = sentence.strip().split() 66 | try: 67 | if not pass_att: 68 | return ' '.join([sentence[findOOV(self,testing,sentence,translation_att[idx][0] if translation_att[idx][0]!=len(sentence) else translation_att[idx][0]-1)]\ 69 | if i == OOV else self.id2word[i - SPECIAL_SYMBOLS + 1] for idx,i in enumerate(ids) if i != EOS and i != PAD and i != SOS]) 70 | else: 71 | return ' '.join([sentence[findOOV(self,testing,sentence,translation_att[idx][0] if translation_att[idx][0]!=len(sentence) else translation_att[idx][0]-1)]\ 72 | if i == OOV else self.id2word[i - SPECIAL_SYMBOLS + 1] for idx,i in enumerate(ids) if i != EOS and i != PAD and i != SOS]),\ 73 | ' '.join([sentence[translation_att[idx][0] if translation_att[idx][0]!=len(sentence) else translation_att[idx][1]]\ 74 | for idx,i in enumerate(ids) if i != EOS and i != PAD and i != SOS]) 75 | except IndexError: 76 | print(translation_att,sentence) 77 | exit(1) 78 | return ' '.join(['' if i == OOV else self.id2word[i - SPECIAL_SYMBOLS + 1] for i in ids if i != EOS and i != PAD and i != SOS]) 79 | 80 | def ids2sentences(self, ids,translations_att=None,sentences=None,pass_att=False,testing=False): 81 | if translations_att is None: 82 | return [self.ids2sentence(i,pass_att=pass_att,testing=testing) for i in ids] 83 | else: 84 | return [self.ids2sentence(i,translation_att=translations_att[idx],sentence=sentences[idx],pass_att=pass_att,testing=testing) for idx,i in enumerate(ids)] 85 | 86 | def size(self): 87 | return len(self.id2word) - 1 88 | 89 | 90 | def special_ids(ids): 91 | return ids * (ids < SPECIAL_SYMBOLS).long() 92 | 93 | def special_ids_nosos(ids): 94 | return ids * (ids < SPECIAL_SYMBOLS-1).long() 95 | def sos_ids(ids): 96 | return ids * (ids == 3).long() 97 | 98 | 99 | def word_ids(ids): 100 | return (ids - SPECIAL_SYMBOLS + 1) * (ids >= SPECIAL_SYMBOLS).long() 101 | 102 | def findOOV(dic,testing, sentence, hintidx): 103 | ids = dic.sentence2ids(' '.join(sentence),testing=testing) 104 | # print(sentence,hintidx) 105 | windowsize=5 106 | for off in range(windowsize): 107 | low = hintidx-off 108 | high = hintidx+off 109 | if low < 0 and high >= len(sentence): 110 | break 111 | if high=0 and ids[low] == OOV: 114 | return low 115 | return hintidx 116 | 117 | 118 | class CorpusReader: 119 | def __init__(self, src_file, trg_file=None, max_sentence_length=80, cache_size=1000): 120 | self.src_file = src_file 121 | self.trg_file = trg_file 122 | self.epoch = 1 123 | self.pending = set() 124 | self.length2pending = collections.defaultdict(set) 125 | self.next = 0 126 | self.cache = [] 127 | self.cache_size = cache_size 128 | self.max_sentence_length = max_sentence_length 129 | 130 | def _fill_cache(self): 131 | self.next = 0 132 | self.cache = [self.cache[i] for i in self.pending] 133 | self.pending = set() 134 | self.length2pending = collections.defaultdict(set) 135 | while len(self.cache) < self.cache_size: 136 | src = self.src_file.readline() 137 | trg = self.trg_file.readline() if self.trg_file is not None else src 138 | src_length = len(tokenize(src)) 139 | trg_length = len(tokenize(trg)) 140 | if src == '' and trg == '': 141 | print("this should not happen consecutively") 142 | self.epoch += 1 143 | self.src_file.seek(0) 144 | if self.trg_file is not None: 145 | self.trg_file.seek(0) 146 | elif 0 < src_length <= self.max_sentence_length and 0 < trg_length <= self.max_sentence_length: 147 | self.cache.append(((src_length, trg_length), src.strip(), trg.strip())) 148 | for i in range(self.cache_size): 149 | self.pending.add(i) 150 | self.length2pending[self.cache[i][0]].add(i) 151 | 152 | def _remove(self, index): 153 | length = self.cache[index][0] 154 | self.pending.remove(index) 155 | self.length2pending[length].remove(index) 156 | 157 | def _score_length(self, src, trg, src_min, src_max, trg_min, trg_max): 158 | return max(abs(src - src_min), 159 | abs(src - src_max), 160 | abs(trg - trg_min), 161 | abs(trg - trg_max)) 162 | 163 | def next_batch(self, size, noop=False): 164 | if size > self.cache_size: 165 | raise ValueError('Cache size smaller than twice the batch size') 166 | 167 | if len(self.pending) < self.cache_size / 2: 168 | self._fill_cache() 169 | 170 | indices = [self.next] 171 | length = self.cache[self.next][0] 172 | target_length = length 173 | src_min = src_max = length[0] 174 | trg_min = trg_max = length[1] 175 | self._remove(self.next) 176 | while len(indices) < size: 177 | try: 178 | index = self.length2pending[target_length].pop() 179 | self.pending.remove(index) 180 | indices.append(index) 181 | except KeyError: 182 | candidates = [(self._score_length(k[0], k[1], src_min, src_max, trg_min, trg_max), k) for k, v in self.length2pending.items() if len(v) > 0] 183 | target_length = min(candidates)[1] 184 | src_min = min(src_min, target_length[0]) 185 | src_max = max(src_max, target_length[0]) 186 | trg_min = min(trg_min, target_length[1]) 187 | trg_max = max(trg_max, target_length[1]) 188 | 189 | indices = sorted(indices, key=lambda i: self.cache[i][0], reverse=True) 190 | 191 | for i in range(self.next, self.cache_size): 192 | if i in self.pending: 193 | self.next = i 194 | break 195 | 196 | return [self.cache[i][1] for i in indices], [self.cache[i][2] for i in indices] 197 | 198 | 199 | class BacktranslatorCorpusReader: 200 | def __init__(self, corpus, translator,ncontrol=None): 201 | self.corpus = corpus 202 | self.translator = translator 203 | self.epoch = corpus.epoch 204 | self.ncontrol = ncontrol 205 | 206 | def next_batch(self, size, noop=False): 207 | src, trg = self.corpus.next_batch(size) 208 | if not noop: 209 | src = self.translator.greedy(trg, train=False, no_noise=False, ncontrol=self.ncontrol) 210 | else: 211 | src=trg 212 | # _,src = self.translator.beam_search(trg, train=False, beam_size=12, max_ratio=1.1,rnk=6) 213 | # print("EXCEPTION SOLVE: ",src[0]) 214 | self.epoch = self.corpus.epoch 215 | return src, trg 216 | 217 | 218 | class MganCorpusReader: 219 | def __init__(self, *corpuses): 220 | self.corpuses = corpuses 221 | self.epoch = corpuses[0].epoch 222 | 223 | def next_batch(self, size,noop=False): 224 | simcorpus = self.corpuses[0] 225 | comcorpus = self.corpuses[1] 226 | simsrc,_ = simcorpus.next_batch(size//4) 227 | comsrc,_ = comcorpus.next_batch(size//4) 228 | self.epoch = simcorpus.epoch 229 | return (simsrc,simsrc),(comsrc,comsrc) 230 | 231 | class MulBackCorpusReader: 232 | def __init__(self, *corpuses,control_num=3): 233 | self.corpuses = corpuses 234 | self.epoch = corpuses[0].epoch 235 | self.control_num = control_num 236 | 237 | def next_batch(self, size,noop=False): 238 | src = [] 239 | trg = [] 240 | rem = size%self.control_num 241 | for idx,corpus in enumerate(self.corpuses): 242 | p,q= corpus.next_batch((size//self.control_num)+(rem if idx ==0 else 0),noop=noop) 243 | src+=p 244 | trg+=q 245 | # print(size,self.control_num,len(src),p) 246 | self.epoch = self.corpuses[0].epoch 247 | return src,trg 248 | 249 | class MulCorpusReader: 250 | def __init__(self, *corpuses,control_num=3): 251 | self.corpuses = corpuses 252 | self.epoch = corpuses[0].epoch 253 | self.control_num = control_num 254 | 255 | def next_batch(self, size,noop=False): 256 | src = [] 257 | rem = size%self.control_num 258 | for idx,corpus in enumerate(self.corpuses): 259 | p= corpus.next_batch((size//self.control_num)+(rem if idx ==0 else 0),noop=noop) 260 | src+=p[0] 261 | # print(size,self.control_num,len(src),p) 262 | self.epoch = self.corpuses[0].epoch 263 | return src,src 264 | 265 | 266 | def read_embeddings(file, threshold=0, vocabulary=None,vocabonly=False): 267 | header = file.readline().split(' ') 268 | count = int(header[0]) if threshold <= 0 else min(threshold, int(header[0])) 269 | dim = int(header[1]) 270 | words = [] 271 | if not vocabonly: 272 | matrix = np.empty((count+1, dim)) if vocabulary is None else [np.zeros(dim)] 273 | for i in range(count): 274 | word, vec = file.readline().split(' ', 1) 275 | if vocabulary is None: 276 | words.append(word) 277 | if not vocabonly: 278 | matrix[i+1] = np.fromstring(vec, sep=' ') 279 | elif word in vocabulary: 280 | words.append(word) 281 | if not vocabonly: 282 | matrix.append(np.fromstring(vec, sep=' ')) 283 | if vocabulary is not None: 284 | if not vocabonly: 285 | matrix = np.array(matrix) 286 | if not vocabonly: 287 | embeddings = nn.Embedding(matrix.shape[0], dim, padding_idx=0) 288 | embeddings.weight.data.copy_(torch.from_numpy(matrix)) 289 | return embeddings, Dictionary(words) 290 | else: 291 | return Dictionary(words) 292 | 293 | 294 | def random_embeddings(vocabulary_size, embedding_size): 295 | return nn.Embedding(vocabulary_size + 1, embedding_size) 296 | 297 | 298 | def tokenize(sentence): 299 | return sentence.strip().split() -------------------------------------------------------------------------------- /undreamt/undreamt/translator.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018 Mikel Artetxe 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | 16 | from undreamt import data, devices 17 | 18 | import random 19 | import torch 20 | import torch.nn as nn 21 | from torch.autograd import Variable 22 | from undreamt.trainset import repeatnoise, dropnoise, wordordernoise, numberfiltering 23 | import random 24 | 25 | random.seed(7) 26 | torch.manual_seed(7) 27 | # torch.cuda.manual_seed_all(7) 28 | 29 | class Translator: 30 | def __init__(self, encoder_embeddings, decoder_embeddings, generator, src_dictionary, trg_dictionary, encoder, 31 | decoder, denoising=True, device=devices.default,repeatnoise=None,psencoder_embeddings=None): 32 | self.encoder_embeddings = encoder_embeddings 33 | self.decoder_embeddings = decoder_embeddings 34 | self.generator = generator 35 | self.src_dictionary = src_dictionary 36 | self.trg_dictionary = trg_dictionary 37 | self.encoder = encoder 38 | self.decoder = decoder 39 | self.denoising = denoising 40 | self.device = device 41 | weight = device(torch.ones(generator.output_classes())) 42 | weight[data.PAD] = 0 43 | self.criterion = nn.NLLLoss(weight, size_average=False,reduce=False) 44 | self.criterionred = nn.NLLLoss(weight, size_average=False,reduce=True) 45 | self.repeatnoise=repeatnoise 46 | self.psencoder_embeddings=psencoder_embeddings 47 | def _train(self, mode): 48 | self.encoder_embeddings.train(mode) 49 | self.decoder_embeddings.train(mode) 50 | self.generator.train(mode) 51 | self.encoder.train(mode) 52 | self.decoder.train(mode) 53 | self.criterion.train(mode) 54 | 55 | def encode(self, sentences, train=False, backbool=False, verbose = False, noiseratio=0.5, pass_embedds=False, no_noise=False,word_embeddings=None,testing=False): 56 | self._train(train) 57 | if noiseratio==0.0: 58 | no_noise=True 59 | # print("SOURCE: {}".format(sentences[0])) if verbose else print("",end='') 60 | if self.denoising and not no_noise and self.repeatnoise is not None: 61 | sentences = repeatnoise(sentences) if self.repeatnoise else dropnoise(sentences) 62 | 63 | if self.denoising and not no_noise: # Add order noise 64 | sentences = wordordernoise(sentences,noiseratio) 65 | 66 | passsents = sentences.copy() 67 | 68 | print("SOURCEnoised: {}".format(sentences[0])) if verbose else print("",end='') 69 | ids, lengths = self.src_dictionary.sentences2ids(sentences, sos=False, eos=True,testing=testing) 70 | # print(self.src_dictionary.ids2sentences(ids)) 71 | 72 | 73 | varids = self.device(Variable(torch.LongTensor(ids), requires_grad=False)) 74 | hidden = self.device(self.encoder.initial_hidden(len(sentences))) 75 | if not pass_embedds: 76 | hidden, context = self.encoder(ids=varids, lengths=lengths, word_embeddings=self.encoder_embeddings if word_embeddings is None else word_embeddings, hidden=hidden) 77 | return hidden, context, lengths, passsents 78 | else: 79 | hidden, context, passembeddings = self.encoder(ids=varids, lengths=lengths, word_embeddings=self.encoder_embeddings if word_embeddings is None else word_embeddings, hidden=hidden, pass_embedds=True) 80 | return hidden, context, lengths, passembeddings, passsents 81 | 82 | def mask(self, lengths): 83 | batch_size = len(lengths) 84 | max_length = max(lengths) 85 | if max_length == min(lengths): 86 | return None 87 | mask = torch.ByteTensor(batch_size, max_length).fill_(0) 88 | for i in range(batch_size): 89 | for j in range(lengths[i], max_length): 90 | mask[i, j] = 1 91 | return self.device(mask) 92 | 93 | def greedy(self, sentences, max_ratio=2, train=False,pass_att=False,no_noise=False,encodings=None,pass_context=False\ 94 | ,detach_encoder=False,ncontrol=None): 95 | self._train(train) 96 | input_lengths = [len(data.tokenize(sentence)) for sentence in sentences] 97 | if encodings is not None: 98 | (hidden,context,context_lengths,sentences) = encodings 99 | else: 100 | hidden, context, context_lengths, sentences = self.encode(sentences, train,no_noise=no_noise) 101 | context_mask = self.mask(context_lengths) 102 | translations = [[] for sentence in sentences] 103 | translations_att = [[] for sentence in sentences] 104 | prev_words = len(sentences)*[data.SOS] 105 | pending = set(range(len(sentences))) 106 | output = self.device(self.decoder.initial_output(len(sentences))) 107 | context_list = [] 108 | # print("SENTENCES GIVEN TO SCORE: {}".format(sentences[0])) 109 | 110 | while len(pending) > 0: 111 | # print(pending) 112 | var = self.device(Variable(torch.LongTensor([prev_words]), requires_grad=False)) 113 | logprobs, hidden, output, att_scores,att_contexts = self.decoder(var, len(sentences)*[1], self.decoder_embeddings, hidden, context, context_mask, output, self.generator\ 114 | , pass_att=True, pass_context=True,detach_encoder=detach_encoder,ncontrol=ncontrol) 115 | postmask = torch.ByteTensor([0 if i in pending else 1 for i in range(var.data.size()[0])]).unsqueeze(0).unsqueeze(2) 116 | att_contexts.masked_fill_(self.device(Variable(postmask,requires_grad=False)),0) 117 | context_list.append(att_contexts) 118 | if logprobs.size()[1]==1: 119 | prev_words = [logprobs.max(dim=2)[1].squeeze().data.cpu().numpy().tolist()] 120 | else: 121 | prev_words = logprobs.max(dim=2)[1].squeeze().data.cpu().numpy().tolist() 122 | # prev_words = 123 | # print('att_scores {}'.format(att_scores.size())) 124 | prev_words_att = att_scores.topk(dim=2,k=2)[1].squeeze().data.cpu().numpy().tolist() 125 | # print("att_scores IN GREEDY FUNCTION {} {}".format(att_scores,prev_words_att)) 126 | for i in pending.copy(): 127 | if prev_words[i] == data.EOS: 128 | pending.discard(i) 129 | else: 130 | translations[i].append(prev_words[i]) 131 | translations_att[i].append(prev_words_att[i]) 132 | if len(translations[i]) >= max_ratio*input_lengths[i]: 133 | pending.discard(i) 134 | if not pass_context: 135 | return self.trg_dictionary.ids2sentences(translations,translations_att=translations_att,sentences=sentences,pass_att=pass_att) 136 | else: 137 | # print(translations) 138 | # print("simpreds",max([len(x) for x in translations])) 139 | return self.trg_dictionary.ids2sentences(translations,translations_att=translations_att,sentences=sentences,pass_att=pass_att), torch.cat(context_list) 140 | def score(self, src, trg, train=False,backbool=False,reduce=True,verbose=False,find_cosine=False,find_preds=False,pass_att=False,word_embeddings=None\ 141 | , pssrc =None,pass_context=False,pass_encodings=False,no_noise=False,ncontrol=None,encodeonly=False,inp_encodings=None): 142 | self._train(train) 143 | 144 | # Check batch sizes 145 | if len(src) != len(trg): 146 | raise Exception('Sentence and hypothesis lengths do not match') 147 | 148 | # Encode 149 | if inp_encodings is None: 150 | if not find_cosine: 151 | hiddensrc, context, context_lengths, src = self.encode(src, train, backbool=backbool, verbose=verbose, no_noise=no_noise) 152 | if pass_encodings: 153 | encodings = (hiddensrc, context, context_lengths, src) 154 | if encodeonly: 155 | return encodings 156 | if word_embeddings is not None or pssrc is not None: 157 | hiddenpssrc, pscontext, pscontext_lengths, pssrc = self.encode(src if pssrc is None else pssrc,train,backbool=backbool,verbose=verbose\ 158 | , word_embeddings=word_embeddings,no_noise=True) 159 | else: 160 | hiddenpssrc = None 161 | passembeddings=None 162 | else: 163 | hiddensrc, context, context_lengths, passembeddings, src = self.encode(src, train, backbool=backbool, verbose=verbose,pass_embedds=True) 164 | if word_embeddings is not None or pssrc is not None: 165 | hiddenpssrc, pscontext, pscontext_lengths,passembeddings, pssrc = self.encode(src if pssrc is None else pssrc,train,backbool=backbool,verbose=verbose\ 166 | , word_embeddings=word_embeddings,no_noise=True) 167 | else: 168 | hiddenpssrc = None 169 | else: 170 | hiddensrc, context, context_lengths, src = inp_encodings 171 | hiddenpssrc = None 172 | passembeddings = None 173 | encodings = inp_encodings 174 | # hiddentrg, contexttrg, contexttrg_lengths = self.encode(trg, train, backbool=backbool, verbose=verbose) if backbool else (None,None,None)#2xbatchsizex600 175 | context_mask = self.mask(context_lengths) 176 | # print("hiddensrc, context, context_lengths: {} {} {}".format(hiddensrc.size(),context.size(),len(context_lengths))) 177 | # Decode 178 | initial_output = self.device(self.decoder.initial_output(len(src))) 179 | input_ids, lengths = self.trg_dictionary.sentences2ids(trg, eos=False, sos=True) 180 | input_ids_var = self.device(Variable(torch.LongTensor(input_ids), requires_grad=False)) 181 | if pass_context: 182 | if find_cosine: 183 | logprobs, hiddendec, _,cosineloss,att_contexts = self.decoder(input_ids_var, lengths, self.decoder_embeddings, hiddensrc, context, context_mask,\ 184 | initial_output, self.generator,att_embeddings=passembeddings,pass_context=pass_context,ncontrol=ncontrol) 185 | else: 186 | logprobs, hiddendec, _,att_contexts = self.decoder(input_ids_var, lengths, self.decoder_embeddings, hiddensrc, context, context_mask,\ 187 | initial_output, self.generator,att_embeddings=passembeddings,pass_context=pass_context,ncontrol=ncontrol) 188 | # print('att_contexts true size',att_contexts.size()) 189 | if context_mask is not None: 190 | # print('you are right') 191 | att_contexts.masked_fill_(Variable(context_mask.transpose(0,1).unsqueeze(2),requires_grad=False),0) 192 | else: 193 | if not pass_att: 194 | if find_cosine: 195 | logprobs, hiddendec, _,cosineloss = self.decoder(input_ids_var, lengths, self.decoder_embeddings, hiddensrc, context, context_mask,\ 196 | initial_output, self.generator,att_embeddings=passembeddings,ncontrol=ncontrol) 197 | else: 198 | logprobs, hiddendec, _ = self.decoder(input_ids_var, lengths, self.decoder_embeddings, hiddensrc, context, context_mask,\ 199 | initial_output, self.generator,att_embeddings=passembeddings,ncontrol=ncontrol) 200 | else: 201 | 202 | if find_cosine: 203 | logprobs, hiddendec, _,cosineloss,att_scores = self.decoder(input_ids_var, lengths, self.decoder_embeddings, hiddensrc, context, context_mask,\ 204 | initial_output, self.generator,att_embeddings=passembeddings,pass_att=pass_att,ncontrol=ncontrol) 205 | else: 206 | logprobs, hiddendec, _,att_scores = self.decoder(input_ids_var, lengths, self.decoder_embeddings, hiddensrc, context, context_mask,\ 207 | initial_output, self.generator,att_embeddings=passembeddings,pass_att=pass_att,ncontrol=ncontrol) 208 | 209 | 210 | # Compute loss 211 | output_ids, lengths = self.trg_dictionary.sentences2ids(trg, eos=True, sos=False) 212 | output_ids_var = self.device(Variable(torch.LongTensor(output_ids), requires_grad=False)) 213 | #dimension of logprobs is (sentencelen,batchsize,vocabsize) 214 | if reduce: 215 | #sum the losses. 216 | loss = self.criterionred(logprobs.view(-1, logprobs.size()[-1]), output_ids_var.view(-1)) 217 | else: 218 | #the loss that will be returned will be (batchsize,sentencelen) 219 | loss = self.criterion(logprobs.view(logprobs.size()[1],logprobs.size()[2],logprobs.size()[0]),\ 220 | output_ids_var.view(output_ids_var.size()[1],output_ids_var.size()[0])) 221 | if pass_context: 222 | if not pass_encodings: 223 | encodings=None 224 | if find_preds: 225 | return (loss,(self.greedy(src[0:2],pass_att=False,ncontrol=ncontrol),),hiddensrc,hiddenpssrc,att_contexts,encodings) 226 | else: 227 | return (loss,hiddensrc,hiddenpssrc,att_contexts,encodings) 228 | if pssrc is None: 229 | if find_preds: 230 | if not find_cosine: 231 | return (loss, (self.greedy(src[0:2],pass_att=False,ncontrol=ncontrol)),hiddensrc,hiddenpssrc) if train else loss 232 | else: 233 | return (loss, cosineloss, (self.greedy(src[0:2],pass_att=False,ncontrol=ncontrol)),hiddensrc,hiddenpssrc) if train else loss 234 | else: 235 | if not find_cosine: 236 | return (loss,hiddensrc,hiddenpssrc) if train else loss 237 | else: 238 | return (loss, cosineloss,hiddensrc,hiddenpssrc) if train else loss 239 | else: 240 | if find_preds: 241 | if not find_cosine: 242 | return (loss, (self.greedy(src[0:2],pass_att=False,ncontrol=ncontrol)),context,pscontext) if train else loss 243 | else: 244 | return (loss, cosineloss, (self.greedy(src[0:2],pass_att=False,ncontrol=ncontrol)),context,pscontext) if train else loss 245 | else: 246 | if not find_cosine: 247 | return (loss,context,pscontext) if train else loss 248 | else: 249 | return (loss, cosineloss,context,pscontext) if train else loss 250 | 251 | 252 | def beam_search(self, sentences, beam_size=12, max_ratio=2, train=False,rnk=2,noiseratio=0.5,pass_att=False,ncontrol=0): 253 | self._train(train) 254 | batch_size = len(sentences) 255 | input_lengths = [len(data.tokenize(sentence)) for sentence in sentences] 256 | hidden, context, context_lengths, sentences = self.encode(sentences, train,noiseratio=noiseratio,testing=True) 257 | translations = [[] for sentence in sentences] 258 | pending = set(range(batch_size)) 259 | 260 | hidden = hidden.repeat(1, beam_size, 1) 261 | context = context.repeat(1, beam_size, 1) 262 | context_lengths *= beam_size 263 | context_mask = self.mask(context_lengths) 264 | ones = beam_size*batch_size*[1] 265 | prev_words = beam_size*batch_size*[data.SOS] 266 | output = self.device(self.decoder.initial_output(beam_size*batch_size)) 267 | 268 | translation_scores = batch_size*[-float('inf')] 269 | hypotheses = batch_size*[(0.0, [])] + (beam_size-1)*batch_size*[(-float('inf'), [])] # (score, translation) 270 | 271 | while len(pending) > 0: 272 | # Each iteration should update: prev_words, hidden, output 273 | var = self.device(Variable(torch.LongTensor([prev_words]), requires_grad=False)) 274 | logprobs, hidden, output, att_scores = self.decoder(var, ones, self.decoder_embeddings, hidden, context, context_mask, output, self.generator,pass_att=True,ncontrol=ncontrol) 275 | prev_words = logprobs.max(dim=2)[1].squeeze().data.cpu().numpy().tolist() 276 | prev_words_att = att_scores.topk(dim=2,k=2)[1].squeeze().data.cpu().numpy().tolist() 277 | word_scores, words = logprobs.topk(k=beam_size+1, dim=2, sorted=False) 278 | word_scores = word_scores.squeeze(0).data.cpu().numpy().tolist() # (beam_size*batch_size) * (beam_size+1) 279 | words = words.squeeze(0).data.cpu().numpy().tolist() 280 | 281 | for sentence_index in pending.copy(): 282 | #consider a particular source for which beamsize best half translations have been extracted. 283 | #now next best beamsize translations should be found. 284 | #candidates which aren't finished will be found. 285 | candidates = [] # (score, index, word) 286 | for rank in range(beam_size): 287 | index = sentence_index + rank*batch_size 288 | for i in range(beam_size + 1): 289 | word = words[index][i] 290 | word_att = prev_words_att[index] 291 | score = hypotheses[index][0] + word_scores[index][i] 292 | if word != data.EOS: 293 | candidates.append((score, index, word, word_att)) 294 | elif score > translation_scores[sentence_index]: 295 | translations[sentence_index] = hypotheses[index][1] + [(word,word_att)] 296 | translation_scores[sentence_index] = score 297 | best = [] # score, word, translation, hidden, output 298 | #beamsize best translations are inserted into best. 299 | for score, current_index, word, word_att in sorted(candidates, reverse=True)[:beam_size]: 300 | translation = hypotheses[current_index][1] + [(word,word_att)] 301 | best.append((score, word, word_att, translation, hidden[:, current_index, :].data, output[current_index].data)) 302 | #update hypotheses based on best array 303 | for rank, (score, word, word_att, translation, h, o) in enumerate(best): 304 | next_index = sentence_index + rank*batch_size 305 | hypotheses[next_index] = (score, translation) 306 | prev_words[next_index] = word 307 | hidden[:, next_index, :] = h 308 | output[next_index, :] = o 309 | if len(hypotheses[sentence_index][1]) >= max_ratio*input_lengths[sentence_index] or translation_scores[sentence_index] > hypotheses[sentence_index][0]: 310 | pending.discard(sentence_index) 311 | if len(translations[sentence_index]) == 0: 312 | translations[sentence_index] = hypotheses[sentence_index][1] 313 | translation_scores[sentence_index] = hypotheses[sentence_index][0] 314 | translations_att = [[translations[i][j][1] for j in range(len(translations[i])) ] for i in range(len(translations))] 315 | translations = [[translations[i][j][0] for j in range(len(translations[i])) ] for i in range(len(translations))] 316 | return self.trg_dictionary.ids2sentences(translations,translations_att=translations_att,sentences=sentences,pass_att=pass_att,testing=True) 317 | -------------------------------------------------------------------------------- /predictions/gen_lower.sen.src2trg.wgan.unsup.noadvcompl.control1.allclass.denoi.singleclassf.rho1.0.10k.13500.test.noredund: -------------------------------------------------------------------------------- 1 | one side of the armed wars is composed mainly of the Sudanese military and the Janjaweed , a Sudanese militia group was mostly from the Afro-Arab Abbala tribes of the northern Rizeigat . 2 | Jeddah is the principal gateway to Mecca , Islam 's holiest city , which slain Muslims are required to visit at least once in their lifetime . 3 | the Great Dark Spot is thought to represent a hole in the carbon cloud deck of Neptune . 4 | his next work , Saturday , follows an most much day in the life of a successful neurosurgeon . 5 | the spider , the trick character , spun a black cord , 6 | there he died six weeks later , on 13 January 888 . 7 | they are been akin to the coastal peoples of Papua New Guinea . 8 | since 2000 , the recipient of the Kate Greenaway Medal has also been presented with the Colin Mears Award to the value of £ 5000 . 9 | following the musicians are dancers , who often play the sogo ( a tiny drum that makes almost no sound ) and tend to have more happy even — . 10 | the spacecraft consists of two main elements : 11 | Alessandro " ( " ) Mazzola ( born 8 November 1942 ) is an former football player . 12 | it was originally thought that the debris thrown up by the collision filled in the smaller crater . 13 | Graham attended Wheaton College from 1939 to 1943 , when he graduated with a BA in jerusalem . 14 | however . 15 | many species had finally by the end of the 19th century , with European settlement . 16 | in 1987 Wexler was inducted into the Rock and Roll Hall of Fame . 17 | in its pure form , dextromethorphan occurs as a white powder . 18 | admission is to Tsinghua is extremely easy . 19 | today NRC is formed as an independent house , private foundation . 20 | it is situated at the coast of the Baltic Sea , where it lies the city of Stralsund . 21 | he was also named 1982 " Sportsman of the Year " by Sports Illustrated . 22 | almost is a British sport believed to derived from the same origins as many sports . 23 | for example , King Bhumibol was born on Monday , so on his birthday throughout Thailand will be decorated with yellow color . 24 | both names became defunct in 2007 when they were merged into The National Museum of Scotland . 25 | however , Tagore notably numerous styles , including craftwork from northern New Ireland , Haida carved from the west coast of Canada ( British ) . 26 | on October 14 , 1960 , Presidential John candidate F. Kennedy proposed the concept of what became the Peace Corps on the steps of Michigan . 27 | she performed for President Reagan in 1988 's Great 's Great at the White House series , which aired on the Public Broadcasting Service . 28 | Perry Saturn ( with Terri ) defeated Eddie Guerrero ( with Chyna ) to win the WWF European Championship ( 8:10 ) Saturn a Guerrero drop . 29 | she remained in the United States until 1927 when she when her husband returned to France . 30 | Despina was discovered in late July , 1989 from the images taken by the Voyager 2 probe . 31 | the first Italian Grand motor racing championship took place on 4 September 1921 at Brescia . 32 | he also completed two collections of short stories : The Ribbajack & Other Curious Yarns and Seven Strange and Ghostly Tales . 33 | at the Voyager 2 images Ophelia appears as an slender object , the major axis lying towards Uranus . 34 | the British decided to help him and take the land by force . 35 | some towns on the Eyre Highway in the north-west corner of Western Australia , between the South Australian border almost as far as Caiguna , do not follow official Western time . 36 | in 19th stars 37 | the other cities on the Palos Verdes Peninsula include Rancho Palos Verdes , Rolling Hills Estates and Rolling Hills . 38 | fear that Drek will destroy the galaxy , Clank asks Ratchet to help him to find the famous villain Captain Qwark , in an effort to stop Drek . 39 | it is not actually a true louse . 40 | he did a call a user-centered design in product in development cycle and also works towards dance while design as a mainstream record . 41 | it is if possible that the other warriors who may have reported you , and the successor who blocked you , are part of a murder against someone half a world away they 've met in person . 42 | working Group I : Assesses live aspects of the climate system and climate change . 43 | the island chain forms part of the Hebrides , separated from the Scottish mainland and from the Inner Hebrides by the stormy waters of the Minch , the Sea of the Hebrides . 44 | Orton and his wife welcomed Alanna Marie Orton on July 12 , 2008 . 45 | a minor planet lanes are number-name used by the Minor Planet Center , a branch of the IAU . 46 | by early September on 30 , wind winds began to decrease increase and a trend began . 47 | each entry has a intersection ( a cheese of data ) which is a copy of the origin in some backing store . 48 | as a result . 49 | Mariel of Redwall is a fantasy novel by Brian Jacques , published in 1991 . 50 | Ryan Prosser ( born 10 July , 1988 ) is a professional rugby union player for Bristol Rugby in the Guinness Premiership . 51 | like previous earlier reports , it consists of four reports , three of them from its working groups . 52 | their niece Hélène Langevin-Joliot is a professor of nuclear physics at the University of Paris , and their grandson Pierre Joliot , who was named after Pierre 53 | this stamp remained the standard letter stamp for the remainder of Victoria 's reign , and vast amounts were printed . 54 | the International Fight League was an American mixed martial arts ( MMA ) promotion ever as the world 's first MMA league . 55 | 56 | aside from this , Cameron has often worked in Christian-themed productions , among them the post-Rapture films Left Behind : The Movie Left Williams . 57 | this was the area east of the mouth of the Vistula River , later sometimes called " Prussia proper " . 58 | after he returned to return to Yerevan to teach at the local Conservatory and later he was appointed artistic director of the Armenian Philarmonic Orchestra . 59 | the story of Christmas is based on the testament accounts given in the Gospel of Matthew , and are 60 | Weelkes was later to find himself in trouble with the Chichester Cathedral government for his heavy drinking and immoderate performances . 61 | so far the ' celebrity ' episodes have included Vic Reeves , Nancy Sorrell , Gaby Roslin , Scott Mills , Mark . 62 | it was discovered by Stephen P. Synnott in images from the Voyager 1 space probe taken on March 5 , 1979 while planet around Jupiter . 63 | Gomaespuma was a Spanish radio show , hosted by Juan Luis Cano and Guillermo Fesser . 64 | on 16 June 2009 , the official release date of The Resistance was announced on the band 's website . 65 | he is also a member of another Jungiery 183 Club . 66 | the 67 | in return , Rollo vows to Charles , converted to Christianity , and he began to defend the northern region of France against the northern groups . 68 | it is derived from Voice of America ( VoA ) Special English . 69 | Disney received a jaguar Oscar monte and seven , ones , presented to him by 10-year-old child actress Shirley Temple . 70 | it was the first asteroid to be discovered by a spacecraft . 71 | Hinterrhein is an district in the canton of Graubünden , Switzerland . 72 | it continues as the Bohemian Switzerland in the Czech Republic . 73 | this leads to consumer confusion when 220 ( 1,048,576 ) is referenced as 1 MB ( megabyte ) instead of 1 . 74 | the incident has been the subject of numerous reports as to study in scholarship . 75 | they are castrated so that the animal may be more handle or may put on weight more quickly . 76 | seventh sons have strong " 77 | Benchmarking conducted by PassMark Software the highlights the 2009 version 's 52 second base time , 32 second line , and 7 MB memory . 78 | Volterra is a town in the Tuscany region of Italy . 79 | although the taste of 80 | the tongue is sticky because of the presence of glycoprotein-rich tooth , which both lubricates movement in and out of the snout and helps to catch ants and drinking , which . 81 | the same tram had derailed on 30 May 2006 at Starr Gate loop during previous trials . 82 | there are statues of Sir Alf Ramsey and Sir Bobby Robson , both former Ipswich Town and England managers , outside the ground . 83 | take the square root of the opinion . 84 | volunteers provided food , masks , water , children 's toys , massages , and a live rock band performance for those at the stadium . 85 | Vouvray-sur-Huisne is a commune in the Sarthe department in the region of Pays-de-la-Loire in northwestern France . 86 | if there are no strong land use . 87 | it is also a starting point for people wanting to explore Cooktown , Cape York , Peninsula and the Atherton Tableland . 88 | drugs often trying pain but are not usually dangerous . 89 | none of the authors , authors , they sponsor , survivors , vandals , or anyone else with Wikipedia with in any 90 | George Frideric Handel also served as Kapellmeister for George , Elector of Hanover ( who eventually became George I of Great ) . 91 | their eyes are quite small , and their visual beautiful is poor . 92 | they are identified as weapons of meat in only by chitin . 93 | Oregano is an important sauce in Greek cuisine . 94 | tickets can be retailed for National Rail services , the Docklands Light Railway and on Oyster card . 95 | these works he produced and published himself , while his much larger paintings were mostly commissioned work . 96 | the historical method are the techniques and rules by which ruled which use primary sources and other evidence to research and then to write history . 97 | the sheer weight of the continental icecap sitting on top of Lake Vostok is believed to develop to the high oxygen estate . 98 | as of 2000 , the population was 89,148 . 99 | Aliteracy ( sometimes spelled alliteracy ) is the state of being able to read but being concerned in doing so . 100 | Mifepristone is a drug compound used as a drug . 101 | it will then rid itself and sink and back to the river bed in order to keep its food and wait for its next meal . 102 | though , research has shown children are less likely to report a crime if it involves someone that he or she knows , and trust , and cares . 103 | today , Landis ' father has become a owner of his son of his son and also himself as one of Floyd 's biggest fans . 104 | shortly after advance Category 4 status , the outer rainfall of the hurricane became curves . 105 | the price for a certain type of labor is the wage . 106 | Convinced that the grounds were haunted , they decided to publish their findings in a book An Adventure ( 1911 ) under the script of Elizabeth Frances and Frances Lamont . 107 | he settled in London , he spent himself to teaching . 108 | Brunstad has several fast food restaurants , a cafeteria-style restaurant , coffee bar , and its own grocery store . 109 | he left a troops of 11,000 troops to garrison to the newly region . 110 | in 1438 Trevi passed under the lands rule of the Church as part of the embassy of Perugia , and its thenceforth its history merged with that of the States of the Church , then ( 1860 ) with the united Kingdom of Italy . 111 | the depression moved across inland on the 20th as a small of rainfall of , and the next day over Brazil , where it caused heavy rains and flooding . 112 | the New York City Housing Authority Police Department was a law agency in New York City that existed from 1952 to 1995 . 113 | the current lineup of the band ( Flynn ( vocals , guitar ) , Duce ( bass ) , Phil Demmel ( guitar ) , and Dave McClain ( drums ) . 114 | save Countries with a small Muslim population are more likely than Muslim-majority countries of the Greater Middle East to use mosques as a way to promote civic . 115 | the characters are foul-mouthed extension of their earlier characters Pete and Dud . 116 | Johan was also the original bassist of the Swedish power band HammerFall , but quit before the band ever released a studio album . 117 | in 1998 , Culver ran for Iowa Secretary of State and was fought . 118 | in 1990 , Mark Messier took the Hart over Ray Bourque by a margin of two votes , the difference being a single vote . 119 | Shade sets the main plot of the novel in motion when he dies that law , and pushes a chain of events that leads to the destruction of his colony 's . 120 | the female equivalent is a daughter . 121 | he was diagnosed with 1943 lung cancer in April 1999 . 122 | prior to the arrival of the storm . 123 | the form of chess played is speed chess in which each competitor has a total of twelve minutes for the whole game . 124 | the Amazon Basin is the part of South America drained by the Amazon River and its tributaries . 125 | the two former were two were later were charged with mutiny and blessed for their roles in the 1979 coup and the 1980 Gwangju massacre . 126 | Moderate to severe damage extended up the Atlantic coastline and far as inland as West Virginia . 127 | because the owner tends to be unaware , these computer are ate to compared . 128 | the wave traveled across the Atlantic , and organized into a tropical depression off the northern coast of Haiti on September 13 . 129 | for example , the stylebook of the Associated Press is updated annually . 130 | the four contain texts are the Gospel of Matthew , Gospel of Mark , Gospel of Luke and Gospel of John , probably written between AD 65 and 100 ( see the Hebrews ) . 131 | since the end of the 19th century Eschelbronn is well known for its furniture industry . 132 | the upper half also is also the coat of arms of the former district Oberbarnim . 133 | unlike the clouds on Earth , however , which are composed of crystals of ice , 134 | their cup is not limited until they reach legal adulthood . 135 | Development Stable releases are rare , but there are often Subversion heard Subversion which are stable enough to use . 136 | finally 1482 in the Order dispatched him to Florence , the ‘ city of his destiny ’ . 137 | in the Soviet years , the Bolsheviks demolished two of Rostov 's principal landmarks ( St Alexander Nevsky Cathedral ( 1908 ) and St Nakhichevan in Nakhichevan ) . 138 | he died on May 29 , 1518 in Madrid , Spain and was buried in the church of San Benito d 'Alcantara . 139 | this was also in the Miller-Urey attack by Stanley L. Miller and Harold C. in 1953 . 140 | Cogeneration ( also combined heat and power , 141 | on the occasion the male " den master " will also allow a second male into the den ; the reason for this is unclear . 142 | a Wikipedia mr. is a JavaScript and / or a CSS '' that can be enabled simply by check an option in your Wikipedia values . 143 | Below are some useful links to allow your involvement . 144 | he served as the prime minister of Egypt between 1945 and 1946 and again from 1946 and 1948 . 145 | she was left behind ( except for this vary ; when the rest of the Nicoleños were moved to the mainland . 146 | James I appointed him Gentleman a Gentleman of the Chapel Royal , where he served as an assistant from at least 1615 until his death . 147 | Chauvin was trying to receive his award and initially showed that he may not accept it . 148 | later , Esperanto speakers began to see the language and the culture that had grown up around it as ends in themselves , even Esperanto is adopted by the United 149 | Dry air usually air up around the southern surface of the western reef upstream of the deep rainfall by early September on 12 . 150 | Calvin Baker is an American writer . 151 | Eva Anna Paula Braun , died Eva Anna Paula Hitler 6 152 | each version of the License is given a distinctive version number . 153 | most IRC servers do not require users to register an account but a user will have set to set a nickname before being connected . 154 | that same year he also received a physics certificate , becoming the youngest certificated aircraft trainer in New York . 155 | SummerSlam ( 2009 ) is an official professional wrestling premiere event produced by World Wrestling Entertainment ( WWE ) which will take place on August Los in Los . 156 | usually portrayed as being bald , with long centimeters , he is said to be an version of the Southern Polestar . 157 | a few animals have curved response , changing color in changing labs , either edges ; brownish , 158 | Val Venis defeated Rikishi in a cage match to retain the WWF Intercontinental Championship ( 14:10 ) Venis pinned Rikishi after hit Rikishi with a TV camera . 159 | this closely resembles the Unix of Unix of each programs each thing well and well and working together over universal . 160 | he came from a musical family . 161 | the largest population of Mennonites are in Canada , Democratic Republic of Congo and the United States . 162 | Naas is a major " Dublin Suburb town , with many people living in Naas and working in Dublin . 163 | Acanthopholis 's armour consisted of oval plates set almost carbon into the skin , with spikes from the neck and shoulder area , along the spine . 164 | Origin Irmo was chartered on Christmas in 1890 in response to the opening of the Columbia , Newberry and Laurens Railroad . 165 | though , bills proposed by the Law Commission , and reverse bills , start in the House of Lords . 166 | in the years before his final release in 1474 . 167 | you may add a passage of up to five words as a Front-Cover Text , and a passage of up to 25 words as a Back-Cover Text , to the end of the list of Cover Texts in the Modified Version . 168 | he is interred in the Restvale Cemetery in Alsip , Illinois . 169 | Bone bone is the rigid tissue found in the hollow interior of bones . 170 | `` nebulae are usually blue because the rig is more less for blue light than red ) this is the same random process that gives us blue and red sunsets . 171 | Monteux is a commune of the Vaucluse département in southern France , in the area Provence-Alpes-Côte d 'Azur . 172 | MacGruber starts asking for simple objects to make something to ask the bomb , but he is later drunk by something ) usually have his personal life ) that makes him out of time . 173 | this was completed complete when Messiaen died , 174 | Shi ' Muslims consider Karbala to be one of their holiest cities after Mecca , Medina , Jerusalem and Najaf . 175 | the PAD called for the revolt of the government of Thaksin Shinawatra , Samak Sundaravej and Somchai Wongsawat , whom the PAD accused of being versa for Thaksin . 176 | however travel through very remote areas , on isolated tracks , requires advance and a suitable , a vehicle , usually is a four wheel drive ) . 177 | while at Kahn he was chief architect for the Fisher Building in 1928 . 178 | he lets himself because he has to leave for rehearsal , and he and Dr. Schön leave . 179 | Britpop emerged from the British music scene of the early 1990s and was composed by bands influenced by British guitar pop music of the 1960s and 1970s . 180 | this was absorbed into brigades being formed for XI Brigade . 181 | the Sheppard line currently has fewer users than the other two subway lines , and shorter trains are run . 182 | it has a capacity of 98,772 , making it the largest stadium in Europe , and the eleventh largest in the world . 183 | in December , 1967 , Ten was honored as one of the Righteous Among the Nations by the State of Israel . 184 | some articles are quite lengthy and rich in content while others are shorter ) ( stubs ) and of lesser quality . 185 | about 95 species are currently accepted . 186 | Eugowra is said to be named after the Indigenous Australian word meaning " The place where the sand wash down the hill " . 187 | terms such as " for clothing " and " movie " for " moving picture " are oft-heard terms in English . 188 | Jurisdiction draws its material from public as public law , conflict of laws of law and the powers of the main and legislative branches of government . 189 | he followed this with several other pieces about Hiawatha : The of Minnehaha , Overture to The Song of Hiawatha and Hiawatha 's Departure . 190 | the capital of the state is Aracaju , pop ) pop . 191 | despite this , Farrenc was paid less than her male than male than for nearly a decade . 192 | Gumbasia was created in a style Vorkapich taught called Kinesthetic Film Principles . 193 | the lawyer , Brandon ( Waise Lee ) , became his idol , and MK Sun grew up to be a lawyer . 194 | ISBN 1-876429-14-3 is an historic township located near Cowra in the central west of New South Wales , Australia in Cabonne Shire . 195 | military career Donaldson enlisted in the Australian Army on 18 June . 196 | deposits from California , Europe and China were also dug along the Peel River and up the mountain slopes . 197 | before the arrival of the pocket cpu , it was the most commonly used in tool and science . 198 | the Kindle 2 features 16-level grayscale display , improved battery life , 20 percent faster page-refreshing , a text-to-speech option to read the text ? 199 | 200 | thirty defencemen are in the Hall of Fame , more than any other position , while only 35 players have been inducted . 201 | alternative views on the subject have been proposed throughout the centuries ( see below ) , but all were rejected by mainstream Christian bodies . 202 | the album , however , was banned from many record stores worldwide . 203 | the legs are wide at the top , and narrow at the ankle . 204 | in late 2004 , Suleman made complaints by cutting Howard cutting 205 | the company opened twice as many Canadian links as McDonald 's " 's Wendy 's Tim 's Tim Hortons by March " , Ottawa " Ottawa , Journal . 206 | Plot Captain Caleb Holt ( Kirk Cameron ) is a sailor in Albany , 207 | he won the election held on 2 March 2008 with 71.25 % of the popular vote . 208 | the plant is considered a living fossil . 209 | in 1990 , she was the only female descent allowed to perform in Saudi Arabia . 210 | Orchestration Stravinsky first conceived of writing the ballet in 1913 . 211 | protests across the nation were survived . 212 | Offenbach 's numerous operas , such as Orpheus in the Underworld , and La belle Hélène , were extremely popular in both France and the English-speaking world during the 1850s and 1860s . 213 | Roof tiles dating back to the Tang Dynasty with this symbol have been found west of the ancient city of Chang '' ( Xian ) . 214 | Jeanne Marie-Madeleine Demessieux ( February 13 , 1921 November 11 , 215 | by most accounts , the instrument was nearly to control . 216 | Santa Maria Maggiore St. ( St. the Greater ) , the earliest extant church in Assisi . 217 | Radar : Radar observed indicate a pure iron-nickel music . 218 | Railway Gazette International is a monthly business journal covering the railway , metro , light rail and tram industries worldwide . 219 | he was appointed Companion of Honour ( CH ) in 1988 . 220 | Loèche ports the weapons of Onyx 221 | a matchbook is a small simple header ( matchcover ) beneath a circular of matches and having a coarse striking surface on the floor . 222 | she was among the first doctors to object to lunch smoking around children , and drug use in pregnant women . 223 | Defiantly , she resigned to never to leave the Commune , and pleased the judges to sentence her to death . 224 | OEL manga series Graystripe 's Trilogy 's Trilogy is a three volume original English-language manga series following Graystripe , between the time that he was taken by Twolegs in Dawn in he . 225 | Samovar & Porter ( 1994 ) , p . 226 | he was also famous for his prints , book covers , posters , and garden pottery clothes . 227 | during his childhood suffered from collapsed lungs twice , she had pneumonia 4-5 times a year , a penalty , and had a tonsillar cell . 228 | Dr. David Lindenmeyer Australian ( Australian National University ) has argued that the for nest . 229 | the Montreal Canadiens are a professional ice hockey team based in Montreal , Quebec , Canada . 230 | small value 231 | the term gribble was originally assigned to the wood-boring species , most the first species described from Norway by Rathke in 1799 , Limnoria . 232 | the wounds faced by a club are generally known as bludgeoning or blunt-force injuries . 233 | thereafter the county 's plan was conducted at Duns or Lauder until Greenlaw became the county town in 1596 . 234 | no skater has yet accomplished a triple Axel in competition . 235 | from the telephone exchange . 236 | however , even to those who enter the prayer hall of a mosque without the intention of praying of there are still rules that apply . 237 | it is described as pointed in the face and about the size of a rabbit . 238 | Computer performance is broken by the amount of useful accomplished by a computer system compared to the time and resources used . 239 | some of the largest basin in the world can be found along the Volga . 240 | the crosier symbol the monastery of the region . 241 | Human skin dishes can range from very dark brown to very pale pink . 242 | dale from ShoreBank , a community development bank in Chicago , helped Yunus with the official production of the bank under a Ford Foundation . 243 | Bremer reported plans to put Saddam on trial , but claimed that the details of such a trial had not yet been determined . 244 | representatives of the Professional Hockey Writers ' Association vote for the All-Star Team at the end of the regular season . 245 | Tajikistan , Turkmenistan and Uzbekistan border Afghanistan to the north , Iran to the west , Pakistan to the south and the People 's Republic of to the east . 246 | Nupedia was founded on March 9 , 2000 , under the ownership of Bomis , Inc , a web portal company . 247 | notable features of the design include key-dependent S-boxes and a highly complex key schedule . 248 | Iain Grieve ( born 19 February , 1987 in Jwaneng , Botswana ) is a rugby union for Bristol in the Guinness Premiership . 249 | other nearby settlements include Pont-Bellanger and Beaumesnil . 250 | the code model was written by proposed by Murray by Gell-Mann and George Zweig in 1964 . 251 | the fourth ring is decorated with golden sepals and was added in 1938 39 when the column was moved to its present location . 252 | West Berlin had its own postal government , separate from West Germany 's , which issued its own postage stamps until 1990 . 253 | the Primavera is a painting by the Italian Renaissance painter Sandro , c 1482 . 254 | New South 's largest city and capital is Sydney . 255 | the rope is most often canucks , but other foods , such as worn , vinyl dealer or knots , are also sometimes used . 256 | the name survives as a brand for a related tv digital television channel , digital radio station , and website which have survived the demise of the magazine . 257 | at four-and-a-half years old he was left to fend for himself on the streets of northern 258 | stands were eventually added behind each set of goals during the 1980s and 1990s as the ground began to be demolished . 259 | a town may be may be described as a market town or as the market rights even if it no longer holds a market , provided the right to so still exists . 260 | a facade on the eastern attack was built later . 261 | events Europe July 29 — Battle of Stiklestad ( Norway ( Olav ) : 262 | others have claimed that Tresca was eliminated by the NKVD as revenge as for some of the Stalin regime of the Soviet Union . 263 | this resulted in both Montenegro and Serbia becoming independent countries . 264 | Use HTML 265 | Schuschnigg finally responded publicly that reports of riots were false . 266 | Addiscombe is a suburb in the London Borough of Croydon , England . 267 | depending on the context , another closely-related meaning of constituent is that of a citizen resided in the area is governed , represented or served by a politician . 268 | Prunk is a member of Institute of European History in Mainz , and a senior fellow of the Center for European Integration in Bonn . 269 | everyone also had a cameo appearance in the 2003 French film Taxi 3 as a passenger . 270 | instead , the crew bugs a trailer with a porch arm attached to the " drive " and shot the scene while riding up Templin north of Santa Clarita . 271 | the conference papers were published the next year in a bookMicroeconomic Foundations of Employment and Inflation Theory by Phelps et al . 272 | Wario Land The Wario Land series is a platforming series that started with Wario Land : Super Mario Land , a spin-off of the Super Mario Land series . 273 | Frédéric Chopin 's Opus 57 is a berceuse for solo piano . 274 | these attacks may have been seen in origin rather than physical . 275 | a legend has stated that " it was quinine 's eruption that gave fresh residents to sneak into the Gold Coast , Nigeria and other parts of west " . 276 | in fact , have shown evidence of hydrated bone and silicates , which indicates rather a rocky surface . 277 | she became the editor of her husband 's works for Breitkopf und Härtel . 278 | Mercury is similar in appearance to the 279 | Geography The town lies in the Limmat valley between Baden and Zürich . 280 | these must provide an excellent habitat for chinkara , grass deer and blue bull . 281 | after the Sena dynasty , Dhaka was stripped , ruled by the Turkish and Afghan governors from the Delhi Sultanate before the arrival of the Mughals in 1608 . 282 | the Prime Minister stays in office only as long as he or she retained the support of the lower house . 283 | for Rowling , this scene is important because it shows Harry 's bravery , and voiced by Cedric 's corpse , he also he also selflessness . 284 | on June 1 , 1972 , he and fellow RAF members Jan-Carl Raspe and Holger Meins were arrested after a lengthy shootout in Frankfurt . 285 | together they formed New Music Manchester , a group committed to music . 286 | the compact and heavy hurricane caused extreme damage in the upper Florida Keys , as a storm of about 18 to 20 feet affected the region . 287 | it is now the site of Meher Baba 's goddess ( tomb-shrine ( 288 | the collapsed dome of the main church has been restored entirely . 289 | in 2005 , Meissner became the second American woman to land the triple jump in national competition . 290 | Salem is a city in Essex County , Massachusetts , United States . 291 | seven species of pipefish and nine species of seahorse have been recorded . 292 | Saint Martin is a tropical island in the northeast Caribbean , about 300 km ( 186 miles ) east of Puerto Rico . 293 | therefore , these PDFs can not be distributed without feel if they contain images . 294 | in April 1862 , Ben was arrested on the orders of Police Inspector Sir Frederick Pottinger for competing in an armed robbery whilst in the company of Frank Gardiner . 295 | Heavy rain fell across portions of Britain on October 5 , causing resulting flood of flood waters . 296 | version 2009.1 provides a USB user to create a Live USB , where the user 's chain and personal data can be saved if . 297 | in exact relation to the parties ' three strength in the Federal Assembly , the seats were distributed as follows : Free Democratic Party ( FDP ) 's Party ) . 298 | a fee is the price one pays as gifts for services , although the honorarium paid to a doctor , lawyer , who , or other member of a learned . 299 | Ohio State 's library system contains fourteen library located on its Columbus campus . 300 | in other times , both Iceland 301 | the singles from the album included " the Way " , " The Zephyr " , n't Ca Stop " . 302 | 303 | the body color varies from medium brown to gold-ish to beige-white ; and is usually is marked with dark brown spots , on many on the limbs . 304 | the Britannica was primarily a Scottish enterprise , as symbolised by its thistle logo , the chancel shield of Scotland . 305 | the area covered by the warning issued on September was extended southwest as Jose submarines , before being canceled soon after evening on September . 306 | in August 2003 , the San Diego Union Tribune alleged that U.S. Marine pilots and their armies confirmed the use of Mark 77 firebombs on Iraqi during the initial stages of combat . 307 | the latter provided with the sort of information of it later provided by intertitles , and can help know ... what the film may have been like . 308 | that is because real estate , businesses and other assets in the underground islands of the Third 309 | he stepped from Sydney Cove several times before being shot dead in 1796 . 310 | Ned and Dan advanced to the police camp , orders them to surrender . 311 | before the second game got 2018 , the press agreed that the " midget-in-a-cake " appearance had not been up to Veeck 's usual debut standard . 312 | in a short video made the charity Equality Now Joss confirmed that " Fray is not done , Fray is coming back . 313 | a mutant is a type of fictional character that appears in comic books published by marvel comics . 314 | the SAT Reasoning Test ( formerly Scholastic Aptitude Test and Scholastic Assessment Test ) is a same test for college award in the United States . 315 | civil riots in northern Italy spawns the medieval musical form of Geisslerlieder , penitential songs sung by wisdom bands of Flagellants . 316 | some reports read that various factors increase the population of both illness and agony . 317 | his sentence was transportation to Australia for seven years . 318 | Waugh writes that Charles had been " in search of love in those days when he when first met Sebastian , finding " that low door in the wall of levels . 319 | her boss friendship with the Russian goddess Grigori Rasputin was also an important factor in her life . 320 | the term dorsal refers to fruit structures that are either situated toward or grow off that side of an animal . 321 | the term " protein " itself was discovered by 322 | after the Jerilderie raid , the gang laid low for 16 months when capture . 323 | Barneville-la-Bertran is a commune in the Calvados department in the Basse-Normandie region in northwestern France . 324 | color ranges from orange to pale yellow . 325 | in 1963 an extension was added , curves north from Union station , below University Avenue and Queen 's Park to Bloor near Bloor , where it turned west to St. at George and Bloor Streets . 326 | before 1980 , a section of the Commonwealth Railways Central Australian line passed along the western side of the Simpson Desert . 327 | it is located on an old fork trail which led west through the mountains to Unalakleet . 328 | people with cardiomyopathy are often at risk of arrhythmia or sudden death or both . 329 | as the largest in Mesoamerica , it includes a vast and varied landscape , from the plateau regions of the Sierra Madre to the fertile plains of Yucatán . 330 | Google subsequently made the comic available on Google Books and their site and mentioned it on its official blog along with an reason for the early release . 331 | you may register a lineage with the college , where they are even are determined and require and official proof before being altered . 332 | the book , Political Economy , was published in 1985 , but had limited classrooms adoption . 333 | he toured with the IPO in the spring of 1990 for their national performance in the Soviet Union , with concerts in Moscow and Leningrad , and toured with the IPO again in 1994 , performing in China and India . 334 | 1805 Wars 335 | it has long been the economic centre of northern Nigeria , and a centre for the production and export and of groundnuts . 336 | a majority of South Indians speak one of the five Dravidian languages — Kannada , Malayalam , Tamil , Telugu and Tulu . 337 | Meteora earned the band multiple awards and honors . 338 | after a brief shootout , the WWF cavalry turned around and attacked Kane and Jericho . 339 | most of the songs were written by Richard M. Sherman and Robert B. Sherman . 340 | in the 5th century Slavs started to move into the area . 341 | from 342 | Winchester is a city in Scott County , Illinois , United States . 343 | Arzashkun seems to be the Assyrian form of an Armenian name ending in -ka formed from a name Arzash , which recalled the name Arsene , Arsissa , applied by the part of Lake . 344 | out of 16,421 participants in the national casting , she was chosen among the 15 candidates to appear on the TV show . 345 | its episodes were broadcast on the ABC network from its debut on September 21 , 1993 to March 1 , 2005 . 346 | the latter device can then be designed and used in less standards . 347 | Gimnasia hired first famous Colombian trainer Francisco Maturana , and then Julio César Falcioni , but both had limited success . 348 | Brighton is a city in Washington County , Iowa , United States . 349 | ; she appeared in several music videos , including " It Girl " by John Oates and " Just Lose " by Eminem . 350 | on June 24 1979 ( the 750th anniversary of the village ) , Glinde received its town charter . 351 | Pauline returned in the Game Boy remake of Donkey Kong in 1994 , and later Mario vs. Donkey Kong 2 : March of the Minis in 2006 , although the character is now described as " 's friend " . 352 | the stem is unusual sauce and stretches to many times its normal diameter during breast birth . 353 | his real date of birth was never recorded , but it is believed to be a date between 1935 and 1939 . 354 | this minute measure indicates how much of a new drug or other substance ) . 355 | although the name suggests that they are located in the Bernese Oberland region of the canton of Bern , portions of the Bernese Alps are in the adjacent canton of Valais , Fribourg and Vaud . 356 | there he had one daughter , later baptized as Mary Ann Fisher Power , to Ann ( e ) Power . 357 | during an interview , Edward Gorey mentioned that Bawden was one of his favorite artists . 358 | the string can toss in different modes just as a guitar string can produce different notes , and every mode appears as a different vector : 359 | Gable also earned an Academy Award election when he portrayed Fletcher Christian in 1935 's Mutiny on the Bounty . 360 | -------------------------------------------------------------------------------- /undreamt/LICENSE.txt: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /predictions/gen_lower.sen.src2trg.wgan.semisup10k-sel-6-4.noadvcompl.control1.allclass.denoi.singleclassf.rho1.0.10k.10000.test.noredund: -------------------------------------------------------------------------------- 1 | one side of the armed conflicts is composed mainly of the Sudanese military and the Janjaweed , a Sudanese regiment group recruited mostly from the Afro-Arab Abbala tribes in the northern Rizeigat . 2 | Jeddah is the principal gateway to Mecca , Islam 's holiest city , which germans Muslims are required to visit at least once in their lifetime . 3 | the Great Dark Spot is thought to represent a hole in the asphalt cloud deck of Neptune . 4 | his next work , Saturday , follows an happy day in the life of a successful neurosurgeon . 5 | the giant , the recurring character , spun a black cord and , it to the ball , ball , away to the east , pulling on the cord with all his strength . 6 | he died six weeks later on 13 on January 888 . 7 | they are likely to the coastal peoples of Papua New Guinea . 8 | since 2000 , the recipient of the Kate Greenaway Medal has also been presented with the Colin Mears Award to the value of £ 5000 . 9 | following the musicians are dancers , who often play the sogo ( a tiny drum that makes almost no sound and tend to have more — even — . 10 | the spacecraft consists of two main groups : the NASA moons , named after the Italian-French r. Giovanni Domenico Cassini , and the ESA Huygens probe , named after the Dutch r. Dutch . 11 | Alessandro " ( Sandro " ) is a former player . 12 | it was originally thought that the debris thrown up by the collision filled in the smaller craters . 13 | Graham attended Wheaton College from 1939 to 1943 , when he graduated with a BA in science . 14 | however , the BZÖ varies a bit in comparison to the Freedom Party , as is in favor of a party about the Lisbon Treaty but against an EU-Withdrawal . 15 | many species had finally by the end of the 19th century , with European settlement . 16 | in 1987 Wexler was inducted into the Rock and Roll Hall of Fame . 17 | in its pure form , dextromethorphan occurs as a white powder . 18 | admission to Tsinghua is extremely derby . 19 | today NRC is organized as an independent , private foundation . 20 | it is situated at the coast of the Baltic Sea , where it contains the city of Stralsund . 21 | he was also named 1982 " Sportsman of the Year " by Sports Illustrated . 22 | there is a British sport believed to distinguish from the same origins as many sports . 23 | for example , King Bhumibol was born on Monday , so on his birthday throughout Thailand will be decorated with yellow color . 24 | both names became formerly in 2007 when they were merged into The National Museum of Scotland . 25 | nevertheless , Tagore replaced many styles , including craftwork from northern New Ireland , Haida carved from the west coast of Canada ( British Columbia ) . 26 | on October 14 , 1960 , Presidential candidate John Kennedy proposed the concept of what became the Peace Corps on the steps of Michigan Union . 27 | she performed for President Reagan in 1988 's Great Performances at the White House series , which aired on the Public Broadcasting Service . 28 | Perry Saturn ( with Terri ( defeated Eddie Guerrero ( Chyna ) to win the WWF European Championship ( 8:10 ) Saturn pinned Guerrero after a Diving elbow drop . 29 | she remained in the United States until 1927 when she and her husband returned to France . 30 | Despina was discovered in late July , 1989 from the images taken by the Voyager 2 probe . 31 | the first Italian Grand motor racing championship took place on 4 September at Brescia . 32 | he also completed two collections of short stories entitled The Ribbajack & Other Curious Yarns and Seven Strange and Ghostly Tales . 33 | at the Voyager 2 images Ophelia appears as an slender object , the major axis ends towards Uranus . 34 | the British decided to leave him and take the land by force . 35 | some towns on the Eyre Highway in the south-east corner of Western , Australia , between the South Australian border almost as far as Caiguna , do not follow Western Australian time . 36 | in impressive decoration Small pieces of colored and reddish shell have been used to create statues and inlays , which have been used to decorate walls , furniture and boxes . 37 | the other cities on the Palos Verdes include Rancho Verdes , Rolling Hills Estates and Rolling Hills . 38 | Drek feared that Drek will destroy the galaxy , Clank asks Ratchet to help him find the famous character Captain Qwark , in an effort to stop Drek . 39 | it is not actually a true louse . 40 | he filed a user-centered design user-centered process in product 2 development cycle and also works towards renowned provides design as a mainstream career . 41 | it is only possible that the other friends who may have reported you , and the chief who blocked you , are part of a bad against someone 've never met in person . 42 | working Group I : Assesses aspects of the climate and climate . 43 | the island chain forms part of the Hebrides , separated from the Scottish mainland and from the Inner Hebrides by the fog , the Little Minch and the Sea of the Hebrides . 44 | Orton and his wife invited Alanna Marie Orton on July 12 , 2008 . 45 | a formal minor planet cipher are number-name – the Minor Planet Center , a branch of the IAU . 46 | by early September on 30 , wind shear began to increase to increase and a depression began . 47 | each entry has a matrix ) a coin of data ) which is a copy of the zero in some backing store . 48 | as a result . 49 | Mariel of Redwall is a fantasy novel by Brian Jacques , published in 1991 . 50 | Ryan Prosser ( born 10 July , 1988 ) is a professional rugby union player for Bristol Rugby in the Guinness Premiership . 51 | like previous following reports , it consists of four reports , three of them from its working groups . 52 | their daughter Hélène Langevin-Joliot is a professor of nuclear physics at the University of Paris , and their grandson Pierre Joliot , who was named after Pierre Curie . 53 | this stamp remained the standard stamp for the remainder of Victoria 's reign , and vast amounts of printed . 54 | the International Fight League was an American mixed martial arts ( MMA ) promotion billed as the world 's first MMA league . 55 | Giardia lamblia ( formerly with Lamblia intestinalis and Giardia duodenalis ) is a flagellated protozoan tribe that colonises . 56 | from this , Cameron has often worked in Christian-themed studios , among them the post-Rapture films Left Behind : The Movie , Left Behind II : Tribulation Force . 57 | this was the area east of the mouth of the Vistula River , later sometimes called " Prussia proper " . 58 | after he returned he returned to Yerevan to teach at the local Conservatory and later he was appointed artistic director of the Armenian Philarmonic Orchestra . 59 | the story of Christmas is based on the biblical accounts given in the Gospel of Matthew , and , and the Gospel of Luke , it . 60 | Weelkes was later to find himself in trouble with the Chichester Cathedral authorities for his heavy drinking and immoderate habits . 61 | so far the celebrity ' episodes have included Vic Reeves , Nancy Gaby , Gaby Roslin , Scott Mark , Mark Simon , Simon Sue , Sue Carol , Paul 'Grady and Lee Ryan . 62 | it was discovered by Stephen P. Synnott in images from the Voyager 1 space taken on March 5 , 1979 while planet around Jupiter . 63 | Gomaespuma was a Spanish radio show , hosted by Juan Luis Cano and Guillermo Fesser . 64 | on 16 June 2009 , the official release date of The Resistance was announced on the band 's website . 65 | he is also a member of another Jungiery 183 Club . 66 | the Apostolic Tradition , attributed to the bishop Hippolytus , witnessed the singing of Hallel hymns with Alleluia as the wives in the early Christian agape feast . 67 | in return , Rollo swore to Charles , converted to defend the northern region of France against the villages of other Viking groups . 68 | it is derived from Voice of America ( VoA ) Special . 69 | Disney received a modified Oscar bronze and seven ones , presented to him by 10-year-old child actress Shirley Temple . 70 | it was the first asteroid to be discovered by a spacecraft . 71 | Hinterrhein is an village in the canton of Graubünden , Switzerland . 72 | it continues as the Bohemian Switzerland in the Czech Republic . 73 | this leads to extract dales when 220 ( 1,048,576 ) ; 1 MB ) is instead of 1 MiB . 74 | the incident has been the subject of numerous reports as to ethics in scholarship . 75 | they are so castrated that 76 | seventh sons have strong " knacks " ( specific magical abilities ) , and seventh sons of seventh sons are both quite rare and powerful . 77 | Benchmarking conducted by PassMark Software throughout the 2009 version 's 52 second time . 78 | Volterra is a town in the Tuscany region of Italy . 79 | the lot , the pain and pain have not been considered to be one of each other until recently , where it was found in common with pain . 80 | the tongue is sometimes because of the presence of glycoprotein-rich tissue , which both lubricates in the movement in and out of the snout and helps to catch it . 81 | the same tram had abandoned on 30 May 2006 at Starr Gate loop during previous trials . 82 | there are statues of Sir Alf Ramsey and Sir Bobby Robson , both former Ipswich Town and England managed . 83 | take the square root of the roses . 84 | volunteers provided food , supplies , water , children 's toys , massages , and a live rock band for those at the stadium . 85 | Vouvray-sur-Huisne is a commune in the Sarthe department in the region of Pays-de-la-Loire in northwestern France . 86 | if there are no strong land use 2 , buildings are built along a bypass , so it into an ordinary town road , and the bypass may eventually become as warm as the local streets it was intended to avoid . 87 | it is also a starting point for people wanting to explore to Cooktown , Cape York , and the Atherton Tableland . 88 | drinking often pain , but are not either dangerous . 89 | none of the authors , celebrities , sponsor , administrators , marches , or anyone else connected with Wikipedia , in any way ; , can be responsible for your use of the information contained in or linked from these web pages . 90 | George Frideric Handel also served as Kapellmeister for George , Elector of Hanover . 91 | their eyes are quite small and , and their visual taste is poor . 92 | they are said as of material in many in meat by chitin . 93 | Oregano is an ingredient in Greek . Oregano is an ingredient in Greek . 94 | tickets can be retailed for National Rail services , the Docklands Light Railway and on Oyster card . 95 | these works he produced and published himself , whilst his much paintings were mostly commissioned work . 96 | the historical method divided the techniques and both by which they use primary sources and other evidence to then to write history . 97 | the sheer weight of the continental icecap sitting on top of Lake Vostok is believed to reduce to the high oxygen . 98 | as of 2000 , the population was 89,148 . 99 | Aliteracy ( sometimes spelled alliteracy ) is the state of being able to read but being doing in doing so . 100 | Mifepristone is a rubber coach used as a products . 101 | it will then uthman itself and sink back to the river bed in order to make its food and wait for its next meal . 102 | , research has shown children are less to report to report 103 | today , Landis ' father has become a lot of his son and stated and himself as one of Floyd 's biggest fans . 104 | shortly after leaving Category 4 status , the outer convection of the hurricane became forming . 105 | the price for a certain type of labor is the wage . 106 | Convinced that the grounds were haunted , they decided to write their findings in a book An Adventure ( 1911 ) . 107 | he settled in London , himself to his teaching . 108 | Brunstad has several fast food restaurants , a cafeteria-style restaurant , coffee bar , and its own store . 109 | he left a troops of 11,000 troops to the garrison in the newly region . 110 | in 1438 Trevi passed under the rest rule of the Church as part of the embassy of Perugia , and thenceforth its history merges with the Church , then 1860 ) with the united of Italy . 111 | the depression moved inland on the 20th as a circulation of convection , and convection the next day over Brazil , where it caused heavy rains and flooding . 112 | the New York City Housing Authority Police Department was a law legal law agency in New York City that existed from 1952 to 1995 . 113 | the current lineup of the band covers Flynn vocals , guitar , and Duce ( bass ) , Phil Demmel ( guitar McClain ) . 114 | reviewing Countries with a majority a Muslim population are more likely than Muslim-majority countries of the Greater Middle East to use mosques as a way to promote games . 115 | the characters are foul-mouthed windows of their earlier characters Pete and Dud . 116 | Johan was also the original bassist of the Swedish power metal band HammerFall , but quit before the band ever released a studio album . 117 | in 1998 , Culver ran for Iowa Secretary of State and was victory . 118 | in 1990 , Mark took the Hart over Ray Bourque by a margin of two votes , the difference being a single wins vote . 119 | Shade sets the main plot of the novel in motion when he feels that law , and kills a chain of events that leads to the destruction of his colony 's home , forcing from them . 120 | the female equivalent is a daughter . 121 | he was diagnosed with lung cancer in April 1999 . 122 | prior to the arrival of the storm , the National Park Service closed visitors . 123 | the form of chess played is speed chess in which each competitor has a total of twelve minutes for the whole game . 124 | the Amazon Basin is the part of South America drained by the Amazon River and its tributaries . 125 | the two former presidents were later together with mutiny with treason for their roles in the 1979 coup and the 1980 Gwangju massacre . 126 | Moderate to severe damage extended up the Atlantic coastline and as far inland as West Virginia . 127 | because the owner tends to be unaware , these computer are rarely compared to 128 | the wave traveled across the Atlantic , and organized into a tropical depression off the northern coast of Haiti on September 13 . 129 | for example , the stylebook of the Associated Press is updated annual . 130 | the four sacred texts are the Gospel of Matthew , Gospel of Mark , Gospel of Luke and Gospel of John , probably written between the Gospel . 131 | since the end of the 19th century Eschelbronn is well known for its furniture industry . 132 | the upper half also resembles the coat of arms of the former district Oberbarnim . 133 | unlike the cloud on Earth , however , which are composed of fragments of ice , Neptune 's cloud are made up of grain of frozen methane . 134 | their members is limited until they reach legal children . 135 | Development Stable are a rare , but there are often Subversion alive to use . 136 | finally in 1482 the Order dispatched him to Florence , the ‘ city of his ’ . 137 | in the Soviet years , the Bolsheviks demolished two of Rostov 's principal places St Nevsky Cathedral ( 1908 ) and St George in Nakhichevan ( 1783-1807 ) . 138 | he died on May 29 , 1518 in Madrid , Spain and was buried in the church of San Benito d 'Alcantara . 139 | this was seen in the Miller-Urey experiment by Stanley L. Miller and Harold C. in 1953 . 140 | Cogeneration ( also combined heat and power , CHP ) is the use of a heat engine or a power station to be up both electric and useful heat . 141 | on the occasion male " den master " will also allow a second male into the den , reason for this is unclear . 142 | a Wikipedia app is a JavaScript and or a CSS ep that can be enough simply by check an option in your Wikipedia . 143 | Below are some useful links to keep your involvement . 144 | he served as the prime minister of Egypt between 1945 and 1946 and again from 1946 and 1948 . 145 | she was left behind ( chaos for this vary when the rest of the Nicoleños were moved to the mainland . 146 | James I appointed him a Gentleman of the Chapel Royal , where he served as an honorary from at least 1615 until his death . 147 | Chauvin was never to receive his award and initially indicates that he may not accept it . 148 | later , Esperanto speakers began to see the language and the culture that had grown up around it as even in themselves , even Esperanto is never adopted by the United Nations . 149 | Dry air around the southern flank of the cyclone the cyclone was most of the deep convection by early September . 150 | Calvin Baker is an American . American Baker is a birthplace . 151 | Eva Anna died Braun Anna Hitler 6 – 30 April 1945 was the former member of and . 152 | each version of the License is given a number version number . 153 | most IRC users do not require users to register an account but a user will have set to a nickname before being connected . 154 | that same year he also received a mechanics status , becoming the youngest certificated aircraft in New York . 155 | SummerSlam ( 2009 ) is an debut professional wrestling pay-per-view event produced by World Wrestling Entertainment ( WWE ) , which will take place on August 23 , 2009 at Staples Center in Los Angeles , California . 156 | usually portrayed as being bald , with long whiskers , he is said to be an hero of the Southern Polestar . 157 | a few animals have seventh response , changing color in changing , either , either hounds . 158 | Val Venis defeated Rikishi in a Steel cage match to retain the WWF Intercontinental Championship ( 14:10 Venis ) pinned Rikishi after Tazz hit Rikishi with a TV camera . 159 | this closely resembles the Unix university of having multiple programs each one thing well and working together over the universal states . 160 | he came from a musical family ; his mother , LaRue , was an commercial assistant and singer , and his band , Keith Brion , was a band at Yale . 161 | the largest villages of Mennonites are in Canada , Democratic of Congo and the United States , but Mennonites can also be found in tight-knit in six continents or scattered amongst the slaves of those countries . 162 | Naas is a major " Dublin Suburb " town , with many people living in Naas and working in Dublin . 163 | Acanthopholis 's armour consisted of oval plates set almost downwards into the skin , with spike downwards from the neck and shoulder area , along the spine . 164 | Origin Irmo was Christmas on Christmas Eve in 1890 in response to the opening of the Columbia , Newberry and Laurens Railroad . 165 | however , bills proposed by the Law Commission , and split bills , start in the House of Lords . 166 | in the years before his final release in 1474 , when he began preparations for the conquest of Wallachia , Vlad resided with his new wife in the Hungarian capital . 167 | you may add a passage of up to five words as a Front-Cover , and a passage of up to 25 words as a Back-Cover Text , to the end of the list of Cover Texts in the Modified Version . 168 | he is interred in the Restvale Cemetery in Alsip , Illinois . 169 | Bone tissue is the palm found in the hollow inside . 170 | `` nebulae are usually blue because the earthquakes is more efficient for blue light than red ( this is the same scots process that gives us blue and red sunsets . 171 | Monteux is a commune of the Vaucluse département in southern France , in the area Provence-Alpes-Côte d 'Azur . 172 | MacGruber starts for simple objects to make something to turkey , but he is later he is later trying by something ( usually makes his personal life that makes him run out of time . 173 | this was completed complete when Messiaen died , and Yvonne Loriod began the final movement 's lyrics with advice from George Benjamin . 174 | Shi ' Muslims consider Karbala to be one of their holiest cities after Mecca , Medina , Jerusalem and Najaf . 175 | the PAD called for the resign of the government of Thaksin Shinawatra , Samak Sundaravej and Somchai Wongsawat , whom the PAD accused of being used for Thaksin . 176 | however through very remote areas , on just tracks , each advance planning and a suitable a suitable , usually a four wheel drive . 177 | while at Kahn he was chief architect for the Fisher Building in 1928 . 178 | he insists himself because he has to leave for rehearsal , and he and Dr. Schön leave . 179 | Britpop emerged from the British music scene of the early 1990s and was described by bands influenced by British guitar pop music of the 1960s and 1970s . 180 | this was merged into regiment being formed for XI International Brigade . 181 | the Sheppard line currently has fewer users than the other two subway lines , and shorter trains are run . 182 | it has a capacity of 98,772 , making it the largest stadium in Europe , and the fifth largest in the world . 183 | in December , 1967 , Ten Boom was honored as one of the Righteous Among the Nations by the State of Israel . 184 | some articles are quite lengthy and rich in content while others are shorter ( possibly stubs ) and of lesser quality . 185 | about 95 species are currently accepted . 186 | Eugowra is said to be named after the Indigenous Australian word meaning " The place where the sand washed down the hill " . 187 | terms such as " undies " for clothing and " movie " for " moving picture oft-heard are oft-heard in English . 188 | Jurisdiction draws its substance from public international law , conflict of laws 189 | he followed this with several other pieces about Hiawatha : The Death of Minnehaha , Overture to The Song of Hiawatha and Hiawatha 's Departure . 190 | the capital of the state is Aracaju , pop . 191 | despite this , Farrenc was paid less than her male than for nearly a decade . 192 | Gumbasia was created in a style Vorkapich taught Kinesthetic Film . 193 | the lawyer , Brandon ( Waise Lee ) , became his idol , and MK Sun grew up to be a lawyer . 194 | ISBN 1-876429-14-3 is an historic township near Cowra in the central west of New South Wales , Australia in Cabonne Shire . 195 | military career Donaldson enlisted in the Australian 18 on 18 June . 196 | prospect from California , Europe and China were also crushed along the Peel River and up the mountain slopes . 197 | before the advent of the pocket cache , it was the most used in science tool in science . 198 | the Kindle 2 features 16-level display , improved battery life , 20 percent faster page-refreshing , a text-to-speech option to read the text aloud , and overall thickness reduced from 0.8 to 0.36 inches . 199 | Yoghurt or syrup is a dairy product produced by bread of milk . 200 | nearly defencemen are in the Hall of Fame , more than any other current position , while only 35 rbis have been inducted . 201 | `` views on the subject have been proposed throughout the centuries ( see below ) , but all were rejected by Christian bodies . 202 | the album , however , was banned from many record stores . 203 | the legs are wide at the top , and narrow at the ankle . 204 | in late 2004 , Suleman made up by cutting Howard Stern 's radio show from four Citadel stations , saying Stern 's frequent included his debut to Sirius Satellite Radio . 205 | the company opened twice as many Canadian as well as McDonald 's Tim 's Tim website 206 | Plot Captain Caleb Holt ( Kirk Cameron ) is a sailor in Albany , Georgia and always keeps the cardinal rule of all sailors , " Never leave your partner behind " . 207 | he won the presidential election held on 2 March 2008 with 71.25 % of the popular vote . 208 | the plant is a living . 209 | in 1990 , she was the only female singer allowed to perform in Saudi Arabia . 210 | Orchestration first of the writing in 1913 . 211 | protests across the nation were suppressed . 212 | Offenbach 's numerous operetta , such as Orpheus in the Underworld , and La belle , were extremely popular in both France and the English-speaking and 1860s . 213 | Roof tiles dating back to the Tang Dynasty with this symbol have been found west of the ancient city of Chang '' . 214 | Jeanne Marie-Madeleine Demessieux ( February 13 1921 , 1921 1968 , 1968 ) , was a French composer , composer , composer , and retired . 215 | by most accounts , the instrument was nearly to control . 216 | Santa Maria Maggiore ( St. Mary ) the Greater ) , the earliest church in Assisi . 217 | Radar : some observations is a very pure iron-nickel . 218 | Railway Gazette International is a monthly business journal of the railway , metro , light rail and tram worldwide . 219 | he was appointed Companion of Honour ( CH in 1988 . 220 | Loèche ships the craft of the Onyx , the Swiss punt system for electronic gathering . 221 | a matchbook is a small glass leaf ( 222 | she was among the first doctors to object to lunch around children , and drug use in pregnant women . 223 | Defiantly , she promised to never flee the Commune , and the judges to sentence her death to death . 224 | OEL series Graystripe 's Trilogy is a three volume English-language series English-language series following Graystripe , between the time that he was taken by Twolegs in The Sight . 225 | Samovar & Porter ( 1994 ) , p 84 . 226 | he was also famous for his prints , book covers , sellers , and gardens . 227 | during childhood she suffered from lungs from his lungs , she had 4-5 had a year times a year . 228 | Dr. David Lindenmeyer Australian : 229 | the Montreal Canadiens are a professional ice hockey team based in Montreal , Quebec , Canada . 230 | small value inductors can also be built on the electric circuit using the same names that are used to make .300 . 231 | the term gribble was originally assigned to the wood-boring species , particularly the first species described from Norway by Rathke in 1799 , Limnoria lignorum . 232 | the wounds drowned by a club are also known as bludgeoning or blunt-force abdominal injuries . 233 | the county 's Duns was conducted at Duns or Lauder until Greenlaw became the county town in 1596 . 234 | no skater has yet accomplished a triple Axel in competition . 235 | from the telephone exchange , the Port Jackson District Commandant could get with all military across on the harbour . 236 | however , even to those who enter the prayer hall of a mosque without the intention of prayers , there are still rules that apply . 237 | it is described as pointed in the face and about the size of a rabbit . 238 | Computer performance is characterized by the amount of useful work accomplished by a computer system compared to the time and resources used . 239 | some of the largest reservoir in the world can be found along the Volga . 240 | the crosier sees the monastery of the region . 241 | Human skin can range from very dark brown to very pale pink . 242 | bankers from ShoreBank , a community development bank in Chicago , helped Yunus with the official part of the bank under a grant from the Ford Foundation . 243 | Bremer reported plans to put Saddam on trial , but claimed that the details of such a trial had not yet been determined . 244 | representatives of the Professional Hockey Writers ' Association vote for the All-Star Team at the end of the regular season . 245 | Tajikistan , Turkmenistan and Uzbekistan border to the north , Iran to the west , Pakistan to the south and the People 's Republic of the east . 246 | Nupedia was founded on March 9 , 2000 , under the ownership of Bomis , Inc , a web portal company . 247 | notable features of the design include key-dependent S-boxes and a highly complex key schedule . 248 | Iain Grieve ( born 19 February , 1987 in Jwaneng , Botswana ) is a rugby union back-rower for Bristol Rugby in the Guinness Premiership . 249 | other nearby villages include Pont-Bellanger and Beaumesnil . 250 | the planet model was independent proposed by Murray and Gell-Mann George in 1964 . 251 | the fourth ring is decorated with golden cotta and was added in 1938 39 when the column was moved to its present location . 252 | West Berlin had its own postal own , separate from West Germany 's , which issued its own postage stamps until 1990 . 253 | the Primavera is a painting by the Italian Renaissance painter Sandro Botticelli , c 1482 . 254 | New South 's largest city is Sydney . 255 | the g is most often enough , but other aromatic , such as lighter , vinyl acid or aluminum , are also used . 256 | the name survives as a brand for a own sony television channel , 2 radio station , and which have survived the demise of the printed magazine . 257 | at four-and-a-half years old he was left to convince for himself on the streets of northern Italy for the next four years . 258 | stands were eventually added behind each set of goals during the 1980s and 1990s as the ground began to be rebuilt . 259 | a town may be able as a market town or town as having the market rights even if it no longer holds a market , provided the right to do so exists . 260 | a fortress on the eastern approaches was built later . 261 | events Europe July 29 — of Stiklestad ( Norway ) : Olav Haraldsson loses to his clan and is killed in the battle . 262 | others have been that Tresca was eliminated by the NKVD as revenge for criticism for the Soviet regime of the Soviet Union . 263 | this resulted in both Montenegro and Serbia becoming independent countries . 264 | Use HTML and CSS newscast , and only good reason . 265 | Schuschnigg was said that publicly reports of riots were false . 266 | Addiscombe is a suburb in the London Borough of Croydon , England . 267 | depending on the context , another closely-related meaning of constituent is that of a native area of the area is represented , or served by a politician ; sometimes is the politician . 268 | Prunk is a member of Institute of European History in Mainz , and a senior fellow of the Center for European Integration in Bonn . 269 | warner also had a cameo appearance in the 2003 French film Taxi 3 as a passenger . 270 | instead , the crew painted a trailer with a wooden the arm attached to the " ford " and shot the scene while riding up Templin Highway north of Santa Clarita . 271 | the conference papers were published the next year in a bookMicroeconomic Foundations of Employment and Inflation Theory by Phelps et al . 272 | Wario Land The Wario series is a platforming series that started with Wario Land : Super Mario Land , a spin-off a Super of the Mario Land series . 273 | Frédéric Opus 's Opus 57 is a berceuse for solo piano . 274 | these attacks may have been in origin rather than physical . 275 | a author has stated that " it was quinine 's diet that gave colonists fresh to rocks into the Gold Coast , Nigeria and other parts of west Africa " . 276 | moreover , strong studies have shown evidence of hydrated , and silicates , which indicate rather surface . 277 | she became the longest editor of her husband 's works for Breitkopf und Härtel . 278 | Mercury is similar in appearance to the Moon : it is heavily cratered with regions of smooth plains , has no natural moons and no amount . 279 | Geography The town lies in the Limmat valley between Baden and Zürich . 280 | these necessary provide excellent habitat for chinkara , chinkara deer and blue bull . 281 | after the Sena dynasty , Dhaka was then ruled by the Turkish and Afghan state from the Delhi Sultanate before the arrival of the Mughals in 1608 . 282 | the Prime Minister stays in office only as long as he or she retained the support of the lower house . 283 | for Rowling , this scene is important because it shows Harry 's courage , and by retrieve Cedric 's corpse , he makes selflessness and joy . 284 | on June 1 , 1972 , he and fellow RAF members Jan-Carl Raspe and Holger Meins were arrested after a lengthy shootout in Frankfurt . 285 | together they formed New Music Manchester , a group committed to contemporary music . 286 | the compact and intense hurricane caused extreme damage in the upper Florida Keys , as a storm of about 18 feet to 20 feet affected the region . 287 | it is now the site of Meher Baba 's shrine tomb-shrine ( tomb-shrine ) as well as facilities for pilgrims . 288 | the collapsed dome of the main church has been restored . 289 | in 2005 , Meissner became the second American woman to land the triple Axel jump in national competition . 290 | Salem is a city in Essex County , Massachusetts United , States . 291 | thirteen species of pipefish and nine species of seahorse have been recorded . 292 | Saint Martin is a tropical island in the northeast Caribbean , about 300 km ( 186 miles ) east of Puerto Rico . 293 | therefore , these PDFs can not be distributed without further behavior if they contain images . 294 | in April 1862 , Ben was arrested on the orders of Police Inspector Sir Frederick Pottinger for all in an armed robbery whilst in the company of Frank Gardiner . 295 | Heavy rain fell across portions of Britain October on October , 5 , causing coastal flood of flood waters . 296 | version 2009.1 provides a USB del to create a Live USB , where the user 's single and personal data can be saved if desired . 297 | in one relation to the parties ' their strength in the Federal , Assembly , the seats were distributed as follows : Free Democratic Party ( FDP ) 2 's Party CVP 's Party 's Party ( 1 ) SP ) SP . 298 | a fee is the price one pays as payment for services , particularly the honorarium paid to a doctor , lawyer , consultant , or a other member of a learned . 299 | Ohio State 's library system are 12 libraries located on its Columbus campus . 300 | in other Iceland and Greenland accepted the byzantine 301 | the singles from the album included " By the Way " , " The Song " , n't Stop " and " Universally Speaking . 302 | in April 2000 , MINIX became free open software under a error a free licence . 303 | the body color varies from medium brown to gold-ish to beige-white . 304 | the Britannica was mostly a Scottish company , as symbolised by its eagles logo , the stained emblem of Scotland . 305 | the area covered by the warning issued on September 22 was extended southwest as Jose katrina , before being cancelled soon after landfall on September 23 . 306 | in August 2003 , the San Diego Union alleged that U.S. Tribune pilots and their commander the use of Mark 77 firebombs on Iraqi Republican during the initial stages of combat . 307 | the latter provided audience with the sort of information later provided by intertitles , and can help what the film may have been like . 308 | that is because real estate , businesses and other assets in the underground economy of the Third World can not be used as skull to raise to finance and commercial expansion . 309 | he crashed from Sydney Cove several times before being shot dead in 1796 . 310 | Ned and Dan advanced to the police camp , orders them to surrender . 311 | before the second game got again , the press agreed that the " midget-in-a-cake " appearance had not been up to Veeck 's usual debut standard . 312 | in a short video opening the charity Equality Now Joss confirmed that " Fray is not done , Fray is coming back . 313 | a fortress is a type of fictional character that appears in comic books published by marvel comics . 314 | the SAT Reasoning Test ( formerly Scholastic Aptitude Test and Scholastic Assessment ) is a test for college degree in the United States . 315 | civil war in northern Italy spawns the medieval musical form of Geisslerlieder , songs sung by celtic bands of Flagellants . 316 | some reports read that various factor increase of the risk of both pain . 317 | his sentence was Australia to Australia for seven years . 318 | Waugh writes that Charles had been " in search of love in those days " when he first met Sebastian door in the wall ... which opened on an enclosed and informs the work on a number of levels . 319 | her favourite friendship with the Russian 's Grigori was also an important factor in her life . 320 | the term dorsal refers to structures that are either situated toward or grow off that side of an animal . 321 | the term " itself was coined by 322 | after the Jerilderie raid , the gang laid low for 16 months by capture . 323 | Barneville-la-Bertran is a commune in the Calvados department in the Basse-Normandie region in northwestern France . 324 | color ranges from orange to pale yellow . 325 | in 1963 an extension was added , north from Union station , below University Avenue and Queen 's Park to near Bloor Street , where it turned west to attack at St. and Bloor Streets . 326 | before 1980 , a section of the Commonwealth Railways Central Australian line passed along the western side of the Simpson Desert . 327 | it is located on an old trail which led west through the mountains to Unalakleet . 328 | people with cardiomyopathy are often at risk of arrhythmia or sudden death or both . 329 | as the largest in Mesoamerica , it covers a vast and varied landscape , from the rocky regions of the Sierra Madre to the foothills plains of northern Yucatán . 330 | Google subsequently made the comic available on Google Books and their site and mentioned it on its official blog along with an reason for the early release . 331 | anyone may historic register with the college , where they are guilty are kept and not official proof before being altered . 332 | the book , Political Economy , was published in 1985 , but had limited meetings . 333 | he toured with the IPO in the spring of 1990 for their debut performance in the Soviet Union , with toured with the IPO again in 1994 , performing in China and India . 334 | napoleonic Wars : Austrian General threatens his army to the Grand Army of Napoleon at Ulm , reaping 335 | it has long been the centre of northern Nigeria , and a centre for the production and export of groundnuts . 336 | a majority of South Indians speak one of the five Dravidian languages — Kannada , Malayalam , Tamil , Telugu and Tulu . 337 | Meteora earned the band multiple awards and honors . 338 | after a brief rematch , the WWF cavalry turned around and attacked Kane and Jericho . 339 | most of the songs were written by Richard M. Sherman and Robert B. Sherman . 340 | in the 5th century Slavs started to move into the area . 341 | from 1900 to 1920 many houses new campus were constructed on campus , including facilities to 342 | Winchester is a city in Scott County , Illinois United , States . 343 | name Arzashkun seems to be the Assyrian form of an Armenian name ending in -ka formed from a proper name Arzash , which recalls by the planet to part of Lake Van . 344 | out of 16,421 people in the national casting , she was chosen among the 15 candidates to appear on the TV show . 345 | its episodes were broadcast on the ABC network from its debut on September 21 , 1993 to March 1 , 2005 . 346 | the latter device can then be designed and used in less hours . 347 | Gimnasia hired first famous Colombian trainer Francisco Maturana , and then Julio César Falcioni , but both had limited success . 348 | Brighton is a city in Washington County , Iowa United , States . 349 | moreover , she appeared in several music videos , including " It Girl " by John Oates and " Just Lose It by Eminem . 350 | on June 24 1979 ( the 750th anniversary of the village ) , Glinde received its town charter . 351 | Pauline returned in the Game Boy remake of Donkey Kong , and later Mario vs. Donkey Kong 2 : March of the Minis in 2006 , although the character is now described as " Mario 's friend " . 352 | the labellum is unusual sauce and stretches to many times in its normal diameter during birth . 353 | his real date of birth was never recorded , but it is believed to be a date between 1935 and 1939 . 354 | this measure indicates how much of a particular drug or other substance ( is needed to mice . 355 | although the name suggests that they are located in the Bernese Oberland region of the canton of Bern . 356 | there he had one daughter , later baptized as Mary Ann Fisher Power , Power to Ann ( e ) . 357 | during an interview , Edward Gorey mentioned that Bawden was one of his favorite artists . 358 | the string can toss in different modes just as a guitar string can produce different notes , and every mode appears as a different particle : atoms , photon , gluon . 359 | Gable also earned an Academy Award award when he portrayed Fletcher Christian in 1935 's Mutiny on the Bounty . 360 | --------------------------------------------------------------------------------