├── toolkit ├── __init__.py └── rnn.py ├── cleaner_files ├── Xoutput-n_analyses-10.txt ├── Xoutput-n_analyses-12.txt ├── Xoutput-n_analyses-13.txt ├── Xoutput-n_analyses-10.txt.rules.sed ├── Xoutput-n_analyses-12.txt.rules.sed ├── Xoutput-n_analyses-13.txt.rules.sed ├── Xoutput-n_analyses-11.txt ├── Xoutput-n_analyses-09.txt ├── Xoutput-n_analyses-11.txt.rules ├── Xoutput-n_analyses-09.txt.rules ├── Xoutput-n_analyses-11.txt.rules.sed ├── Xoutput-n_analyses-09.txt.rules.sed ├── Xoutput-n_analyses-07.txt ├── Xoutput-n_analyses-07.txt.rules ├── Xoutput-n_analyses-07.txt.rules.sed ├── Xoutput-n_analyses-05.txt.rules ├── Xoutput-n_analyses-05.txt.rules.sed ├── Xoutput-n_analyses-06.txt.rules ├── Xoutput-n_analyses-06.txt.rules.sed ├── Xoutput-n_analyses-05.txt ├── Xoutput-n_analyses-08.txt ├── Xoutput-n_analyses-08.txt.rules ├── Xoutput-n_analyses-08.txt.rules.sed ├── Xoutput-n_analyses-06.txt └── Xoutput-n_analyses-03.txt.rules ├── requirements.txt ├── .dockerignore ├── environment-variables ├── scripts ├── turkish-lowercase.py ├── lowercase-root-surface-forms.py ├── strip-sentences-with-inconsistent-morph-analysis.sh ├── cleaning-script.awk ├── run-configurations-for-three-integration_modes.sh ├── TRUBA │ ├── job-creator.sh │ └── helper-script-to-run-the-experiment-set-TRUBA.sh ├── helper-script-to-run-the-experiment-set-small-sizes-sgd-parameter-sweep.sh ├── helper-script-to-run-the-experiment-set-small-sizes-adam-parameter-sweep.sh ├── helper-script-to-run-the-experiment-set.sh ├── rules-for-cleaning.txt ├── helper-script-to-run-the-experiment-set-small-sizes.sh ├── run_this_after_cleaner_gui.sh ├── inspect_results.py └── inspect_results.ipynb ├── LICENSE.md ├── dynetsaver.py ├── docker └── my_silent.cfg ├── dataset ├── eng.testa.86 ├── eng.testb.118 ├── gungor.ner.test.small ├── gungor.ner.dev.small ├── eng.train.208 └── gungor.ner.train.small ├── Dockerfile ├── README.md ├── crf.py ├── main_form.ui ├── main_form.py ├── control_experiments.py ├── eval.py ├── cleaner_gui.py └── evaluation └── conlleval /toolkit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-10.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-12.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-13.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-10.txt.rules.sed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-12.txt.rules.sed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-13.txt.rules.sed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.12.0 2 | virtualenvwrapper==4.7.2 3 | pymongo==3.4.0 4 | sacred==0.7.0 -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | dataset 2 | docs 3 | evaluation/temp 4 | models 5 | Xoutput* 6 | *.pyc 7 | *.bak 8 | l_mkl_2018.1.163.tgz -------------------------------------------------------------------------------- /environment-variables: -------------------------------------------------------------------------------- 1 | export LD_PRELOAD=/opt/intel/mkl/lib/intel64/libmkl_def.so:/opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_intel_thread.so:/opt/intel/lib/intel64_lin/libiomp5.so -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-11.txt: -------------------------------------------------------------------------------- 1 | 1 1 0 12 Xlı+Adj^DB+Noun+Ness+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Ness+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Ness+A3sg+Pnon+Acc Xlı+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+P3sg+Nom Xlı+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Acc Xlılığ+Noun+A3sg+P3sg+Nom Xlılığ+Noun+A3sg+Pnon+Acc Xlılığı+Noun+A3sg+Pnon+Nom Xlılıg+Noun+A3sg+P3sg+Nom Xlılıg+Noun+A3sg+Pnon+Acc Xlılık+Noun+A3sg+P3sg+Nom Xlılık+Noun+A3sg+Pnon+Acc 2 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-09.txt: -------------------------------------------------------------------------------- 1 | 1 2 0 10 Xla+Verb^DB+Verb+Recip+Pos+Past+A1pl Xla+Verb+Recip+Pos+Past+A1pl Xla+Verb+Recip+Pos^DB+Adj+PastPart+Pnon Xla+Verb+Recip+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos+Past+A1pl X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Adj+PastPart+Pnon X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom X+Adj^DB+Verb+Become+Pos+Past+A1pl X+Adj^DB+Verb+Become+Pos^DB+Adj+PastPart+Pnon X+Adj^DB+Verb+Become+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom 2 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-11.txt.rules: -------------------------------------------------------------------------------- 1 | 1 Xlı+Adj^DB+Noun+Ness+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Ness+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Ness+A3sg+Pnon+Acc Xlı+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+P3sg+Nom Xlı+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Acc Xlılığ+Noun+A3sg+P3sg+Nom Xlılığ+Noun+A3sg+Pnon+Acc Xlılığı+Noun+A3sg+Pnon+Nom Xlılıg+Noun+A3sg+P3sg+Nom Xlılıg+Noun+A3sg+Pnon+Acc Xlılık+Noun+A3sg+P3sg+Nom Xlılık+Noun+A3sg+Pnon+Acc Xlı+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+P3sg+Nom -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-09.txt.rules: -------------------------------------------------------------------------------- 1 | 1 Xla+Verb^DB+Verb+Recip+Pos+Past+A1pl Xla+Verb+Recip+Pos+Past+A1pl Xla+Verb+Recip+Pos^DB+Adj+PastPart+Pnon Xla+Verb+Recip+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos+Past+A1pl X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Adj+PastPart+Pnon X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom X+Adj^DB+Verb+Become+Pos+Past+A1pl X+Adj^DB+Verb+Become+Pos^DB+Adj+PastPart+Pnon X+Adj^DB+Verb+Become+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom Xla+Verb+Recip+Pos+Past+A1pl -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-11.txt.rules.sed: -------------------------------------------------------------------------------- 1 | s/^\(.\+\) \(\(.\+\)lı+Adj^DB+Noun+Ness+A3sg+P3sg+Nom\)\( \3+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Ness+A3sg+P3sg+Nom \3+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Ness+A3sg+Pnon+Acc \3lı+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+P3sg+Nom \3lı+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Acc \3lılığ+Noun+A3sg+P3sg+Nom \3lılığ+Noun+A3sg+Pnon+Acc \3lılığı+Noun+A3sg+Pnon+Nom \3lılıg+Noun+A3sg+P3sg+Nom \3lılıg+Noun+A3sg+Pnon+Acc \3lılık+Noun+A3sg+P3sg+Nom \3lılık+Noun+A3sg+Pnon+Acc .\+\)$/\1 \3lı+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+P3sg+Nom\4/g 2 | -------------------------------------------------------------------------------- /scripts/turkish-lowercase.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | 4 | import sys 5 | 6 | CHARMAP = { 7 | "to_upper": { 8 | u"ı": u"I", 9 | u"i": u"İ", 10 | }, 11 | "to_lower": { 12 | u"I": u"ı", 13 | u"İ": u"i", 14 | } 15 | } 16 | 17 | 18 | def lower(s): 19 | for key, value in CHARMAP.get("to_lower").items(): 20 | s = s.replace(key, value) 21 | 22 | return s.lower() 23 | 24 | line = sys.stdin.readline() 25 | 26 | while line: 27 | print lower(line.decode("utf8").strip()) 28 | line = sys.stdin.readline() -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-09.txt.rules.sed: -------------------------------------------------------------------------------- 1 | s/^\(.\+\) \(\(.\+\)la+Verb^DB+Verb+Recip+Pos+Past+A1pl\)\( \3la+Verb+Recip+Pos+Past+A1pl \3la+Verb+Recip+Pos^DB+Adj+PastPart+Pnon \3la+Verb+Recip+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom \3+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos+Past+A1pl \3+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Adj+PastPart+Pnon \3+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom \3+Adj^DB+Verb+Become+Pos+Past+A1pl \3+Adj^DB+Verb+Become+Pos^DB+Adj+PastPart+Pnon \3+Adj^DB+Verb+Become+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom .\+\)$/\1 \3la+Verb+Recip+Pos+Past+A1pl\4/g 2 | -------------------------------------------------------------------------------- /scripts/lowercase-root-surface-forms.py: -------------------------------------------------------------------------------- 1 | 2 | # coding=utf8 3 | 4 | import sys 5 | 6 | CHARMAP = { 7 | "to_upper": { 8 | u"ı": u"I", 9 | u"i": u"İ", 10 | }, 11 | "to_lower": { 12 | u"I": u"ı", 13 | u"İ": u"i", 14 | } 15 | } 16 | 17 | 18 | def lower(s): 19 | for key, value in CHARMAP.get("to_lower").items(): 20 | s = s.replace(key, value) 21 | 22 | return s.lower() 23 | 24 | line = sys.stdin.readline() 25 | 26 | while line: 27 | tokens = line.decode("utf8").strip().split(" ") 28 | for idx in range(1, len(tokens)-1): 29 | if tokens[idx] == "*UNKNOWN*": 30 | pass 31 | else: 32 | _tags = tokens[idx].split("+") 33 | _tags[0] = lower(_tags[0]) 34 | tokens[idx] = "+".join(_tags) 35 | print " ".join(tokens).encode("utf8") 36 | line = sys.stdin.readline() -------------------------------------------------------------------------------- /scripts/strip-sentences-with-inconsistent-morph-analysis.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #awk 'BEGIN { found_in_sentence = 0; } /^$/ { if (found_in_sentence == 1) { print sentence; }; found_in_sentence = 0; sentence = ""; } !/^$/ { if (length(sentence) == 0) { sentence = $0 "\n"; } else { sentence = sentence $0 "\n";}; if (found_in_sentence == 0) { inconsistent = 1; for (i = 3; i <= NF-1; i++) { if ($2 == $i) { inconsistent = 0 } }; if (inconsistent == 1) { found_in_sentence = 1 } } }' 4 | awk 'BEGIN { inconsistent_sentence = 0; } /^$/ { if (inconsistent_sentence == 0) { print sentence; }; inconsistent_sentence = 0; sentence = ""; } !/^$/ { if (length(sentence) == 0) { sentence = $0 "\n"; } else { sentence = sentence $0 "\n";}; if (inconsistent_sentence == 0) { found = 0; if (NF == 4 && $3 == "*UNKNOWN*") { inconsistent_sentence = 0; } else { for (i = 3; i <= NF-1; i++) { if ($2 == $i) { found = 1; break }; }; if (found == 1) { inconsistent_sentence = 0 } else { inconsistent_sentence = 1 };} } }' -------------------------------------------------------------------------------- /scripts/cleaning-script.awk: -------------------------------------------------------------------------------- 1 | { 2 | total++; 3 | counts[$2]++; 4 | if ($2 == n_analyses + 1) { 5 | split($3, arr, /+/); 6 | split(arr[1], arr2, /'"'"'/); 7 | if (length(arr2[2]) == 0) { 8 | 9 | golden_root = arr[1]; 10 | 11 | l = 1; 12 | for (; l < length(golden_root); l++) { 13 | 14 | all_ok = 1; 15 | for (i=4; i <= NF; i++) { 16 | split($i, tmp_arr, /+/); 17 | if (substr(tmp_arr[1], 0, l) != substr(golden_root, 0, l)) { 18 | all_ok = 0 19 | break; 20 | } 21 | } 22 | if (all_ok != 1) { 23 | break; 24 | } 25 | } 26 | 27 | if (l-1 == 0) { 28 | NA_count++; 29 | } else { 30 | # print tolower($0); 31 | # print (l-1); 32 | output = $1 " " $2; 33 | for (i=3; i <= NF; i++) { 34 | output = output " X" substr($i, l); 35 | } 36 | print output; 37 | } 38 | }; 39 | suffix_counts[arr2[2]]++; 40 | } 41 | } /* END { for (key in counts) { print key, counts[key]; }; for (suffix in suffix_counts) { print suffix, suffix_counts[suffix]; }; print total; } */ -------------------------------------------------------------------------------- /scripts/run-configurations-for-three-integration_modes.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CODE_PATH=${CODE_PATH:-/home/onur/projects/research/focus/ner-tagger-tensorflow} 4 | 5 | EXP_NAME=${1:-default_exp_name} 6 | GPU=${2:-0} 7 | 8 | echo 'cd '${CODE_PATH}' && source /usr/local/bin/virtualenvwrapper.sh && workon dynet && source environment-variables && python control_experiments.py -m joint_ner_and_md with integration_mode=0 dynet_gpu='$GPU' embeddings_filepath="" word_lstm_dim=256 experiment_name='$EXP_NAME 9 | echo 'cd '${CODE_PATH}' && source /usr/local/bin/virtualenvwrapper.sh && workon dynet && source environment-variables && python control_experiments.py -m joint_ner_and_md with integration_mode=1 dynet_gpu='$GPU' embeddings_filepath="" word_lstm_dim=256 experiment_name='$EXP_NAME 10 | echo 'cd '${CODE_PATH}' && source /usr/local/bin/virtualenvwrapper.sh && workon dynet && source environment-variables && python control_experiments.py -m joint_ner_and_md with integration_mode=2 dynet_gpu='$GPU' embeddings_filepath="" word_lstm_dim=256 experiment_name='$EXP_NAME -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Guillaume Lample, Onur Güngör 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dynetsaver.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class DynetSaver(): 4 | 5 | def __init__(self, parameter_collection, checkpoint_dir): 6 | self.parameter_collection = parameter_collection 7 | self.checkpoint_dir = checkpoint_dir 8 | 9 | def save(self, epoch=None, n_bests=None): 10 | assert epoch or (n_bests >= 0), "One of epoch or n_bests should be specified" 11 | model_dir_path = "model-epoch-%08d" % epoch if epoch is not None else ("best-models-%08d" % n_bests) 12 | model_checkpoint_dir_path = os.path.join(self.checkpoint_dir, model_dir_path) 13 | if not os.path.exists(model_checkpoint_dir_path): 14 | os.mkdir(model_checkpoint_dir_path) 15 | self.parameter_collection.save(os.path.join(model_checkpoint_dir_path, 16 | "model.ckpt")) 17 | 18 | def get_newest_ckpt_directory(self): 19 | model_directories = sorted(os.walk(self.checkpoint_dir).next()[1]) 20 | 21 | if model_directories: 22 | return os.path.join(self.checkpoint_dir, model_directories[-1]) 23 | else: 24 | return None 25 | 26 | def restore(self, ckpt_directory): 27 | self.parameter_collection.populate(os.path.join(ckpt_directory, "model.ckpt")) 28 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-07.txt: -------------------------------------------------------------------------------- 1 | 1 1 0 8 Xlık+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3pl+Cop Xlık+Noun+A3pl+Pnon+Nom^DB+Adverb+Since Xlık+Noun+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+A3pl+Cop X+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Adverb+Since X+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Adverb+Since X+Adj^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 2 | 2 1 0 8 Xici+Adj^DB+Verb+Zero+Pres+A3sg+Cop X+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adverb+Since X+Verb+Pos^DB+Adj+Agt^DB+Verb+Zero+Pres+Cop+A3sg Xi+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xi+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adverb+Since Xiç+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xici+Adj^DB+Verb+Zero+Pres+Cop+A3sg 3 | 3 1 0 8 Xa+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Verb+Pos^DB+Noun+FeelLike+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Verb+Pos^DB+Noun+FeelLike+A3sg+Pnon+Nom^DB+Adverb+Since Xa+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xa+Adj^DB+Adj+JustLike^DB+Verb+Zero+Pres+Cop+A3sg Xa+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since Xa+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xa+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 4 | -------------------------------------------------------------------------------- /docker/my_silent.cfg: -------------------------------------------------------------------------------- 1 | ACCEPT_EULA=accept 2 | CONTINUE_WITH_OPTIONAL_ERROR=yes 3 | PSET_INSTALL_DIR=/opt/intel 4 | CONTINUE_WITH_INSTALLDIR_OVERWRITE=yes 5 | PSET_MODE=install 6 | ARCH_SELECTED=ALL 7 | COMPONENTS=;intel-comp-l-all-vars__noarch;intel-comp-nomcu-vars__noarch;intel-openmp__x86_64;intel-openmp-32bit__x86_64;intel-tbb-libs-32bit__x86_64;intel-tbb-libs__x86_64;intel-mkl-common__noarch;intel-mkl-installer-license__noarch;intel-mkl-core-32bit__x86_64;intel-mkl-core__x86_64;intel-mkl-core-rt-32bit__x86_64;intel-mkl-core-rt__x86_64;intel-mkl-doc__noarch;intel-mkl-doc-ps__noarch;intel-mkl-gnu-32bit__x86_64;intel-mkl-gnu__x86_64;intel-mkl-gnu-rt-32bit__x86_64;intel-mkl-gnu-rt__x86_64;intel-mkl-common-ps__noarch;intel-mkl-core-ps-32bit__x86_64;intel-mkl-core-ps__x86_64;intel-mkl-common-c__noarch;intel-mkl-core-c-32bit__x86_64;intel-mkl-core-c__x86_64;intel-mkl-common-c-ps__noarch;intel-mkl-tbb-32bit__x86_64;intel-mkl-tbb__x86_64;intel-mkl-tbb-rt-32bit__x86_64;intel-mkl-tbb-rt__x86_64;intel-mkl-gnu-c-32bit__x86_64;intel-mkl-gnu-c__x86_64;intel-mkl-common-f__noarch;intel-mkl-core-f-32bit__x86_64;intel-mkl-core-f__x86_64;intel-mkl-gnu-f-rt-32bit__x86_64;intel-mkl-gnu-f-rt__x86_64;intel-mkl-gnu-f__x86_64;intel-mkl-gnu-f-32bit__x86_64;intel-mkl-f95-common__noarch;intel-mkl-f95-32bit__x86_64;intel-mkl-f__x86_64;intel-mkl-psxe__noarch;intel-psxe-common__noarch;intel-psxe-common-doc__noarch;intel-compxe-pset 8 | -------------------------------------------------------------------------------- /scripts/TRUBA/job-creator.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo $0 4 | rundir_path=`dirname $0` 5 | experiment_name=${1:-TRUBA-all-experiments-20180311-01} 6 | dim=${2:-10} 7 | 8 | partition_name=${3:-short} 9 | core_per_job=${4:-4} 10 | max_time=${5:-4-00:00:00} 11 | 12 | sub_job_id=0 13 | max_jobs_to_submit=100 14 | 15 | # jobs_line_by_line=`${rundir_path}/helper-script-to-run-the-experiment-set-TRUBA.sh ${experiment_name} ${dim}` 16 | 17 | #echo $jobs_line_by_line | while read line; do 18 | 19 | ${rundir_path}/helper-script-to-run-the-experiment-set-TRUBA.sh ${experiment_name} ${dim} | while read line; do 20 | 21 | sub_job_id=$((sub_job_id + 1)) 22 | echo $sub_job_id 23 | echo $max_jobs_to_submit 24 | echo $line 25 | 26 | # experiment_name=XXX-dim-10-morpho_tag_type-char 27 | job_id=`echo ${line} | awk '{ match($0, /.* experiment_name=([^ ]+) /, arr); printf "%s", arr[1]; }'` 28 | 29 | echo '#!/bin/bash' > ${rundir_path}/batch-script-${job_id}.sh 30 | echo $line >> ${rundir_path}/batch-script-${job_id}.sh 31 | 32 | sbatch -A ogungor -J ${job_id} -p ${partition_name} -c ${core_per_job} --time=${max_time} --mail-type=END --mail-user=onurgu@boun.edu.tr ${rundir_path}/batch-script-${job_id}.sh 33 | 34 | echo sleeping for 120 seconds to allow time to FileStorageObserver 35 | sleep 120 36 | 37 | if [[ sub_job_id -eq max_jobs_to_submit ]]; then 38 | # echo exit 39 | exit 40 | fi 41 | done -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-07.txt.rules: -------------------------------------------------------------------------------- 1 | 1 Xlık+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3pl+Cop Xlık+Noun+A3pl+Pnon+Nom^DB+Adverb+Since Xlık+Noun+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+A3pl+Cop X+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Adverb+Since X+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Adverb+Since X+Adj^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xlık+Noun+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 2 | 2 Xici+Adj^DB+Verb+Zero+Pres+A3sg+Cop X+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adverb+Since X+Verb+Pos^DB+Adj+Agt^DB+Verb+Zero+Pres+Cop+A3sg Xi+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xi+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adverb+Since Xiç+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xici+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xici+Adj^DB+Verb+Zero+Pres+Cop+A3sg 3 | 3 Xa+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Verb+Pos^DB+Noun+FeelLike+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Verb+Pos^DB+Noun+FeelLike+A3sg+Pnon+Nom^DB+Adverb+Since Xa+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xa+Adj^DB+Adj+JustLike^DB+Verb+Zero+Pres+Cop+A3sg Xa+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since Xa+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xa+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xa+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-07.txt.rules.sed: -------------------------------------------------------------------------------- 1 | s/^\(.\+\) \(\(.\+\)lık+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3pl+Cop\)\( \3lık+Noun+A3pl+Pnon+Nom^DB+Adverb+Since \3lık+Noun+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+A3pl+Cop \3+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Adverb+Since \3+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3+Adj^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Adverb+Since \3+Adj^DB+Noun+Ness+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg .\+\)$/\1 \3lık+Noun+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg\4/g 2 | s/^\(.\+\) \(\(.\+\)ici+Adj^DB+Verb+Zero+Pres+A3sg+Cop\)\( \3+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adverb+Since \3+Verb+Pos^DB+Adj+Agt^DB+Verb+Zero+Pres+Cop+A3sg \3i+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3i+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adverb+Since \3iç+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3ici+Adj^DB+Verb+Zero+Pres+Cop+A3sg .\+\)$/\1 \3ici+Adj^DB+Verb+Zero+Pres+Cop+A3sg\4/g 3 | s/^\(.\+\) \(\(.\+\)a+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop\)\( \3+Verb+Pos^DB+Noun+FeelLike+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3+Verb+Pos^DB+Noun+FeelLike+A3sg+Pnon+Nom^DB+Adverb+Since \3a+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3a+Adj^DB+Adj+JustLike^DB+Verb+Zero+Pres+Cop+A3sg \3a+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since \3a+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3a+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg .\+\)$/\1 \3a+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg\4/g 4 | -------------------------------------------------------------------------------- /dataset/eng.testa.86: -------------------------------------------------------------------------------- 1 | CRICKET NNP I-NP O 2 | - : O O 3 | LEICESTERSHIRE NNP I-NP I-ORG 4 | TAKE NNP I-NP O 5 | OVER IN I-PP O 6 | AT NNP I-NP O 7 | TOP NNP I-NP O 8 | AFTER NNP I-NP O 9 | INNINGS NNP I-NP O 10 | VICTORY NN I-NP O 11 | . . O O 12 | 13 | LONDON NNP I-NP I-LOC 14 | 1996-08-30 CD I-NP O 15 | 16 | West NNP I-NP I-MISC 17 | Indian NNP I-NP I-MISC 18 | all-rounder NN I-NP O 19 | Phil NNP I-NP I-PER 20 | Simmons NNP I-NP I-PER 21 | took VBD I-VP O 22 | four CD I-NP O 23 | for IN I-PP O 24 | 38 CD I-NP O 25 | on IN I-PP O 26 | Friday NNP I-NP O 27 | as IN I-PP O 28 | Leicestershire NNP I-NP I-ORG 29 | beat VBD I-VP O 30 | Somerset NNP I-NP I-ORG 31 | by IN I-PP O 32 | an DT I-NP O 33 | innings NN I-NP O 34 | and CC O O 35 | 39 CD I-NP O 36 | runs NNS I-NP O 37 | in IN I-PP O 38 | two CD I-NP O 39 | days NNS I-NP O 40 | to TO I-VP O 41 | take VB I-VP O 42 | over IN I-PP O 43 | at IN B-PP O 44 | the DT I-NP O 45 | head NN I-NP O 46 | of IN I-PP O 47 | the DT I-NP O 48 | county NN I-NP O 49 | championship NN I-NP O 50 | . . O O 51 | 52 | Their PRP$ I-NP O 53 | stay NN I-NP O 54 | on IN I-PP O 55 | top NN I-NP O 56 | , , O O 57 | though RB I-ADVP O 58 | , , O O 59 | may MD I-VP O 60 | be VB I-VP O 61 | short-lived JJ I-ADJP O 62 | as IN I-PP O 63 | title NN I-NP O 64 | rivals NNS I-NP O 65 | Essex NNP I-NP I-ORG 66 | , , O O 67 | Derbyshire NNP I-NP I-ORG 68 | and CC I-NP O 69 | Surrey NNP I-NP I-ORG 70 | all DT O O 71 | closed VBD I-VP O 72 | in RP I-PRT O 73 | on IN I-PP O 74 | victory NN I-NP O 75 | while IN I-SBAR O 76 | Kent NNP I-NP I-ORG 77 | made VBD I-VP O 78 | up RP I-PRT O 79 | for IN I-PP O 80 | lost VBN I-NP O 81 | time NN I-NP O 82 | in IN I-PP O 83 | their PRP$ I-NP O 84 | rain-affected JJ I-NP O 85 | match NN I-NP O 86 | against IN I-PP O 87 | Nottinghamshire NNP I-NP I-ORG 88 | . . O O 89 | 90 | -------------------------------------------------------------------------------- /scripts/helper-script-to-run-the-experiment-set-small-sizes-sgd-parameter-sweep.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | experiment_name=${1:-section1-all-20171114-08} 4 | original_experiment_name=${experiment_name} 5 | 6 | ner_tagger_root=/home/onur/projects/research/focus/ner-tagger-dynet-multilayer 7 | 8 | n_trials=10 9 | 10 | dim=10 11 | morpho_tag_type=char 12 | 13 | for trial in `seq 1 ${n_trials}`; do 14 | 15 | for morpho_tag_type in char ; do 16 | 17 | small_sizes="char_dim=$dim \ 18 | char_lstm_dim=$dim \ 19 | morpho_tag_dim=$dim \ 20 | morpho_tag_lstm_dim=$dim \ 21 | morpho_tag_type=${morpho_tag_type} \ 22 | word_dim=$dim \ 23 | word_lstm_dim=$dim " 24 | 25 | # experiment_name=${original_experiment_name}-dim-${dim}-morpho_tag_type-${morpho_tag_type}-trial-`printf "%02d" ${trial}` 26 | experiment_name=${original_experiment_name}-sgd-sweep 27 | 28 | # for learning_rate in 0.1 0.05 0.01 0.005 0.001 ; do 29 | for learning_rate in 0.1 0.05 0.01 0.005 0.001 0.15 0.2 0.5 ; do 30 | pre_command="echo ${original_experiment_name}-learning_rate-${learning_rate}-trial-`printf "%02d" ${trial}` >> ${experiment_name}.log" 31 | command=${pre_command}" && ""cd ${ner_tagger_root} && \ 32 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 33 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 34 | active_models=0 \ 35 | integration_mode=0 \ 36 | dynet_gpu=0 \ 37 | embeddings_filepath=\"\" \ 38 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 39 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 40 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 41 | $small_sizes \ 42 | lr_method=`printf "sgd-learning_rate_float@%.03lf" ${learning_rate}` \ 43 | max_epochs=10 \ 44 | experiment_name=${experiment_name} ;" 45 | echo $command; 46 | done 47 | done 48 | done -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-05.txt.rules: -------------------------------------------------------------------------------- 1 | 1 Xın+Adverb X+Verb+Pos+Imp+A2pl X+Noun+A3sg+P2sg+Nom X+Noun+A3sg+Pnon+Gen Xı+Noun+A3sg+P2sg+Nom Xın+Noun+A3sg+Pnon+Nom Xın+Noun+A3sg+Pnon+Nom 2 | 2 Xler'i+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3pl+P3sg+Nom X+Noun+Prop+A3pl+Pnon+Acc X+Noun+Prop+A3pl+Nom Xler+Noun+Prop+A3sg+P3sg+Nom Xler+Noun+Prop+A3sg+Pnon+Acc X+Noun+Prop+A3pl+Pnon+Acc 3 | 3 Xl+Verb+Pos+Imp+A3sg Xl+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg Xls+Noun+A3sg+P2sg+Nom Xls+Noun+A3sg+Pnon+Gen Xlsu+Noun+A3sg+P2sg+Nom Xlsun+Noun+A3sg+Pnon+Nom Xl+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg 4 | 4 Xğı'nın+Noun+Prop+A3sg+Pnon+Nom Xğ+Noun+Prop+A3sg+P3sg+Gen Xğı+Noun+Prop+A3sg+P2sg+Gen Xğı+Noun+Prop+A3sg+Pnon+Gen Xg+Noun+Prop+A3sg+P3sg+Gen Xk+Noun+Prop+A3sg+P3sg+Gen Xk+Noun+Prop+A3sg+P3sg+Gen 5 | 5 Xum+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+P1sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A1sg+Cop Xu+Adj^DB+Noun+Zero+A3sg+P1sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xum+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xum+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xum+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 6 | 6 Xici+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom X+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adj+FitFor X+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom X+Verb+Pos^DB+Adj+Agt^DB+Noun+Ness+A3sg+Pnon+Nom Xi+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adj+FitFor Xi+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom X+Verb+Pos^DB+Adj+Agt^DB+Noun+Ness+A3sg+Pnon+Nom 7 | 7 Xz+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xz+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since Xz+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xz+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+Cop+A3sg Xz+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since Xz+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xz+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+Cop+A3sg -------------------------------------------------------------------------------- /scripts/helper-script-to-run-the-experiment-set-small-sizes-adam-parameter-sweep.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | experiment_name=${1:-section1-all-20171114-08} 4 | original_experiment_name=${experiment_name} 5 | 6 | ner_tagger_root=/home/onur/projects/research/focus/ner-tagger-dynet-multilayer 7 | 8 | n_trials=10 9 | 10 | dim=10 11 | morpho_tag_type=char 12 | 13 | for trial in `seq 1 ${n_trials}`; do 14 | 15 | for morpho_tag_type in char ; do 16 | 17 | small_sizes="char_dim=$dim \ 18 | char_lstm_dim=$dim \ 19 | morpho_tag_dim=$dim \ 20 | morpho_tag_lstm_dim=$dim \ 21 | morpho_tag_type=${morpho_tag_type} \ 22 | word_dim=$dim \ 23 | word_lstm_dim=$dim " 24 | 25 | # experiment_name=${original_experiment_name}-dim-${dim}-morpho_tag_type-${morpho_tag_type}-trial-`printf "%02d" ${trial}` 26 | experiment_name=${original_experiment_name}-adam-sweep-sparse_updates_enabled 27 | 28 | for learning_rate in 0.1 0.05 0.01 0.005 0.001 ; do 29 | for sparse_updates_enabled in 0 ; do 30 | pre_command="echo ${original_experiment_name}-learning_rate-${learning_rate}-trial-`printf "%02d" ${trial}` >> ${experiment_name}.log" 31 | command=${pre_command}" && ""cd ${ner_tagger_root} && \ 32 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 33 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 34 | active_models=0 \ 35 | integration_mode=0 \ 36 | dynet_gpu=0 \ 37 | embeddings_filepath=\"\" \ 38 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 39 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 40 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 41 | $small_sizes \ 42 | lr_method=`printf "adam-alpha_float@%.03lf" ${learning_rate}` \ 43 | sparse_updates_enabled=${sparse_updates_enabled} 44 | max_epochs=10 \ 45 | experiment_name=${experiment_name} ;" 46 | echo $command; 47 | done; 48 | done 49 | done 50 | done -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-05.txt.rules.sed: -------------------------------------------------------------------------------- 1 | s/^\(.\+\) \(\(.\+\)ın+Adverb\)\( \3+Verb+Pos+Imp+A2pl \3+Noun+A3sg+P2sg+Nom \3+Noun+A3sg+Pnon+Gen \3ı+Noun+A3sg+P2sg+Nom \3ın+Noun+A3sg+Pnon+Nom .\+\)$/\1 \3ın+Noun+A3sg+Pnon+Nom\4/g 2 | s/^\(.\+\) \(\(.\+\)ler'i+Noun+Prop+A3sg+Pnon+Nom\)\( \3+Noun+Prop+A3pl+P3sg+Nom \3+Noun+Prop+A3pl+Pnon+Acc \3+Noun+Prop+A3pl+Nom \3ler+Noun+Prop+A3sg+P3sg+Nom \3ler+Noun+Prop+A3sg+Pnon+Acc .\+\)$/\1 \3+Noun+Prop+A3pl+Pnon+Acc\4/g 3 | s/^\(.\+\) \(\(.\+\)l+Verb+Pos+Imp+A3sg\)\( \3l+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg \3ls+Noun+A3sg+P2sg+Nom \3ls+Noun+A3sg+Pnon+Gen \3lsu+Noun+A3sg+P2sg+Nom \3lsun+Noun+A3sg+Pnon+Nom .\+\)$/\1 \3l+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg\4/g 4 | s/^\(.\+\) \(\(.\+\)ğı'nın+Noun+Prop+A3sg+Pnon+Nom\)\( \3ğ+Noun+Prop+A3sg+P3sg+Gen \3ğı+Noun+Prop+A3sg+P2sg+Gen \3ğı+Noun+Prop+A3sg+Pnon+Gen \3g+Noun+Prop+A3sg+P3sg+Gen \3k+Noun+Prop+A3sg+P3sg+Gen .\+\)$/\1 \3k+Noun+Prop+A3sg+P3sg+Gen\4/g 5 | s/^\(.\+\) \(\(.\+\)um+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop\)\( \3+Noun+A3sg+P1sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A1sg+Cop \3u+Adj^DB+Noun+Zero+A3sg+P1sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3um+Noun+A3sg+Pnon+Nom^DB+Adverb+Since \3um+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg .\+\)$/\1 \3um+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg\4/g 6 | s/^\(.\+\) \(\(.\+\)ici+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom\)\( \3+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adj+FitFor \3+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom \3+Verb+Pos^DB+Adj+Agt^DB+Noun+Ness+A3sg+Pnon+Nom \3i+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adj+FitFor \3i+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom .\+\)$/\1 \3+Verb+Pos^DB+Adj+Agt^DB+Noun+Ness+A3sg+Pnon+Nom\4/g 7 | s/^\(.\+\) \(\(.\+\)z+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop\)\( \3z+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since \3z+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3z+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+Cop+A3sg \3z+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since \3z+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg .\+\)$/\1 \3z+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+Cop+A3sg\4/g 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | # Build dependencies 4 | RUN echo "deb-src http://archive.ubuntu.com/ubuntu/ xenial main restricted" >> /etc/apt/sources.list && apt-get update && apt-get install -y python-pip git mercurial build-essential cmake libopenblas-dev 5 | 6 | RUN pip install --upgrade pip 7 | RUN pip install wheel 8 | 9 | RUN pip install cython 10 | 11 | RUN pip install numpy==1.12.0 12 | 13 | ENV EIGEN_VERSION 699b659 14 | # ENV EIGEN_VERSION 346ecdb 15 | RUN cd /opt && \ 16 | hg clone https://bitbucket.org/eigen/eigen/ -r ${EIGEN_VERSION} 17 | 18 | # code for installing Intel MKL. You need to supply l_mkl_2018.1.163.tgz file 19 | #COPY l_mkl_2018.1.163.tgz /opt/ 20 | #COPY docker/my_silent.cfg /opt/l_mkl_2018.1.163/ 21 | #RUN cd /opt/ && \ 22 | # tar zxvf l_mkl_2018.1.163.tgz && cd /opt/l_mkl_2018.1.163 && \ 23 | # ./install.sh --silent "my_silent.cfg" 24 | #ENV LD_PRELOAD /opt/intel/mkl/lib/intel64/libmkl_def.so:/opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_intel_thread.so:/opt/intel/lib/intel64_lin/libiomp5.so 25 | 26 | # DyNet, version 4234759 27 | ENV DYNET_VERSION 2.0.1 28 | RUN cd /opt && \ 29 | git clone https://github.com/clab/dynet.git && \ 30 | cd dynet && \ 31 | git checkout ${DYNET_VERSION} && \ 32 | mkdir build && \ 33 | cd build && \ 34 | cmake .. -DEIGEN3_INCLUDE_DIR=/opt/eigen -DPYTHON=`which python` && \ 35 | make -j2 && \ 36 | cd python && \ 37 | python ../../setup.py build --build-dir=.. --skip-build install 38 | 39 | RUN cd /opt/dynet/build && make -j2 install 40 | 41 | # if you want to use Intel MKL, change the above cmake line to 42 | # cmake .. -DEIGEN3_INCLUDE_DIR=/opt/eigen -DPYTHON=`which python` -DMKL_ROOT /opt/intel/mkl && \ 43 | 44 | ENV DYLD_LIBRARY_PATH /opt/dynet/build/dynet/ 45 | ENV LD_LIBRARY_PATH /opt/dynet/build/dynet/ 46 | 47 | RUN mkdir /opt/ner-tagger-dynet 48 | 49 | WORKDIR /opt/ner-tagger-dynet 50 | 51 | COPY *.py /opt/ner-tagger-dynet/ 52 | COPY requirements.txt /opt/ner-tagger-dynet/ 53 | 54 | RUN pip install -r requirements.txt 55 | 56 | RUN mkdir dataset 57 | 58 | COPY evaluation/conlleval evaluation/ 59 | RUN mkdir -p evaluation/temp/eval_logs/ 60 | 61 | RUN mkdir models/ 62 | 63 | -------------------------------------------------------------------------------- /dataset/eng.testb.118: -------------------------------------------------------------------------------- 1 | SOCCER NN I-NP O 2 | - : O O 3 | JAPAN NNP I-NP I-LOC 4 | GET VB I-VP O 5 | LUCKY NNP I-NP O 6 | WIN NNP I-NP O 7 | , , O O 8 | CHINA NNP I-NP I-PER 9 | IN IN I-PP O 10 | SURPRISE DT I-NP O 11 | DEFEAT NN I-NP O 12 | . . O O 13 | 14 | Nadim NNP I-NP I-PER 15 | Ladki NNP I-NP I-PER 16 | 17 | AL-AIN NNP I-NP I-LOC 18 | , , O O 19 | United NNP I-NP I-LOC 20 | Arab NNP I-NP I-LOC 21 | Emirates NNPS I-NP I-LOC 22 | 1996-12-06 CD I-NP O 23 | 24 | Japan NNP I-NP I-LOC 25 | began VBD I-VP O 26 | the DT I-NP O 27 | defence NN I-NP O 28 | of IN I-PP O 29 | their PRP$ I-NP O 30 | Asian JJ I-NP I-MISC 31 | Cup NNP I-NP I-MISC 32 | title NN I-NP O 33 | with IN I-PP O 34 | a DT I-NP O 35 | lucky JJ I-NP O 36 | 2-1 CD I-NP O 37 | win VBP I-VP O 38 | against IN I-PP O 39 | Syria NNP I-NP I-LOC 40 | in IN I-PP O 41 | a DT I-NP O 42 | Group NNP I-NP O 43 | C NNP I-NP O 44 | championship NN I-NP O 45 | match NN I-NP O 46 | on IN I-PP O 47 | Friday NNP I-NP O 48 | . . O O 49 | 50 | But CC O O 51 | China NNP I-NP I-LOC 52 | saw VBD I-VP O 53 | their PRP$ I-NP O 54 | luck NN I-NP O 55 | desert VB I-VP O 56 | them PRP I-NP O 57 | in IN I-PP O 58 | the DT I-NP O 59 | second NN I-NP O 60 | match NN I-NP O 61 | of IN I-PP O 62 | the DT I-NP O 63 | group NN I-NP O 64 | , , O O 65 | crashing VBG I-VP O 66 | to TO I-PP O 67 | a DT I-NP O 68 | surprise NN I-NP O 69 | 2-0 CD I-NP O 70 | defeat NN I-NP O 71 | to TO I-PP O 72 | newcomers NNS I-NP O 73 | Uzbekistan NNP I-NP I-LOC 74 | . . O O 75 | 76 | China NNP I-NP I-LOC 77 | controlled VBD I-VP O 78 | most JJS I-NP O 79 | of IN I-PP O 80 | the DT I-NP O 81 | match NN I-NP O 82 | and CC O O 83 | saw VBD I-VP O 84 | several JJ I-NP O 85 | chances NNS I-NP O 86 | missed VBD I-VP O 87 | until IN I-SBAR O 88 | the DT I-NP O 89 | 78th JJ I-NP O 90 | minute NN I-NP O 91 | when WRB I-ADVP O 92 | Uzbek NNP I-NP I-MISC 93 | striker NN I-NP O 94 | Igor JJ B-NP I-PER 95 | Shkvyrin NNP I-NP I-PER 96 | took VBD I-VP O 97 | advantage NN I-NP O 98 | of IN I-PP O 99 | a DT I-NP O 100 | misdirected JJ I-NP O 101 | defensive JJ I-NP O 102 | header NN I-NP O 103 | to TO I-VP O 104 | lob VB I-VP O 105 | the DT I-NP O 106 | ball NN I-NP O 107 | over IN I-PP O 108 | the DT I-NP O 109 | advancing VBG I-NP O 110 | Chinese JJ I-NP I-MISC 111 | keeper NN I-NP O 112 | and CC O O 113 | into IN I-PP O 114 | an DT I-NP O 115 | empty JJ I-NP O 116 | net NN I-NP O 117 | . . O O 118 | 119 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | See updated version at http://github.com/onurgu/joint-ner-and-md-tagger 3 | 4 | # Neural Tagger for MD and NER 5 | 6 | This repo contains the software that was used to conduct the experiments reported 7 | in our article titled "Improving Named Entity Recognition by Jointly Learning to 8 | Disambiguate Morphological Tags" [1] to be presented at [COLING 2018](http://coling2018.org). 9 | 10 | 11 | # Training and testing 12 | 13 | We recommend using the helper scripts for conducting experiments. The scripts named `helper-script-*` 14 | run the experiments in the paper with given hyper parameters. 15 | 16 | 17 | bash ./scripts/helper-script-to-run-the-experiment-set-small-sizes.sh campaing_name | parallel -j6 18 | 19 | For the reporting part to work, you should set up a working [`sacred`](https://github.com/IDSIA/sacred) 20 | environment, which is very easy if you choose a filesystem based storage. You can find an 21 | example of this in the helper script found in `./scripts/TRUBA` folder. 22 | 23 | ## Tag sentences 24 | 25 | This project do not have a designated tagger script for now but you can obtain the output in `eval_dir`. 26 | You should provide the text in tokenized form in CoNLL format. 27 | The script will tag both the development and testing files and produce files in `./evaluation/temp/eval_logs/`. 28 | If you need this and want to contribute by coding and sharing it with the project, 29 | you are welcome. 30 | 31 | ## Replication of the experiments 32 | 33 | To reproduce the experiments reported with our model, you can use `Docker` 34 | and build a replica of our experimentation environment. 35 | 36 | To build: 37 | 38 | ```bash 39 | docker build -t yourimagename:yourversion . 40 | ``` 41 | 42 | To run: 43 | ```bash 44 | docker run -ti -v `pwd`/dataset:/opt/ner-tagger-dynet/dataset -v `pwd`/models:/opt/ner-tagger-dynet/models yourimagename:yourversion python train.py --train dataset/gungor.ner.train.small --dev dataset/gungor.ner.dev.small --test dataset/gungor.ner.test.small --word_dim 300 --word_lstm_dim 200 --word_bidirect 1 --cap_dim 100 --crf 1 --lr_method=adam --maximum-epochs 50 --char_dim 200 --char_lstm_dim 200 --char_bidirect 1 --overwrite-mappings 1 --batch-size 1 45 | ``` 46 | 47 | You should create or set permissions accordingly for ``` `pwd`/dataset ``` and ``` `pwd`/models ```. 48 | 49 | ## References 50 | 51 | [1] Gungor, O., Uskudarli, S., Gungor, T., Improving Named Entity Recognition by Jointly Learning to 52 | Disambiguate Morphological Tags, 2018, COLING 2018, 19-25 August, (to appear). 53 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-06.txt.rules: -------------------------------------------------------------------------------- 1 | 1 Xğu+Det Xk+Adj^DB+Noun+Zero+A3sg+P3sg+Nom Xk+Adj^DB+Noun+Zero+A3sg+Pnon+Acc Xk+Postp+PCAbl^DB+Noun+Zero+A3sg+P3sg+Nom Xk+Postp+PCAbl^DB+Noun+Zero+A3sg+Pnon+Acc Xğu+Adj Xğu+Pron+Quant+A3pl+P3pl+Nom Xğu+Pron+Quant+A3pl+P3pl+Nom 2 | 2 Xişi+Noun+A3pl+Pnon+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+Pnon+Acc X+Verb+Pos^DB+Noun+Inf3+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf3+A3sg+P3pl+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+P3pl+Nom Xiş+Noun+A3pl+P3pl+Nom Xiş+Noun+A3sg+P3pl+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+P3sg+Nom 3 | 3 Xişi+Noun+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+Pnon+Acc X+Verb+Pos^DB+Noun+Inf3+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf3+A3sg+P3pl+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+P3pl+Nom Xiş+Noun+A3pl+P3pl+Nom Xiş+Noun+A3sg+P3pl+Nom Xiş+Noun+A3pl+P3pl+Nom 4 | 4 Xişi+Noun+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf+A3pl+Pnon+Acc X+Verb+Pos^DB+Noun+Inf+A3pl+P3pl+Nom X+Verb+Pos^DB+Noun+Inf+A3sg+P3pl+Nom Xiş+Noun+A3pl+P3pl+Nom Xiş+Noun+A3sg+P3pl+Nom Xiş+Noun+A3pl+P3pl+Nom 5 | 5 Xuşma+Noun+A3pl+P3sg+Ins X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Ins X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Ins X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Ins Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Ins Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Ins Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Ins Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Ins 6 | 6 Xı+Ques+Pres+A2sg+Cop Xı+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg+Cop Xıs+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xıs+Noun+A3sg+Pnon+Gen^DB+Verb+Zero+Pres+Cop+A3sg Xısı+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xısın+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xısındır+Noun+A3sg+Pnon+Nom Xı+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg+Cop 7 | 7 Xüş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Verb+Recip^DB+Verb+Caus+Pos+Imp+A2sg X+Verb+Pos^DB+Noun+Inf3+A3sg+Pnon+Nom^DB+Adverb+Since X+Verb+Pos^DB+Noun+Inf3+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xüş+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xüş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xüş+Verb^DB+Verb+Caus+Pos+Imp+A2sg Xüş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 8 | 8 Xl+Verb+Pos+Opt+A2sg Xl+Noun+A3sg+Pnon+Dat^DB+Verb+Zero+Pres+A2sg Xla+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg Xlas+Noun+A3sg+P2sg+Nom Xlas+Noun+A3sg+Pnon+Gen Xlası+Noun+A3sg+P2sg+Nom Xlasın+Noun+A3sg+Pnon+Nom Xlasın+Noun+A3sg+Pnon+Nom 9 | 9 Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos^DB+Adj+NarrPart Xkle+Verb+Recip^DB+Verb+Caus+Pos+Narr+A3sg Xkle+Verb+Recip^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos+Narr+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero Xk+Adj^DB+Verb+Become^DB+Verb+Caus+Pos+Narr+A3sg Xk+Adj^DB+Verb+Become^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero Xk+Adj^DB+Verb+Become^DB+Verb+Caus+Pos+Narr+A3sg 10 | 10 Xu+Noun+A3sg+P3sg+Loc X+Noun+A3sg+P2sg+Loc X+Noun+A3sg+P3sg+Loc Xu+Noun+A3sg+P2sg+Loc Xun+Noun+A3sg+Pnon+Loc Xunda+Noun+A3sg+Pnon+Nom Xunt+Noun+A3sg+Pnon+Dat X+Noun+A3sg+P3sg+Loc -------------------------------------------------------------------------------- /dataset/gungor.ner.test.small: -------------------------------------------------------------------------------- 1 | Bunlar bu+Pron+Demons+A3pl+Pnon+Nom bu+Pron+Demons+A3pl+Pnon+Nom O 2 | , ,+Punc ,+Punc O 3 | mücadelenin mücadele+Noun+A3sg+Pnon+Gen mücadele+Noun+A3sg+P2sg+Gen mücadele+Noun+A3sg+Pnon+Gen O 4 | kesin kesin+Adj kes+Noun+A3sg+P2sg+Nom kes+Noun+A3sg+Pnon+Gen kes+Verb+Pos+Imp+A2pl kesin+Adj O 5 | galibini galip+Noun+A3sg+P3sg+Acc galip+Noun+A3sg+P3sg+Acc galip+Noun+A3sg+P2sg+Acc O 6 | belirleyecek belirle+Verb+Pos^DB+Adj+FutPart+Pnon belirle+Verb+Pos+Fut+A3sg belirle+Verb+Pos^DB+Adj+FutPart+Pnon O 7 | bir bir+Det bir+Det bir+Adverb bir+Adj bir+Num+Card O 8 | sayısal sayısal+Adj sayısal+Adj sayı+Noun+A3sg+Pnon+Nom^DB+Adj+Related O 9 | güce güç+Noun+A3sg+Pnon+Dat güç+Noun+A3sg+Pnon+Dat güç+Adj^DB+Noun+Zero+A3sg+Pnon+Dat O 10 | sahip sahip+Noun+A3sg+Pnon+Nom sahip+Noun+A3sg+Pnon+Nom O 11 | bulunuyorlar bulun+Verb+Pos+Prog1+A3pl bul+Verb^DB+Verb+Pass+Pos+Prog1+A3pl bulun+Verb+Pos+Prog1+A3pl O 12 | 13 | Bugünkü bugün+Noun+A3sg+Pnon+Nom^DB+Adj+Rel bugün+Noun+A3sg+Pnon+Nom^DB+Adj+Rel O 14 | mücadele mücadele+Noun+A3sg+Pnon+Nom mücadele+Noun+A3sg+Pnon+Nom O 15 | , ,+Punc ,+Punc O 16 | Savaş savaş+Noun+A3sg+Pnon+Nom Savaş+Noun+Prop+A3sg+Pnon+Nom savaş+Noun+A3sg+Pnon+Nom savaş+Verb+Pos+Imp+A2sg O 17 | sonrası sonra+Postp+PCAbl^DB+Noun+Zero+A3sg+P3sg+Nom sonra+Noun^DB+Adj+Almost sonra+Noun+A3sg+P3sg+Nom sonra+Postp+PCAbl^DB+Noun+Zero^DB+Adj+Almost sonra+Postp+PCAbl^DB+Noun+Zero+A3sg+P3sg+Nom O 18 | Fedaral Fedaral+Noun+Prop+A3sg+Pnon+Nom fedaral+Noun+A3sg+Pnon+Nom I-LOC 19 | Almanya Almanya+Noun+Prop+A3sg+Pnon+Nom Almanya+Noun+Prop+A3sg+Pnon+Nom I-LOC 20 | Cumhuriyeti'nin Cumhuriyet+Noun+Prop+A3sg+P3sg+Gen Cumhuriyet+Noun+Prop+A3sg+P3sg+Gen I-LOC 21 | kuruluşundan kuruluş+Noun+A3sg+P3sg+Abl kur+Verb^DB+Verb+Pass+Pos^DB+Noun+Inf3+A3sg+P3sg+Abl kur+Verb^DB+Verb+Pass+Pos^DB+Noun+Inf3+A3sg+P2sg+Abl kuruluş+Noun+A3sg+P3sg+Abl kuruluş+Noun+A3sg+P2sg+Abl O 22 | bu bu+Det bu+Det bu+Pron+Demons+A3sg+Pnon+Nom O 23 | yana yan+Verb+Pos+Opt+A3sg yan+Noun+A3sg+Pnon+Dat yan+Verb+Pos+Opt+A3sg yana+Postp+PCAbl O 24 | 14ncü 14ncü+Noun+A3sg+Pnon+Nom *UNKNOWN* O 25 | , ,+Punc ,+Punc O 26 | iki iki+Num+Card iki+Num+Card O 27 | Almanya'nın Almanya+Noun+Prop+A3sg+Pnon+Gen Almanya+Noun+Prop+A3sg+Pnon+Gen Almanya+Noun+Prop+A3sg+P2sg+Gen I-LOC 28 | birlişmesinden birlişmesinden+Noun+A3sg+Pnon+Nom birlişme+Noun+A3sg+P3sg+Abl birlişmes+Noun+A3sg+P2sg+Abl birlişmes+Noun+A3sg+P3sg+Abl birlişmesi+Noun+A3sg+P2sg+Abl birlişmesin+Noun+A3sg+Pnon+Abl birlişmesinde+Noun+A3sg+P2sg+Nom birlişmesinden+Noun+A3sg+Pnon+Nom O 29 | sonra sonra+Adverb sonra+Adverb sonra+Noun+A3sg+Pnon+Nom sonra+Postp+PCAbl O 30 | da da+Conj da+Conj O 31 | 3. 3.+Noun+A3sg+Pnon+Nom 3+Num+Ord O 32 | ve ve+Conj ve+Conj O 33 | 20. 20.+Noun+A3sg+Pnon+Nom 20+Num+Ord O 34 | yüzyılın yüzyıl+Noun+A3sg+Pnon+Gen yüzyıl+Noun+A3sg+Pnon+Gen yüzyıl+Noun+A3sg+P2sg+Nom O 35 | son son+Noun+A3sg+Pnon+Nom son+Noun+A3sg+Pnon+Nom son+Adj O 36 | seçimi seçim+Noun+A3sg+P3sg+Nom seçi+Noun+A3sg+P1sg+Acc seçim+Noun+A3sg+Pnon+Acc seçim+Noun+A3sg+P3sg+Nom O 37 | 38 | -------------------------------------------------------------------------------- /scripts/helper-script-to-run-the-experiment-set.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | experiment_name=${1:-section1-all-20171114-08} 4 | 5 | ner_tagger_root=/home/onur/projects/research/focus/ner-tagger-dynet-multilayer 6 | 7 | for imode in 0 1 2 ; do 8 | if [[ $imode == 0 ]]; then 9 | for amodels in 1 0 ; do 10 | cd ${ner_tagger_root} && \ 11 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 12 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 13 | active_models=${amodels} \ 14 | integration_mode=$imode \ 15 | dynet_gpu=0 \ 16 | embeddings_filepath="" \ 17 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 18 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 19 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 20 | experiment_name=${experiment_name} ; 21 | done; 22 | cd ${ner_tagger_root} && \ 23 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 24 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 25 | active_models=0 \ 26 | integration_mode=0 \ 27 | use_golden_morpho_analysis_in_word_representation=1 \ 28 | dynet_gpu=0 \ 29 | embeddings_filepath="" \ 30 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 31 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 32 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 33 | experiment_name=${experiment_name} ; 34 | elif [[ $imode == 1 ]]; then 35 | cd ${ner_tagger_root} && \ 36 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 37 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 38 | active_models=2 \ 39 | integration_mode=1 \ 40 | dynet_gpu=0 \ 41 | embeddings_filepath="" \ 42 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 43 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 44 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 45 | experiment_name=${experiment_name} ; 46 | else 47 | cd ${ner_tagger_root} && \ 48 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 49 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 50 | active_models=2 \ 51 | integration_mode=2 \ 52 | multilayer=1 \ 53 | shortcut_connections=1 \ 54 | dynet_gpu=0 \ 55 | embeddings_filepath="" \ 56 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 57 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 58 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 59 | experiment_name=${experiment_name} ; 60 | 61 | cd ${ner_tagger_root} && \ 62 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 63 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 64 | active_models=2 \ 65 | integration_mode=2 \ 66 | dynet_gpu=0 \ 67 | embeddings_filepath="" \ 68 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 69 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 70 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 71 | experiment_name=${experiment_name} ; 72 | 73 | fi ; 74 | done -------------------------------------------------------------------------------- /dataset/gungor.ner.dev.small: -------------------------------------------------------------------------------- 1 | BİRİ biri+Pron+Quant+A3sg+P3sg+Nom bir+Adj^DB+Noun+Zero+A3sg+P3sg+Nom bir+Adj^DB+Noun+Zero+A3sg+Pnon+Acc bir+Num+Card^DB+Noun+Zero+A3sg+P3sg+Nom bir+Num+Card^DB+Noun+Zero+A3sg+Pnon+Acc biri+Pron+A3sg+P3sg+Nom O 2 | bitmeden bit+Verb+Pos^DB+Noun+Inf2+A3sg+Pnon+Abl bit+Verb+Pos^DB+Adverb+WithoutHavingDoneSo bit+Verb+Pos^DB+Noun+Inf2+A3sg+Pnon+Abl O 3 | öteki öte+Noun+A3sg+Pnon+Nom^DB+Adj+Rel öte+Noun+A3sg+Pnon+Nom^DB+Adj+Rel öte+Postp+PCAbl^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adj+Rel O 4 | çıkıyor çık+Verb+Pos+Prog1+A3sg çık+Verb+Pos+Prog1+A3sg O 5 | 6 | Üretken üretken+Adj üretken+Adj O 7 | ülke ülke+Noun+A3sg+Pnon+Nom ülke+Noun+A3sg+Pnon+Nom O 8 | Türkiye Türkiye+Noun+Prop+A3sg+Pnon+Nom Türkiye+Noun+Prop+A3sg+Pnon+Nom türki+Adj^DB+Noun+Zero+A3sg+Pnon+Dat I-LOC 9 | 10 | Yapılacağı yap+Verb^DB+Verb+Pass+Pos^DB+Adj+FutPart+P3sg yap+Verb^DB+Verb+Pass+Pos^DB+Adj+FutPart+P3sg yap+Verb^DB+Verb+Pass+Pos^DB+Noun+FutPart+A3sg+P3sg+Nom O 11 | şüpheli şüphe+Noun+A3sg+Pnon+Nom^DB+Adj+With şüphe+Noun+A3sg+Pnon+Nom^DB+Adj+With O 12 | de de+Conj de+Conj de+Verb+Pos+Imp+A2sg O 13 | olsa ol+Verb+Pos+Desr+A3sg ol+Verb+Pos+Desr+A3sg O 14 | , ,+Punc ,+Punc O 15 | seçim seçim+Noun+A3sg+Pnon+Nom seçi+Noun+A3sg+P1sg+Nom seçim+Noun+A3sg+Pnon+Nom O 16 | üzerine üzer+Noun+A3sg+P3sg+Dat üzer+Noun+A3sg+P3sg+Dat üzer+Noun+A3sg+P2sg+Dat üz+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3sg+Dat üz+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P2sg+Dat O 17 | çeşitlemeler çeşitle+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Nom çeşitle+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Nom O 18 | günlerimizi gün+Noun+A3pl+P1pl+Acc gün+Noun+A3pl+P1pl+Acc günle+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P1pl+Acc O 19 | dolduruyordu dol+Verb^DB+Verb+Caus+Pos+Prog1+Past+A3sg dol+Verb^DB+Verb+Caus+Pos+Prog1+Past+A3sg O 20 | 21 | Sonra sonra+Adverb sonra+Adverb sonra+Noun+A3sg+Pnon+Nom sonra+Postp+PCAbl O 22 | Vergi vergi+Noun+A3sg+Pnon+Nom vergi+Noun+A3sg+Pnon+Nom Vergi+Noun+Prop+A3sg+Pnon+Nom O 23 | Yasası yasa+Noun+A3sg+P3sg+Nom yas+Verb+Pos^DB+Adj+FeelLike yas+Verb+Pos^DB+Noun+FeelLike+A3sg+Pnon+Nom yasa+Noun+A3sg+P3sg+Nom yasa+Noun^DB+Adj+Almost O 24 | tartışmaları tartış+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom tart+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom tart+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom tart+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc tart+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom tartış+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom tartış+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom tartış+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc tartış+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom O 25 | çıktı çık+Verb+Pos+Past+A3sg çıktı+Noun+A3sg+Pnon+Nom çık+Verb+Pos+Past+A3sg O 26 | 27 | Daha daha+Noun+A3sg+Pnon+Nom daha+Adverb O 28 | onlardan o+Pron+Demons+A3pl+Pnon+Abl o+Pron+Demons+A3pl+Pnon+Abl o+Pron+Pers+A3pl+Pnon+Abl on+Num+Card^DB+Noun+Zero+A3pl+Pnon+Abl O 29 | kurtulmadan kurtul+Verb+Pos^DB+Noun+Inf2+A3sg+Pnon+Abl kurtul+Verb+Pos^DB+Adverb+WithoutHavingDoneSo kurtul+Verb+Pos^DB+Noun+Inf2+A3sg+Pnon+Abl O 30 | işte iş+Noun+A3sg+Pnon+Loc işte+Adverb iş+Noun+A3sg+Pnon+Loc O 31 | yeniden yeniden+Adverb yeniden+Adverb yeni+Adj^DB+Noun+Zero+A3sg+Pnon+Abl O 32 | döndük dön+Verb+Pos+Past+A1pl dön+Verb+Pos+Past+A1pl dön+Verb+Pos^DB+Adj+PastPart+Pnon dön+Verb+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom O 33 | çetelere çete+Noun+A3pl+Pnon+Dat çete+Noun+A3pl+Pnon+Dat O 34 | 35 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-06.txt.rules.sed: -------------------------------------------------------------------------------- 1 | s/^\(.\+\) \(\(.\+\)ğu+Det\)\( \3k+Adj^DB+Noun+Zero+A3sg+P3sg+Nom \3k+Adj^DB+Noun+Zero+A3sg+Pnon+Acc \3k+Postp+PCAbl^DB+Noun+Zero+A3sg+P3sg+Nom \3k+Postp+PCAbl^DB+Noun+Zero+A3sg+Pnon+Acc \3ğu+Adj \3ğu+Pron+Quant+A3pl+P3pl+Nom .\+\)$/\1 \3ğu+Pron+Quant+A3pl+P3pl+Nom\4/g 2 | s/^\(.\+\) \(\(.\+\)işi+Noun+A3pl+Pnon+Nom\)\( \3+Verb+Pos^DB+Noun+Inf3+A3pl+Pnon+Acc \3+Verb+Pos^DB+Noun+Inf3+A3pl+P3sg+Nom \3+Verb+Pos^DB+Noun+Inf3+A3sg+P3pl+Nom \3+Verb+Pos^DB+Noun+Inf3+A3pl+P3pl+Nom \3iş+Noun+A3pl+P3pl+Nom \3iş+Noun+A3sg+P3pl+Nom .\+\)$/\1 \3+Verb+Pos^DB+Noun+Inf3+A3pl+P3sg+Nom\4/g 3 | s/^\(.\+\) \(\(.\+\)işi+Noun+A3pl+P3sg+Nom\)\( \3+Verb+Pos^DB+Noun+Inf3+A3pl+Pnon+Acc \3+Verb+Pos^DB+Noun+Inf3+A3pl+P3sg+Nom \3+Verb+Pos^DB+Noun+Inf3+A3sg+P3pl+Nom \3+Verb+Pos^DB+Noun+Inf3+A3pl+P3pl+Nom \3iş+Noun+A3pl+P3pl+Nom \3iş+Noun+A3sg+P3pl+Nom .\+\)$/\1 \3iş+Noun+A3pl+P3pl+Nom\4/g 4 | s/^\(.\+\) \(\(.\+\)işi+Noun+A3pl+P3sg+Nom\)\( \3+Verb+Pos^DB+Noun+Inf+A3pl+P3sg+Nom \3+Verb+Pos^DB+Noun+Inf+A3pl+Pnon+Acc \3+Verb+Pos^DB+Noun+Inf+A3pl+P3pl+Nom \3+Verb+Pos^DB+Noun+Inf+A3sg+P3pl+Nom \3iş+Noun+A3pl+P3pl+Nom \3iş+Noun+A3sg+P3pl+Nom .\+\)$/\1 \3iş+Noun+A3pl+P3pl+Nom\4/g 5 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3pl+P3sg+Ins\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Ins \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Ins \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Ins \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Ins \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Ins \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Ins .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Ins\4/g 6 | s/^\(.\+\) \(\(.\+\)ı+Ques+Pres+A2sg+Cop\)\( \3ı+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg+Cop \3ıs+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3ıs+Noun+A3sg+Pnon+Gen^DB+Verb+Zero+Pres+Cop+A3sg \3ısı+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3ısın+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3ısındır+Noun+A3sg+Pnon+Nom .\+\)$/\1 \3ı+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg+Cop\4/g 7 | s/^\(.\+\) \(\(.\+\)üş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop\)\( \3+Verb+Recip^DB+Verb+Caus+Pos+Imp+A2sg \3+Verb+Pos^DB+Noun+Inf3+A3sg+Pnon+Nom^DB+Adverb+Since \3+Verb+Pos^DB+Noun+Inf3+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3üş+Noun+A3sg+Pnon+Nom^DB+Adverb+Since \3üş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3üş+Verb^DB+Verb+Caus+Pos+Imp+A2sg .\+\)$/\1 \3üş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg\4/g 8 | s/^\(.\+\) \(\(.\+\)l+Verb+Pos+Opt+A2sg\)\( \3l+Noun+A3sg+Pnon+Dat^DB+Verb+Zero+Pres+A2sg \3la+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg \3las+Noun+A3sg+P2sg+Nom \3las+Noun+A3sg+Pnon+Gen \3lası+Noun+A3sg+P2sg+Nom \3lasın+Noun+A3sg+Pnon+Nom .\+\)$/\1 \3lasın+Noun+A3sg+Pnon+Nom\4/g 9 | s/^\(.\+\) \(\(.\+\)k+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos^DB+Adj+NarrPart\)\( \3kle+Verb+Recip^DB+Verb+Caus+Pos+Narr+A3sg \3kle+Verb+Recip^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero \3k+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos+Narr+A3sg \3k+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero \3k+Adj^DB+Verb+Become^DB+Verb+Caus+Pos+Narr+A3sg \3k+Adj^DB+Verb+Become^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero .\+\)$/\1 \3k+Adj^DB+Verb+Become^DB+Verb+Caus+Pos+Narr+A3sg\4/g 10 | s/^\(.\+\) \(\(.\+\)u+Noun+A3sg+P3sg+Loc\)\( \3+Noun+A3sg+P2sg+Loc \3+Noun+A3sg+P3sg+Loc \3u+Noun+A3sg+P2sg+Loc \3un+Noun+A3sg+Pnon+Loc \3unda+Noun+A3sg+Pnon+Nom \3unt+Noun+A3sg+Pnon+Dat .\+\)$/\1 \3+Noun+A3sg+P3sg+Loc\4/g 11 | -------------------------------------------------------------------------------- /scripts/rules-for-cleaning.txt: -------------------------------------------------------------------------------- 1 | 2 | 2 3 | X'ın X'ın+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+P2sg+Nom X+Noun+Prop+A3sg+Pnon+Gen ==== X+Noun+Prop+A3sg+Pnon+Gen 4 | X'in X'in+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+P2sg+Nom X+Noun+Prop+A3sg+Pnon+Gen ==== X+Noun+Prop+A3sg+Pnon+Gen 5 | X'un X'un+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+P2sg+Nom X+Noun+Prop+A3sg+Pnon+Gen ==== X+Noun+Prop+A3sg+Pnon+Gen 6 | X'nın X'nın+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+P2sg+Gen X+Noun+Prop+A3sg+Pnon+Gen ==== X+Noun+Prop+A3sg+Pnon+Gen 7 | Xcu Xcu+Noun+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom ==== X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt 8 | Xçı Xçı+Noun+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom ==== X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt 9 | Xcı Xcı+Noun+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom ==== X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt 10 | Xci Xci+Noun+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom ==== X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt 11 | Xçi Xçi+Noun+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom ==== X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt 12 | Xlardır X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3pl+Cop X+Noun+A3pl+Pnon+Nom^DB+Adverb+Since X+Noun+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg ==== X+Noun+A3pl+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 13 | Xdır X+Adj^DB+Verb+Zero+Pres+A3sg+Cop X+Adj^DB+Verb+Zero+Pres+Cop+A3sg X+Verb^DB+Verb+Caus+Pos+Imp+A2sg ==== X+Adj^DB+Verb+Zero+Pres+Cop+A3sg 14 | Xr Xr+Postp+PCNom Xr+Noun+A3sg+Pnon+Nom Xr+Postp+PCDat ==== Xr+Postp+PCDat 15 | Xmektedir X+Verb+Pos+Prog2+A3sg+Cop X+Verb+Pos+Prog2+Cop+A3sg X+Verb+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg ==== X+Verb+Pos+Prog2+Cop+A3sg 16 | Xmaktadır X+Verb+Pos+Prog2+A3sg+Cop X+Verb+Pos+Prog2+Cop+A3sg X+Verb+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg ==== X+Verb+Pos+Prog2+Cop+A3sg 17 | Xlı Xlı+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+Pnon+Nom^DB+Adj+With X+Noun+Prop+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Zero+A3sg+Pnon+Nom ==== X+Noun+Prop+A3sg+Pnon+Nom^DB+Adj+With 18 | Xli Xli+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+Pnon+Nom^DB+Adj+With X+Noun+Prop+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Zero+A3sg+Pnon+Nom ==== X+Noun+Prop+A3sg+Pnon+Nom^DB+Adj+With 19 | Xdir X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adverb+Since ==== X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 20 | Xildi Xil+Verb+Neg+Past+A3sg X+Verb^DB+Verb+Pass+Pos+Past+A3sg Xil+Verb+Past+A3sg ==== değil+Verb+Past+A3sg 21 | X'i X'i+Noun+Prop+A3sg+Pnon+Nom X+Num+Card^DB+Noun+Zero+A3sg+P3sg+Nom X+Num+Card^DB+Noun+Zero+A3sg+Pnon+Acc ==== X+Num+Card^DB+Noun+Zero+A3sg+Pnon+Acc 22 | X'i X'i+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+P3sg+Nom X+Noun+Prop+A3sg+Pnon+Acc ==== X+Noun+Prop+A3sg+Pnon+Acc 23 | Xcıların Xcı+Noun+A3pl+Pnon+Gen X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3pl+Pnon+Gen X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3pl+P2sg+Nom ==== X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3pl+Pnon+Gen 24 | 25 | >>>>> Xçi+Noun+A3pl+Pnon+Gen -> X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3pl+Pnon+Gen 26 | 27 | Xtir X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+Pnon+Nom^DB+Adverb+Since X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 28 | 29 | 30 | 31 | değil değil+Verb+Neg+Pres+A3sg değ+Verb^DB+Verb+Pass+Pos+Imp+A2sg değil+Conj değil+Verb+Pres+A3sg 32 | 33 | Xil Xil+Verb+Neg+Pres+A3sg X+Verb^DB+Verb+Pass+Pos+Imp+A2sg Xil+Conj Xil+Verb+Pres+A3sg 34 | 35 | CHOOSE Xil+Conj 36 | 37 | 97 0 4 Xdır X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+Pnon+Nom^DB+Adverb+Since X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Verb^DB+Verb+Caus+Pos+Imp+A2sg 38 | 39 | CHOOSE X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg -------------------------------------------------------------------------------- /toolkit/rnn.py: -------------------------------------------------------------------------------- 1 | 2 | import dynet 3 | from dynet import BiRNNBuilder, CoupledLSTMBuilder, LSTMBuilder 4 | 5 | 6 | # def ensure_freshness(a): 7 | # if a.cg_version != dynet.cg().version(): raise ValueError("Attempt to use a stale expression.") 8 | 9 | 10 | class BiLSTMMultiLayeredWithShortcutConnections(BiRNNBuilder): 11 | 12 | def __init__(self, num_layers, input_dim, hidden_dim, model, rnn_builder_factory, 13 | shortcut_connections): 14 | 15 | """ 16 | 17 | This class implements a multilayered BiRNN with shortcut connections 18 | 19 | @param num_layers: depth of the BiRNN 20 | @param input_dim: size of the inputs 21 | @param hidden_dim: size of the outputs (and intermediate layer representations) 22 | @param model 23 | @param rnn_builder_factory: RNNBuilder subclass, e.g. LSTMBuilder 24 | """ 25 | super(BiLSTMMultiLayeredWithShortcutConnections, self).__init__(num_layers, input_dim, 26 | hidden_dim, model, 27 | rnn_builder_factory) 28 | assert num_layers > 0 29 | assert hidden_dim % 2 == 0 30 | self.shortcut_connections = shortcut_connections 31 | self.builder_layers = [] # type: list[(LSTMBuilder, LSTMBuilder)] 32 | f = rnn_builder_factory(1, input_dim, hidden_dim/2, model) 33 | b = rnn_builder_factory(1, input_dim, hidden_dim/2, model) 34 | self.builder_layers.append((f,b)) 35 | for _ in xrange(num_layers-1): 36 | if self.shortcut_connections: 37 | current_level_input_dim = input_dim+hidden_dim 38 | else: 39 | current_level_input_dim = hidden_dim 40 | f = rnn_builder_factory(1, current_level_input_dim, hidden_dim/2, model) 41 | b = rnn_builder_factory(1, current_level_input_dim, hidden_dim/2, model) 42 | self.builder_layers.append((f,b)) 43 | 44 | def transduce(self, es): 45 | """ 46 | returns the list of output Expressions obtained by adding the given inputs 47 | to the current state, one by one, to both the forward and backward RNNs, 48 | and concatenating. 49 | 50 | @param es: a list of Expression 51 | 52 | see also add_inputs(xs) 53 | 54 | .transduce(xs) is different from .add_inputs(xs) in the following way: 55 | 56 | .add_inputs(xs) returns a list of RNNState pairs. RNNState objects can be 57 | queried in various ways. In particular, they allow access to the previous 58 | state, as well as to the state-vectors (h() and s() ) 59 | 60 | .transduce(xs) returns a list of Expression. These are just the output 61 | expressions. For many cases, this suffices. 62 | transduce is much more memory efficient than add_inputs. 63 | """ 64 | # for e in es: 65 | # ensure_freshness(e) 66 | original_input = list(es) 67 | layer_outputs = [] 68 | fs = self.builder_layers[0][0].initial_state().transduce(es) 69 | bs = self.builder_layers[0][1].initial_state().transduce(reversed(es)) 70 | if self.shortcut_connections: 71 | es = [dynet.concatenate([original_input_item, f, b]) 72 | for original_input_item, f, b in zip(original_input, fs, reversed(bs))] 73 | else: 74 | es = [dynet.concatenate([f, b]) 75 | for f, b in zip(fs, reversed(bs))] 76 | layer_outputs.append(es) 77 | for (fb, bb) in self.builder_layers[1:]: 78 | fs = fb.initial_state().transduce(es) 79 | bs = bb.initial_state().transduce(reversed(es)) 80 | if self.shortcut_connections: 81 | es = [dynet.concatenate([original_input_item, f, b]) 82 | for original_input_item, f, b in zip(original_input, fs, reversed(bs))] 83 | else: 84 | es = [dynet.concatenate([f, b]) 85 | for f, b in zip(fs, reversed(bs))] 86 | layer_outputs.append(es) 87 | return es, layer_outputs 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /dataset/eng.train.208: -------------------------------------------------------------------------------- 1 | EU NNP I-NP I-ORG 2 | rejects VBZ I-VP O 3 | German JJ I-NP I-MISC 4 | call NN I-NP O 5 | to TO I-VP O 6 | boycott VB I-VP O 7 | British JJ I-NP I-MISC 8 | lamb NN I-NP O 9 | . . O O 10 | 11 | Peter NNP I-NP I-PER 12 | Blackburn NNP I-NP I-PER 13 | 14 | BRUSSELS NNP I-NP I-LOC 15 | 1996-08-22 CD I-NP O 16 | 17 | The DT I-NP O 18 | European NNP I-NP I-ORG 19 | Commission NNP I-NP I-ORG 20 | said VBD I-VP O 21 | on IN I-PP O 22 | Thursday NNP I-NP O 23 | it PRP B-NP O 24 | disagreed VBD I-VP O 25 | with IN I-PP O 26 | German JJ I-NP I-MISC 27 | advice NN I-NP O 28 | to TO I-PP O 29 | consumers NNS I-NP O 30 | to TO I-VP O 31 | shun VB I-VP O 32 | British JJ I-NP I-MISC 33 | lamb NN I-NP O 34 | until IN I-SBAR O 35 | scientists NNS I-NP O 36 | determine VBP I-VP O 37 | whether IN I-SBAR O 38 | mad JJ I-NP O 39 | cow NN I-NP O 40 | disease NN I-NP O 41 | can MD I-VP O 42 | be VB I-VP O 43 | transmitted VBN I-VP O 44 | to TO I-PP O 45 | sheep NN I-NP O 46 | . . O O 47 | 48 | Germany NNP I-NP I-LOC 49 | 's POS B-NP O 50 | representative NN I-NP O 51 | to TO I-PP O 52 | the DT I-NP O 53 | European NNP I-NP I-ORG 54 | Union NNP I-NP I-ORG 55 | 's POS B-NP O 56 | veterinary JJ I-NP O 57 | committee NN I-NP O 58 | Werner NNP I-NP I-PER 59 | Zwingmann NNP I-NP I-PER 60 | said VBD I-VP O 61 | on IN I-PP O 62 | Wednesday NNP I-NP O 63 | consumers NNS I-NP O 64 | should MD I-VP O 65 | buy VB I-VP O 66 | sheepmeat NN I-NP O 67 | from IN I-PP O 68 | countries NNS I-NP O 69 | other JJ I-ADJP O 70 | than IN I-PP O 71 | Britain NNP I-NP I-LOC 72 | until IN I-SBAR O 73 | the DT I-NP O 74 | scientific JJ I-NP O 75 | advice NN I-NP O 76 | was VBD I-VP O 77 | clearer JJR I-ADJP O 78 | . . O O 79 | 80 | " " O O 81 | We PRP I-NP O 82 | do VBP I-VP O 83 | n't RB I-VP O 84 | support VB I-VP O 85 | any DT I-NP O 86 | such JJ I-NP O 87 | recommendation NN I-NP O 88 | because IN I-SBAR O 89 | we PRP I-NP O 90 | do VBP I-VP O 91 | n't RB I-VP O 92 | see VB I-VP O 93 | any DT I-NP O 94 | grounds NNS I-NP O 95 | for IN I-PP O 96 | it PRP I-NP O 97 | , , O O 98 | " " O O 99 | the DT I-NP O 100 | Commission NNP I-NP I-ORG 101 | 's POS B-NP O 102 | chief JJ I-NP O 103 | spokesman NN I-NP O 104 | Nikolaus NNP I-NP I-PER 105 | van NNP I-NP I-PER 106 | der FW I-NP I-PER 107 | Pas NNP I-NP I-PER 108 | told VBD I-VP O 109 | a DT I-NP O 110 | news NN I-NP O 111 | briefing NN I-NP O 112 | . . O O 113 | 114 | He PRP I-NP O 115 | said VBD I-VP O 116 | further JJ I-NP O 117 | scientific JJ I-NP O 118 | study NN I-NP O 119 | was VBD I-VP O 120 | required VBN I-VP O 121 | and CC O O 122 | if IN I-SBAR O 123 | it PRP I-NP O 124 | was VBD I-VP O 125 | found VBN I-VP O 126 | that IN I-SBAR O 127 | action NN I-NP O 128 | was VBD I-VP O 129 | needed VBN I-VP O 130 | it PRP I-NP O 131 | should MD I-VP O 132 | be VB I-VP O 133 | taken VBN I-VP O 134 | by IN I-PP O 135 | the DT I-NP O 136 | European NNP I-NP I-ORG 137 | Union NNP I-NP I-ORG 138 | . . O O 139 | 140 | He PRP I-NP O 141 | said VBD I-VP O 142 | a DT I-NP O 143 | proposal NN I-NP O 144 | last JJ B-NP O 145 | month NN I-NP O 146 | by IN I-PP O 147 | EU NNP I-NP I-ORG 148 | Farm NNP I-NP O 149 | Commissioner NNP I-NP O 150 | Franz NNP I-NP I-PER 151 | Fischler NNP I-NP I-PER 152 | to TO I-VP O 153 | ban VB I-VP O 154 | sheep NN I-NP O 155 | brains NNS I-NP O 156 | , , O O 157 | spleens NNS I-NP O 158 | and CC O O 159 | spinal JJ I-NP O 160 | cords NNS I-NP O 161 | from IN I-PP O 162 | the DT I-NP O 163 | human NN I-NP O 164 | and CC I-NP O 165 | animal NN I-NP O 166 | food NN I-NP O 167 | chains NNS I-NP O 168 | was VBD I-VP O 169 | a DT I-NP O 170 | highly RB I-NP O 171 | specific JJ I-ADJP O 172 | and CC I-ADJP O 173 | precautionary JJ I-ADJP O 174 | move NN I-NP O 175 | to TO I-VP O 176 | protect VB I-VP O 177 | human JJ I-NP O 178 | health NN I-NP O 179 | . . O O 180 | 181 | Fischler JJR I-NP I-PER 182 | proposed VBN I-NP O 183 | EU-wide NNP I-NP I-MISC 184 | measures VBZ I-VP O 185 | after IN I-PP O 186 | reports NNS I-NP O 187 | from IN I-PP O 188 | Britain NNP I-NP I-LOC 189 | and CC I-NP O 190 | France NNP I-NP I-LOC 191 | that WDT B-NP O 192 | under IN I-PP O 193 | laboratory NN I-NP O 194 | conditions NNS B-NP O 195 | sheep NN I-NP O 196 | could MD I-VP O 197 | contract VB I-VP O 198 | Bovine NNP I-NP I-MISC 199 | Spongiform NNP I-NP I-MISC 200 | Encephalopathy NNP I-NP I-MISC 201 | ( ( O O 202 | BSE NNP I-NP I-MISC 203 | ) ) O O 204 | -- : O O 205 | mad JJ I-NP O 206 | cow NN I-NP O 207 | disease NN I-NP O 208 | . . O O 209 | 210 | -------------------------------------------------------------------------------- /scripts/helper-script-to-run-the-experiment-set-small-sizes.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | experiment_name=${1:-section1-all-20171114-08} 4 | original_experiment_name=${experiment_name} 5 | 6 | ner_tagger_root=/home/onur/projects/research/focus/ner-tagger-dynet-multilayer 7 | 8 | n_trials=10 9 | 10 | dim=10 11 | morpho_tag_type=char 12 | 13 | for trial in `seq 1 ${n_trials}`; do 14 | 15 | for morpho_tag_type in char ; do 16 | 17 | small_sizes="char_dim=$dim \ 18 | char_lstm_dim=$dim \ 19 | morpho_tag_dim=$dim \ 20 | morpho_tag_lstm_dim=$dim \ 21 | morpho_tag_type=${morpho_tag_type} \ 22 | word_dim=$dim \ 23 | word_lstm_dim=$dim \ 24 | lr_method=sgd-learning_rate_float@0.100 " 25 | 26 | # experiment_name=${original_experiment_name}-dim-${dim}-morpho_tag_type-${morpho_tag_type}-trial-`printf "%02d" ${trial}` 27 | experiment_name=${original_experiment_name}-dim-${dim}-morpho_tag_type-${morpho_tag_type} 28 | 29 | pre_command="echo ${original_experiment_name}-dim-${dim}-morpho_tag_type-${morpho_tag_type}-trial-`printf "%02d" ${trial}` >> ${experiment_name}.log" 30 | 31 | for imode in 0 1 2 ; do 32 | if [[ $imode == 0 ]]; then 33 | for amodels in 1 0 ; do 34 | command=${pre_command}" && ""cd ${ner_tagger_root} && \ 35 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 36 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 37 | active_models=${amodels} \ 38 | integration_mode=$imode \ 39 | dynet_gpu=0 \ 40 | embeddings_filepath=\"\" \ 41 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 42 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 43 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 44 | $small_sizes \ 45 | experiment_name=${experiment_name} ;" 46 | echo $command; 47 | done; 48 | command=${pre_command}" && ""cd ${ner_tagger_root} && \ 49 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 50 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 51 | active_models=0 \ 52 | integration_mode=0 \ 53 | use_golden_morpho_analysis_in_word_representation=1 \ 54 | dynet_gpu=0 \ 55 | embeddings_filepath=\"\" \ 56 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 57 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 58 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 59 | $small_sizes \ 60 | experiment_name=${experiment_name} ;" 61 | echo $command; 62 | elif [[ $imode == 1 ]]; then 63 | command=${pre_command}" && ""cd ${ner_tagger_root} && \ 64 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 65 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 66 | active_models=2 \ 67 | integration_mode=1 \ 68 | dynet_gpu=0 \ 69 | embeddings_filepath=\"\" \ 70 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 71 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 72 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 73 | $small_sizes \ 74 | experiment_name=${experiment_name} ;" 75 | echo $command; 76 | else 77 | command=${pre_command}" && ""cd ${ner_tagger_root} && \ 78 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 79 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 80 | active_models=2 \ 81 | integration_mode=2 \ 82 | multilayer=1 \ 83 | shortcut_connections=1 \ 84 | dynet_gpu=0 \ 85 | embeddings_filepath=\"\" \ 86 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 87 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 88 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 89 | $small_sizes \ 90 | experiment_name=${experiment_name} ;" 91 | echo $command; 92 | 93 | command=${pre_command}" && ""cd ${ner_tagger_root} && \ 94 | source /usr/local/bin/virtualenvwrapper.sh && workon dynet && \ 95 | source environment-variables && python control_experiments.py -m joint_ner_and_md with \ 96 | active_models=2 \ 97 | integration_mode=2 \ 98 | dynet_gpu=0 \ 99 | embeddings_filepath="" \ 100 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 101 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 102 | test_filepath=turkish/gungor.ner.test.14.only_consistent \ 103 | $small_sizes \ 104 | experiment_name=${experiment_name} ;" 105 | echo $command; 106 | 107 | fi ; 108 | done 109 | 110 | done 111 | done -------------------------------------------------------------------------------- /scripts/TRUBA/helper-script-to-run-the-experiment-set-TRUBA.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | experiment_name=${1:-section1-all-20180311-01} 4 | original_experiment_name=${experiment_name} 5 | 6 | ner_tagger_root=/truba/home/ogungor/projects/research/projects/focus/joint_md_and_ner/ner-tagger-dynet 7 | 8 | #virtualenvwrapper_path=/usr/local/bin/virtualenvwrapper.sh 9 | #virtualenv_name=dynet 10 | virtualenvwrapper_path=/truba/home/ogungor/.local/bin/virtualenvwrapper.sh 11 | virtualenv_name=joint_ner_dynet 12 | 13 | #environment_variables_path=environment-variables 14 | environment_variables_path='/truba/sw/centos7.3/comp/intel/PS2017-update1/mkl/bin/mklvars.sh intel64' 15 | 16 | datasets_root=/truba/home/ogungor/projects/research/datasets/joint_ner_dynet/ 17 | 18 | #sacred_args='-m localhost:17017:joint_ner_and_md' 19 | sacred_args='-F /truba/home/ogungor/projects/research/projects/focus/joint_md_and_ner/experiment-logs/' 20 | 21 | preamble="cd ${ner_tagger_root} && \ 22 | source ${virtualenvwrapper_path} && \ 23 | workon ${virtualenv_name} && \ 24 | source ${environment_variables_path} && \ 25 | export LD_PRELOAD=/truba/sw/centos7.3/comp/intel/PS2017-update1/compilers_and_libraries_2017.1.132/linux/mkl/lib/intel64_lin/libmkl_avx2.so:/truba/sw/centos7.3/comp/intel/PS2017-update1/compilers_and_libraries_2017.1.132/linux/mkl/lib/intel64_lin/libmkl_def.so:/truba/sw/centos7.3/comp/intel/PS2017-update1/compilers_and_libraries_2017.1.132/linux/mkl/lib/intel64_lin/libmkl_core.so:/truba/sw/centos7.3/comp/intel/PS2017-update1/compilers_and_libraries_2017.1.132/linux/mkl/lib/intel64_lin/libmkl_intel_lp64.so:/truba/sw/centos7.3/comp/intel/PS2017-update1/compilers_and_libraries_2017.1.132/linux/mkl/lib/intel64_lin/libmkl_intel_thread.so:/truba/sw/centos7.3/comp/intel/PS2017-update1/compilers_and_libraries_2017.1.132/linux/compiler/lib/intel64_lin/libiomp5.so && \ 26 | python control_experiments.py ${sacred_args} with " 27 | 28 | dataset_filepaths="datasets_root=${datasets_root} \ 29 | train_filepath=turkish/gungor.ner.train.14.only_consistent \ 30 | dev_filepath=turkish/gungor.ner.dev.14.only_consistent \ 31 | test_filepath=turkish/gungor.ner.test.14.only_consistent " 32 | 33 | n_trials=10 34 | 35 | dim=${2:-10} 36 | morpho_tag_type=char 37 | 38 | for trial in `seq 1 ${n_trials}`; do 39 | 40 | for morpho_tag_type in char ; do 41 | 42 | small_sizes="char_dim=$dim \ 43 | char_lstm_dim=$dim \ 44 | morpho_tag_dim=$dim \ 45 | morpho_tag_lstm_dim=$dim \ 46 | morpho_tag_type=${morpho_tag_type} \ 47 | word_dim=$dim \ 48 | word_lstm_dim=$dim \ 49 | lr_method=sgd-learning_rate_float@0.01 " 50 | # changed the learning rate to 0.01 from 0.100 51 | 52 | # experiment_name=${original_experiment_name}-dim-${dim}-morpho_tag_type-${morpho_tag_type}-trial-`printf "%02d" ${trial}` 53 | experiment_name=${original_experiment_name}-dim-${dim}-morpho_tag_type-${morpho_tag_type} 54 | 55 | pre_command="echo ${original_experiment_name}-dim-${dim}-morpho_tag_type-${morpho_tag_type}-trial-`printf "%02d" ${trial}` >> ${experiment_name}.log" 56 | 57 | for imode in 0 1 2 ; do 58 | if [[ $imode == 0 ]]; then 59 | for amodels in 1 0 ; do 60 | command=${pre_command}" && "" ${preamble} \ 61 | active_models=${amodels} \ 62 | integration_mode=$imode \ 63 | dynet_gpu=0 \ 64 | embeddings_filepath=\"\" \ 65 | ${dataset_filepaths} \ 66 | $small_sizes \ 67 | experiment_name=${experiment_name} ;" 68 | echo $command; 69 | done; 70 | command=${pre_command}" && "" ${preamble} \ 71 | active_models=0 \ 72 | integration_mode=0 \ 73 | use_golden_morpho_analysis_in_word_representation=1 \ 74 | dynet_gpu=0 \ 75 | embeddings_filepath=\"\" \ 76 | ${dataset_filepaths} \ 77 | $small_sizes \ 78 | experiment_name=${experiment_name} ;" 79 | echo $command; 80 | elif [[ $imode == 1 ]]; then 81 | command=${pre_command}" && "" ${preamble} \ 82 | active_models=2 \ 83 | integration_mode=1 \ 84 | dynet_gpu=0 \ 85 | embeddings_filepath=\"\" \ 86 | ${dataset_filepaths} \ 87 | $small_sizes \ 88 | experiment_name=${experiment_name} ;" 89 | echo $command; 90 | else 91 | command=${pre_command}" && "" ${preamble} \ 92 | active_models=2 \ 93 | integration_mode=2 \ 94 | multilayer=1 \ 95 | shortcut_connections=1 \ 96 | dynet_gpu=0 \ 97 | embeddings_filepath=\"\" \ 98 | ${dataset_filepaths} \ 99 | $small_sizes \ 100 | experiment_name=${experiment_name} ;" 101 | echo $command; 102 | 103 | command=${pre_command}" && "" ${preamble} \ 104 | active_models=2 \ 105 | integration_mode=2 \ 106 | dynet_gpu=0 \ 107 | embeddings_filepath=\"\" \ 108 | ${dataset_filepaths} \ 109 | $small_sizes \ 110 | experiment_name=${experiment_name} ;" 111 | echo $command; 112 | 113 | fi ; 114 | done 115 | 116 | done 117 | done -------------------------------------------------------------------------------- /crf.py: -------------------------------------------------------------------------------- 1 | # 2 | # From https://github.com/rguthrie3/BiLSTM-CRF/blob/master/model.py 3 | # 4 | 5 | import dynet 6 | import numpy as np 7 | 8 | 9 | class CRF(): 10 | 11 | def __init__(self, model, id_to_tag): 12 | 13 | self.id_to_tag = id_to_tag 14 | self.tag_to_id = {tag: id for id, tag in id_to_tag.items()} 15 | self.n_tags = len(self.id_to_tag) 16 | self.b_id = len(self.tag_to_id) 17 | self.e_id = len(self.tag_to_id) + 1 18 | 19 | self.transitions = model.add_lookup_parameters((self.n_tags+2, 20 | self.n_tags+2), 21 | name="transitions") 22 | 23 | def score_sentence(self, observations, tags): 24 | assert len(observations) == len(tags) 25 | score_seq = [0] 26 | score = dynet.scalarInput(0) 27 | tags = [self.b_id] + tags 28 | for i, obs in enumerate(observations): 29 | # print self.b_id 30 | # print self.e_id 31 | # print obs.value() 32 | # print tags 33 | # print self.transitions 34 | # print self.transitions[tags[i+1]].value() 35 | score = score \ 36 | + dynet.pick(self.transitions[tags[i + 1]], tags[i])\ 37 | + dynet.pick(obs, tags[i + 1]) 38 | score_seq.append(score.value()) 39 | score = score + dynet.pick(self.transitions[self.e_id], tags[-1]) 40 | return score 41 | 42 | 43 | def viterbi_loss(self, observations, tags): 44 | observations = [dynet.concatenate([obs, dynet.inputVector([-1e10, -1e10])], d=0) for obs in 45 | observations] 46 | viterbi_tags, viterbi_score = self.viterbi_decoding(observations) 47 | if viterbi_tags != tags: 48 | gold_score = self.score_sentence(observations, tags) 49 | return (viterbi_score - gold_score), viterbi_tags 50 | else: 51 | return dynet.scalarInput(0), viterbi_tags 52 | 53 | 54 | def neg_log_loss(self, observations, tags): 55 | observations = [dynet.concatenate([obs, dynet.inputVector([-1e10, -1e10])], d=0) for obs in observations] 56 | gold_score = self.score_sentence(observations, tags) 57 | forward_score = self.forward(observations) 58 | return forward_score - gold_score 59 | 60 | 61 | def forward(self, observations): 62 | def log_sum_exp(scores): 63 | npval = scores.npvalue() 64 | argmax_score = np.argmax(npval) 65 | max_score_expr = dynet.pick(scores, argmax_score) 66 | max_score_expr_broadcast = dynet.concatenate([max_score_expr] * (self.n_tags+2)) 67 | return max_score_expr + dynet.log( 68 | dynet.sum_dims(dynet.transpose(dynet.exp(scores - max_score_expr_broadcast)), [1])) 69 | 70 | init_alphas = [-1e10] * (self.n_tags + 2) 71 | init_alphas[self.b_id] = 0 72 | for_expr = dynet.inputVector(init_alphas) 73 | for idx, obs in enumerate(observations): 74 | # print "obs: ", obs.value() 75 | alphas_t = [] 76 | for next_tag in range(self.n_tags+2): 77 | obs_broadcast = dynet.concatenate([dynet.pick(obs, next_tag)] * (self.n_tags + 2)) 78 | # print "for_expr: ", for_expr.value() 79 | # print "transitions next_tag: ", self.transitions[next_tag].value() 80 | # print "obs_broadcast: ", obs_broadcast.value() 81 | 82 | next_tag_expr = for_expr + self.transitions[next_tag] + obs_broadcast 83 | alphas_t.append(log_sum_exp(next_tag_expr)) 84 | for_expr = dynet.concatenate(alphas_t) 85 | terminal_expr = for_expr + self.transitions[self.e_id] 86 | alpha = log_sum_exp(terminal_expr) 87 | return alpha 88 | 89 | 90 | def viterbi_decoding(self, observations): 91 | backpointers = [] 92 | init_vvars = [-1e10] * (self.n_tags + 2) 93 | init_vvars[self.b_id] = 0 # has all the probability 94 | for_expr = dynet.inputVector(init_vvars) 95 | trans_exprs = [self.transitions[idx] for idx in range(self.n_tags + 2)] 96 | for obs in observations: 97 | bptrs_t = [] 98 | vvars_t = [] 99 | for next_tag in range(self.n_tags + 2): 100 | next_tag_expr = for_expr + trans_exprs[next_tag] 101 | next_tag_arr = next_tag_expr.npvalue() 102 | best_tag_id = np.argmax(next_tag_arr) 103 | bptrs_t.append(best_tag_id) 104 | vvars_t.append(dynet.pick(next_tag_expr, best_tag_id)) 105 | for_expr = dynet.concatenate(vvars_t) + obs 106 | backpointers.append(bptrs_t) 107 | # Perform final transition to terminal 108 | terminal_expr = for_expr + trans_exprs[self.e_id] 109 | terminal_arr = terminal_expr.npvalue() 110 | best_tag_id = np.argmax(terminal_arr) 111 | path_score = dynet.pick(terminal_expr, best_tag_id) 112 | # Reverse over the backpointers to get the best path 113 | best_path = [best_tag_id] # Start with the tag that was best for terminal 114 | for bptrs_t in reversed(backpointers): 115 | best_tag_id = bptrs_t[best_tag_id] 116 | best_path.append(best_tag_id) 117 | start = best_path.pop() # Remove the start symbol 118 | best_path.reverse() 119 | assert start == self.b_id 120 | # Return best path and best path's score 121 | return best_path, path_score -------------------------------------------------------------------------------- /scripts/run_this_after_cleaner_gui.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | declare -A n_consistent_lines 4 | declare -A n_consistent_sentences 5 | 6 | # initial operations 7 | for label in train dev test ; do 8 | cp dataset/gungor.ner.${label} dataset/gungor.ner.${label}.01 9 | 10 | echo starting sentences $label $(cat dataset/gungor.ner.${label}.01 | awk '/^$/ { count += 1 } END { print count }') 11 | 12 | cat dataset/gungor.ner.${label}.01 | python ./scripts/lowercase-root-surface-forms.py > dataset/gungor.ner.${label}.01a 13 | 14 | cat dataset/gungor.ner.${label}.01a | bash ./scripts/strip-sentences-with-inconsistent-morph-analysis.sh > dataset/gungor.ner.${label}.01.only_consistent ; 15 | n_consistent_lines[$label]=$(wc -l dataset/gungor.ner.${label}.01.only_consistent | cut -d" " -f1) 16 | n_consistent_sentences[$label]=$(cat dataset/gungor.ner.${label}.01.only_consistent | awk '/^$/ { count += 1 } END { print count }') 17 | # printf "%s %02d %6d %6d\n" $label 0 ${n_consistent_lines[$label]} ${n_consistent_sentences[$label]} 18 | done 19 | 20 | printf "level | n_lines_changed n_consistent_lines n_consistent_sentences | ...\n" 21 | printf "%02d | train %6d %6d %6d | dev %6d %6d %6d | test %6d %6d %6d\n" -1 \ 22 | -1 ${n_consistent_lines['train']} ${n_consistent_sentences['train']} \ 23 | -1 ${n_consistent_lines['dev']} ${n_consistent_sentences['dev']} \ 24 | -1 ${n_consistent_lines['test']} ${n_consistent_sentences['test']} 25 | 26 | #echo 'continue...' 27 | #read 28 | 29 | for n_analyzes in `seq 1 13`; do 30 | 31 | declare -A n_lines_changed 32 | declare -A n_consistent_lines 33 | declare -A n_consistent_sentences 34 | 35 | if [[ -f Xoutput-n_analyses-`printf "%02d" ${n_analyzes}`.txt.rules ]]; then 36 | cat Xoutput-n_analyses-`printf "%02d" ${n_analyzes}`.txt.rules | \ 37 | awk '{ output = ""; for (i=2; i < NF; i++) { if (i == 2) { replacement_pattern= "\\(.\\+\\)"; } else { replacement_pattern = "\\3"; }; gsub(/^X/, replacement_pattern, $i); if (i == 2) { output = "s/^\\(.\\+\\) \\(" $i "\\)\\("; } else { output = output " " $i; } }; gsub(/^X/, "\\3", $NF); output = output " .\\+\\)$/\\1 " $NF "\\4/g"; print output; }' > Xoutput-n_analyses-`printf "%02d" ${n_analyzes}`.txt.rules.sed ; 38 | 39 | for label in train dev test ; do 40 | # echo $label 41 | if [[ ${n_analyzes} == 1 ]]; then 42 | cat dataset/gungor.ner.${label}.01a | awk 'NF == 4 { print $1, $3, $3, $4 } NF != 4 { print }' > dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))` 43 | n_lines_changed[$label]=-1 44 | else 45 | sed -f Xoutput-n_analyses-`printf "%02d" ${n_analyzes}`.txt.rules.sed dataset/gungor.ner.${label}.`printf "%02d" ${n_analyzes}` > dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))` 46 | n_lines_changed[$label]=$(($(diff dataset/gungor.ner.${label}.`printf "%02d" ${n_analyzes}` dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))` | wc -l)/4)) 47 | # echo ${n_lines_changed[$label]} 48 | fi 49 | 50 | cat dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))` | bash ./scripts/strip-sentences-with-inconsistent-morph-analysis.sh > dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))`.only_consistent ; 51 | n_consistent_lines[$label]=$(wc -l dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))`.only_consistent | cut -d" " -f1) 52 | n_consistent_sentences[$label]=$(cat dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))`.only_consistent | awk '/^$/ { count += 1 } END { print count }') 53 | # echo ${n_consistent_lines[$label]} 54 | done 55 | 56 | if [[ ${n_analyzes} == 1 ]]; then 57 | printf "ran awk rule on 01 level to get the %02d level data files\n" $((n_analyzes)) $((n_analyzes+1)) 58 | else 59 | printf "ran sed rules on %02d level to get the %02d level data files\n" $((n_analyzes)) $((n_analyzes+1)) 60 | fi 61 | 62 | printf "level | n_lines_changed n_consistent_lines n_consistent_sentences | ...\n" 63 | printf "%02d | train %6d %6d %6d | dev %6d %6d %6d | test %6d %6d %6d\n" ${n_analyzes} \ 64 | ${n_lines_changed['train']} ${n_consistent_lines['train']} ${n_consistent_sentences['train']} \ 65 | ${n_lines_changed['dev']} ${n_consistent_lines['dev']} ${n_consistent_sentences['dev']} \ 66 | ${n_lines_changed['test']} ${n_consistent_lines['test']} ${n_consistent_sentences['test']} 67 | else 68 | for label in train dev test ; do 69 | if [[ ${n_analyzes} == 1 ]]; then 70 | cp dataset/gungor.ner.${label}.01a dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))` 71 | else 72 | cp dataset/gungor.ner.${label}.`printf "%02d" ${n_analyzes}` dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))` 73 | fi 74 | 75 | cat dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))` | bash ./scripts/strip-sentences-with-inconsistent-morph-analysis.sh > dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))`.only_consistent ; 76 | n_consistent_lines[$label]=$(wc -l dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))`.only_consistent | cut -d" " -f1) 77 | n_consistent_sentences[$label]=$(cat dataset/gungor.ner.${label}.`printf "%02d" $((n_analyzes+1))`.only_consistent | awk '/^$/ { count += 1 } END { print count }') 78 | # echo ${n_consistent_lines[$label]} 79 | done 80 | # printf "%02d | %6d %6d %6d | %6d %6d %6d\n" ${n_analyzes} 0 0 0 0 0 0 81 | printf "NO SED RULES FOUND. only copied %02d level data files to get the %02d level data files\n" $((n_analyzes)) $((n_analyzes+1)) 82 | printf "level | n_lines_changed n_consistent_lines n_consistent_sentences | ...\n" 83 | printf "%02d | train %6d %6d %6d | dev %6d %6d %6d | test %6d %6d %6d\n" ${n_analyzes} \ 84 | 0 ${n_consistent_lines['train']} ${n_consistent_sentences['train']} \ 85 | 0 ${n_consistent_lines['dev']} ${n_consistent_sentences['dev']} \ 86 | 0 ${n_consistent_lines['test']} ${n_consistent_sentences['test']} 87 | fi 88 | 89 | # echo 'continue...' 90 | # read 91 | 92 | done 93 | 94 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-05.txt: -------------------------------------------------------------------------------- 1 | 1 12 0 6 Xın+Adverb X+Verb+Pos+Imp+A2pl X+Noun+A3sg+P2sg+Nom X+Noun+A3sg+Pnon+Gen Xı+Noun+A3sg+P2sg+Nom Xın+Noun+A3sg+Pnon+Nom 2 | 2 4 0 6 Xler'i+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3pl+P3sg+Nom X+Noun+Prop+A3pl+Pnon+Acc X+Noun+Prop+A3pl+Nom Xler+Noun+Prop+A3sg+P3sg+Nom Xler+Noun+Prop+A3sg+Pnon+Acc 3 | 3 3 0 6 Xl+Verb+Pos+Imp+A3sg Xl+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg Xls+Noun+A3sg+P2sg+Nom Xls+Noun+A3sg+Pnon+Gen Xlsu+Noun+A3sg+P2sg+Nom Xlsun+Noun+A3sg+Pnon+Nom 4 | 4 3 0 6 Xğı'nın+Noun+Prop+A3sg+Pnon+Nom Xğ+Noun+Prop+A3sg+P3sg+Gen Xğı+Noun+Prop+A3sg+P2sg+Gen Xğı+Noun+Prop+A3sg+Pnon+Gen Xg+Noun+Prop+A3sg+P3sg+Gen Xk+Noun+Prop+A3sg+P3sg+Gen 5 | 5 2 0 6 Xum+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+P1sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A1sg+Cop Xu+Adj^DB+Noun+Zero+A3sg+P1sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xum+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xum+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 6 | 6 2 0 6 Xici+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom X+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adj+FitFor X+Verb+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom X+Verb+Pos^DB+Adj+Agt^DB+Noun+Ness+A3sg+Pnon+Nom Xi+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adj+FitFor Xi+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom 7 | 7 1 0 6 Xz+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xz+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since Xz+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xz+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+Cop+A3sg Xz+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since Xz+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 8 | 8 1 0 6 Xy+Verb^DB+Verb+Caus+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xy+Verb^DB+Verb+Caus+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+Cop+A3sg Xy+Verb^DB+Verb+Caus+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since Xy+Verb^DB+Verb+Caus+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xy+Verb^DB+Verb+Caus+Pos^DB+Adj+Agt^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since Xy+Verb^DB+Verb+Caus+Pos^DB+Adj+Agt^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 9 | 9 1 0 6 Xt+Noun+A3sg+Pnon+Gen^DB+Verb+Zero+Pres+A3sg+Cop Xt+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xt+Noun+A3sg+Pnon+Gen^DB+Verb+Zero+Pres+Cop+A3sg Xtı+Num+Card^DB+Noun+Zero+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xtın+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xtın+Noun+A3sg+Pnon+Nom^DB+Adverb+Since 10 | 10 1 0 6 Xn+Noun+Prop+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+P3sg+Gen Xnluğ+Noun+Prop+A3sg+P3sg+Gen Xnluğu+Noun+Prop+A3sg+P2sg+Gen Xnluğu+Noun+Prop+A3sg+Pnon+Gen Xnlug+Noun+Prop+A3sg+P3sg+Gen Xnluk+Noun+Prop+A3sg+P3sg+Gen 11 | 11 1 0 6 Xn+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xn+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+Cop+A3sg Xn+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since Xn+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xn+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since Xn+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 12 | 12 1 0 6 Xn+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xn+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since Xn+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xn+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+Cop+A3sg Xn+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since Xn+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 13 | 13 1 0 6 Xl+Verb+Pos+Imp+A3pl Xls+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+A3pl Xls+Noun+A3sg+Pnon+Gen^DB+Verb+Zero+Pres+A3pl Xlsu+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+A3pl Xlsun+Noun+A3pl+Pnon+Nom Xlsunlar+Noun+A3sg+Pnon+Nom 14 | 14 1 0 6 Xl+Verb+Pos+Imp+A2pl X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Zero+A3sg+P2sg+Nom Xl+Noun+A3sg+Pnon+Gen Xl+Noun+A3sg+P2sg+Nom Xlu+Noun+A3sg+P2sg+Nom Xlun+Noun+A3sg+Pnon+Nom 15 | 15 1 0 6 Xlık+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xlık+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xlık+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adverb+Since X+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 16 | 16 1 0 6 Xlar'ı+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3pl+P3sg+Nom X+Noun+Prop+A3pl+Pnon+Acc X+Noun+Prop+A3pl+Nom Xlar+Noun+Prop+A3sg+P3sg+Nom Xlar+Noun+Prop+A3sg+Pnon+Acc 17 | 17 1 0 6 Xi+Pron+Quant+A3sg+P3sg+Nom X+Adj^DB+Noun+Zero+A3sg+P3sg+Nom X+Adj^DB+Noun+Zero+A3sg+Pnon+Acc X+Num+Card^DB+Noun+Zero+A3sg+P3sg+Nom X+Num+Card^DB+Noun+Zero+A3sg+Pnon+Acc Xi+Pron+A3sg+P3sg+Nom 18 | 18 1 0 6 Xi+Noun+A3sg+P3sg+Dat X+Noun+A3sg+P2sg+Dat X+Noun+A3sg+P3sg+Dat Xi+Noun+A3sg+P2sg+Dat Xin+Noun+A3sg+Pnon+Dat Xine+Noun+A3sg+Pnon+Nom 19 | 19 1 0 6 Xı+Noun+A3sg+P3sg+Dat X+Noun+A3sg+P2sg+Dat X+Noun+A3sg+P3sg+Dat Xı+Noun+A3sg+P2sg+Dat Xın+Noun+A3sg+Pnon+Dat Xına+Noun+A3sg+Pnon+Nom 20 | 20 1 0 6 Xici+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Verb+Pos^DB+Noun+Agt+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Verb+Pos^DB+Adj+Agt^DB+Adj+JustLike^DB+Verb+Zero+Pres+Cop+A3sg X+Verb+Pos^DB+Adj+Agt^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since X+Verb+Pos^DB+Adj+Agt^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xici+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 21 | 21 1 0 6 Xe+Verb+Pos^DB+Adj+AorPart^DB+Noun+Zero+A3sg+P3sg+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3sg+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P2sg+Acc Xer+Adj^DB+Noun+Zero+A3sg+P3sg+Acc Xer+Adj^DB+Noun+Zero+A3sg+P2sg+Acc Xeri+Adj^DB+Noun+Zero+A3sg+P2sg+Acc 22 | 22 1 0 6 Xç+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adverb+Since X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adverb+Since X+Adj^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xç+Noun+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 23 | -------------------------------------------------------------------------------- /scripts/inspect_results.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[2]: 5 | 6 | 7 | import pymongo 8 | from IPython.display import display 9 | 10 | import glob 11 | import os 12 | import json 13 | 14 | 15 | def find_runs_on_filesystem(campaign_name, logs_filepath="../experiment-logs/"): 16 | 17 | runs = [] 18 | for run_dir in glob.glob("/".join([logs_filepath, "[0-9]*"])): 19 | run = {} 20 | try: 21 | with open(os.path.join(run_dir, "info.json"), "r") as f: 22 | run["info"] = json.load(f) 23 | with open(os.path.join(run_dir, "config.json"), "r") as f: 24 | run["config"] = json.load(f) 25 | if run["config"]["experiment_name"] == campaign_name: 26 | runs.append(run) 27 | except IOError as e: 28 | print(e) 29 | return runs 30 | 31 | def report_results_of_a_specific_campaign(campaign_name, db_type): 32 | 33 | print(campaign_name) 34 | if db_type == "mongo": 35 | client = pymongo.MongoClient("localhost", 27017) 36 | db = client.joint_ner_and_md 37 | runs = db.runs.find({"config.experiment_name": campaign_name}) 38 | else: 39 | runs = find_runs_on_filesystem(campaign_name, logs_filepath=db_type) 40 | 41 | configs = [] 42 | for run_idx, run in enumerate(runs): 43 | 44 | dict_to_report = dict(run["config"]) 45 | initial_keys = dict_to_report.keys() 46 | 47 | print initial_keys 48 | 49 | result_designation_labels = ["MORPH", "NER", "YURET"] 50 | 51 | dict_to_report["epochs"] = max([len(run["info"][label].keys()) 52 | for label in ["NER_dev_f_score", "MORPH_dev_f_score"]]) 53 | 54 | for result_designation_label in result_designation_labels: 55 | 56 | print "result_designation_label: ", result_designation_label 57 | 58 | if result_designation_label == "YURET": 59 | best_performances = run["info"][result_designation_label + "_test_f_score"] 60 | else: 61 | best_performances = run["info"][result_designation_label + "_dev_f_score"] 62 | print best_performances 63 | best_dev_result_for_this_run = 0 64 | best_test_result_for_this_run = 0 65 | epoch_id_of_the_best_dev_result = -1 66 | # display(run["config"]) 67 | for epoch in sorted([int(k) for k in best_performances.keys()]): 68 | # if result_designation_label != "NER": 69 | # corrected_epoch = epoch + 1 70 | epoch_max = max(best_performances[str(epoch)]) 71 | if epoch_max > best_dev_result_for_this_run: 72 | epoch_id_of_the_best_dev_result = epoch 73 | best_dev_result_for_this_run = epoch_max 74 | best_test_result_for_this_run = \ 75 | max(run["info"][result_designation_label + "_test_f_score"][str(epoch)]) 76 | 77 | # print "run_idx: %d, epoch: %d, epoch_best_performance: %.2lf, best_for_this_run: %.2lf" % (run_idx, epoch, epoch_max, best_for_this_run) 78 | 79 | dict_to_report[result_designation_label + "_best_dev"] = best_dev_result_for_this_run 80 | dict_to_report[result_designation_label + "_best_test"] = best_test_result_for_this_run 81 | 82 | for x in result_designation_labels: 83 | # if x != result_designation_label: 84 | print "x: ", x 85 | print "epoch_id_of_the_best_dev_result: ", epoch_id_of_the_best_dev_result 86 | dict_to_report[result_designation_label + "_to_" + x + "_test"] = \ 87 | max(run["info"][x + "_test_f_score"][str(epoch_id_of_the_best_dev_result)]) \ 88 | if str(epoch_id_of_the_best_dev_result) in run["info"][x + "_test_f_score"].keys() else -1 89 | print dict_to_report[result_designation_label + "_to_" + x + "_test"] 90 | 91 | configs.append({key: dict_to_report[key] for key in [x for x in ["host", 92 | "integration_mode", 93 | "active_models", 94 | "train_with_yuret", 95 | "use_golden_morpho_analysis_in_word_representation", 96 | "multilayer", 97 | "shortcut_connections", 98 | "epochs"] if x in dict_to_report] + 99 | [x for x in dict_to_report.keys() if x not in initial_keys]}) 100 | 101 | import pandas 102 | df = pandas.DataFrame.from_dict(configs) 103 | print configs 104 | cols = df.columns.tolist() 105 | 106 | # display(df[["host"] + 107 | # [x for x in dict_to_report.keys() if x not in initial_keys]]) 108 | 109 | display(df) 110 | 111 | df_groupedby_hyperparameters = df.groupby(["integration_mode", 112 | "active_models", 113 | "train_with_yuret", 114 | "use_golden_morpho_analysis_in_word_representation", 115 | "multilayer", 116 | "shortcut_connections"]) 117 | return df, df_groupedby_hyperparameters.NER_best_test.mean() 118 | 119 | 120 | if __name__ == "__main__": 121 | 122 | import argparse 123 | 124 | parser = argparse.ArgumentParser() 125 | 126 | parser.add_argument("--campaign_name", default="section1-all-20171013-01") 127 | 128 | parser.add_argument("--db_type", default="mongo") 129 | 130 | args = parser.parse_args() 131 | 132 | df, df_groupedby_hyperparameter_NER_best_test_mean = report_results_of_a_specific_campaign(args.campaign_name, args.db_type) 133 | df.to_csv("./scripts/results-%s.csv" % args.campaign_name) 134 | df_groupedby_hyperparameter_NER_best_test_mean.to_csv("./scripts/results-NER_best_test_mean-%s.csv" % args.campaign_name) -------------------------------------------------------------------------------- /dataset/gungor.ner.train.small: -------------------------------------------------------------------------------- 1 | Müzik Müzik+Noun+A3sg+Pnon+Nom müzik+Noun+A3sg+Pnon+Nom O 2 | Müzik Müzik+Noun+A3sg+Pnon+Nom müzik+Noun+A3sg+Pnon+Nom O 3 | Şenliği'ne Şenlik+Noun+Prop+A3sg+P3sg+Dat Şenlik+Noun+Prop+A3sg+P3sg+Dat O 4 | hazırlanın hazırla+Verb^DB+Verb+Reflex+Pos+Imp+A2pl hazırla+Verb^DB+Verb+Pass+Pos+Imp+A2pl hazırla+Verb+Reflex+Pos+Imp+A2pl hazır+Adj^DB+Verb+Acquire+Pos+Imp+A2pl O 5 | POZİTİF pozitif+Adj pozitif+Adj I-ORG 6 | ve ve+Conj ve+Conj I-ORG 7 | Açık açık+Adj Açık+Noun+Prop+A3sg+Pnon+Nom açık+Adverb açık+Adj I-ORG 8 | Radyo radyo+Noun+A3sg+Pnon+Nom radyo+Noun+A3sg+Pnon+Nom I-ORG 9 | işbirliğiyle işbirliği+Noun+A3sg+Pnon+Ins işbirlik+Noun+A3sg+P3sg+Ins O 10 | düzenlenecek düzenle+Verb^DB+Verb+Pass+Pos^DB+Adj+FutPart+Pnon düzenle+Verb^DB+Verb+Pass+Pos+Fut+A3sg düzenle+Verb^DB+Verb+Pass+Pos^DB+Adj+FutPart+Pnon düzen+Noun+A3sg+Pnon+Nom^DB+Verb+Acquire+Pos+Fut+A3sg düzen+Noun+A3sg+Pnon+Nom^DB+Verb+Acquire+Pos^DB+Adj+FutPart+Pnon O 11 | olan ol+Verb+Pos^DB+Adj+PresPart ol+Verb+Pos^DB+Adj+PresPart O 12 | İstanbul İstanbul+Noun+Prop+A3sg+Pnon+Nom İstanbul+Noun+Prop+A3sg+Pnon+Nom I-LOC 13 | Müzik müzik+Noun+A3sg+Pnon+Nom müzik+Noun+A3sg+Pnon+Nom O 14 | Şenliği şenlik+Noun+A3sg+P3sg+Nom şenlik+Noun+A3sg+P3sg+Nom şenlik+Noun+A3sg+Pnon+Acc şen+Adj^DB+Noun+Ness+A3sg+P3sg+Nom şen+Adj^DB+Noun+Ness+A3sg+Pnon+Acc O 15 | 2 2+Num+Card 2+Num+Card O 16 | , ,+Punc ,+Punc O 17 | müzikseverlere müziksever+Noun+A3pl+Pnon+Dat müziksever+Noun+A3pl+Pnon+Dat O 18 | Aralık aralık+Noun+A3sg+Pnon+Nom ara+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom ara+Noun+A3sg+Pnon+Nom^DB+Adj+FitFor ara+Adj^DB+Noun+Ness+A3sg+Pnon+Nom Aralık+Noun+Prop+A3sg+Pnon+Nom aralık+Noun+A3sg+Pnon+Nom O 19 | ayında ay+Noun+A3sg+P3sg+Loc ay+Noun+A3sg+P3sg+Loc ay+Noun+A3sg+P2sg+Loc ayı+Noun+A3sg+P2sg+Loc O 20 | merhaba merhaba+Noun+A3sg+Pnon+Nom merhaba+Noun+A3sg+Pnon+Nom O 21 | demeye de+Verb+Pos^DB+Noun+Inf2+A3sg+Pnon+Dat de+Verb+Neg+Opt+A3sg de+Verb+Pos^DB+Noun+Inf2+A3sg+Pnon+Dat O 22 | hazırlanıyor hazırla+Verb^DB+Verb+Pass+Pos+Prog1+A3sg hazırla+Verb^DB+Verb+Pass+Pos+Prog1+A3sg hazırla+Verb+Reflex+Pos+Prog1+A3sg hazır+Adj^DB+Verb+Acquire+Pos+Prog1+A3sg O 23 | 24 | Tek tek+Adj tek+Adj Tek+Noun+Prop+A3sg+Pnon+Nom O 25 | çatı çatı+Noun+A3sg+Pnon+Nom çat+Noun+A3sg+P3sg+Nom çat+Noun+A3sg+Pnon+Acc çatı+Noun+A3sg+Pnon+Nom O 26 | altında alt+Noun+A3sg+P3sg+Loc alt+Noun+A3sg+P3sg+Loc alt+Noun+A3sg+P2sg+Loc altı+Num+Card^DB+Noun+Zero+A3sg+P2sg+Loc altın+Noun+A3sg+Pnon+Loc O 27 | dokuz dokuz+Num+Card dok+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A1pl dokuz+Num+Card O 28 | ayrı ayrı+Adj ayrı+Adj O 29 | salonda salon+Noun+A3sg+Pnon+Loc salon+Noun+A3sg+Pnon+Loc O 30 | gerçekleştirilecek gerçek+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos^DB+Adj+FutPart+Pnon gerçekle+Verb+Recip^DB+Verb+Caus^DB+Verb+Pass+Pos+Fut+A3sg gerçekle+Verb+Recip^DB+Verb+Caus^DB+Verb+Pass+Pos^DB+Adj+FutPart+Pnon gerçek+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos+Fut+A3sg gerçek+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos^DB+Adj+FutPart+Pnon gerçek+Adj^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos+Fut+A3sg gerçek+Adj^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos^DB+Adj+FutPart+Pnon O 31 | Şenlik şenlik+Noun+A3sg+Pnon+Nom şenlik+Noun+A3sg+Pnon+Nom Şenlik+Noun+Prop+A3sg+Pnon+Nom şen+Adj^DB+Noun+Ness+A3sg+Pnon+Nom O 32 | kapsamında kapsam+Noun+A3sg+P3sg+Loc kapsam+Noun+A3sg+P3sg+Loc kapsam+Noun+A3sg+P2sg+Loc O 33 | doksanın doksan+Num+Card^DB+Noun+Zero+A3sg+Pnon+Gen doksan+Num+Card^DB+Noun+Zero+A3sg+P2sg+Nom doksan+Num+Card^DB+Noun+Zero+A3sg+Pnon+Gen O 34 | üzerinde üzer+Noun+A3sg+P3sg+Loc üzer+Noun+A3sg+P3sg+Loc üzer+Noun+A3sg+P2sg+Loc üz+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3sg+Loc üz+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P2sg+Loc O 35 | etkinlik etkin+Adj^DB+Noun+Ness+A3sg+Pnon+Nom etkin+Adj^DB+Noun+Ness+A3sg+Pnon+Nom O 36 | yer yer+Noun+A3sg+Pnon+Nom ye+Verb+Pos+Aor+A3sg ye+Verb+Pos+Aor^DB+Adj+Zero yer+Noun+A3sg+Pnon+Nom yer+Verb+Pos+Imp+A2sg O 37 | alacak al+Verb+Pos+Fut+A3sg al+Verb+Pos+Fut+A3sg al+Verb+Pos^DB+Adj+FutPart+Pnon alacak+Noun+A3sg+Pnon+Nom O 38 | 39 | Halk halk+Noun+A3sg+Pnon+Nom halk+Noun+A3sg+Pnon+Nom Halk+Noun+Prop+A3sg+Pnon+Nom O 40 | müziğinden müzik+Noun+A3sg+P3sg+Abl müzik+Noun+A3sg+P3sg+Abl müzik+Noun+A3sg+P2sg+Abl O 41 | caza caz+Noun+A3sg+Pnon+Dat caz+Noun+A3sg+Pnon+Dat O 42 | , ,+Punc ,+Punc O 43 | klasik klasik+Adj klasik+Adj O 44 | batı batı+Noun+A3sg+Pnon+Nom batı+Noun+A3sg+Pnon+Nom batı+Adj O 45 | müziğinden müzik+Noun+A3sg+P3sg+Abl müzik+Noun+A3sg+P3sg+Abl müzik+Noun+A3sg+P2sg+Abl O 46 | klasik klasik+Adj klasik+Adj O 47 | Türk türk+Noun+A3sg+Pnon+Nom Türk+Noun+Prop+A3sg+Pnon+Nom türk+Noun+A3sg+Pnon+Nom türk+Adj O 48 | müziğine müzik+Noun+A3sg+P3sg+Dat müzik+Noun+A3sg+P3sg+Dat müzik+Noun+A3sg+P2sg+Dat O 49 | , ,+Punc ,+Punc O 50 | rock'tan rock'tan+Noun+Prop+A3sg+Pnon+Nom *UNKNOWN* O 51 | etnik etnik+Adj etnik+Adj O 52 | müziğe müzik+Noun+A3sg+Pnon+Dat müzik+Noun+A3sg+Pnon+Dat O 53 | uzanan uzan+Verb+Pos^DB+Adj+PresPart uza+Verb^DB+Verb+Pass+Pos^DB+Adj+PresPart O 54 | konserlerin konser+Noun+A3pl+Pnon+Gen konser+Noun+A3pl+P2sg+Nom konser+Noun+A3pl+Pnon+Gen O 55 | yanı yan+Noun+A3sg+P3sg+Nom yan+Noun+A3sg+P3sg+Nom yan+Noun+A3sg+Pnon+Acc O 56 | sıra sıra+Noun+A3sg+Pnon+Nom sıra+Noun+A3sg+Pnon+Nom O 57 | , ,+Punc ,+Punc O 58 | atölye atölye+Noun+A3sg+Pnon+Nom atölye+Noun+A3sg+Pnon+Nom O 59 | çalışmaları çalış+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom çalış+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom çalış+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom çalış+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc çalış+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom O 60 | , ,+Punc ,+Punc O 61 | panel panel+Noun+A3sg+Pnon+Nom panel+Noun+A3sg+Pnon+Nom O 62 | ve ve+Conj ve+Conj O 63 | söyleşiler söyleşi+Noun+A3pl+Pnon+Nom söyleşi+Noun+A3pl+Pnon+Nom O 64 | , ,+Punc ,+Punc O 65 | çocuk çocuk+Noun+A3sg+Pnon+Nom çocuk+Noun+A3sg+Pnon+Nom O 66 | etkinlikleri etkin+Adj^DB+Noun+Ness+A3pl+P3sg+Nom etkin+Adj^DB+Noun+Ness+A3pl+P3sg+Nom etkin+Adj^DB+Noun+Ness+A3pl+Pnon+Acc etkin+Adj^DB+Noun+Ness+A3pl+P3pl+Nom etkin+Adj^DB+Noun+Ness+A3sg+P3pl+Nom O 67 | , ,+Punc ,+Punc O 68 | CD cd+Noun+A3sg+Pnon+Nom cd+Noun+A3sg+Pnon+Nom Cd+Noun+Prop+A3sg+Pnon+Nom O 69 | ve ve+Conj ve+Conj O 70 | kitap kitap+Noun+A3sg+Pnon+Nom kitap+Noun+A3sg+Pnon+Nom O 71 | satışı sat+Verb+Pos^DB+Noun+Inf3+A3sg+P3sg+Nom sat+Verb+Pos^DB+Noun+Inf3+A3sg+P3sg+Nom sat+Verb+Pos^DB+Noun+Inf3+A3sg+Pnon+Acc O 72 | gibi gibi+Postp+PCNom gibi+Postp+PCNom O 73 | etkinlikler etkin+Adj^DB+Noun+Ness+A3pl+Pnon+Nom etkin+Adj^DB+Noun+Ness+A3pl+Pnon+Nom O 74 | Şenlik'i Şenlik+Noun+Prop+A3sg+Pnon+Acc Şenlik+Noun+Prop+A3sg+P3sg+Nom Şenlik+Noun+Prop+A3sg+Pnon+Acc Şenlik+Noun+Prop+A3sg+P3sg+Nom Şenlik+Noun+Prop+A3sg+Pnon+Acc O 75 | destekleyecek destekle+Verb+Pos+Fut+A3sg destekle+Verb+Pos+Fut+A3sg destekle+Verb+Pos^DB+Adj+FutPart+Pnon O 76 | 77 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-08.txt: -------------------------------------------------------------------------------- 1 | 1 7 0 9 Xuşma+Noun+A3pl+Pnon+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom 2 | 2 3 0 9 Xuşma+Noun+A3pl+P3sg+Nom X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom 3 | 3 3 0 9 Xuşma+Noun+A3pl+P3sg+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc 4 | 4 2 0 9 Xuşma+Noun+A3pl+P3sg+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc 5 | 5 2 0 9 Xuşma+Noun+A3pl+P3sg+Loc^DB+Adj+Rel X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc^DB+Adj+Rel X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc^DB+Adj+Rel X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc^DB+Adj+Rel X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc^DB+Adj+Rel Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc^DB+Adj+Rel Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc^DB+Adj+Rel Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc^DB+Adj+Rel Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc^DB+Adj+Rel 6 | 6 2 0 9 Xuşma+Noun+A3pl+P3sg+Gen X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Gen X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Gen X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Gen X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Gen Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Gen Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Gen Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Gen Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Gen 7 | 7 2 0 9 Xe+Verb+Pos^DB+Adj+AorPart^DB+Noun+Zero+A3sg+P3pl+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3pl+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3sg+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P2sg+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3pl+Acc Xer+Adj^DB+Noun+Zero+A3pl+P3pl+Acc Xer+Adj^DB+Noun+Zero+A3pl+P3sg+Acc Xer+Adj^DB+Noun+Zero+A3pl+P2sg+Acc Xer+Adj^DB+Noun+Zero+A3sg+P3pl+Acc 8 | 8 1 0 9 Xuş+Verb+Pos^DB+Adj+NarrPart^DB+Noun+Ness+A3sg+P3pl+Gen X+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3pl+Gen X+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3sg+P3pl+Gen X+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3sg+Gen X+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P2sg+Gen Xuş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3pl+Gen Xuş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3sg+P3pl+Gen Xuş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3sg+Gen Xuş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P2sg+Gen 9 | 9 1 0 9 Xuşma+Noun+A3sg+P3pl+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc 10 | 10 1 0 9 Xuşma+Noun+A3pl+P3sg+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl 11 | 11 1 0 9 Xuşma+Noun+A3pl+P3pl+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc 12 | 12 1 0 9 Xuşma+Noun+A3pl+P3pl+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl 13 | 13 1 0 9 Xli+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xli+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xli+Adj^DB+Adj+JustLike^DB+Verb+Zero+Pres+Cop+A3sg Xli+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since Xli+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Adj+JustLike^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 14 | 14 1 0 9 Xla+Verb^DB+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom Xla+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom Xla+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom Xla+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc Xla+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom 15 | 15 1 0 9 Xlaş+Verb+Pos+Prog2+A3sg+Cop Xla+Verb+Recip+Pos+Prog2+Cop+A3sg Xla+Verb+Recip+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xlaş+Verb+Pos+Prog2+Cop+A3sg Xlaş+Verb+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Verb+Become+Pos+Prog2+Cop+A3sg X+Adj^DB+Verb+Become+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos+Prog2+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg 16 | 16 1 0 9 Xe+Verb+Pos^DB+Adj+AorPart^DB+Noun+Zero+A3sg+P3pl+Nom Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3pl+Nom Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3sg+Nom Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+Pnon+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3pl+Nom Xer+Adj^DB+Noun+Zero+A3pl+P3pl+Nom Xer+Adj^DB+Noun+Zero+A3pl+P3sg+Nom Xer+Adj^DB+Noun+Zero+A3pl+Pnon+Acc Xer+Adj^DB+Noun+Zero+A3sg+P3pl+Nom 17 | 17 1 0 9 Xe+Verb+Pos^DB+Adj+AorPart^DB+Noun+Zero+A3pl+Pnon+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3pl+Nom Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3sg+Nom Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+Pnon+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3pl+Nom Xer+Adj^DB+Noun+Zero+A3pl+P3pl+Nom Xer+Adj^DB+Noun+Zero+A3pl+P3sg+Nom Xer+Adj^DB+Noun+Zero+A3pl+Pnon+Acc Xer+Adj^DB+Noun+Zero+A3sg+P3pl+Nom 18 | -------------------------------------------------------------------------------- /main_form.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | MainWindow 4 | 5 | 6 | 7 | 0 8 | 0 9 | 1303 10 | 987 11 | 12 | 13 | 14 | gungor.ner cleaner tool 15 | 16 | 17 | 18 | ../../../../../../../usr/share/icons/gnome/32x32/apps/accessories-text-editor.png../../../../../../../usr/share/icons/gnome/32x32/apps/accessories-text-editor.png 19 | 20 | 21 | 22 | 23 | 24 | 10 25 | 20 26 | 131 27 | 17 28 | 29 | 30 | 31 | Xoutput files 32 | 33 | 34 | 35 | 36 | 37 | 10 38 | 170 39 | 151 40 | 17 41 | 42 | 43 | 44 | selected file contents 45 | 46 | 47 | 48 | 49 | 50 | 10 51 | 200 52 | 781 53 | 192 54 | 55 | 56 | 57 | 58 | 59 | 60 | 810 61 | 270 62 | 471 63 | 121 64 | 65 | 66 | 67 | 68 | 69 | 70 | 810 71 | 170 72 | 151 73 | 17 74 | 75 | 76 | 77 | selected row 78 | 79 | 80 | 81 | 82 | 83 | 10 84 | 450 85 | 261 86 | 17 87 | 88 | 89 | 90 | samples from train and dev files 91 | 92 | 93 | 94 | 95 | 96 | 810 97 | 190 98 | 171 99 | 17 100 | 101 | 102 | 103 | golden morph. analysis 104 | 105 | 106 | 107 | 108 | 109 | 810 110 | 220 111 | 471 112 | 41 113 | 114 | 115 | 116 | 117 | 118 | 119 | 10 120 | 680 121 | 151 122 | 17 123 | 124 | 125 | 126 | rule list 127 | 128 | 129 | 130 | 131 | 132 | 610 133 | 410 134 | 181 135 | 20 136 | 137 | 138 | 139 | corrected morph. analysis 140 | 141 | 142 | 143 | 144 | 145 | 810 146 | 400 147 | 471 148 | 41 149 | 150 | 151 | 152 | true 153 | 154 | 155 | 156 | 157 | 158 | 810 159 | 450 160 | 471 161 | 27 162 | 163 | 164 | 165 | Add this as a rule 166 | 167 | 168 | 169 | 170 | 171 | 10 172 | 40 173 | 1271 174 | 121 175 | 176 | 177 | 178 | 100 179 | 180 | 181 | 182 | 183 | 184 | 10 185 | 480 186 | 1271 187 | 192 188 | 189 | 190 | 191 | 192 | 193 | 194 | 10 195 | 710 196 | 1271 197 | 192 198 | 199 | 200 | 201 | 202 | 203 | 204 | 20 205 | 910 206 | 261 207 | 17 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 850 218 | 910 219 | 431 220 | 27 221 | 222 | 223 | 224 | Sort and Save the rules 225 | 226 | 227 | 228 | 229 | 230 | 970 231 | 170 232 | 68 233 | 17 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 380 244 | 410 245 | 221 246 | 27 247 | 248 | 249 | 250 | Add all rules for n_analysis=1 251 | 252 | 253 | 254 | 255 | 256 | 257 | 0 258 | 0 259 | 1303 260 | 25 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-08.txt.rules: -------------------------------------------------------------------------------- 1 | 1 Xuşma+Noun+A3pl+Pnon+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc 2 | 2 Xuşma+Noun+A3pl+P3sg+Nom X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom 3 | 3 Xuşma+Noun+A3pl+P3sg+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc 4 | 4 Xuşma+Noun+A3pl+P3sg+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc 5 | 5 Xuşma+Noun+A3pl+P3sg+Loc^DB+Adj+Rel X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc^DB+Adj+Rel X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc^DB+Adj+Rel X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc^DB+Adj+Rel X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc^DB+Adj+Rel Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc^DB+Adj+Rel Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc^DB+Adj+Rel Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc^DB+Adj+Rel Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc^DB+Adj+Rel Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc^DB+Adj+Rel 6 | 6 Xuşma+Noun+A3pl+P3sg+Gen X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Gen X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Gen X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Gen X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Gen Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Gen Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Gen Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Gen Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Gen Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Gen 7 | 7 Xe+Verb+Pos^DB+Adj+AorPart^DB+Noun+Zero+A3sg+P3pl+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3pl+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3sg+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P2sg+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3pl+Acc Xer+Adj^DB+Noun+Zero+A3pl+P3pl+Acc Xer+Adj^DB+Noun+Zero+A3pl+P3sg+Acc Xer+Adj^DB+Noun+Zero+A3pl+P2sg+Acc Xer+Adj^DB+Noun+Zero+A3sg+P3pl+Acc Xer+Adj^DB+Noun+Zero+A3sg+P3pl+Acc 8 | 8 Xuş+Verb+Pos^DB+Adj+NarrPart^DB+Noun+Ness+A3sg+P3pl+Gen X+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3pl+Gen X+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3sg+P3pl+Gen X+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3sg+Gen X+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P2sg+Gen Xuş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3pl+Gen Xuş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3sg+P3pl+Gen Xuş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3sg+Gen Xuş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P2sg+Gen Xuş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3sg+P3pl+Gen 9 | 9 Xuşma+Noun+A3sg+P3pl+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc 10 | 10 Xuşma+Noun+A3pl+P3sg+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl 11 | 11 Xuşma+Noun+A3pl+P3pl+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc 12 | 12 Xuşma+Noun+A3pl+P3pl+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl 13 | 13 Xli+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xli+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xli+Adj^DB+Adj+JustLike^DB+Verb+Zero+Pres+Cop+A3sg Xli+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since Xli+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Adj+JustLike^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xli+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 14 | 14 Xla+Verb^DB+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom Xla+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom Xla+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom Xla+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc Xla+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom Xla+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom 15 | 15 Xlaş+Verb+Pos+Prog2+A3sg+Cop Xla+Verb+Recip+Pos+Prog2+Cop+A3sg Xla+Verb+Recip+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xlaş+Verb+Pos+Prog2+Cop+A3sg Xlaş+Verb+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Verb+Become+Pos+Prog2+Cop+A3sg X+Adj^DB+Verb+Become+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos+Prog2+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xlaş+Verb+Pos+Prog2+Cop+A3sg 16 | 16 Xe+Verb+Pos^DB+Adj+AorPart^DB+Noun+Zero+A3sg+P3pl+Nom Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3pl+Nom Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3sg+Nom Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+Pnon+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3pl+Nom Xer+Adj^DB+Noun+Zero+A3pl+P3pl+Nom Xer+Adj^DB+Noun+Zero+A3pl+P3sg+Nom Xer+Adj^DB+Noun+Zero+A3pl+Pnon+Acc Xer+Adj^DB+Noun+Zero+A3sg+P3pl+Nom Xer+Adj^DB+Noun+Zero+A3sg+P3pl+Nom 17 | 17 Xe+Verb+Pos^DB+Adj+AorPart^DB+Noun+Zero+A3pl+Pnon+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3pl+Nom Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3sg+Nom Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+Pnon+Acc Xe+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3pl+Nom Xer+Adj^DB+Noun+Zero+A3pl+P3pl+Nom Xer+Adj^DB+Noun+Zero+A3pl+P3sg+Nom Xer+Adj^DB+Noun+Zero+A3pl+Pnon+Acc Xer+Adj^DB+Noun+Zero+A3sg+P3pl+Nom Xer+Adj^DB+Noun+Zero+A3pl+Pnon+Acc -------------------------------------------------------------------------------- /main_form.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'main_form.ui' 4 | # 5 | # Created by: PyQt4 UI code generator 4.11.4 6 | # 7 | # WARNING! All changes made in this file will be lost! 8 | 9 | from PyQt4 import QtCore, QtGui 10 | 11 | try: 12 | _fromUtf8 = QtCore.QString.fromUtf8 13 | except AttributeError: 14 | def _fromUtf8(s): 15 | return s 16 | 17 | try: 18 | _encoding = QtGui.QApplication.UnicodeUTF8 19 | def _translate(context, text, disambig): 20 | return QtGui.QApplication.translate(context, text, disambig, _encoding) 21 | except AttributeError: 22 | def _translate(context, text, disambig): 23 | return QtGui.QApplication.translate(context, text, disambig) 24 | 25 | class Ui_MainWindow(object): 26 | def setupUi(self, MainWindow): 27 | MainWindow.setObjectName(_fromUtf8("MainWindow")) 28 | MainWindow.resize(1303, 987) 29 | icon = QtGui.QIcon() 30 | icon.addPixmap(QtGui.QPixmap(_fromUtf8("../../../../../../../usr/share/icons/gnome/32x32/apps/accessories-text-editor.png")), QtGui.QIcon.Normal, QtGui.QIcon.Off) 31 | MainWindow.setWindowIcon(icon) 32 | self.centralwidget = QtGui.QWidget(MainWindow) 33 | self.centralwidget.setObjectName(_fromUtf8("centralwidget")) 34 | self.label = QtGui.QLabel(self.centralwidget) 35 | self.label.setGeometry(QtCore.QRect(10, 20, 131, 17)) 36 | self.label.setObjectName(_fromUtf8("label")) 37 | self.label_2 = QtGui.QLabel(self.centralwidget) 38 | self.label_2.setGeometry(QtCore.QRect(10, 170, 151, 17)) 39 | self.label_2.setObjectName(_fromUtf8("label_2")) 40 | self.listWidget_selected_file_contents = QtGui.QListWidget(self.centralwidget) 41 | self.listWidget_selected_file_contents.setGeometry(QtCore.QRect(10, 200, 781, 192)) 42 | self.listWidget_selected_file_contents.setObjectName(_fromUtf8("listWidget_selected_file_contents")) 43 | self.listWidget_selected_row = QtGui.QListWidget(self.centralwidget) 44 | self.listWidget_selected_row.setGeometry(QtCore.QRect(810, 270, 471, 121)) 45 | self.listWidget_selected_row.setObjectName(_fromUtf8("listWidget_selected_row")) 46 | self.label_3 = QtGui.QLabel(self.centralwidget) 47 | self.label_3.setGeometry(QtCore.QRect(810, 170, 151, 17)) 48 | self.label_3.setObjectName(_fromUtf8("label_3")) 49 | self.label_4 = QtGui.QLabel(self.centralwidget) 50 | self.label_4.setGeometry(QtCore.QRect(10, 450, 261, 17)) 51 | self.label_4.setObjectName(_fromUtf8("label_4")) 52 | self.label_5 = QtGui.QLabel(self.centralwidget) 53 | self.label_5.setGeometry(QtCore.QRect(810, 190, 171, 17)) 54 | self.label_5.setObjectName(_fromUtf8("label_5")) 55 | self.textEdit_golden_morph_analysis = QtGui.QTextEdit(self.centralwidget) 56 | self.textEdit_golden_morph_analysis.setGeometry(QtCore.QRect(810, 220, 471, 41)) 57 | self.textEdit_golden_morph_analysis.setObjectName(_fromUtf8("textEdit_golden_morph_analysis")) 58 | self.label_6 = QtGui.QLabel(self.centralwidget) 59 | self.label_6.setGeometry(QtCore.QRect(10, 680, 151, 17)) 60 | self.label_6.setObjectName(_fromUtf8("label_6")) 61 | self.label_7 = QtGui.QLabel(self.centralwidget) 62 | self.label_7.setGeometry(QtCore.QRect(610, 410, 181, 20)) 63 | self.label_7.setObjectName(_fromUtf8("label_7")) 64 | self.textEdit_2 = QtGui.QTextEdit(self.centralwidget) 65 | self.textEdit_2.setGeometry(QtCore.QRect(810, 400, 471, 41)) 66 | self.textEdit_2.setReadOnly(True) 67 | self.textEdit_2.setObjectName(_fromUtf8("textEdit_2")) 68 | self.addRuleToTheListButton = QtGui.QPushButton(self.centralwidget) 69 | self.addRuleToTheListButton.setGeometry(QtCore.QRect(810, 450, 471, 27)) 70 | self.addRuleToTheListButton.setObjectName(_fromUtf8("addRuleToTheListButton")) 71 | self.treeView_Xoutput_files = QtGui.QTreeView(self.centralwidget) 72 | self.treeView_Xoutput_files.setGeometry(QtCore.QRect(10, 40, 1271, 121)) 73 | self.treeView_Xoutput_files.setObjectName(_fromUtf8("treeView_Xoutput_files")) 74 | self.treeView_Xoutput_files.header().setMinimumSectionSize(100) 75 | self.tableWidget_samples_from_train_and_dev = QtGui.QTableWidget(self.centralwidget) 76 | self.tableWidget_samples_from_train_and_dev.setGeometry(QtCore.QRect(10, 480, 1271, 192)) 77 | self.tableWidget_samples_from_train_and_dev.setObjectName(_fromUtf8("tableWidget_samples_from_train_and_dev")) 78 | self.tableWidget_samples_from_train_and_dev.setColumnCount(0) 79 | self.tableWidget_samples_from_train_and_dev.setRowCount(0) 80 | self.tableWidget_output_file_contents = QtGui.QTableWidget(self.centralwidget) 81 | self.tableWidget_output_file_contents.setGeometry(QtCore.QRect(10, 710, 1271, 192)) 82 | self.tableWidget_output_file_contents.setObjectName(_fromUtf8("tableWidget_output_file_contents")) 83 | self.tableWidget_output_file_contents.setColumnCount(0) 84 | self.tableWidget_output_file_contents.setRowCount(0) 85 | self.output_file_load_status = QtGui.QLabel(self.centralwidget) 86 | self.output_file_load_status.setGeometry(QtCore.QRect(20, 910, 261, 17)) 87 | self.output_file_load_status.setText(_fromUtf8("")) 88 | self.output_file_load_status.setObjectName(_fromUtf8("output_file_load_status")) 89 | self.sort_and_save_button = QtGui.QPushButton(self.centralwidget) 90 | self.sort_and_save_button.setGeometry(QtCore.QRect(850, 910, 431, 27)) 91 | self.sort_and_save_button.setObjectName(_fromUtf8("sort_and_save_button")) 92 | self.label_8 = QtGui.QLabel(self.centralwidget) 93 | self.label_8.setGeometry(QtCore.QRect(970, 170, 68, 17)) 94 | self.label_8.setText(_fromUtf8("")) 95 | self.label_8.setObjectName(_fromUtf8("label_8")) 96 | self.special_button_for_level_01 = QtGui.QPushButton(self.centralwidget) 97 | self.special_button_for_level_01.setGeometry(QtCore.QRect(380, 410, 221, 27)) 98 | self.special_button_for_level_01.setObjectName(_fromUtf8("special_button_for_level_01")) 99 | MainWindow.setCentralWidget(self.centralwidget) 100 | self.menubar = QtGui.QMenuBar(MainWindow) 101 | self.menubar.setGeometry(QtCore.QRect(0, 0, 1303, 25)) 102 | self.menubar.setObjectName(_fromUtf8("menubar")) 103 | MainWindow.setMenuBar(self.menubar) 104 | self.statusbar = QtGui.QStatusBar(MainWindow) 105 | self.statusbar.setObjectName(_fromUtf8("statusbar")) 106 | MainWindow.setStatusBar(self.statusbar) 107 | 108 | self.retranslateUi(MainWindow) 109 | QtCore.QMetaObject.connectSlotsByName(MainWindow) 110 | 111 | def retranslateUi(self, MainWindow): 112 | MainWindow.setWindowTitle(_translate("MainWindow", "gungor.ner cleaner tool", None)) 113 | self.label.setText(_translate("MainWindow", "Xoutput files", None)) 114 | self.label_2.setText(_translate("MainWindow", "selected file contents", None)) 115 | self.label_3.setText(_translate("MainWindow", "selected row", None)) 116 | self.label_4.setText(_translate("MainWindow", "samples from train and dev files", None)) 117 | self.label_5.setText(_translate("MainWindow", "golden morph. analysis", None)) 118 | self.label_6.setText(_translate("MainWindow", "rule list", None)) 119 | self.label_7.setText(_translate("MainWindow", "corrected morph. analysis", None)) 120 | self.addRuleToTheListButton.setText(_translate("MainWindow", "Add this as a rule", None)) 121 | self.sort_and_save_button.setText(_translate("MainWindow", "Sort and Save the rules", None)) 122 | self.special_button_for_level_01.setText(_translate("MainWindow", "Add all rules for n_analysis=1", None)) 123 | 124 | -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-08.txt.rules.sed: -------------------------------------------------------------------------------- 1 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3pl+Pnon+Acc\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc\4/g 2 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3pl+P3sg+Nom\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom\4/g 3 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3pl+P3sg+Acc\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc\4/g 4 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3pl+P3sg+Loc\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc\4/g 5 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3pl+P3sg+Loc^DB+Adj+Rel\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc^DB+Adj+Rel \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc^DB+Adj+Rel \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc^DB+Adj+Rel \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc^DB+Adj+Rel \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc^DB+Adj+Rel \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc^DB+Adj+Rel \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc^DB+Adj+Rel \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc^DB+Adj+Rel .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc^DB+Adj+Rel\4/g 6 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3pl+P3sg+Gen\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Gen \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Gen \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Gen \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Gen \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Gen \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Gen \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Gen \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Gen .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Gen\4/g 7 | s/^\(.\+\) \(\(.\+\)e+Verb+Pos^DB+Adj+AorPart^DB+Noun+Zero+A3sg+P3pl+Acc\)\( \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3pl+Acc \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3sg+Acc \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P2sg+Acc \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3pl+Acc \3er+Adj^DB+Noun+Zero+A3pl+P3pl+Acc \3er+Adj^DB+Noun+Zero+A3pl+P3sg+Acc \3er+Adj^DB+Noun+Zero+A3pl+P2sg+Acc \3er+Adj^DB+Noun+Zero+A3sg+P3pl+Acc .\+\)$/\1 \3er+Adj^DB+Noun+Zero+A3sg+P3pl+Acc\4/g 8 | s/^\(.\+\) \(\(.\+\)uş+Verb+Pos^DB+Adj+NarrPart^DB+Noun+Ness+A3sg+P3pl+Gen\)\( \3+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3pl+Gen \3+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3sg+P3pl+Gen \3+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3sg+Gen \3+Verb+Recip+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P2sg+Gen \3uş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3pl+Gen \3uş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3sg+P3pl+Gen \3uş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P3sg+Gen \3uş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3pl+P2sg+Gen .\+\)$/\1 \3uş+Verb+Pos+Narr^DB+Adj+Zero^DB+Noun+Ness+A3sg+P3pl+Gen\4/g 9 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3sg+P3pl+Loc\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Loc \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Loc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Loc .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Loc\4/g 10 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3pl+P3sg+Abl\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl\4/g 11 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3pl+P3pl+Acc\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Acc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Acc \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Acc \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Acc\4/g 12 | s/^\(.\+\) \(\(.\+\)uşma+Noun+A3pl+P3pl+Abl\)\( \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl \3+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl \3+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl \3uş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Abl \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Abl \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P2sg+Abl .\+\)$/\1 \3uş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Abl\4/g 13 | s/^\(.\+\) \(\(.\+\)li+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop\)\( \3li+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3li+Adj^DB+Adj+JustLike^DB+Verb+Zero+Pres+Cop+A3sg \3li+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since \3li+Adj^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Adj+JustLike^DB+Verb+Zero+Pres+Cop+A3sg \3+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since \3+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Adj+JustLike^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg \3+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg .\+\)$/\1 \3li+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg\4/g 14 | s/^\(.\+\) \(\(.\+\)la+Verb^DB+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom\)\( \3la+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom \3la+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom \3la+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc \3la+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom \3+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom \3+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom \3+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3pl+Pnon+Acc \3+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom .\+\)$/\1 \3la+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom\4/g 15 | s/^\(.\+\) \(\(.\+\)laş+Verb+Pos+Prog2+A3sg+Cop\)\( \3la+Verb+Recip+Pos+Prog2+Cop+A3sg \3la+Verb+Recip+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg \3laş+Verb+Pos+Prog2+Cop+A3sg \3laş+Verb+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg \3+Adj^DB+Verb+Become+Pos+Prog2+Cop+A3sg \3+Adj^DB+Verb+Become+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg \3+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos+Prog2+Cop+A3sg \3+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg .\+\)$/\1 \3laş+Verb+Pos+Prog2+Cop+A3sg\4/g 16 | s/^\(.\+\) \(\(.\+\)e+Verb+Pos^DB+Adj+AorPart^DB+Noun+Zero+A3sg+P3pl+Nom\)\( \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3pl+Nom \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3sg+Nom \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+Pnon+Acc \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3pl+Nom \3er+Adj^DB+Noun+Zero+A3pl+P3pl+Nom \3er+Adj^DB+Noun+Zero+A3pl+P3sg+Nom \3er+Adj^DB+Noun+Zero+A3pl+Pnon+Acc \3er+Adj^DB+Noun+Zero+A3sg+P3pl+Nom .\+\)$/\1 \3er+Adj^DB+Noun+Zero+A3sg+P3pl+Nom\4/g 17 | s/^\(.\+\) \(\(.\+\)e+Verb+Pos^DB+Adj+AorPart^DB+Noun+Zero+A3pl+Pnon+Acc\)\( \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3pl+Nom \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+P3sg+Nom \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3pl+Pnon+Acc \3e+Verb+Pos+Aor^DB+Adj+Zero^DB+Noun+Zero+A3sg+P3pl+Nom \3er+Adj^DB+Noun+Zero+A3pl+P3pl+Nom \3er+Adj^DB+Noun+Zero+A3pl+P3sg+Nom \3er+Adj^DB+Noun+Zero+A3pl+Pnon+Acc \3er+Adj^DB+Noun+Zero+A3sg+P3pl+Nom .\+\)$/\1 \3er+Adj^DB+Noun+Zero+A3pl+Pnon+Acc\4/g 18 | -------------------------------------------------------------------------------- /scripts/inspect_results.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [ 10 | { 11 | "ename": "ImportError", 12 | "evalue": "No module named pymongo", 13 | "traceback": [ 14 | "\u001b[0;31m\u001b[0m", 15 | "\u001b[0;31mImportError\u001b[0mTraceback (most recent call last)", 16 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpymongo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mIPython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisplay\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdisplay\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 17 | "\u001b[0;31mImportError\u001b[0m: No module named pymongo" 18 | ], 19 | "output_type": "error" 20 | } 21 | ], 22 | "source": [ 23 | "import pymongo\n", 24 | "from IPython.display import display" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 4, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "ename": "NameError", 34 | "evalue": "name 'pymongo' is not defined", 35 | "traceback": [ 36 | "\u001b[0;31m\u001b[0m", 37 | "\u001b[0;31mNameError\u001b[0mTraceback (most recent call last)", 38 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mclient\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpymongo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMongoClient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"localhost\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m27017\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 39 | "\u001b[0;31mNameError\u001b[0m: name 'pymongo' is not defined" 40 | ], 41 | "output_type": "error" 42 | } 43 | ], 44 | "source": [ 45 | "client = pymongo.MongoClient(\"localhost\", 27017)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "db = client.joint_ner_and_md" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/html": [ 65 | "
\n", 66 | "\n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | "
char_dimmorpho_tag_dimword_dimmorpho_tag_typehostbest
001010with_rootlocalhost59.61
\n", 90 | "
" 91 | ], 92 | "text/plain": [ 93 | "
\n", 94 | "\n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | "
char_dimmorpho_tag_dimword_dimmorpho_tag_typehostbest
001010with_rootlocalhost59.61
\n", 118 | "
" 119 | ] 120 | }, 121 | "execution_count": 14, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "print(\"section1-all-20171013-01\")\n", 128 | "runs = db.runs.find({\"config.experiment_name\": \"section1-all-20171013-01\"})\n", 129 | "configs = []\n", 130 | "for run_idx, run in enumerate(runs):\n", 131 | " \n", 132 | " dict_to_report = dict(run[\"config\"])\n", 133 | " initial_keys = dict_to_report.keys()\n", 134 | " \n", 135 | " print initial_keys\n", 136 | " \n", 137 | " result_designation_labels = [\"MORPH\", \"NER\", \"YURET\"]\n", 138 | " \n", 139 | " for result_designation_label in result_designation_labels:\n", 140 | " \n", 141 | " print \"result_designation_label: \", result_designation_label\n", 142 | " \n", 143 | " if result_designation_label == \"YURET\":\n", 144 | " best_performances = run[\"info\"][result_designation_label + \"_test_f_score\"]\n", 145 | " else:\n", 146 | " best_performances = run[\"info\"][result_designation_label + \"_dev_f_score\"]\n", 147 | " print best_performances\n", 148 | " best_dev_result_for_this_run = 0\n", 149 | " best_test_result_for_this_run = 0\n", 150 | " epoch_id_of_the_best_dev_result = -1\n", 151 | " # display(run[\"config\"])\n", 152 | " for epoch in sorted([int(k) for k in best_performances.keys()]):\n", 153 | " # if result_designation_label != \"NER\":\n", 154 | " # corrected_epoch = epoch + 1\n", 155 | " epoch_max = max(best_performances[str(epoch)])\n", 156 | " if epoch_max > best_dev_result_for_this_run:\n", 157 | " epoch_id_of_the_best_dev_result = epoch\n", 158 | " best_dev_result_for_this_run = epoch_max\n", 159 | " best_test_result_for_this_run = \\\n", 160 | " max(run[\"info\"][result_designation_label + \"_test_f_score\"][str(epoch)])\n", 161 | " \n", 162 | " # print \"run_idx: %d, epoch: %d, epoch_best_performance: %.2lf, best_for_this_run: %.2lf\" % (run_idx, epoch, epoch_max, best_for_this_run)\n", 163 | " \n", 164 | " dict_to_report[result_designation_label + \"_best_dev\"] = best_dev_result_for_this_run\n", 165 | " dict_to_report[result_designation_label + \"_best_test\"] = best_test_result_for_this_run\n", 166 | " \n", 167 | " for x in result_designation_labels:\n", 168 | " # if x != result_designation_label:\n", 169 | " print \"x: \", x\n", 170 | " print \"epoch_id_of_the_best_dev_result: \", epoch_id_of_the_best_dev_result\n", 171 | " dict_to_report[result_designation_label + \"_to_\" + x + \"_test\"] = \\\n", 172 | " max(run[\"info\"][x + \"_test_f_score\"][str(epoch_id_of_the_best_dev_result)]) \\\n", 173 | " if str(epoch_id_of_the_best_dev_result) in run[\"info\"][x + \"_test_f_score\"].keys() else -1\n", 174 | " print dict_to_report[result_designation_label + \"_to_\" + x + \"_test\"]\n", 175 | " \n", 176 | " configs.append({key: dict_to_report[key] for key in [\"host\", \n", 177 | " \"integration_mode\", \n", 178 | " \"train_with_yuret\", \n", 179 | " \"use_golden_morpho_analysis_in_word_representation\"] + \n", 180 | " [x for x in dict_to_report.keys() if x not in initial_keys]})\n", 181 | "\n", 182 | "import pandas\n", 183 | "df = pandas.DataFrame.from_dict(configs)\n", 184 | "print configs\n", 185 | "cols = df.columns.tolist()\n", 186 | "\n", 187 | "# display(df[[\"host\"] + \n", 188 | "# [x for x in dict_to_report.keys() if x not in initial_keys]])\n", 189 | "\n", 190 | "display(df)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "" 200 | ] 201 | } 202 | ], 203 | "metadata": { 204 | "kernelspec": { 205 | "display_name": "Python 2", 206 | "language": "python", 207 | "name": "python2" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 2.0 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython2", 219 | "version": "2.7.6" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 0 224 | } -------------------------------------------------------------------------------- /control_experiments.py: -------------------------------------------------------------------------------- 1 | from sacred import Experiment 2 | 3 | from sacred.observers import MongoObserver 4 | 5 | import subprocess 6 | import sys 7 | import re 8 | 9 | ex = Experiment('my_experiment') 10 | 11 | @ex.config 12 | def my_config(): 13 | skip_testing = 0 14 | reload = 0 15 | max_epochs = 50 16 | 17 | dynet_gpu = 0 18 | 19 | host="localhost" 20 | experiment_name = "default_experiment_name" 21 | 22 | datasets_root = "/home/onur/projects/research/turkish-ner/datasets" 23 | 24 | learning_rate = 0.01 25 | 26 | crf = 1 27 | # lr_method = "sgd-learning_rate_float@%lf" % learning_rate 28 | lr_method = "adam" 29 | 30 | batch_size = 1 31 | 32 | sparse_updates_enabled = 1 33 | dropout = 0.5 34 | char_dim = 64 35 | char_lstm_dim = 64 36 | 37 | morpho_tag_dim = 64 38 | morpho_tag_lstm_dim = 64 39 | morpho_tag_type = "char" 40 | 41 | morpho_tag_column_index = 1 42 | 43 | integration_mode = 0 44 | active_models = 0 45 | multilayer = 0 46 | shortcut_connections = 0 47 | 48 | word_dim = 64 49 | word_lstm_dim = 64 50 | cap_dim = 0 51 | 52 | # char_dim = 200 53 | # char_lstm_dim = 200 54 | # 55 | # morpho_tag_dim = 100 56 | # morpho_tag_lstm_dim = 200 57 | # morpho_tag_type = "wo_root" 58 | # 59 | # morpho_tag_column_index = 1 60 | # 61 | # integration_mode = 0 62 | # 63 | # word_dim = 300 64 | # word_lstm_dim = 200 65 | # cap_dim = 100 66 | 67 | train_filepath = "turkish/gungor.ner.train.only_consistent" 68 | dev_filepath = "turkish/gungor.ner.dev.only_consistent" 69 | test_filepath = "turkish/gungor.ner.test.only_consistent" 70 | 71 | yuret_train_filepath = "turkish/train.merge.utf8.gungor_format" 72 | yuret_test_filepath = "turkish/test.merge.utf8.gungor_format" 73 | 74 | train_with_yuret = 0 75 | test_with_yuret = 1 76 | 77 | use_golden_morpho_analysis_in_word_representation = 0 78 | 79 | embeddings_filepath = "turkish/we-300.txt" 80 | 81 | 82 | @ex.main 83 | def my_main(): 84 | 85 | run_a_single_configuration_without_fabric() 86 | 87 | from utils import read_args, form_parameters_dict, get_name, get_model_subpath 88 | 89 | 90 | @ex.capture 91 | def run_a_single_configuration_without_fabric( 92 | datasets_root, 93 | crf, 94 | lr_method, 95 | batch_size, 96 | sparse_updates_enabled, 97 | dropout, 98 | char_dim, 99 | char_lstm_dim, 100 | morpho_tag_dim, 101 | morpho_tag_lstm_dim, 102 | morpho_tag_type, 103 | morpho_tag_column_index, 104 | word_dim, 105 | word_lstm_dim, 106 | cap_dim, skip_testing, max_epochs, 107 | train_filepath, 108 | dev_filepath, 109 | test_filepath, 110 | yuret_train_filepath, 111 | yuret_test_filepath, 112 | train_with_yuret, 113 | test_with_yuret, 114 | use_golden_morpho_analysis_in_word_representation, 115 | embeddings_filepath, 116 | integration_mode, 117 | active_models, 118 | multilayer, 119 | shortcut_connections, 120 | reload, 121 | dynet_gpu, 122 | _run): 123 | 124 | """ 125 | python train.py --pre_emb ../../data/we-300.txt --train dataset/gungor.ner.train.only_consistent --dev dataset/gungor.ner.dev.only_consistent --test dataset/gungor.ner.test.only_consistent --word_di 126 | m 300 --word_lstm_dim 200 --word_bidirect 1 --cap_dim 100 --crf 1 --lr_method=sgd-learning_rate_float@0.05 --maximum-epochs 50 --char_dim 200 --char_lstm_dim 200 --char_bid 127 | irect 1 --overwrite-mappings 1 --batch-size 1 --morpho_tag_dim 100 --integration_mode 2 128 | """ 129 | 130 | execution_part = "python train.py --overwrite-mappings 1 " 131 | 132 | if sparse_updates_enabled == 0: 133 | execution_part += "--disable_sparse_updates " 134 | 135 | if dynet_gpu == 1: 136 | execution_part += "--dynet-gpu 1 " 137 | 138 | if train_with_yuret == 1: 139 | execution_part += "--train_with_yuret " 140 | 141 | if use_golden_morpho_analysis_in_word_representation == 1: 142 | execution_part += "--use_golden_morpho_analysis_in_word_representation " 143 | 144 | if word_dim == 0: 145 | embeddings_part = "" 146 | else: 147 | if embeddings_filepath: 148 | embeddings_part = "--pre_emb %s/%s " % (datasets_root, embeddings_filepath) 149 | else: 150 | embeddings_part = "" 151 | 152 | print (train_filepath, dev_filepath, test_filepath, skip_testing, max_epochs) 153 | 154 | always_constant_part = "-T %s/%s " \ 155 | "-d %s/%s " \ 156 | "-t %s/%s " \ 157 | "%s" \ 158 | "%s" \ 159 | "--yuret_train %s/%s " \ 160 | "--yuret_test %s/%s " \ 161 | "%s" \ 162 | "--skip-testing %d " \ 163 | "--tag_scheme iobes " \ 164 | "--maximum-epochs %d " % (datasets_root, train_filepath, 165 | datasets_root, dev_filepath, 166 | datasets_root, test_filepath, 167 | "--train_with_yuret " if train_with_yuret else "", 168 | "--test_with_yuret " if test_with_yuret else "", 169 | datasets_root, yuret_train_filepath, 170 | datasets_root, yuret_test_filepath, 171 | embeddings_part, 172 | skip_testing, max_epochs) 173 | 174 | commandline_args = always_constant_part + \ 175 | "--crf %d " \ 176 | "--lr_method %s " \ 177 | "--batch-size %d " \ 178 | "--dropout %1.1lf " \ 179 | "--char_dim %d " \ 180 | "--char_lstm_dim %d " \ 181 | "--morpho_tag_dim %d " \ 182 | "--morpho_tag_lstm_dim %d " \ 183 | "--morpho_tag_type %s " \ 184 | "--morpho-tag-column-index %d " \ 185 | "--word_dim %d " \ 186 | "--word_lstm_dim %d "\ 187 | "--cap_dim %d "\ 188 | "--integration_mode %d " \ 189 | "--active_models %d " \ 190 | "--multilayer %d " \ 191 | "--shortcut_connections %d " \ 192 | "--reload %d" % (crf, 193 | lr_method, 194 | batch_size, 195 | dropout, 196 | char_dim, 197 | char_lstm_dim, 198 | morpho_tag_dim, 199 | morpho_tag_lstm_dim, 200 | morpho_tag_type, 201 | morpho_tag_column_index, 202 | word_dim, 203 | word_lstm_dim, 204 | cap_dim, 205 | integration_mode, 206 | active_models, 207 | multilayer, 208 | shortcut_connections, 209 | reload) 210 | 211 | # tagger_root = "/media/storage/genie/turkish-ner/code/tagger" 212 | 213 | print _run 214 | print _run.info 215 | 216 | print subprocess.check_output(["id"]) 217 | print subprocess.check_output(["pwd"]) 218 | 219 | opts = read_args(args_as_a_list=commandline_args.split(" ")) 220 | print opts 221 | parameters = form_parameters_dict(opts) 222 | print parameters 223 | # model_path = get_name(parameters) 224 | model_path = get_model_subpath(parameters) 225 | print model_path 226 | 227 | task_names = ["NER", "MORPH", "YURET"] 228 | 229 | for task_name in task_names: 230 | _run.info["%s_dev_f_score" % task_name] = dict() 231 | _run.info["%s_test_f_score" % task_name] = dict() 232 | 233 | _run.info['starting'] = 1 234 | 235 | dummy_prefix = "" 236 | 237 | print dummy_prefix + execution_part + commandline_args 238 | process = subprocess.Popen((dummy_prefix + execution_part + commandline_args).split(" "), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 239 | 240 | def record_metric(epoch, label, value): 241 | if str(epoch) in _run.info[label]: 242 | _run.info[label][str(epoch)].append(value) 243 | else: 244 | _run.info[label][str(epoch)] = list() 245 | _run.info[label][str(epoch)].append(value) 246 | 247 | def capture_information(line): 248 | 249 | # 1 250 | """ 251 | NER Epoch: %d Best dev and accompanying test score, best_dev, best_test: %lf %lf 252 | """ 253 | for task_name in task_names: 254 | m = re.match("^%s Epoch: (\d+) .* best_dev, best_test: (.+) (.+)$" % task_name, line) 255 | if m: 256 | epoch = int(m.group(1)) 257 | best_dev = float(m.group(2)) 258 | best_test = float(m.group(3)) 259 | 260 | record_metric(epoch, "%s_dev_f_score" % task_name, best_dev) 261 | record_metric(epoch, "%s_test_f_score" % task_name, best_test) 262 | 263 | for line in iter(process.stdout.readline, ''): 264 | sys.stdout.write(line) 265 | capture_information(line) 266 | sys.stdout.flush() 267 | 268 | return model_path 269 | 270 | if __name__ == '__main__': 271 | ex.run_commandline() -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | """Evaluation 2 | 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | 7 | from collections import defaultdict as dd 8 | import itertools 9 | import logging 10 | import math 11 | import sys 12 | import time 13 | 14 | import subprocess 15 | 16 | import codecs 17 | import numpy as np 18 | 19 | import os 20 | 21 | import dynet 22 | 23 | import loader 24 | from loader import calculate_global_maxes, update_tag_scheme, \ 25 | word_mapping, augment_with_pretrained, char_mapping, tag_mapping, prepare_dataset 26 | from model import MainTaggerModel 27 | from utils import read_args, form_parameters_dict, models_path, eval_script, eval_temp, iobes_iob 28 | from dynetsaver import DynetSaver 29 | 30 | logging.basicConfig(level=logging.INFO) 31 | logger = logging.getLogger("eval") 32 | 33 | 34 | 35 | def eval_once(model, dev_buckets, test_buckets, model_dir_path, integration_mode, 36 | run_for_all_checkpoints=False, 37 | *args): 38 | """Run Eval once. 39 | 40 | Args: 41 | saver: DynetSaver. 42 | summary_writer: Summary writer. 43 | summary_op: Summary op. 44 | """ 45 | 46 | model.saver = DynetSaver(model.model, model_dir_path) 47 | ckpt = model.saver.get_checkpoint_state() 48 | if ckpt: 49 | if run_for_all_checkpoints: 50 | for model_checkpoint_path in ckpt.all_model_checkpoint_paths: 51 | eval_for_a_checkpoint(model.saver, model, model_checkpoint_path, dev_buckets, test_buckets, 52 | integration_mode, 53 | *args) 54 | else: 55 | eval_for_a_checkpoint(model.saver, model, ckpt.model_checkpoint_path, dev_buckets, test_buckets, 56 | integration_mode, *args) 57 | 58 | 59 | def eval_for_a_checkpoint(saver, model, model_checkpoint_path, dev_buckets, test_buckets, integration_mode, *args): 60 | if model_checkpoint_path: 61 | # Restores from checkpoint 62 | saver.restore(model_checkpoint_path) 63 | print "Evaluating %s" % model_checkpoint_path 64 | # Assuming model_checkpoint_path looks something like: 65 | # /my-favorite-path/cifar10_train/model.ckpt-0, 66 | # extract global_step from it. 67 | epoch = int(os.path.basename(model_checkpoint_path).split('-')[-1]) 68 | # global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] 69 | else: 70 | print('No checkpoint file found') 71 | return 72 | 73 | return eval_with_specific_model(model.model, epoch, dev_buckets, test_buckets, integration_mode, *args) 74 | 75 | 76 | def eval_with_specific_model(model, epoch, buckets_list, integration_mode, active_models, 77 | *args): # FLAGS.eval_dir 78 | # type: (MainTaggerModel, int, list, object, object) -> object 79 | id_to_tag, batch_size, eval_dir, tag_scheme = args 80 | 81 | f_scores = {} 82 | dataset_labels = ["dev", "test", "yuret"] 83 | 84 | total_correct_disambs = {dataset_label: 0 for dataset_label in dataset_labels} 85 | total_disamb_targets = {dataset_label: 0 for dataset_label in dataset_labels} 86 | if active_models in [1, 2, 3]: 87 | detailed_correct_disambs = {dataset_label: dd(int) for dataset_label in dataset_labels} 88 | detailed_total_target_disambs = {dataset_label: dd(int) for dataset_label in dataset_labels} 89 | 90 | for dataset_label, dataset_buckets in buckets_list: 91 | 92 | if len(dataset_buckets) == 0: 93 | print "Skipping to evaluate %s dataset as it is empty" % dataset_label 94 | total_correct_disambs[dataset_label] = -1 95 | total_disamb_targets[dataset_label] = 1 96 | continue 97 | 98 | print "Starting to evaluate %s dataset" % dataset_label 99 | predictions = [] 100 | n_tags = len(id_to_tag) 101 | count = np.zeros((n_tags, n_tags), dtype=np.int32) 102 | 103 | # permuted_bucket_ids = np.random.permutation(range(len(dataset_buckets))) 104 | 105 | for bucket_id in range(len(dataset_buckets)): 106 | 107 | # bucket_id = np.random.random_integers(0, len(train_bins)-1) 108 | bucket_data_dict = dataset_buckets[bucket_id] 109 | 110 | n_batches = int(math.ceil(float(len(bucket_data_dict)) / batch_size)) 111 | 112 | print "dataset_label: %s" % dataset_label 113 | print ("n_batches: %d" % n_batches) 114 | print ("bucket_id: %d" % bucket_id) 115 | 116 | for batch_idx in range(n_batches): 117 | # print("batch_idx: %d" % batch_idx) 118 | sys.stdout.write(". ") 119 | sys.stdout.flush() 120 | 121 | sentences_in_the_batch = bucket_data_dict[ 122 | (batch_idx * batch_size):((batch_idx + 1) * batch_size)] 123 | 124 | for sentence in sentences_in_the_batch: 125 | dynet.renew_cg() 126 | 127 | sentence_length = len(sentence['word_ids']) 128 | 129 | if active_models in [2, 3]: 130 | selected_morph_analyzes, decoded_tags = model.predict(sentence) 131 | elif active_models in [1]: 132 | selected_morph_analyzes, _ = model.predict(sentence) 133 | elif active_models in [0]: 134 | decoded_tags = model.predict(sentence) 135 | 136 | if active_models in [0, 2, 3]: # i.e. not only MD 137 | p_tags = [id_to_tag[p_tag] for p_tag in decoded_tags] 138 | r_tags = [id_to_tag[p_tag] for p_tag in sentence['tag_ids']] 139 | if tag_scheme == 'iobes': 140 | p_tags = iobes_iob(p_tags) 141 | r_tags = iobes_iob(r_tags) 142 | 143 | for i, (word_id, y_pred, y_real) in enumerate( 144 | zip(sentence['word_ids'], decoded_tags, 145 | sentence['tag_ids'])): 146 | new_line = " ".join([sentence['str_words'][i]] + [r_tags[i], p_tags[i]]) 147 | predictions.append(new_line) 148 | count[y_real, y_pred] += 1 149 | predictions.append("") 150 | 151 | if active_models in [1, 2, 3]: 152 | n_correct_morph_disambs = \ 153 | sum([x == y for x, y, z in zip(selected_morph_analyzes, 154 | sentence['golden_morph_analysis_indices'], 155 | sentence['morpho_analyzes_tags']) if len(z) > 1]) 156 | total_correct_disambs[dataset_label] += n_correct_morph_disambs 157 | total_disamb_targets[dataset_label] += sum([1 for el in sentence['morpho_analyzes_tags'] if len(el) > 1]) 158 | for key, value in [(len(el), x == y) for el, x, y in zip(sentence['morpho_analyzes_tags'], 159 | selected_morph_analyzes, 160 | sentence['golden_morph_analysis_indices'])]: 161 | if value: 162 | detailed_correct_disambs[dataset_label][key] += 1 163 | detailed_total_target_disambs[dataset_label][key] += 1 164 | # total_possible_analyzes += sum([len(el) for el in sentence['morpho_analyzes_tags'] if len(el) > 1]) 165 | 166 | print "" 167 | 168 | if active_models in [0, 2, 3]: 169 | # Write predictions to disk and run CoNLL script externally 170 | eval_id = np.random.randint(1000000, 2000000) 171 | output_path = os.path.join(eval_dir, 172 | "%s.eval.%i.epoch-%04d.output" % ( 173 | dataset_label, eval_id, epoch)) 174 | scores_path = os.path.join(eval_dir, 175 | "%s.eval.%i.epoch-%04d.scores" % ( 176 | dataset_label, eval_id, epoch)) 177 | with codecs.open(output_path, 'w', 'utf8') as f: 178 | f.write("\n".join(predictions)) 179 | 180 | print "Evaluating the %s dataset with conlleval script" % dataset_label 181 | command_string = "%s < %s > %s" % (eval_script, output_path, scores_path) 182 | print command_string 183 | # os.system(command_string) 184 | # sys.exit(0) 185 | with codecs.open(output_path, "r", encoding="utf-8") as output_path_f: 186 | eval_lines = [x.rstrip() for x in subprocess.check_output([eval_script], 187 | stdin=output_path_f).split( 188 | "\n")] 189 | 190 | # CoNLL evaluation results 191 | # eval_lines = [l.rstrip() for l in codecs.open(scores_path, 'r', 'utf8')] 192 | for line in eval_lines: 193 | print line 194 | f_scores[dataset_label] = float(eval_lines[1].split(" ")[-1]) 195 | 196 | if active_models in [1, 2, 3]: 197 | for n_possible_analyzes in map(int, detailed_correct_disambs[dataset_label].keys()): 198 | print "%s %d %d/%d" % (dataset_label, 199 | n_possible_analyzes, 200 | detailed_correct_disambs[dataset_label][n_possible_analyzes], 201 | detailed_total_target_disambs[dataset_label][n_possible_analyzes]) 202 | if active_models in [0]: 203 | return f_scores, {} 204 | else: 205 | result = {} 206 | for dataset_label in dataset_labels: 207 | if total_disamb_targets[dataset_label] == 0: 208 | total_correct_disambs[dataset_label] = -1 209 | total_disamb_targets[dataset_label] = 1 210 | result[dataset_label] = \ 211 | total_correct_disambs[dataset_label] / float(total_disamb_targets[dataset_label]) 212 | 213 | return f_scores, result 214 | 215 | def evaluate(model, dev_buckets, test_buckets, opts, *args): 216 | """Eval CIFAR-10 for a number of steps.""" # with tf.Graph().as_default() as g: 217 | 218 | while True: 219 | eval_once(model, dev_buckets, test_buckets, model.model_path, 220 | opts.integration_mode, 221 | run_for_all_checkpoints=bool(opts.run_for_all_checkpoints), 222 | *args) 223 | print "Sleeping for %d" % 600 224 | time.sleep(600) -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-06.txt: -------------------------------------------------------------------------------- 1 | 1 42 0 7 Xğu+Det Xk+Adj^DB+Noun+Zero+A3sg+P3sg+Nom Xk+Adj^DB+Noun+Zero+A3sg+Pnon+Acc Xk+Postp+PCAbl^DB+Noun+Zero+A3sg+P3sg+Nom Xk+Postp+PCAbl^DB+Noun+Zero+A3sg+Pnon+Acc Xğu+Adj Xğu+Pron+Quant+A3pl+P3pl+Nom 2 | 2 32 0 7 Xişi+Noun+A3pl+Pnon+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+Pnon+Acc X+Verb+Pos^DB+Noun+Inf3+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf3+A3sg+P3pl+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+P3pl+Nom Xiş+Noun+A3pl+P3pl+Nom Xiş+Noun+A3sg+P3pl+Nom 3 | 3 12 0 7 Xişi+Noun+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+Pnon+Acc X+Verb+Pos^DB+Noun+Inf3+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf3+A3sg+P3pl+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+P3pl+Nom Xiş+Noun+A3pl+P3pl+Nom Xiş+Noun+A3sg+P3pl+Nom 4 | 4 5 0 7 Xişi+Noun+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf+A3pl+Pnon+Acc X+Verb+Pos^DB+Noun+Inf+A3pl+P3pl+Nom X+Verb+Pos^DB+Noun+Inf+A3sg+P3pl+Nom Xiş+Noun+A3pl+P3pl+Nom Xiş+Noun+A3sg+P3pl+Nom 5 | 5 3 0 7 Xuşma+Noun+A3pl+P3sg+Ins X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3pl+Ins X+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+P3pl+Ins X+Verb+Recip+Pos^DB+Noun+Inf2+A3pl+P3sg+Ins Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Ins Xuş+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Ins Xuş+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Ins 6 | 6 3 0 7 Xı+Ques+Pres+A2sg+Cop Xı+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg+Cop Xıs+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xıs+Noun+A3sg+Pnon+Gen^DB+Verb+Zero+Pres+Cop+A3sg Xısı+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xısın+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xısındır+Noun+A3sg+Pnon+Nom 7 | 7 2 0 7 Xüş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Verb+Recip^DB+Verb+Caus+Pos+Imp+A2sg X+Verb+Pos^DB+Noun+Inf3+A3sg+Pnon+Nom^DB+Adverb+Since X+Verb+Pos^DB+Noun+Inf3+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xüş+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xüş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xüş+Verb^DB+Verb+Caus+Pos+Imp+A2sg 8 | 8 2 0 7 Xl+Verb+Pos+Opt+A2sg Xl+Noun+A3sg+Pnon+Dat^DB+Verb+Zero+Pres+A2sg Xla+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A2sg Xlas+Noun+A3sg+P2sg+Nom Xlas+Noun+A3sg+Pnon+Gen Xlası+Noun+A3sg+P2sg+Nom Xlasın+Noun+A3sg+Pnon+Nom 9 | 9 2 0 7 Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos^DB+Adj+NarrPart Xkle+Verb+Recip^DB+Verb+Caus+Pos+Narr+A3sg Xkle+Verb+Recip^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos+Narr+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero Xk+Adj^DB+Verb+Become^DB+Verb+Caus+Pos+Narr+A3sg Xk+Adj^DB+Verb+Become^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero 10 | 10 1 0 7 Xu+Noun+A3sg+P3sg+Loc X+Noun+A3sg+P2sg+Loc X+Noun+A3sg+P3sg+Loc Xu+Noun+A3sg+P2sg+Loc Xun+Noun+A3sg+Pnon+Loc Xunda+Noun+A3sg+Pnon+Nom Xunt+Noun+A3sg+Pnon+Dat 11 | 11 1 0 7 Xn+Adj^DB+Verb+Become^DB+Verb+Caus+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xnle+Verb+Recip^DB+Verb+Caus+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xnle+Verb+Recip^DB+Verb+Caus+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adverb+Since Xnle+Verb+Recip^DB+Verb+Caus+Pos^DB+Adj+Agt^DB+Verb+Zero+Pres+Cop+A3sg Xn+Adj^DB+Verb+Become^DB+Verb+Caus+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xn+Adj^DB+Verb+Become^DB+Verb+Caus+Pos^DB+Noun+Agt+A3sg+Pnon+Nom^DB+Adverb+Since Xn+Adj^DB+Verb+Become^DB+Verb+Caus+Pos^DB+Adj+Agt^DB+Verb+Zero+Pres+Cop+A3sg 12 | 12 1 0 7 Xl+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xlma+Adj^DB+Noun+Zero+A3pl+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xlma+Adj^DB+Noun+Zero+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xlma+Adj^DB+Noun+Zero+A3sg+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xl+Verb+Pos^DB+Noun+Inf2+A3pl+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xl+Verb+Pos^DB+Noun+Inf2+A3sg+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xl+Verb+Pos^DB+Noun+Inf2+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 13 | 13 1 0 7 Xlık+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xlık+Noun+A3pl+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xlık+Noun+A3sg+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xlık+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Noun+Ness+A3pl+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Noun+Ness+A3sg+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Noun+Ness+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 14 | 14 1 0 7 Xla+Verb^DB+Verb+Reflex+Pos+Prog2+A3sg+Cop Xla+Verb^DB+Verb+Pass+Pos+Prog2+Cop+A3sg Xla+Verb^DB+Verb+Pass+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xla+Verb+Reflex+Pos+Prog2+Cop+A3sg Xla+Verb+Reflex+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Verb+Acquire+Pos+Prog2+Cop+A3sg X+Adj^DB+Verb+Acquire+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg 15 | 15 1 0 7 Xla+Verb^DB+Verb+Reflex+Pos+Neces+A3sg Xla+Verb^DB+Verb+Pass+Pos+Neces+A3sg Xla+Verb^DB+Verb+Pass+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With Xla+Verb+Reflex+Pos+Neces+A3sg Xla+Verb+Reflex+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With X+Adj^DB+Verb+Acquire+Pos+Neces+A3sg X+Adj^DB+Verb+Acquire+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With 16 | 16 1 0 7 Xla+Verb^DB+Verb+Reflex+Pos+Neces+A3sg+Cop Xla+Verb^DB+Verb+Pass+Pos+Neces+Cop+A3sg Xla+Verb^DB+Verb+Pass+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg Xla+Verb+Reflex+Pos+Neces+Cop+A3sg Xla+Verb+Reflex+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Verb+Acquire+Pos+Neces+Cop+A3sg X+Adj^DB+Verb+Acquire+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg 17 | 17 1 0 7 Xla+Verb^DB+Verb+Recip+Pos+Past+A1pl Xla+Verb+Recip+Pos+Past+A1pl Xla+Verb+Recip+Pos^DB+Adj+PastPart+Pnon Xla+Verb+Recip+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos+Past+A1pl X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Adj+PastPart+Pnon X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+PastPart+A3sg+Pnon+Nom 18 | 18 1 0 7 Xla+Verb^DB+Verb+Recip+Neg+Imp+A2sg Xla+Verb+Recip+Neg+Imp+A2sg Xla+Verb+Recip+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Neg+Imp+A2sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom X+Adj^DB+Verb+Become+Neg+Imp+A2sg X+Adj^DB+Verb+Become+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom 19 | 19 1 0 7 Xlaş+Verb^DB+Verb+Pass+Pos+Prog2+A3sg+Cop Xla+Verb+Recip^DB+Verb+Pass+Pos+Prog2+Cop+A3sg Xla+Verb+Recip^DB+Verb+Pass+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xlaş+Verb^DB+Verb+Pass+Pos+Prog2+Cop+A3sg Xlaş+Verb^DB+Verb+Pass+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Pass+Pos+Prog2+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Pass+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg 20 | 20 1 0 7 Xlaş+Verb^DB+Verb+Pass+Pos+Neces+A3sg+Cop Xla+Verb+Recip^DB+Verb+Pass+Pos+Neces+Cop+A3sg Xla+Verb+Recip^DB+Verb+Pass+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg Xlaş+Verb^DB+Verb+Pass+Pos+Neces+Cop+A3sg Xlaş+Verb^DB+Verb+Pass+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Pass+Pos+Neces+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Pass+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg 21 | 21 1 0 7 Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+A3sg+Cop Xkle+Verb+Recip^DB+Verb+Caus+Pos+Prog2+Cop+A3sg Xkle+Verb+Recip^DB+Verb+Caus+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos+Prog2+Cop+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xk+Adj^DB+Verb+Become^DB+Verb+Caus+Pos+Prog2+Cop+A3sg Xk+Adj^DB+Verb+Become^DB+Verb+Caus+Pos^DB+Noun+Inf1+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg 22 | 22 1 0 7 Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos+Neces+A3sg+Cop Xkle+Verb+Recip^DB+Verb+Caus^DB+Verb+Pass+Pos+Neces+Cop+A3sg Xkle+Verb+Recip^DB+Verb+Caus^DB+Verb+Pass+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos+Neces+Cop+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg Xk+Adj^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos+Neces+Cop+A3sg Xk+Adj^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg 23 | 23 1 0 7 Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass^DB+Verb+Able+Neg^DB+Adj+AorPart Xkle+Verb+Recip^DB+Verb+Caus^DB+Verb+Pass^DB+Verb+Able+Neg+Aor+A3sg Xkle+Verb+Recip^DB+Verb+Caus^DB+Verb+Pass^DB+Verb+Able+Neg+Aor^DB+Adj+Zero Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass^DB+Verb+Able+Neg+Aor+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass^DB+Verb+Able+Neg+Aor^DB+Adj+Zero Xk+Adj^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass^DB+Verb+Able+Neg+Aor+A3sg Xk+Adj^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass^DB+Verb+Able+Neg+Aor^DB+Adj+Zero 24 | 24 1 0 7 Xış+Verb+Neg+Neces+A3sg+Cop X+Verb+Recip+Neg+Neces+Cop+A3sg X+Verb+Recip+Neg^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg Xı+Verb+Recip+Neg+Neces+Cop+A3sg Xı+Verb+Recip+Neg^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg Xış+Verb+Neg+Neces+Cop+A3sg Xış+Verb+Neg^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg 25 | 25 1 0 7 Xış+Verb^DB+Verb+Pass+Neg+Neces+A3sg+Cop X+Verb+Recip^DB+Verb+Pass+Neg+Neces+Cop+A3sg X+Verb+Recip^DB+Verb+Pass+Neg^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg Xı+Verb+Recip^DB+Verb+Pass+Neg+Neces+Cop+A3sg Xı+Verb+Recip^DB+Verb+Pass+Neg^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg Xış+Verb^DB+Verb+Pass+Neg+Neces+Cop+A3sg Xış+Verb^DB+Verb+Pass+Neg^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg 26 | 26 1 0 7 Xış+Verb^DB+Verb+Caus+Pos^DB+Adj+NarrPart X+Verb+Recip^DB+Verb+Caus+Pos+Narr+A3sg X+Verb+Recip^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero Xı+Verb+Recip^DB+Verb+Caus+Pos+Narr+A3sg Xı+Verb+Recip^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero Xış+Verb^DB+Verb+Caus+Pos+Narr+A3sg Xış+Verb^DB+Verb+Caus+Pos+Narr^DB+Adj+Zero 27 | 27 1 0 7 Xış+Verb^DB+Verb+Caus+Neg+Neces+A3sg+Cop X+Verb+Recip^DB+Verb+Caus+Neg+Neces+Cop+A3sg X+Verb+Recip^DB+Verb+Caus+Neg^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg Xı+Verb+Recip^DB+Verb+Caus+Neg+Neces+Cop+A3sg Xı+Verb+Recip^DB+Verb+Caus+Neg^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg Xış+Verb^DB+Verb+Caus+Neg+Neces+Cop+A3sg Xış+Verb^DB+Verb+Caus+Neg^DB+Noun+Inf2+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg 28 | 28 1 0 7 Xişi+Noun+A3pl+P3pl+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+Pnon+Acc X+Verb+Pos^DB+Noun+Inf3+A3pl+P3sg+Nom X+Verb+Pos^DB+Noun+Inf3+A3sg+P3pl+Nom X+Verb+Pos^DB+Noun+Inf3+A3pl+P3pl+Nom Xiş+Noun+A3pl+P3pl+Nom Xiş+Noun+A3sg+P3pl+Nom 29 | 29 1 0 7 Xcü+Noun+A3pl+Pnon+Nom Xç+Noun+A3pl+P3sg+Nom Xç+Noun+A3pl+Pnon+Acc Xç+Noun+A3pl+P3pl+Nom Xç+Noun+A3sg+P3pl+Nom Xç+Noun+A3pl+P3pl+Nom Xç+Noun+A3sg+P3pl+Nom 30 | 30 1 0 7 Xce+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xce+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xce+Noun+A3pl+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xce+Noun+A3sg+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Adj+AsIf^DB+Noun+Zero+A3pl+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Adj+AsIf^DB+Noun+Zero+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Adj^DB+Adj+AsIf^DB+Noun+Zero+A3sg+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg 31 | -------------------------------------------------------------------------------- /cleaner_gui.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import codecs 4 | 5 | import subprocess 6 | 7 | import collections 8 | 9 | from PyQt4.QtCore import QStringList, QDir, QString 10 | 11 | from PyQt4.QtGui import QTreeView, QFileSystemModel, QTableWidgetItem, QListWidgetItem 12 | 13 | if __name__ == "__main__": 14 | 15 | import argparse 16 | 17 | parser = argparse.ArgumentParser() 18 | 19 | parser.add_argument("--command", required=True, choices=["gui"]) 20 | # parser.add_argument("--gold_data", type=bool, default=False) 21 | # parser.add_argument("--output_dir", required=True) 22 | # parser.add_argument("--verbose", type=bool, default=False) 23 | 24 | args = parser.parse_args() 25 | 26 | from PyQt4 import QtGui 27 | import main_form 28 | 29 | class ExampleApp(QtGui.QMainWindow, main_form.Ui_MainWindow): 30 | def __init__(self): 31 | super(self.__class__, self).__init__() 32 | self.setupUi(self) # This is defined in design.py file automatically 33 | 34 | # self.listWidget_Xoutput_files.addItem("deneme") 35 | 36 | cleaner_files_path = os.path.join(str(QDir.currentPath()), "cleaner_files") 37 | if not os.path.exists(cleaner_files_path): 38 | os.mkdir(cleaner_files_path) 39 | 40 | self.model = QFileSystemModel() 41 | self.model.setRootPath(cleaner_files_path) 42 | 43 | self.model.setNameFilters(QStringList(["Xoutput-n_analyses-*.txt"])) 44 | self.model.setNameFilterDisables(False) 45 | self.model.setFilter(QDir.Dirs | QDir.Files) 46 | 47 | 48 | self.treeView_Xoutput_files.setModel(self.model) 49 | 50 | self.treeView_Xoutput_files.setRootIndex(self.model.index(cleaner_files_path)) 51 | 52 | self.treeView_Xoutput_files.setColumnWidth(0, 500) 53 | 54 | self.treeView_Xoutput_files.selectionModel().selectionChanged.connect(self.load_and_view_file_contents) 55 | 56 | self.rules_dict = {} 57 | 58 | self.special_button_for_level_01.setDisabled(True) 59 | 60 | def load_and_view_file_contents(self, current, previous): 61 | 62 | print current.indexes() 63 | model_index = current.indexes()[0] 64 | 65 | filename = self.model.data(model_index).toString() 66 | 67 | import re 68 | m = re.match(r"Xoutput-n_analyses-([0-9]+)", filename) 69 | if m: 70 | n_analyzes = int(m.group(1)) 71 | else: 72 | n_analyzes = -1 73 | 74 | if n_analyzes == 1: 75 | self.special_button_for_level_01.setDisabled(False) 76 | self.special_button_for_level_01.clicked.connect(self.add_all_level_01_to_rule_dict) 77 | else: 78 | self.special_button_for_level_01.setDisabled(True) 79 | 80 | with codecs.open(filename, "r", encoding="utf8") as f: 81 | lines = f.readlines() 82 | # print lines 83 | self.listWidget_selected_file_contents.clear() 84 | self.listWidget_selected_file_contents.addItems(QStringList(lines)) 85 | self.listWidget_selected_file_contents.selectionModel().selectionChanged.connect(self.load_and_view_samples_from_train_and_dev) 86 | 87 | self.load_and_view_rule_file_contents(n_analyzes) 88 | 89 | def load_and_view_rule_file_contents(self, n_analyzes): 90 | # load rules file 91 | self.rules_dict = {} 92 | rules_filename = "Xoutput-n_analyses-%02d.txt.rules" % n_analyzes 93 | try: 94 | with codecs.open(rules_filename, "r") as rules_f: 95 | self.output_file_load_status.setText("%s loaded." % rules_filename) 96 | self.output_file_load_status.setStyleSheet("QLabel { color : green; }") 97 | 98 | rules = [] 99 | 100 | line = rules_f.readline().strip() 101 | while line: 102 | rules.append(line.split(" ")) 103 | self.rules_dict[int(line.split(" ")[0])] = line.split(" ") 104 | line = rules_f.readline().strip() 105 | 106 | self.update_tableWidgetxxxx(self.tableWidget_output_file_contents, 107 | sorted(self.rules_dict.items(), key=lambda x: x[0]), 108 | len(self.rules_dict.keys()), 109 | 1 + 1 + n_analyzes + 1) # id + golden + FST analyzes + selected 110 | 111 | 112 | except IOError as e: 113 | # print "File not found" 114 | self.output_file_load_status.setText("File not found") 115 | self.output_file_load_status.setStyleSheet("QLabel { color : red; }") 116 | 117 | self.update_tableWidgetxxxx(self.tableWidget_output_file_contents, 118 | [], 119 | 0, 120 | 1) # id + golden + FST analyzes + selected 121 | 122 | def update_tableWidgetxxxx(self, table_widget, rules, row_count, col_count): 123 | table_widget.clear() 124 | table_widget.setColumnCount(col_count) 125 | table_widget.setRowCount(row_count) 126 | 127 | if rules: 128 | for row in range(table_widget.rowCount()): 129 | row_items = rules[row] 130 | print row_items 131 | item = self.listWidget_selected_file_contents.item( 132 | int(row_items[0]) - 1) # type: QListWidgetItem 133 | item.setBackgroundColor(QtGui.QColor(255, 0, 0, 127)) 134 | for column in range( 135 | table_widget.columnCount()): 136 | if column < len(row_items[1]): 137 | table_widget.setItem(row, column, QTableWidgetItem(row_items[1][column].decode("utf8"))) 138 | 139 | # self.tableWidget_samples_from_train_and_dev.resizeColumnToContents() 140 | for column in range(table_widget.columnCount()): 141 | table_widget.resizeColumnToContents(column) 142 | 143 | def update_corrected_morph_analysis(self, current, previous): 144 | 145 | model_index = current.indexes()[0] 146 | 147 | self.textEdit_2.setPlainText(self.listWidget_selected_row.model().data(model_index).toString()) 148 | 149 | def add_all_level_01_to_rule_dict(self): 150 | 151 | self.rules_dict = {} 152 | 153 | for idx in range(self.listWidget_selected_file_contents.count()): 154 | row_items = unicode(self.listWidget_selected_file_contents.item(idx).text()).strip().split(" ") 155 | 156 | rules_item = [x.encode("utf8") for x in [row_items[0], 157 | row_items[4], 158 | row_items[-1], 159 | row_items[-1]]] 160 | 161 | self.rules_dict[int(row_items[0])] = rules_item 162 | 163 | self.update_tableWidgetxxxx(self.tableWidget_output_file_contents, 164 | sorted(self.rules_dict.items(), key=lambda x: x[0]), 165 | len(self.rules_dict.keys()), 166 | 1 + 1 + 1 + 1) # id + golden + FST analyzes + selected 167 | 168 | 169 | def add_to_the_rule_dict(self, state): 170 | 171 | n_analyzes, entry_id = [int(x) for x in self.label_8.text().split(" ")] 172 | 173 | other_analyzes = [self.listWidget_selected_row.item(i) for i in range(self.listWidget_selected_row.count())] # type: list[QListWidgetItem] 174 | 175 | rules_item = [unicode(x).encode("utf8") for x in [entry_id, 176 | self.textEdit_golden_morph_analysis.toPlainText()] + \ 177 | [x.text() for x in other_analyzes] + \ 178 | [self.textEdit_2.toPlainText()]] 179 | 180 | self.rules_dict[entry_id] = rules_item 181 | 182 | self.update_tableWidgetxxxx(self.tableWidget_output_file_contents, 183 | sorted(self.rules_dict.items(), key=lambda x: x[0]), 184 | len(self.rules_dict.keys()), 185 | 1 + 1 + n_analyzes + 1) # id + golden + FST analyzes + selected 186 | 187 | def load_and_view_samples_from_train_and_dev(self, current, previous): 188 | print current.indexes() 189 | 190 | model_index = current.indexes()[0] 191 | 192 | morph_analyzes = unicode(self.listWidget_selected_file_contents.model().data( 193 | model_index).toString()).strip().split(" ") 194 | # print morph_analyzes 195 | golden_morph_analysis = morph_analyzes[4] 196 | target = golden_morph_analysis[1:] 197 | 198 | other_morph_analyzes = morph_analyzes[5:] 199 | 200 | n_analyzes = len(other_morph_analyzes) 201 | 202 | self.label_3.setText("selected row id: %d" % int(morph_analyzes[0])) 203 | self.label_8.setText("%d %d" % (int(n_analyzes), int(morph_analyzes[0]))) 204 | 205 | self.listWidget_selected_row.clear() 206 | self.listWidget_selected_row.addItems(QStringList(other_morph_analyzes)) 207 | 208 | self.textEdit_golden_morph_analysis.setPlainText(golden_morph_analysis) 209 | 210 | # self.addRuleToTheListButton.clicked.connect( 211 | # partial(self.save_to_file, n_analyzes=n_analyzes, entry_id=int(morph_analyzes[0]))) 212 | 213 | # from functools import partial 214 | self.addRuleToTheListButton.clicked.connect(self.add_to_the_rule_dict) 215 | 216 | if len(other_morph_analyzes) == 1: 217 | self.textEdit_2.setPlainText(other_morph_analyzes[0]) 218 | 219 | self.listWidget_selected_row.selectionModel().selectionChanged.connect(self.update_corrected_morph_analysis) 220 | 221 | print type(target) 222 | print target 223 | print target.encode("utf8") 224 | 225 | # target = target.replace("?", "\?") 226 | 227 | lines = subprocess.check_output(("grep -F -m 50 %s ./dataset/errors.gungor.ner.train_and_dev" % target).split(" "), 228 | shell=False) 229 | 230 | # print lines 231 | 232 | lines = [x.decode("utf8") for x in lines.split("\n")] 233 | 234 | print type(lines[0]) 235 | print len(lines) 236 | 237 | self.tableWidget_samples_from_train_and_dev.clear() 238 | 239 | self.tableWidget_samples_from_train_and_dev.setColumnCount(n_analyzes + 1) 240 | self.tableWidget_samples_from_train_and_dev.setRowCount(len(lines)-1) 241 | 242 | for row in range(self.tableWidget_samples_from_train_and_dev.rowCount()): 243 | row_items = lines[row].split(" ")[2:] 244 | for column in range(self.tableWidget_samples_from_train_and_dev.columnCount()): 245 | if column < len(row_items): 246 | self.tableWidget_samples_from_train_and_dev.setItem(row, column, QTableWidgetItem(row_items[column])) 247 | 248 | # self.tableWidget_samples_from_train_and_dev.resizeColumnToContents() 249 | for column in range(self.tableWidget_samples_from_train_and_dev.columnCount()): 250 | self.tableWidget_samples_from_train_and_dev.resizeColumnToContents(column) 251 | 252 | self.sort_and_save_button.clicked.connect(self.sort_and_save) 253 | 254 | def sort_and_save(self): 255 | 256 | indexes = self.treeView_Xoutput_files.selectedIndexes() 257 | 258 | model_index = indexes[0] 259 | 260 | filename = self.model.data(model_index).toString() 261 | 262 | with open(filename+ ".rules", "w") as f: 263 | for row in range(self.tableWidget_output_file_contents.rowCount()): 264 | row_content = [] 265 | for column in range(self.tableWidget_output_file_contents.columnCount()): 266 | cell_content = self.tableWidget_output_file_contents.item(row, column).text() # type: QString 267 | if cell_content: 268 | row_content.append(unicode(cell_content).encode("utf8")) 269 | if row != 0: 270 | f.write("\n") 271 | f.write(" ".join(row_content)) 272 | 273 | 274 | app = QtGui.QApplication(sys.argv) # A new instance of QApplication 275 | form = ExampleApp() # We set the form to be our ExampleApp (design) 276 | form.show() # Show the form 277 | app.exec_() # and execute the app -------------------------------------------------------------------------------- /cleaner_files/Xoutput-n_analyses-03.txt.rules: -------------------------------------------------------------------------------- 1 | 1 Xil+Verb+Neg+Pres+A3sg X+Verb^DB+Verb+Pass+Pos+Imp+A2sg Xil+Conj Xil+Verb+Pres+A3sg Xil+Verb+Pres+A3sg 2 | 2 Xl+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xl+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xl+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xl+Verb^DB+Verb+Caus+Pos+Imp+A2sg Xl+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 3 | 3 Xi'nin+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+P3sg+Gen Xi+Noun+Prop+A3sg+P2sg+Gen Xi+Noun+Prop+A3sg+Pnon+Gen Xi+Noun+Prop+A3sg+Pnon+Gen 4 | 4 Xı+Noun+A3sg+P3sg+Dat X+Noun+A3sg+P2sg+Dat X+Noun+A3sg+P3sg+Dat Xı+Noun+A3sg+P2sg+Dat X+Noun+A3sg+P3sg+Dat 5 | 5 Xsü+Noun+A3sg+Pnon+Nom X+Noun+A3sg+P3sg+Nom X+Noun+A3sg+P3sg+Nom X+Noun^DB+Adj+Almost X+Noun+A3sg+P3sg+Nom 6 | 6 Xy+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xy+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xy+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xy+Verb^DB+Verb+Caus+Pos+Imp+A2sg Xy+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 7 | 7 Xi+Adj X+Noun+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Acc Xi+Noun+A3sg+Pnon+Nom Xi+Noun+A3sg+Pnon+Nom 8 | 8 Xım+Adj^DB+Verb+Zero+Pres+A3sg+Cop X+Adj^DB+Verb+Zero+Pres+A1sg+Cop X+Adj^DB+Noun+Zero+A3sg+P1sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xım+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xım+Adj^DB+Verb+Zero+Pres+Cop+A3sg 9 | 9 Xcı+Noun+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt X+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom X+Adj^DB+Noun+Agt+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Adj+Agt 10 | 10 Xi+Noun+A3pl+Pnon+Gen X+Noun+A3pl+P2sg+Gen X+Noun+A3pl+P3pl+Gen X+Noun+A3sg+P3pl+Gen X+Noun+A3pl+P3pl+Gen 11 | 11 Xın+Adj Xın+Postp+PCAcc X+Noun+A3sg+P2sg+Nom X+Noun+A3sg+Pnon+Gen Xın+Postp+PCAcc 12 | 12 Xi+Pron+Quant+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Adj^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Num+Card^DB+Noun+Zero+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xi+Pron+Quant+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xi+Pron+Quant+A3sg+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 13 | 13 Xi+Noun+A3pl+P3sg+Gen X+Noun+A3pl+P2sg+Gen X+Noun+A3pl+P3pl+Gen X+Noun+A3sg+P3pl+Gen X+Noun+A3pl+P3pl+Gen 14 | 14 Xliler+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+A3pl X+Noun+Prop+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Zero+A3pl+Pnon+Nom Xli+Noun+Prop+A3pl+Pnon+Nom Xli+Noun+Prop+A3pl+Pnon+Nom 15 | 15 Xı+Noun+A3sg+P3sg+Loc X+Noun+A3sg+P2sg+Loc X+Noun+A3sg+P3sg+Loc Xı+Noun+A3sg+P2sg+Loc X+Noun+A3sg+P3sg+Loc 16 | 16 Xi+Noun+A3pl+P3sg+Dat X+Noun+A3pl+P2sg+Dat X+Noun+A3pl+P3pl+Dat X+Noun+A3sg+P3pl+Dat X+Noun+A3pl+P3pl+Dat 17 | 17 Xnda+Adverb X+Noun+A3sg+P2sg+Loc Xn+Noun+A3sg+Pnon+Loc Xn+Adj^DB+Noun+Zero+A3sg+Pnon+Loc Xn+Adj^DB+Noun+Zero+A3sg+Pnon+Loc 18 | 18 Xı+Noun+A3pl+P3sg+Dat X+Noun+A3pl+P2sg+Dat X+Noun+A3pl+P3pl+Dat X+Noun+A3sg+P3pl+Dat X+Noun+A3pl+P3pl+Dat 19 | 19 Xi+Noun+A3pl+P3sg+Abl X+Noun+A3pl+P2sg+Abl X+Noun+A3pl+P3pl+Abl X+Noun+A3sg+P3pl+Abl X+Noun+A3pl+P3pl+Abl 20 | 20 Xi+Adverb X+Noun+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Acc Xi+Noun+A3sg+Pnon+Nom Xi+Noun+A3sg+Pnon+Nom 21 | 21 Xn+Verb+Pos+Opt+A1pl Xn+Verb+Pos+Opt+A3pl Xnel+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A1sg Xnel+Noun+A3sg+P1sg+Nom Xn+Verb+Pos+Opt+A3pl 22 | 22 Xlıkçı+Noun+A3sg+Pnon+Nom Xlık+Noun+A3sg+Pnon+Nom^DB+Adj+Agt Xlık+Noun+A3sg+Pnon+Nom^DB+Noun+Agt+A3sg+Pnon+Nom X+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adj+Agt X+Adj^DB+Noun+Ness+A3sg+Pnon+Nom^DB+Adj+Agt 23 | 23 Xi+Noun+A3pl+P3sg+Nom X+Noun+A3pl+P3pl+Nom X+Noun+A3sg+P3pl+Nom Xleri+Noun+A3pl+Pnon+Nom X+Noun+A3pl+P3pl+Nom 24 | 24 Xi+Noun+A3pl+P3sg+Loc X+Noun+A3pl+P2sg+Loc X+Noun+A3pl+P3pl+Loc X+Noun+A3sg+P3pl+Loc X+Noun+A3pl+P3pl+Loc 25 | 25 Xı+Noun+A3pl+P3pl+Acc X+Noun+A3pl+P2sg+Acc X+Noun+A3pl+P3pl+Acc X+Noun+A3sg+P3pl+Acc X+Noun+A3pl+P3pl+Acc 26 | 26 Xd'ın+Noun+Prop+A3sg+Pnon+Nom Xd+Noun+Prop+A3sg+P2sg+Nom Xt+Noun+Prop+A3sg+P2sg+Nom Xt+Noun+Prop+A3sg+Pnon+Gen Xt+Noun+Prop+A3sg+Pnon+Gen 27 | 27 Xy+Verb+Pos+Fut+A3sg+Cop Xy+Verb+Pos+Fut+Cop+A3sg Xyacak+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xyacak+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xy+Verb+Pos+Fut+Cop+A3sg 28 | 28 Xsü+Noun+A3sg+Pnon+Nom X+Noun^DB+Adj+Almost X+Noun+A3sg+P3sg+Nom X+Noun+A3sg+P3sg+Nom X+Noun+A3sg+P3sg+Nom 29 | 29 Xş+Adj^DB+Verb+Zero+Pres+A3sg+Cop Xş+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xş+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xş+Adj^DB+Verb+Zero+Pres+Cop+A3sg 30 | 30 Xm+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+P1sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xm+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xm+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xm+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 31 | 31 Xli+Adj^DB+Verb+Zero+Pres+A3sg+Cop Xli+Adj^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Zero+A3sg+Pnon+Nom^DB+Adverb+Since Xli+Adj^DB+Verb+Zero+Pres+Cop+A3sg 32 | 32 Xı+Noun+A3sg+P3sg+Dat X+Noun+A3sg+P3sg+Dat X+Noun+A3sg+P2sg+Dat Xı+Noun+A3sg+P2sg+Dat X+Noun+A3sg+P3sg+Dat 33 | 33 Xı+Noun+A3pl+P3sg+Gen X+Noun+A3pl+P2sg+Gen X+Noun+A3pl+P3pl+Gen X+Noun+A3sg+P3pl+Gen X+Noun+A3pl+P3pl+Gen 34 | 34 Xi+Noun+A3pl+P3sg+Acc X+Noun+A3pl+P2sg+Acc X+Noun+A3pl+P3pl+Acc X+Noun+A3sg+P3pl+Acc X+Noun+A3pl+P3pl+Acc 35 | 35 Xu+Noun+Prop+Noun+A3sg+P3sg+Gen X+Noun+Prop+A3sg+P3sg+Gen Xu+Noun+Prop+A3sg+P2sg+Gen Xu+Noun+Prop+A3sg+Pnon+Gen Xu+Noun+Prop+A3sg+Pnon+Gen 36 | 36 Xü+Noun+A3sg+Pnon+Nom X+Noun+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Acc X+Noun+A3sg+P3sg+Nom X+Noun+A3sg+P3sg+Nom 37 | 37 Xs+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xs+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xs+Noun+A3pl+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xs+Noun+A3sg+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xs+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 38 | 38 Xı'nın+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+P3sg+Gen Xı+Noun+Prop+A3sg+P2sg+Gen Xı+Noun+Prop+A3sg+Pnon+Gen Xı+Noun+Prop+A3sg+Pnon+Gen 39 | 39 Xı+Det X+Noun+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Acc Xı+Adj Xı+Adj 40 | 40 Xdu'nun+Noun+Prop+A3sg+Pnon+Nom Xdu+Noun+Prop+A3sg+P2sg+Gen Xdu+Noun+Prop+A3sg+Pnon+Gen Xt+Noun+Prop+A3sg+P3sg+Gen Xdu+Noun+Prop+A3sg+Pnon+Gen 41 | 41 Xdi'nin+Noun+Prop+A3sg+Pnon+Nom Xdi+Noun+Prop+A3sg+P2sg+Gen Xdi+Noun+Prop+A3sg+Pnon+Gen Xt+Noun+Prop+A3sg+P3sg+Gen Xdi+Noun+Prop+A3sg+Pnon+Gen 42 | 42 Xç+Verb+Pos+Narr+A3sg+Cop Xçmiş+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xçmiş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xç+Verb+Pos+Narr+Cop+A3sg Xç+Verb+Pos+Narr+Cop+A3sg 43 | 43 Xa+Noun+A3sg+Pnon+Nom^DB+Adj+Rel Xak+Noun+A3sg+P3sg+Nom Xak+Noun+A3sg+Pnon+Acc Xaki+Noun+A3sg+Pnon+Nom Xaki+Noun+A3sg+Pnon+Nom 44 | 44 Xz+Postp+PCAbl^DB+Verb+Zero+Pres+A3sg+Cop Xz+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xz+Verb^DB+Verb+Caus+Pos+Imp+A2sg Xz+Postp+PCAbl^DB+Verb+Zero+Pres+Cop+A3sg Xz+Postp+PCAbl^DB+Verb+Zero+Pres+Cop+A3sg 45 | 45 Xş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xş+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xş+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xş+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 46 | 46 X'luk+Noun+Prop+A3sg+Pnon+Nom X+Num+Real^DB+Noun+Ness+A3sg+Pnon+Nom X+Noun+Time+A3sg+Pnon+Nom^DB+Adj+FitFor X+Noun+Time+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Nom X+Num+Real^DB+Noun+Ness+A3sg+Pnon+Nom 47 | 47 Xliği+Noun+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+Pnon+Acc Xlik+Noun+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Nom^DB+Noun+Ness+A3sg+P3sg+Nom 48 | 48 Xlı+Adj X+Noun+A3sg+Pnon+Nom^DB+Adj+With Xl+Noun+A3sg+P3sg+Nom Xl+Noun+A3sg+Pnon+Acc X+Noun+A3sg+Pnon+Nom^DB+Adj+With 49 | 49 Xla+Verb^DB+Verb+Reflex+Pos+Imp+A2pl Xla+Verb^DB+Verb+Pass+Pos+Imp+A2pl Xla+Verb+Reflex+Pos+Imp+A2pl X+Adj^DB+Verb+Acquire+Pos+Imp+A2pl Xla+Verb+Reflex+Pos+Imp+A2pl 50 | 50 Xları'na+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+P3pl+Dat Xlar+Noun+Prop+A3sg+P3sg+Dat Xları+Noun+Prop+A3sg+P2sg+Dat X+Noun+Prop+A3sg+P3pl+Dat 51 | 51 Xla+Postp+PCAbl^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+Pnon+Ins^DB+Verb+Zero+Pres+Cop+A3sg Xla+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xla+Postp+PCAbl^DB+Verb+Zero+Pres+Cop+A3sg Xla+Postp+PCAbl^DB+Verb+Zero+Pres+Cop+A3sg 52 | 52 Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos^DB+Adj+NarrPart^DB+Verb+Zero+Pres+A3sg+Cop Xkle+Verb+Recip^DB+Verb+Caus^DB+Verb+Pass+Pos+Narr+Cop+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos+Narr+Cop+A3sg Xk+Adj^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos+Narr+Cop+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become^DB+Verb+Caus^DB+Verb+Pass+Pos+Narr+Cop+A3sg 53 | 53 Xin+Postp+PCNom^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Gen^DB+Verb+Zero+Pres+Cop+A3sg Xin+Postp+PCNom^DB+Verb+Zero+Pres+Cop+A3sg Xin+Postp+PCNom^DB+Verb+Zero+Pres+Cop+A3sg 54 | 54 Xı+Noun+A3pl+Pnon+Gen X+Noun+A3pl+P2sg+Gen X+Noun+A3pl+P3pl+Gen X+Noun+A3sg+P3pl+Gen X+Noun+A3pl+P3pl+Gen 55 | 55 Xil+Verb+Neg+Narr+A3sg X+Verb^DB+Verb+Pass+Pos+Narr+A3sg X+Verb^DB+Verb+Pass+Pos+Narr^DB+Adj+Zero Xil+Verb+Narr+A3sg Xil+Verb+Narr+A3sg 56 | 56 Xı+Adj X+Noun+A3sg+P3sg+Nom X+Noun+A3sg+Pnon+Acc Xı+Noun+A3sg+Pnon+Nom Xı+Noun+A3sg+Pnon+Nom 57 | 57 Xd'in+Noun+Prop+A3sg+Pnon+Nom Xd+Noun+Prop+A3sg+P2sg+Nom Xt+Noun+Prop+A3sg+P2sg+Nom Xt+Noun+Prop+A3sg+Pnon+Gen Xt+Noun+Prop+A3sg+Pnon+Gen 58 | 58 Xcü+Noun+A3sg+Pnon+Nom Xç+Noun+A3sg+P3sg+Nom Xç+Noun+A3sg+P3sg+Nom Xç+Noun+A3sg+Pnon+Acc Xç+Noun+A3sg+P3sg+Nom 59 | 59 Xç+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xç+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xç+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xç+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xç+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 60 | 60 Xa+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xa+Noun+A3pl+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xa+Noun+A3sg+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xa+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xa+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 61 | 61 Xu'nun+Noun+Prop+A3sg+Pnon+Nom X+Noun+Prop+A3sg+P3sg+Gen Xu+Noun+Prop+A3sg+P2sg+Gen Xu+Noun+Prop+A3sg+Pnon+Gen Xu+Noun+Prop+A3sg+Pnon+Gen 62 | 62 Xu+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xu+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xu+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xu+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xu+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 63 | 63 Xun+Adj^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg X+Noun+A3sg+Pnon+Gen^DB+Verb+Zero+Pres+Cop+A3sg Xun+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xun+Adj^DB+Verb+Zero+Pres+Cop+A3sg 64 | 64 Xu+Adj^DB+Verb+Zero+Pres+A3sg+Cop Xu+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xu+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xu+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xu+Adj^DB+Verb+Zero+Pres+Cop+A3sg 65 | 65 Xt+Verb+Pos^DB+Adj+NarrPart Xt+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Narr+A3sg Xt+Verb+Pos+Narr+A3sg Xt+Verb+Pos+Narr^DB+Adj+Zero Xt+Verb+Pos+Narr^DB+Adj+Zero 66 | 66 Xt+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xt+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xt+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xt+Verb^DB+Verb+Caus+Pos+Imp+A2sg Xt+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 67 | 67 Xs+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xs+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xs+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xs+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xs+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 68 | 68 Xş+Noun+A3sg+P3sg+Loc^DB+Verb+Zero+Pres+A3sg+Cop Xş+Noun+A3sg+P3sg+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xş+Noun+A3sg+P2sg+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xşin+Adj^DB+Noun+Zero+A3sg+Pnon+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xş+Noun+A3sg+P3sg+Loc^DB+Verb+Zero+Pres+Cop+A3sg 69 | 69 Xr+Noun+Prop+A3sg+Pnon+Nom^DB+Adj+With^DB+Noun+Zero+A3sg+Pnon+Gen Xrl+Noun+Prop+A3sg+P3sg+Gen Xrlü+Noun+Prop+A3sg+P2sg+Gen Xrlü+Noun+Prop+A3sg+Pnon+Gen Xrlü+Noun+Prop+A3sg+Pnon+Gen 70 | 70 Xr+Noun+A3sg+P3sg+Loc^DB+Verb+Zero+Pres+A3sg+Cop Xr+Noun+A3sg+P3sg+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xr+Noun+A3sg+P2sg+Loc^DB+Verb+Zero+Pres+Cop+A3sg Xrinde+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xr+Noun+A3sg+P3sg+Loc^DB+Verb+Zero+Pres+Cop+A3sg 71 | 71 Xn+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xn+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xn+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xn+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 72 | 72 Xm+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Noun+A3sg+P1sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xm+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xm+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xm+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 73 | 73 Xm+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop X+Adj^DB+Noun+Zero+A3sg+P1sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xm+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xm+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xm+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 74 | 74 Xm+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xm+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xm+Noun+A3pl+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xm+Noun+A3sg+P3pl+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xm+Noun+A3pl+P3sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg 75 | 75 Xl+Verb+Neg+Pres+A3pl Xlle+Verb+Pos+Aor+A3sg Xlle+Verb+Pos+Aor^DB+Adj+Zero Xl+Verb+Pres+A3pl Xl+Verb+Pres+A3pl 76 | 76 Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos+Narr+A3sg+Cop Xkle+Verb+Recip+Pos+Narr+Cop+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos+Narr+Cop+A3sg Xk+Adj^DB+Verb+Become+Pos+Narr+Cop+A3sg Xk+Noun+A3sg+Pnon+Nom^DB+Verb+Become+Pos+Narr+Cop+A3sg 77 | 77 Xı+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xı+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xı+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xı+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xı+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg 78 | 78 Xı+Noun+A3pl+P3sg+Loc X+Noun+A3pl+P2sg+Loc X+Noun+A3pl+P3pl+Loc X+Noun+A3sg+P3pl+Loc X+Noun+A3sg+P3pl+Loc 79 | 79 Xı+Noun+A3pl+P3pl+Dat X+Noun+A3pl+P2sg+Dat X+Noun+A3pl+P3pl+Dat X+Noun+A3sg+P3pl+Dat X+Noun+A3sg+P3pl+Dat 80 | 80 Xi+Noun+A3pl+P3pl+Acc X+Noun+A3pl+P2sg+Acc X+Noun+A3pl+P3pl+Acc X+Noun+A3sg+P3pl+Acc X+Noun+A3sg+P3pl+Acc 81 | 81 Xi+Noun+A3pl+P3pl+Abl X+Noun+A3pl+P2sg+Abl X+Noun+A3pl+P3pl+Abl X+Noun+A3sg+P3pl+Abl X+Noun+A3sg+P3pl+Abl 82 | 82 Xğun+Adj^DB+Verb+Zero+Pres+A3sg+Cop Xk+Adj^DB+Noun+Zero+A3sg+P2sg+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xk+Adj^DB+Noun+Zero+A3sg+Pnon+Gen^DB+Verb+Zero+Pres+Cop+A3sg Xğun+Adj^DB+Verb+Zero+Pres+Cop+A3sg Xk+Adj^DB+Noun+Zero+A3sg+Pnon+Gen^DB+Verb+Zero+Pres+Cop+A3sg 83 | 83 Xe+Verb+Pos^DB+Adj+AorPart Xe+Verb+Pos+Aor+A3sg Xe+Verb+Pos+Aor^DB+Adj+Zero Xer+Adj Xe+Verb+Pos+Aor^DB+Adj+Zero 84 | 84 Xdan+Adj X+Noun+A3sg+Pnon+Abl Xda+Noun+A3sg+P2sg+Nom Xdan+Noun+A3sg+Pnon+Nom X+Noun+A3sg+Pnon+Abl 85 | 85 Xce+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+A3sg+Cop Xç+Noun+A3sg+Pnon+Dat^DB+Verb+Zero+Pres+Cop+A3sg Xce+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg Xce+Noun+A3sg+Pnon+Nom^DB+Adverb+Since Xce+Noun+A3sg+Pnon+Nom^DB+Verb+Zero+Pres+Cop+A3sg -------------------------------------------------------------------------------- /evaluation/conlleval: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | # conlleval: evaluate result of processing CoNLL-2000 shared task 3 | # usage: conlleval [-l] [-r] [-d delimiterTag] [-o oTag] < file 4 | # README: http://cnts.uia.ac.be/conll2000/chunking/output.html 5 | # options: l: generate LaTeX output for tables like in 6 | # http://cnts.uia.ac.be/conll2003/ner/example.tex 7 | # r: accept raw result tags (without B- and I- prefix; 8 | # assumes one word per chunk) 9 | # d: alternative delimiter tag (default is single space) 10 | # o: alternative outside tag (default is O) 11 | # note: the file should contain lines with items separated 12 | # by $delimiter characters (default space). The final 13 | # two items should contain the correct tag and the 14 | # guessed tag in that order. Sentences should be 15 | # separated from each other by empty lines or lines 16 | # with $boundary fields (default -X-). 17 | # url: http://lcg-www.uia.ac.be/conll2000/chunking/ 18 | # started: 1998-09-25 19 | # version: 2004-01-26 20 | # author: Erik Tjong Kim Sang 21 | 22 | use strict; 23 | 24 | my $false = 0; 25 | my $true = 42; 26 | 27 | my $boundary = "-X-"; # sentence boundary 28 | my $correct; # current corpus chunk tag (I,O,B) 29 | my $correctChunk = 0; # number of correctly identified chunks 30 | my $correctTags = 0; # number of correct chunk tags 31 | my $correctType; # type of current corpus chunk tag (NP,VP,etc.) 32 | my $delimiter = " "; # field delimiter 33 | my $FB1 = 0.0; # FB1 score (Van Rijsbergen 1979) 34 | my $firstItem; # first feature (for sentence boundary checks) 35 | my $foundCorrect = 0; # number of chunks in corpus 36 | my $foundGuessed = 0; # number of identified chunks 37 | my $guessed; # current guessed chunk tag 38 | my $guessedType; # type of current guessed chunk tag 39 | my $i; # miscellaneous counter 40 | my $inCorrect = $false; # currently processed chunk is correct until now 41 | my $lastCorrect = "O"; # previous chunk tag in corpus 42 | my $latex = 0; # generate LaTeX formatted output 43 | my $lastCorrectType = ""; # type of previously identified chunk tag 44 | my $lastGuessed = "O"; # previously identified chunk tag 45 | my $lastGuessedType = ""; # type of previous chunk tag in corpus 46 | my $lastType; # temporary storage for detecting duplicates 47 | my $line; # line 48 | my $nbrOfFeatures = -1; # number of features per line 49 | my $precision = 0.0; # precision score 50 | my $oTag = "O"; # outside tag, default O 51 | my $raw = 0; # raw input: add B to every token 52 | my $recall = 0.0; # recall score 53 | my $tokenCounter = 0; # token counter (ignores sentence breaks) 54 | 55 | my %correctChunk = (); # number of correctly identified chunks per type 56 | my %foundCorrect = (); # number of chunks in corpus per type 57 | my %foundGuessed = (); # number of identified chunks per type 58 | 59 | my @features; # features on line 60 | my @sortedTypes; # sorted list of chunk type names 61 | 62 | # sanity check 63 | while (@ARGV and $ARGV[0] =~ /^-/) { 64 | if ($ARGV[0] eq "-l") { $latex = 1; shift(@ARGV); } 65 | elsif ($ARGV[0] eq "-r") { $raw = 1; shift(@ARGV); } 66 | elsif ($ARGV[0] eq "-d") { 67 | shift(@ARGV); 68 | if (not defined $ARGV[0]) { 69 | die "conlleval: -d requires delimiter character"; 70 | } 71 | $delimiter = shift(@ARGV); 72 | } elsif ($ARGV[0] eq "-o") { 73 | shift(@ARGV); 74 | if (not defined $ARGV[0]) { 75 | die "conlleval: -o requires delimiter character"; 76 | } 77 | $oTag = shift(@ARGV); 78 | } else { die "conlleval: unknown argument $ARGV[0]\n"; } 79 | } 80 | if (@ARGV) { die "conlleval: unexpected command line argument\n"; } 81 | # process input 82 | while () { 83 | chomp($line = $_); 84 | @features = split(/$delimiter/,$line); 85 | if ($nbrOfFeatures < 0) { $nbrOfFeatures = $#features; } 86 | elsif ($nbrOfFeatures != $#features and @features != 0) { 87 | printf STDERR "unexpected number of features: %d (%d)\n", 88 | $#features+1,$nbrOfFeatures+1; 89 | exit(1); 90 | } 91 | if (@features == 0 or 92 | $features[0] eq $boundary) { @features = ($boundary,"O","O"); } 93 | if (@features < 2) { 94 | die "conlleval: unexpected number of features in line $line\n"; 95 | } 96 | if ($raw) { 97 | if ($features[$#features] eq $oTag) { $features[$#features] = "O"; } 98 | if ($features[$#features-1] eq $oTag) { $features[$#features-1] = "O"; } 99 | if ($features[$#features] ne "O") { 100 | $features[$#features] = "B-$features[$#features]"; 101 | } 102 | if ($features[$#features-1] ne "O") { 103 | $features[$#features-1] = "B-$features[$#features-1]"; 104 | } 105 | } 106 | # 20040126 ET code which allows hyphens in the types 107 | if ($features[$#features] =~ /^([^-]*)-(.*)$/) { 108 | $guessed = $1; 109 | $guessedType = $2; 110 | } else { 111 | $guessed = $features[$#features]; 112 | $guessedType = ""; 113 | } 114 | pop(@features); 115 | if ($features[$#features] =~ /^([^-]*)-(.*)$/) { 116 | $correct = $1; 117 | $correctType = $2; 118 | } else { 119 | $correct = $features[$#features]; 120 | $correctType = ""; 121 | } 122 | pop(@features); 123 | # ($guessed,$guessedType) = split(/-/,pop(@features)); 124 | # ($correct,$correctType) = split(/-/,pop(@features)); 125 | $guessedType = $guessedType ? $guessedType : ""; 126 | $correctType = $correctType ? $correctType : ""; 127 | $firstItem = shift(@features); 128 | 129 | # 1999-06-26 sentence breaks should always be counted as out of chunk 130 | if ( $firstItem eq $boundary ) { $guessed = "O"; } 131 | 132 | if ($inCorrect) { 133 | if ( &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and 134 | &endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and 135 | $lastGuessedType eq $lastCorrectType) { 136 | $inCorrect=$false; 137 | $correctChunk++; 138 | $correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ? 139 | $correctChunk{$lastCorrectType}+1 : 1; 140 | } elsif ( 141 | &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) != 142 | &endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) or 143 | $guessedType ne $correctType ) { 144 | $inCorrect=$false; 145 | } 146 | } 147 | 148 | if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and 149 | &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and 150 | $guessedType eq $correctType) { $inCorrect = $true; } 151 | 152 | if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) ) { 153 | $foundCorrect++; 154 | $foundCorrect{$correctType} = $foundCorrect{$correctType} ? 155 | $foundCorrect{$correctType}+1 : 1; 156 | } 157 | if ( &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) ) { 158 | $foundGuessed++; 159 | $foundGuessed{$guessedType} = $foundGuessed{$guessedType} ? 160 | $foundGuessed{$guessedType}+1 : 1; 161 | } 162 | if ( $firstItem ne $boundary ) { 163 | if ( $correct eq $guessed and $guessedType eq $correctType ) { 164 | $correctTags++; 165 | } 166 | $tokenCounter++; 167 | } 168 | 169 | $lastGuessed = $guessed; 170 | $lastCorrect = $correct; 171 | $lastGuessedType = $guessedType; 172 | $lastCorrectType = $correctType; 173 | } 174 | if ($inCorrect) { 175 | $correctChunk++; 176 | $correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ? 177 | $correctChunk{$lastCorrectType}+1 : 1; 178 | } 179 | 180 | if (not $latex) { 181 | # compute overall precision, recall and FB1 (default values are 0.0) 182 | $precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0); 183 | $recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0); 184 | $FB1 = 2*$precision*$recall/($precision+$recall) 185 | if ($precision+$recall > 0); 186 | 187 | # print overall performance 188 | printf "processed $tokenCounter tokens with $foundCorrect phrases; "; 189 | printf "found: $foundGuessed phrases; correct: $correctChunk.\n"; 190 | if ($tokenCounter>0) { 191 | printf "accuracy: %6.2f%%; ",100*$correctTags/$tokenCounter; 192 | printf "precision: %6.2f%%; ",$precision; 193 | printf "recall: %6.2f%%; ",$recall; 194 | printf "FB1: %6.2f\n",$FB1; 195 | } 196 | } 197 | 198 | # sort chunk type names 199 | undef($lastType); 200 | @sortedTypes = (); 201 | foreach $i (sort (keys %foundCorrect,keys %foundGuessed)) { 202 | if (not($lastType) or $lastType ne $i) { 203 | push(@sortedTypes,($i)); 204 | } 205 | $lastType = $i; 206 | } 207 | # print performance per chunk type 208 | if (not $latex) { 209 | for $i (@sortedTypes) { 210 | $correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0; 211 | if (not($foundGuessed{$i})) { $foundGuessed{$i} = 0; $precision = 0.0; } 212 | else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; } 213 | if (not($foundCorrect{$i})) { $recall = 0.0; } 214 | else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; } 215 | if ($precision+$recall == 0.0) { $FB1 = 0.0; } 216 | else { $FB1 = 2*$precision*$recall/($precision+$recall); } 217 | printf "%17s: ",$i; 218 | printf "precision: %6.2f%%; ",$precision; 219 | printf "recall: %6.2f%%; ",$recall; 220 | printf "FB1: %6.2f %d\n",$FB1,$foundGuessed{$i}; 221 | } 222 | } else { 223 | print " & Precision & Recall & F\$_{\\beta=1} \\\\\\hline"; 224 | for $i (@sortedTypes) { 225 | $correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0; 226 | if (not($foundGuessed{$i})) { $precision = 0.0; } 227 | else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; } 228 | if (not($foundCorrect{$i})) { $recall = 0.0; } 229 | else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; } 230 | if ($precision+$recall == 0.0) { $FB1 = 0.0; } 231 | else { $FB1 = 2*$precision*$recall/($precision+$recall); } 232 | printf "\n%-7s & %6.2f\\%% & %6.2f\\%% & %6.2f \\\\", 233 | $i,$precision,$recall,$FB1; 234 | } 235 | print "\\hline\n"; 236 | $precision = 0.0; 237 | $recall = 0; 238 | $FB1 = 0.0; 239 | $precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0); 240 | $recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0); 241 | $FB1 = 2*$precision*$recall/($precision+$recall) 242 | if ($precision+$recall > 0); 243 | printf "Overall & %6.2f\\%% & %6.2f\\%% & %6.2f \\\\\\hline\n", 244 | $precision,$recall,$FB1; 245 | } 246 | 247 | exit 0; 248 | 249 | # endOfChunk: checks if a chunk ended between the previous and current word 250 | # arguments: previous and current chunk tags, previous and current types 251 | # note: this code is capable of handling other chunk representations 252 | # than the default CoNLL-2000 ones, see EACL'99 paper of Tjong 253 | # Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006 254 | 255 | sub endOfChunk { 256 | my $prevTag = shift(@_); 257 | my $tag = shift(@_); 258 | my $prevType = shift(@_); 259 | my $type = shift(@_); 260 | my $chunkEnd = $false; 261 | 262 | if ( $prevTag eq "B" and $tag eq "B" ) { $chunkEnd = $true; } 263 | if ( $prevTag eq "B" and $tag eq "O" ) { $chunkEnd = $true; } 264 | if ( $prevTag eq "I" and $tag eq "B" ) { $chunkEnd = $true; } 265 | if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; } 266 | 267 | if ( $prevTag eq "E" and $tag eq "E" ) { $chunkEnd = $true; } 268 | if ( $prevTag eq "E" and $tag eq "I" ) { $chunkEnd = $true; } 269 | if ( $prevTag eq "E" and $tag eq "O" ) { $chunkEnd = $true; } 270 | if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; } 271 | 272 | if ($prevTag ne "O" and $prevTag ne "." and $prevType ne $type) { 273 | $chunkEnd = $true; 274 | } 275 | 276 | # corrected 1998-12-22: these chunks are assumed to have length 1 277 | if ( $prevTag eq "]" ) { $chunkEnd = $true; } 278 | if ( $prevTag eq "[" ) { $chunkEnd = $true; } 279 | 280 | return($chunkEnd); 281 | } 282 | 283 | # startOfChunk: checks if a chunk started between the previous and current word 284 | # arguments: previous and current chunk tags, previous and current types 285 | # note: this code is capable of handling other chunk representations 286 | # than the default CoNLL-2000 ones, see EACL'99 paper of Tjong 287 | # Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006 288 | 289 | sub startOfChunk { 290 | my $prevTag = shift(@_); 291 | my $tag = shift(@_); 292 | my $prevType = shift(@_); 293 | my $type = shift(@_); 294 | my $chunkStart = $false; 295 | 296 | if ( $prevTag eq "B" and $tag eq "B" ) { $chunkStart = $true; } 297 | if ( $prevTag eq "I" and $tag eq "B" ) { $chunkStart = $true; } 298 | if ( $prevTag eq "O" and $tag eq "B" ) { $chunkStart = $true; } 299 | if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; } 300 | 301 | if ( $prevTag eq "E" and $tag eq "E" ) { $chunkStart = $true; } 302 | if ( $prevTag eq "E" and $tag eq "I" ) { $chunkStart = $true; } 303 | if ( $prevTag eq "O" and $tag eq "E" ) { $chunkStart = $true; } 304 | if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; } 305 | 306 | if ($tag ne "O" and $tag ne "." and $prevType ne $type) { 307 | $chunkStart = $true; 308 | } 309 | 310 | # corrected 1998-12-22: these chunks are assumed to have length 1 311 | if ( $tag eq "[" ) { $chunkStart = $true; } 312 | if ( $tag eq "]" ) { $chunkStart = $true; } 313 | 314 | return($chunkStart); 315 | } 316 | --------------------------------------------------------------------------------