├── .gitignore ├── LICENSE ├── README.md ├── basic.conf ├── bert ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── __init__.py ├── create_pretraining_data.py ├── extract_features.py ├── modeling.py ├── modeling_test.py ├── multilingual.md ├── optimization.py ├── optimization_test.py ├── predicting_movie_reviews_with_bert_on_tf_hub.ipynb ├── requirements.txt ├── run_classifier.py ├── run_classifier_with_tfhub.py ├── run_pretraining.py ├── run_squad.py ├── sample_text.txt ├── tokenization.py └── tokenization_test.py ├── cased_config_vocab ├── bert_base_config.json ├── bert_large_config.json ├── trial.jsonlines └── vocab.txt ├── conll-2012 ├── scorer │ ├── reference-coreference-scorers │ │ └── v8.01 │ │ │ ├── README.txt │ │ │ ├── lib │ │ │ ├── Algorithm │ │ │ │ ├── Munkres.pm │ │ │ │ └── README.Munkres │ │ │ ├── CorScorer.pm │ │ │ ├── Cwd.pm │ │ │ ├── Data │ │ │ │ └── Dumper.pm │ │ │ └── Math │ │ │ │ └── Combinatorics.pm │ │ │ ├── scorer.bat │ │ │ ├── scorer.pl │ │ │ └── test │ │ │ ├── CorefMetricTest.pm │ │ │ ├── CorefMetricTestConfig.pm │ │ │ ├── DataFiles │ │ │ ├── TC-A-1.response │ │ │ ├── TC-A-10.response │ │ │ ├── TC-A-11.response │ │ │ ├── TC-A-12.response │ │ │ ├── TC-A-13.response │ │ │ ├── TC-A-2.response │ │ │ ├── TC-A-3.response │ │ │ ├── TC-A-4.response │ │ │ ├── TC-A-5.response │ │ │ ├── TC-A-6.response │ │ │ ├── TC-A-7.response │ │ │ ├── TC-A-8.response │ │ │ ├── TC-A-9.response │ │ │ ├── TC-A.key │ │ │ ├── TC-B-1.response │ │ │ ├── TC-B.key │ │ │ ├── TC-C-1.response │ │ │ ├── TC-C.key │ │ │ ├── TC-D-1.response │ │ │ ├── TC-D.key │ │ │ ├── TC-E-1.response │ │ │ ├── TC-E.key │ │ │ ├── TC-F-1.response │ │ │ ├── TC-F.key │ │ │ ├── TC-G-1.response │ │ │ ├── TC-G.key │ │ │ ├── TC-H-1.response │ │ │ ├── TC-H.key │ │ │ ├── TC-I-1.response │ │ │ ├── TC-I.key │ │ │ ├── TC-J-1.response │ │ │ ├── TC-J.key │ │ │ ├── TC-K-1.response │ │ │ ├── TC-K.key │ │ │ ├── TC-L-1.response │ │ │ ├── TC-L.key │ │ │ ├── TC-M-1.response │ │ │ ├── TC-M-2.response │ │ │ ├── TC-M-3.response │ │ │ ├── TC-M-4.response │ │ │ ├── TC-M-5.response │ │ │ ├── TC-M-6.response │ │ │ ├── TC-M.key │ │ │ ├── TC-N-1.response │ │ │ ├── TC-N-2.response │ │ │ ├── TC-N-3.response │ │ │ ├── TC-N-4.response │ │ │ ├── TC-N-5.response │ │ │ ├── TC-N-6.response │ │ │ └── TC-N.key │ │ │ ├── TestCases.README │ │ │ └── test.pl │ └── v8.01 │ │ ├── README.txt │ │ ├── lib │ │ ├── Algorithm │ │ │ ├── Munkres.pm │ │ │ └── README.Munkres │ │ ├── CorScorer.pm │ │ ├── Cwd.pm │ │ ├── Data │ │ │ └── Dumper.pm │ │ └── Math │ │ │ └── Combinatorics.pm │ │ ├── scorer.bat │ │ ├── scorer.pl │ │ └── test │ │ ├── CorefMetricTest.pm │ │ ├── CorefMetricTestConfig.pm │ │ ├── DataFiles │ │ ├── TC-A-1.response │ │ ├── TC-A-10.response │ │ ├── TC-A-11.response │ │ ├── TC-A-12.response │ │ ├── TC-A-13.response │ │ ├── TC-A-2.response │ │ ├── TC-A-3.response │ │ ├── TC-A-4.response │ │ ├── TC-A-5.response │ │ ├── TC-A-6.response │ │ ├── TC-A-7.response │ │ ├── TC-A-8.response │ │ ├── TC-A-9.response │ │ ├── TC-A.key │ │ ├── TC-B-1.response │ │ ├── TC-B.key │ │ ├── TC-C-1.response │ │ ├── TC-C.key │ │ ├── TC-D-1.response │ │ ├── TC-D.key │ │ ├── TC-E-1.response │ │ ├── TC-E.key │ │ ├── TC-F-1.response │ │ ├── TC-F.key │ │ ├── TC-G-1.response │ │ ├── TC-G.key │ │ ├── TC-H-1.response │ │ ├── TC-H.key │ │ ├── TC-I-1.response │ │ ├── TC-I.key │ │ ├── TC-J-1.response │ │ ├── TC-J.key │ │ ├── TC-K-1.response │ │ ├── TC-K.key │ │ ├── TC-L-1.response │ │ ├── TC-L.key │ │ ├── TC-M-1.response │ │ ├── TC-M-2.response │ │ ├── TC-M-3.response │ │ ├── TC-M-4.response │ │ ├── TC-M-5.response │ │ ├── TC-M-6.response │ │ ├── TC-M.key │ │ ├── TC-N-1.response │ │ ├── TC-N-2.response │ │ ├── TC-N-3.response │ │ ├── TC-N-4.response │ │ ├── TC-N-5.response │ │ ├── TC-N-6.response │ │ └── TC-N.key │ │ ├── TestCases.README │ │ └── test.pl └── v3 │ └── scripts │ ├── conll2coreference.py │ ├── conll2coreference.sh │ ├── conll2name.py │ ├── conll2name.sh │ ├── conll2parse.py │ ├── conll2parse.sh │ ├── skeleton2conll.py │ └── skeleton2conll.sh ├── conll.py ├── coref_kernels.cc ├── coref_ops.py ├── current_models.py ├── debug ├── compare.py ├── count.py ├── demo.py ├── diff_clusters.py ├── gen_keys.py ├── gold_mentions.py ├── http_demo.py ├── print_clusters.py ├── pronoun_evaluation.py ├── slurm_coref.slrm └── tune.py ├── download_pretrained.sh ├── evaluate.py ├── experiments.conf ├── gap_to_jsonlines.py ├── independent.py ├── metrics.py ├── minimize.py ├── optimization.py ├── overlap.py ├── overlap_minimize.py ├── predict.py ├── pytorch_to_tf.py ├── requirements.txt ├── setup_all.sh ├── setup_training.sh ├── to_gap_tsv.py ├── train.py ├── util.py └── viz ├── .gitignore ├── coref_elmo_full ├── index.html ├── main.js └── run.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.out 3 | *.so 4 | *.jsonlines 5 | logs 6 | char_vocab*.txt 7 | glove*.txt 8 | glove*.txt.filtered 9 | *.v*_*_conll 10 | *.hdf5 11 | -------------------------------------------------------------------------------- /basic.conf: -------------------------------------------------------------------------------- 1 | # Main configuration. Do not edit! Copy to experiments.conf and change that. 2 | best { 3 | # BERT 4 | bert_config_file = ../bert/cased_L-12_H-768_A-12/bert_config.json 5 | vocab_file = ../bert/cased_L-12_H-768_A-12/vocab.txt 6 | init_checkpoint = ../bert/cased_L-12_H-768_A-12/bert_model.ckpt 7 | tf_checkpoint = ../bert/cased_L-24_H-1024_A-16/bert_model.ckpt 8 | model_type = independent 9 | # Computation limits. 10 | max_top_antecedents = 50 11 | max_training_sentences = 5 12 | top_span_ratio = 0.4 13 | max_num_speakers = 20 14 | max_segment_len = 256 15 | 16 | # Learning 17 | bert_learning_rate = 1e-5 18 | task_learning_rate = 2e-4 19 | num_docs = 2802 20 | 21 | # Model hyperparameters. 22 | dropout_rate = 0.3 23 | ffnn_size = 1000 24 | ffnn_depth = 1 25 | num_epochs = 20 26 | feature_size = 20 27 | max_span_width = 30 28 | use_metadata = true 29 | use_features = true 30 | use_segment_distance = true 31 | model_heads = true 32 | coref_depth = 2 33 | coarse_to_fine = true 34 | fine_grained = true 35 | use_prior = true 36 | 37 | # Other. 38 | train_path = train.english.jsonlines 39 | eval_path = dev.english.jsonlines 40 | conll_eval_path = dev.english.v4_gold_conll 41 | single_example = true 42 | genres = ["bc", "bn", "mz", "nw", "pt", "tc", "wb"] 43 | eval_frequency = 1000 44 | report_frequency = 100 45 | log_root = with_init 46 | adam_eps = 1e-6 47 | task_optimizer = adam 48 | } 49 | -------------------------------------------------------------------------------- /bert/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | BERT needs to maintain permanent compatibility with the pre-trained model files, 4 | so we do not plan to make any major changes to this library (other than what was 5 | promised in the README). However, we can accept small patches related to 6 | re-factoring and documentation. To submit contributes, there are just a few 7 | small guidelines you need to follow. 8 | 9 | ## Contributor License Agreement 10 | 11 | Contributions to this project must be accompanied by a Contributor License 12 | Agreement. You (or your employer) retain the copyright to your contribution; 13 | this simply gives us permission to use and redistribute your contributions as 14 | part of the project. Head over to to see 15 | your current agreements on file or to sign a new one. 16 | 17 | You generally only need to submit a CLA once, so if you've already submitted one 18 | (even if it was for a different project), you probably don't need to do it 19 | again. 20 | 21 | ## Code reviews 22 | 23 | All submissions, including submissions by project members, require review. We 24 | use GitHub pull requests for this purpose. Consult 25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 26 | information on using pull requests. 27 | 28 | ## Community Guidelines 29 | 30 | This project follows 31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 32 | -------------------------------------------------------------------------------- /bert/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /bert/optimization_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import optimization 20 | import tensorflow as tf 21 | 22 | 23 | class OptimizationTest(tf.test.TestCase): 24 | 25 | def test_adam(self): 26 | with self.test_session() as sess: 27 | w = tf.get_variable( 28 | "w", 29 | shape=[3], 30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1])) 31 | x = tf.constant([0.4, 0.2, -0.5]) 32 | loss = tf.reduce_mean(tf.square(x - w)) 33 | tvars = tf.trainable_variables() 34 | grads = tf.gradients(loss, tvars) 35 | global_step = tf.train.get_or_create_global_step() 36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2) 37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step) 38 | init_op = tf.group(tf.global_variables_initializer(), 39 | tf.local_variables_initializer()) 40 | sess.run(init_op) 41 | for _ in range(100): 42 | sess.run(train_op) 43 | w_np = sess.run(w) 44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2) 45 | 46 | 47 | if __name__ == "__main__": 48 | tf.test.main() 49 | -------------------------------------------------------------------------------- /bert/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow. 2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow. 3 | -------------------------------------------------------------------------------- /bert/sample_text.txt: -------------------------------------------------------------------------------- 1 | This text is included to make sure Unicode is handled properly: 力加勝北区ᴵᴺᵀᵃছজটডণত 2 | Text should be one-sentence-per-line, with empty lines between documents. 3 | This sample text is public domain and was randomly selected from Project Guttenberg. 4 | 5 | The rain had only ceased with the gray streaks of morning at Blazing Star, and the settlement awoke to a moral sense of cleanliness, and the finding of forgotten knives, tin cups, and smaller camp utensils, where the heavy showers had washed away the debris and dust heaps before the cabin doors. 6 | Indeed, it was recorded in Blazing Star that a fortunate early riser had once picked up on the highway a solid chunk of gold quartz which the rain had freed from its incumbering soil, and washed into immediate and glittering popularity. 7 | Possibly this may have been the reason why early risers in that locality, during the rainy season, adopted a thoughtful habit of body, and seldom lifted their eyes to the rifted or india-ink washed skies above them. 8 | "Cass" Beard had risen early that morning, but not with a view to discovery. 9 | A leak in his cabin roof,--quite consistent with his careless, improvident habits,--had roused him at 4 A. M., with a flooded "bunk" and wet blankets. 10 | The chips from his wood pile refused to kindle a fire to dry his bed-clothes, and he had recourse to a more provident neighbor's to supply the deficiency. 11 | This was nearly opposite. 12 | Mr. Cassius crossed the highway, and stopped suddenly. 13 | Something glittered in the nearest red pool before him. 14 | Gold, surely! 15 | But, wonderful to relate, not an irregular, shapeless fragment of crude ore, fresh from Nature's crucible, but a bit of jeweler's handicraft in the form of a plain gold ring. 16 | Looking at it more attentively, he saw that it bore the inscription, "May to Cass." 17 | Like most of his fellow gold-seekers, Cass was superstitious. 18 | 19 | The fountain of classic wisdom, Hypatia herself. 20 | As the ancient sage--the name is unimportant to a monk--pumped water nightly that he might study by day, so I, the guardian of cloaks and parasols, at the sacred doors of her lecture-room, imbibe celestial knowledge. 21 | From my youth I felt in me a soul above the matter-entangled herd. 22 | She revealed to me the glorious fact, that I am a spark of Divinity itself. 23 | A fallen star, I am, sir!' continued he, pensively, stroking his lean stomach--'a fallen star!--fallen, if the dignity of philosophy will allow of the simile, among the hogs of the lower world--indeed, even into the hog-bucket itself. Well, after all, I will show you the way to the Archbishop's. 24 | There is a philosophic pleasure in opening one's treasures to the modest young. 25 | Perhaps you will assist me by carrying this basket of fruit?' And the little man jumped up, put his basket on Philammon's head, and trotted off up a neighbouring street. 26 | Philammon followed, half contemptuous, half wondering at what this philosophy might be, which could feed the self-conceit of anything so abject as his ragged little apish guide; 27 | but the novel roar and whirl of the street, the perpetual stream of busy faces, the line of curricles, palanquins, laden asses, camels, elephants, which met and passed him, and squeezed him up steps and into doorways, as they threaded their way through the great Moon-gate into the ample street beyond, drove everything from his mind but wondering curiosity, and a vague, helpless dread of that great living wilderness, more terrible than any dead wilderness of sand which he had left behind. 28 | Already he longed for the repose, the silence of the Laura--for faces which knew him and smiled upon him; but it was too late to turn back now. 29 | His guide held on for more than a mile up the great main street, crossed in the centre of the city, at right angles, by one equally magnificent, at each end of which, miles away, appeared, dim and distant over the heads of the living stream of passengers, the yellow sand-hills of the desert; 30 | while at the end of the vista in front of them gleamed the blue harbour, through a network of countless masts. 31 | At last they reached the quay at the opposite end of the street; 32 | and there burst on Philammon's astonished eyes a vast semicircle of blue sea, ringed with palaces and towers. 33 | He stopped involuntarily; and his little guide stopped also, and looked askance at the young monk, to watch the effect which that grand panorama should produce on him. 34 | -------------------------------------------------------------------------------- /bert/tokenization_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | import tempfile 21 | import tokenization 22 | import six 23 | import tensorflow as tf 24 | 25 | 26 | class TokenizationTest(tf.test.TestCase): 27 | 28 | def test_full_tokenizer(self): 29 | vocab_tokens = [ 30 | "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", 31 | "##ing", "," 32 | ] 33 | with tempfile.NamedTemporaryFile(delete=False) as vocab_writer: 34 | if six.PY2: 35 | vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) 36 | else: 37 | vocab_writer.write("".join( 38 | [x + "\n" for x in vocab_tokens]).encode("utf-8")) 39 | 40 | vocab_file = vocab_writer.name 41 | 42 | tokenizer = tokenization.FullTokenizer(vocab_file) 43 | os.unlink(vocab_file) 44 | 45 | tokens = tokenizer.tokenize(u"UNwant\u00E9d,running") 46 | self.assertAllEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) 47 | 48 | self.assertAllEqual( 49 | tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9]) 50 | 51 | def test_chinese(self): 52 | tokenizer = tokenization.BasicTokenizer() 53 | 54 | self.assertAllEqual( 55 | tokenizer.tokenize(u"ah\u535A\u63A8zz"), 56 | [u"ah", u"\u535A", u"\u63A8", u"zz"]) 57 | 58 | def test_basic_tokenizer_lower(self): 59 | tokenizer = tokenization.BasicTokenizer(do_lower_case=True) 60 | 61 | self.assertAllEqual( 62 | tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "), 63 | ["hello", "!", "how", "are", "you", "?"]) 64 | self.assertAllEqual(tokenizer.tokenize(u"H\u00E9llo"), ["hello"]) 65 | 66 | def test_basic_tokenizer_no_lower(self): 67 | tokenizer = tokenization.BasicTokenizer(do_lower_case=False) 68 | 69 | self.assertAllEqual( 70 | tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "), 71 | ["HeLLo", "!", "how", "Are", "yoU", "?"]) 72 | 73 | def test_wordpiece_tokenizer(self): 74 | vocab_tokens = [ 75 | "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", 76 | "##ing" 77 | ] 78 | 79 | vocab = {} 80 | for (i, token) in enumerate(vocab_tokens): 81 | vocab[token] = i 82 | tokenizer = tokenization.WordpieceTokenizer(vocab=vocab) 83 | 84 | self.assertAllEqual(tokenizer.tokenize(""), []) 85 | 86 | self.assertAllEqual( 87 | tokenizer.tokenize("unwanted running"), 88 | ["un", "##want", "##ed", "runn", "##ing"]) 89 | 90 | self.assertAllEqual( 91 | tokenizer.tokenize("unwantedX running"), ["[UNK]", "runn", "##ing"]) 92 | 93 | def test_convert_tokens_to_ids(self): 94 | vocab_tokens = [ 95 | "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", 96 | "##ing" 97 | ] 98 | 99 | vocab = {} 100 | for (i, token) in enumerate(vocab_tokens): 101 | vocab[token] = i 102 | 103 | self.assertAllEqual( 104 | tokenization.convert_tokens_to_ids( 105 | vocab, ["un", "##want", "##ed", "runn", "##ing"]), [7, 4, 5, 8, 9]) 106 | 107 | def test_is_whitespace(self): 108 | self.assertTrue(tokenization._is_whitespace(u" ")) 109 | self.assertTrue(tokenization._is_whitespace(u"\t")) 110 | self.assertTrue(tokenization._is_whitespace(u"\r")) 111 | self.assertTrue(tokenization._is_whitespace(u"\n")) 112 | self.assertTrue(tokenization._is_whitespace(u"\u00A0")) 113 | 114 | self.assertFalse(tokenization._is_whitespace(u"A")) 115 | self.assertFalse(tokenization._is_whitespace(u"-")) 116 | 117 | def test_is_control(self): 118 | self.assertTrue(tokenization._is_control(u"\u0005")) 119 | 120 | self.assertFalse(tokenization._is_control(u"A")) 121 | self.assertFalse(tokenization._is_control(u" ")) 122 | self.assertFalse(tokenization._is_control(u"\t")) 123 | self.assertFalse(tokenization._is_control(u"\r")) 124 | 125 | def test_is_punctuation(self): 126 | self.assertTrue(tokenization._is_punctuation(u"-")) 127 | self.assertTrue(tokenization._is_punctuation(u"$")) 128 | self.assertTrue(tokenization._is_punctuation(u"`")) 129 | self.assertTrue(tokenization._is_punctuation(u".")) 130 | 131 | self.assertFalse(tokenization._is_punctuation(u"A")) 132 | self.assertFalse(tokenization._is_punctuation(u" ")) 133 | 134 | 135 | if __name__ == "__main__": 136 | tf.test.main() 137 | -------------------------------------------------------------------------------- /cased_config_vocab/bert_base_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_probs_dropout_prob": 0.1, 3 | "hidden_act": "gelu", 4 | "hidden_dropout_prob": 0.1, 5 | "hidden_size": 768, 6 | "initializer_range": 0.02, 7 | "intermediate_size": 3072, 8 | "max_position_embeddings": 512, 9 | "num_attention_heads": 12, 10 | "num_hidden_layers": 12, 11 | "type_vocab_size": 2, 12 | "vocab_size": 28996 13 | } 14 | -------------------------------------------------------------------------------- /cased_config_vocab/bert_large_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_probs_dropout_prob": 0.1, 3 | "directionality": "bidi", 4 | "hidden_act": "gelu", 5 | "hidden_dropout_prob": 0.1, 6 | "hidden_size": 1024, 7 | "initializer_range": 0.02, 8 | "intermediate_size": 4096, 9 | "max_position_embeddings": 512, 10 | "num_attention_heads": 16, 11 | "num_hidden_layers": 24, 12 | "pooler_fc_size": 768, 13 | "pooler_num_attention_heads": 12, 14 | "pooler_num_fc_layers": 3, 15 | "pooler_size_per_head": 128, 16 | "pooler_type": "first_token_transform", 17 | "type_vocab_size": 2, 18 | "vocab_size": 28996 19 | } 20 | -------------------------------------------------------------------------------- /cased_config_vocab/trial.jsonlines: -------------------------------------------------------------------------------- 1 | {"doc_key": "bn/voa/02/voa_0210_0", "sentences": [["[CLS]", "Meanwhile", "Prime", "Minister", "E", "##hu", "##d", "Bar", "##ak", "told", "Israeli", "television", "he", "doubts", "a", "peace", "deal", "can", "be", "reached", "before", "Israel", "'", "s", "February", "6th", "election", ".", "He", "said", "he", "will", "now", "focus", "on", "suppress", "##ing", "Palestinian", "violence", ".", "[SEP]"]], "speakers": [["[SPL]", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "[SPL]"]], "clusters": [[]], "sentence_map": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2], "subtoken_map": [0, 0, 1, 2, 3, 3, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 30, 31, 32, 33, 33]} 2 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/README.txt: -------------------------------------------------------------------------------- 1 | NAME 2 | CorScorer: Perl package for scoring coreference resolution systems 3 | using different metrics. 4 | 5 | 6 | VERSION 7 | v8.01 -- reference implementations of MUC, B-cubed, CEAF and BLANC metrics. 8 | 9 | 10 | CHANGES SINCE v8.0 11 | - fixed a bug that crashed the BLANC scorer when a duplicate singleton 12 | mention was present in the response. 13 | 14 | INSTALLATION 15 | Requirements: 16 | 1. Perl: downloadable from http://perl.org 17 | 2. Algorithm-Munkres: included in this package and downloadable 18 | from CPAN http://search.cpan.org/~tpederse/Algorithm-Munkres-0.08 19 | 20 | USE 21 | This package is distributed with two scripts to execute the scorer from 22 | the command line. 23 | 24 | Windows (tm): scorer.bat 25 | Linux: scorer.pl 26 | 27 | 28 | SYNOPSIS 29 | use CorScorer; 30 | 31 | $metric = 'ceafm'; 32 | 33 | # Scores the whole dataset 34 | &CorScorer::Score($metric, $keys_file, $response_file); 35 | 36 | # Scores one file 37 | &CorScorer::Score($metric, $keys_file, $response_file, $name); 38 | 39 | 40 | INPUT 41 | metric: the metric desired to score the results: 42 | muc: MUCScorer (Vilain et al, 1995) 43 | bcub: B-Cubed (Bagga and Baldwin, 1998) 44 | ceafm: CEAF (Luo et al., 2005) using mention-based similarity 45 | ceafe: CEAF (Luo et al., 2005) using entity-based similarity 46 | blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions 47 | all: uses all the metrics to score 48 | 49 | keys_file: file with expected coreference chains in CoNLL-2011/2012 format 50 | 51 | response_file: file with output of coreference system (CoNLL-2011/2012 format) 52 | 53 | name: [optional] the name of the document to score. If name is not 54 | given, all the documents in the dataset will be scored. If given 55 | name is "none" then all the documents are scored but only total 56 | results are shown. 57 | 58 | 59 | OUTPUT 60 | The score subroutine returns an array with four values in this order: 61 | 1) Recall numerator 62 | 2) Recall denominator 63 | 3) Precision numerator 64 | 4) Precision denominator 65 | 66 | Also recall, precision and F1 are printed in the standard output when variable 67 | $VERBOSE is not null. 68 | 69 | Final scores: 70 | Recall = recall_numerator / recall_denominator 71 | Precision = precision_numerator / precision_denominator 72 | F1 = 2 * Recall * Precision / (Recall + Precision) 73 | 74 | Identification of mentions 75 | An scorer for identification of mentions (recall, precision and F1) is also included. 76 | Mentions from system response are compared with key mentions. This version performs 77 | strict mention matching as was used in the CoNLL-2011 and 2012 shared tasks. 78 | 79 | AUTHORS 80 | Emili Sapena, Universitat Politècnica de Catalunya, http://www.lsi.upc.edu/~esapena, esapena lsi.upc.edu 81 | Sameer Pradhan, sameer.pradhan childrens.harvard.edu 82 | Sebastian Martschat, sebastian.martschat h-its.org 83 | Xiaoqiang Luo, xql google.com 84 | 85 | COPYRIGHT AND LICENSE 86 | Copyright (C) 2009-2011, Emili Sapena esapena lsi.upc.edu 87 | 2011-2014, Sameer Pradhan sameer.pradhan childrens.harvard.edu 88 | 89 | This program is free software; you can redistribute it and/or modify it 90 | under the terms of the GNU General Public License as published by the 91 | Free Software Foundation; either version 2 of the License, or (at your 92 | option) any later version. This program is distributed in the hope that 93 | it will be useful, but WITHOUT ANY WARRANTY; without even the implied 94 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 95 | GNU General Public License for more details. 96 | 97 | You should have received a copy of the GNU General Public License along 98 | with this program; if not, write to the Free Software Foundation, Inc., 99 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 100 | 101 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/scorer.bat: -------------------------------------------------------------------------------- 1 | @rem = '--*-Perl-*-- 2 | @echo off 3 | if "%OS%" == "Windows_NT" goto WinNT 4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9 5 | goto endofperl 6 | :WinNT 7 | perl -x -S %0 %* 8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl 9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH. 10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul 11 | goto endofperl 12 | @rem '; 13 | #!perl 14 | #line 15 15 | 16 | BEGIN { 17 | $d = $0; 18 | $d =~ s/\/[^\/][^\/]*$//g; 19 | push(@INC, $d."/lib"); 20 | } 21 | 22 | use strict; 23 | use CorScorer; 24 | 25 | if (@ARGV < 3) { 26 | print q| 27 | use: scorer.bat [name] 28 | 29 | metric: the metric desired to score the results: 30 | muc: MUCScorer (Vilain et al, 1995) 31 | bcub: B-Cubed (Bagga and Baldwin, 1998) 32 | ceafm: CEAF (Luo et al, 2005) using mention-based similarity 33 | ceafe: CEAF (Luo et al, 2005) using entity-based similarity 34 | all: uses all the metrics to score 35 | 36 | keys_file: file with expected coreference chains in SemEval format 37 | 38 | response_file: file with output of coreference system (SemEval format) 39 | 40 | name: [optional] the name of the document to score. If name is not 41 | given, all the documents in the dataset will be scored. If given 42 | name is "none" then all the documents are scored but only total 43 | results are shown. 44 | 45 | |; 46 | exit; 47 | } 48 | 49 | my $metric = shift (@ARGV); 50 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) { 51 | print "Invalid metric\n"; 52 | exit; 53 | } 54 | 55 | 56 | if ($metric eq 'all') { 57 | foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') { 58 | print "\nMETRIC $m:\n"; 59 | &CorScorer::Score( $m, @ARGV ); 60 | } 61 | } 62 | else { 63 | &CorScorer::Score( $metric, @ARGV ); 64 | } 65 | 66 | __END__ 67 | :endofperl 68 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/scorer.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | BEGIN { 4 | $d = $0; 5 | $d =~ s/\/[^\/][^\/]*$//g; 6 | 7 | if ($d eq $0) { 8 | unshift(@INC, "lib"); 9 | } 10 | else { 11 | unshift(@INC, $d . "/lib"); 12 | } 13 | } 14 | 15 | use strict; 16 | use CorScorer; 17 | 18 | if (@ARGV < 3) { 19 | print q| 20 | use: scorer.pl [name] 21 | 22 | metric: the metric desired to score the results: 23 | muc: MUCScorer (Vilain et al, 1995) 24 | bcub: B-Cubed (Bagga and Baldwin, 1998) 25 | ceafm: CEAF (Luo et al, 2005) using mention-based similarity 26 | ceafe: CEAF (Luo et al, 2005) using entity-based similarity 27 | blanc: BLANC 28 | all: uses all the metrics to score 29 | 30 | keys_file: file with expected coreference chains in SemEval format 31 | 32 | response_file: file with output of coreference system (SemEval format) 33 | 34 | name: [optional] the name of the document to score. If name is not 35 | given, all the documents in the dataset will be scored. If given 36 | name is "none" then all the documents are scored but only total 37 | results are shown. 38 | 39 | |; 40 | exit; 41 | } 42 | 43 | my $metric = shift(@ARGV); 44 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) { 45 | print "Invalid metric\n"; 46 | exit; 47 | } 48 | 49 | if ($metric eq 'all') { 50 | foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') { 51 | print "\nMETRIC $m:\n"; 52 | &CorScorer::Score($m, @ARGV); 53 | } 54 | } 55 | else { 56 | &CorScorer::Score($metric, @ARGV); 57 | } 58 | 59 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/CorefMetricTest.pm: -------------------------------------------------------------------------------- 1 | package CorefMetricTest; 2 | use strict; 3 | use warnings; 4 | use Exporter; 5 | 6 | our @ISA= qw(Exporter); 7 | our @EXPORT = qw(ComputeScoreFromCounts DiffExpectedAndActual); 8 | 9 | ################################################################################ 10 | # Compute recall, precision and F1. 11 | # 12 | # Input: (numerator_counts_for_recall, denominator_counts_for_recall, 13 | # numerator_counts_for_precision, denominator_counts_for_precision) 14 | # Output: (recall, precision, F1) 15 | ################################################################################ 16 | sub ComputeScoreFromCounts { 17 | # The first 4 are also coref link counts when using BLANC. 18 | my ($recall_numerator, $recall_denominator, 19 | $precision_numerator, $precision_denominator, @noncoref_counts) = @_; 20 | # The coref recall, precision, and F1 when using BLANC. 21 | my ($recall, $precision, $F1) = 22 | RPFFromCounts($recall_numerator, $recall_denominator, 23 | $precision_numerator, $precision_denominator); 24 | 25 | # BLANC: @noncoref_counts= 26 | # (noncoref_numerator_recall, noncoref_denominator_recall, 27 | # noncoref_numerator_precision, noncoref_denominator_precision) 28 | if (scalar(@noncoref_counts) == 4) { 29 | ($recall, $precision, $F1) = CorScorer::ComputeBLANCFromCounts( 30 | $recall_numerator, $recall_denominator, $precision_denominator, 31 | $noncoref_counts[0], $noncoref_counts[1], $noncoref_counts[3]); 32 | } 33 | $recall = ($recall < 0) ? 0 : $recall; 34 | $precision = ($precision < 0) ? 0 : $precision; 35 | $F1 = ($F1 < 0) ? 0 : $F1; 36 | return ($recall, $precision, $F1); 37 | } 38 | 39 | sub RPFFromCounts 40 | { 41 | my ($recall_numerator, $recall_denominator, 42 | $precision_numerator, $precision_denominator, @nonCorefCounts) = @_; 43 | my ($recall, $precision, $F1) = (-1, -1, 0); 44 | if ($recall_denominator > 0) { 45 | $recall = $recall_numerator / $recall_denominator; 46 | } 47 | if ($precision_denominator > 0) { 48 | $precision = $precision_numerator / $precision_denominator; 49 | } 50 | 51 | if (($recall + $precision) > 0) { 52 | $F1 = 2 * $recall * $precision / ($recall + $precision); 53 | } 54 | 55 | return ($recall, $precision, $F1); 56 | } 57 | 58 | # deprecated -- see CorScorer::ComputeBLANCFromCounts(). 59 | sub ComputeBLANCRPF 60 | { 61 | my ($coref_recall, $coref_precision, $coref_F1, 62 | $noncoref_recall, $noncoref_precision, $noncoref_F1) = @_; 63 | 64 | my ($recall, $precision, $F1); 65 | 66 | if ($coref_recall < 0 && $noncoref_recall < 0) { 67 | # no key mention. 68 | $recall = $precision = $F1 = 0; 69 | } elsif ($coref_recall < 0) { 70 | # key: all links are non-coref (mentions are all singltons). 71 | $recall = $noncoref_recall; 72 | $precision = ($noncoref_precision < 0) ? 0 : $noncoref_precision; 73 | $F1 = $noncoref_F1; 74 | } elsif ($noncoref_recall < 0) { 75 | # key: all links are coref (all mentions are in one entity). 76 | $recall = $coref_recall; 77 | $precision = ($coref_precision < 0) ? 0 : $coref_precision; 78 | $F1 = $coref_F1; 79 | } else { 80 | #key contains both coref and non-coref links. 81 | if ($coref_precision < 0 && $noncoref_precision < 0) { 82 | # no response. 83 | $recall = $precision = $F1 = 0; 84 | } else { 85 | if ($coref_precision < 0) { 86 | # response: all links are non-coref, or response mentions are all 87 | # singletons. 88 | $coref_precision = 0; 89 | } elsif ($noncoref_precision < 0) { 90 | # response: all links are coref, or all mentions are in one entity. 91 | $noncoref_precision = 0; 92 | } 93 | $recall = ($coref_recall + $noncoref_recall)/2; 94 | $precision = ($coref_precision + $noncoref_precision)/2; 95 | $F1 = ($coref_F1 + $noncoref_F1)/2; 96 | } 97 | } 98 | 99 | return ($recall, $precision, $F1); 100 | } 101 | 102 | ############################################################################## 103 | # Compute the sum of the duifference between the expected recall, precision, 104 | # F1 and the actual one. 105 | ############################################################################## 106 | sub DiffExpectedAndActual { 107 | my ($expected, $actual) = @_; 108 | if (scalar(@$expected) != scalar(@$actual)) { 109 | print STDERR "Expected and actual have diff dimensions: \n"; 110 | print STDERR " Expected: ", join(" ", @$expected), "\n"; 111 | print STDERR " Actual: ", join(" ", @$actual), "\n"; 112 | return 1.0e5; 113 | } 114 | my $sum = 0.0; 115 | my $i = 0; 116 | foreach my $e (@$expected) { 117 | $sum += abs($e - $actual->[$i]); 118 | ++$i; 119 | } 120 | return $sum; 121 | } 122 | 123 | 1; 124 | 125 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-1.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 jnk - 17 | test2 0 5 e (2) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-10.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 x - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 z - 17 | test2 0 5 e (4) 18 | test2 0 6 y - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-11.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 x - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 z - 17 | test2 0 5 e (0) 18 | test2 0 6 y - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-12.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 1) 7 | test1 0 5 b3 - 8 | test1 0 6 b4 - 9 | test1 0 7 jnk (2) 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (3) 13 | test2 0 1 x - 14 | test2 0 2 d1 (4 15 | test2 0 3 d2 4) 16 | test2 0 4 z - 17 | test2 0 5 e (5) 18 | test2 0 6 y - 19 | test2 0 7 f1 (6) 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-13.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 0) 7 | test1 0 5 b3 - 8 | test1 0 6 b4 - 9 | test1 0 7 jnk (0) 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 x - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 z - 17 | test2 0 5 e (0) 18 | test2 0 6 y - 19 | test2 0 7 f1 (0) 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-2.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 - 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 - 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c - 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 jnk - 17 | test2 0 5 e (2) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-3.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 y (2) 17 | test2 0 5 e (2) 18 | test2 0 6 z (3) 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-4.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 x (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-5.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 (1 7 | test1 0 5 b3 1) 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-6.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 (3 7 | test1 0 5 b3 3) 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-7.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1(1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1)1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-8.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1(3 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 3)1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-9.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1(3(3(3(3(3(3(3(3(3(3 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 3)3)3)3)3)3)3)3)3)3)1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A.key: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 jnk - 17 | test2 0 5 e (2) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-B-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 - 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10043 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10043) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 (10043 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 10043) 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 - 72 | nw/xinhua/00/chtb_0009 - 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-B.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (10043 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 10043) 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10054 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10054) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 - 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 - 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 - 72 | nw/xinhua/00/chtb_0009 - 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-C-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 - 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10043 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10043) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 (10043 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 10043) 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 (10060) 72 | nw/xinhua/00/chtb_0009 (10060) 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-C.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (10043 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 10043) 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10054 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10054) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 - 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 - 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 (10060) 72 | nw/xinhua/00/chtb_0009 (10060) 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-D-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (3) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (3) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (3) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (3) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (3) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (3) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (3) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-D.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (2) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (3) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (3) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (3) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (3) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (3) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-E-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (2) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (1) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (1) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (1) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (1) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (1) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-E.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (2) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (3) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (3) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (3) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (3) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (3) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-F-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-F.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-G-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-G.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-H-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-H.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-I-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-I.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-J-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-J.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-K-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (2) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 (2) 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (3) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (3) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 (3) 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-K.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 - 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (1) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (1) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (1) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-L-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 (3) 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (3) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (3) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-L.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (2) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 (2) 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-1.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 jnk - 17 | test2 0 5 e (0) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-2.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 jnk - 17 | test2 0 5 e (4) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-3.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (1 15 | test2 0 3 d2 1) 16 | test2 0 4 jnk - 17 | test2 0 5 e (1) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-4.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk (0) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (0) 17 | test2 0 5 e - 18 | test2 0 6 jnk (0) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-5.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk (3) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (4) 17 | test2 0 5 e - 18 | test2 0 6 jnk (5) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-6.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk (1) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (1) 17 | test2 0 5 e - 18 | test2 0 6 jnk (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M.key: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 jnk - 17 | test2 0 5 e (0) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-1.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 jnk - 17 | test2 0 5 e (4) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-2.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 jnk - 17 | test2 0 5 e (0) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-3.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (1 15 | test2 0 3 d2 1) 16 | test2 0 4 jnk - 17 | test2 0 5 e (1) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-4.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk (3) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (4) 17 | test2 0 5 e - 18 | test2 0 6 jnk (5) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-5.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk (0) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (0) 17 | test2 0 5 e - 18 | test2 0 6 jnk (0) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-6.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk (1) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (1) 17 | test2 0 5 e - 18 | test2 0 6 jnk (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N.key: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 jnk - 17 | test2 0 5 e (4) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/reference-coreference-scorers/v8.01/test/test.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | BEGIN { 4 | $d = $0; 5 | $d =~ s/\/[^\/][^\/]*$//g; 6 | push(@INC, $d); 7 | push(@INC, $d . "/../lib"); 8 | } 9 | 10 | use strict; 11 | use CorScorer; 12 | use CorefMetricTest; 13 | use CorefMetricTestConfig; 14 | 15 | my $error_tolerance = 1.e-4; 16 | my $script_dir = $0; 17 | $script_dir =~ s/\/[^\/][^\/]*$//g; 18 | 19 | foreach my $test_case (@CorefMetricTestConfig::TestCases) { 20 | my $id = $test_case->{'id'}; 21 | my @key_response_files = ($script_dir . "/" . $test_case->{'key_file'}, 22 | $script_dir . "/" . $test_case->{'response_file'}); 23 | print "\nTesting case ($id): keyFile=", $key_response_files[0], 24 | " responseFile=", $key_response_files[1], "\n"; 25 | my $expected_metrics = $test_case->{'expected_metrics'}; 26 | foreach my $metric_name (sort keys %$expected_metrics) { 27 | my $expected_values = $expected_metrics->{$metric_name}; 28 | *::SAVED_STDOUT = *STDOUT; 29 | *STDOUT = *::SUPRRES_STDOUT; 30 | my @actual_counts = &CorScorer::Score($metric_name, @key_response_files); 31 | # Compute R,P,and F1 from raw counts. 32 | my @actual_values = CorefMetricTest::ComputeScoreFromCounts(@actual_counts); 33 | *STDOUT = *::SAVED_STDOUT; 34 | my $diff = CorefMetricTest::DiffExpectedAndActual($expected_values, \@actual_values); 35 | printf " metric: %+10s", $metric_name; 36 | if ($diff < $error_tolerance) { 37 | print " => PASS\n"; 38 | } else { 39 | print " => FAIL\n"; 40 | print " Expected (recall, prec, F1) = (", join(" ", @$expected_values), ")\n"; 41 | print " Actual (recall, prec, F1) = (", join(" ", @actual_values), ")\n"; 42 | #exit(1); 43 | } 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/README.txt: -------------------------------------------------------------------------------- 1 | NAME 2 | CorScorer: Perl package for scoring coreference resolution systems 3 | using different metrics. 4 | 5 | 6 | VERSION 7 | v8.01 -- reference implementations of MUC, B-cubed, CEAF and BLANC metrics. 8 | 9 | 10 | CHANGES SINCE v8.0 11 | - fixed a bug that crashed the BLANC scorer when a duplicate singleton 12 | mention was present in the response. 13 | 14 | INSTALLATION 15 | Requirements: 16 | 1. Perl: downloadable from http://perl.org 17 | 2. Algorithm-Munkres: included in this package and downloadable 18 | from CPAN http://search.cpan.org/~tpederse/Algorithm-Munkres-0.08 19 | 20 | USE 21 | This package is distributed with two scripts to execute the scorer from 22 | the command line. 23 | 24 | Windows (tm): scorer.bat 25 | Linux: scorer.pl 26 | 27 | 28 | SYNOPSIS 29 | use CorScorer; 30 | 31 | $metric = 'ceafm'; 32 | 33 | # Scores the whole dataset 34 | &CorScorer::Score($metric, $keys_file, $response_file); 35 | 36 | # Scores one file 37 | &CorScorer::Score($metric, $keys_file, $response_file, $name); 38 | 39 | 40 | INPUT 41 | metric: the metric desired to score the results: 42 | muc: MUCScorer (Vilain et al, 1995) 43 | bcub: B-Cubed (Bagga and Baldwin, 1998) 44 | ceafm: CEAF (Luo et al., 2005) using mention-based similarity 45 | ceafe: CEAF (Luo et al., 2005) using entity-based similarity 46 | blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions 47 | all: uses all the metrics to score 48 | 49 | keys_file: file with expected coreference chains in CoNLL-2011/2012 format 50 | 51 | response_file: file with output of coreference system (CoNLL-2011/2012 format) 52 | 53 | name: [optional] the name of the document to score. If name is not 54 | given, all the documents in the dataset will be scored. If given 55 | name is "none" then all the documents are scored but only total 56 | results are shown. 57 | 58 | 59 | OUTPUT 60 | The score subroutine returns an array with four values in this order: 61 | 1) Recall numerator 62 | 2) Recall denominator 63 | 3) Precision numerator 64 | 4) Precision denominator 65 | 66 | Also recall, precision and F1 are printed in the standard output when variable 67 | $VERBOSE is not null. 68 | 69 | Final scores: 70 | Recall = recall_numerator / recall_denominator 71 | Precision = precision_numerator / precision_denominator 72 | F1 = 2 * Recall * Precision / (Recall + Precision) 73 | 74 | Identification of mentions 75 | An scorer for identification of mentions (recall, precision and F1) is also included. 76 | Mentions from system response are compared with key mentions. This version performs 77 | strict mention matching as was used in the CoNLL-2011 and 2012 shared tasks. 78 | 79 | AUTHORS 80 | Emili Sapena, Universitat Politècnica de Catalunya, http://www.lsi.upc.edu/~esapena, esapena lsi.upc.edu 81 | Sameer Pradhan, sameer.pradhan childrens.harvard.edu 82 | Sebastian Martschat, sebastian.martschat h-its.org 83 | Xiaoqiang Luo, xql google.com 84 | 85 | COPYRIGHT AND LICENSE 86 | Copyright (C) 2009-2011, Emili Sapena esapena lsi.upc.edu 87 | 2011-2014, Sameer Pradhan sameer.pradhan childrens.harvard.edu 88 | 89 | This program is free software; you can redistribute it and/or modify it 90 | under the terms of the GNU General Public License as published by the 91 | Free Software Foundation; either version 2 of the License, or (at your 92 | option) any later version. This program is distributed in the hope that 93 | it will be useful, but WITHOUT ANY WARRANTY; without even the implied 94 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 95 | GNU General Public License for more details. 96 | 97 | You should have received a copy of the GNU General Public License along 98 | with this program; if not, write to the Free Software Foundation, Inc., 99 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 100 | 101 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/scorer.bat: -------------------------------------------------------------------------------- 1 | @rem = '--*-Perl-*-- 2 | @echo off 3 | if "%OS%" == "Windows_NT" goto WinNT 4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9 5 | goto endofperl 6 | :WinNT 7 | perl -x -S %0 %* 8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl 9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH. 10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul 11 | goto endofperl 12 | @rem '; 13 | #!perl 14 | #line 15 15 | 16 | BEGIN { 17 | $d = $0; 18 | $d =~ s/\/[^\/][^\/]*$//g; 19 | push(@INC, $d."/lib"); 20 | } 21 | 22 | use strict; 23 | use CorScorer; 24 | 25 | if (@ARGV < 3) { 26 | print q| 27 | use: scorer.bat [name] 28 | 29 | metric: the metric desired to score the results: 30 | muc: MUCScorer (Vilain et al, 1995) 31 | bcub: B-Cubed (Bagga and Baldwin, 1998) 32 | ceafm: CEAF (Luo et al, 2005) using mention-based similarity 33 | ceafe: CEAF (Luo et al, 2005) using entity-based similarity 34 | all: uses all the metrics to score 35 | 36 | keys_file: file with expected coreference chains in SemEval format 37 | 38 | response_file: file with output of coreference system (SemEval format) 39 | 40 | name: [optional] the name of the document to score. If name is not 41 | given, all the documents in the dataset will be scored. If given 42 | name is "none" then all the documents are scored but only total 43 | results are shown. 44 | 45 | |; 46 | exit; 47 | } 48 | 49 | my $metric = shift (@ARGV); 50 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) { 51 | print "Invalid metric\n"; 52 | exit; 53 | } 54 | 55 | 56 | if ($metric eq 'all') { 57 | foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') { 58 | print "\nMETRIC $m:\n"; 59 | &CorScorer::Score( $m, @ARGV ); 60 | } 61 | } 62 | else { 63 | &CorScorer::Score( $metric, @ARGV ); 64 | } 65 | 66 | __END__ 67 | :endofperl 68 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/scorer.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | BEGIN { 4 | $d = $0; 5 | $d =~ s/\/[^\/][^\/]*$//g; 6 | 7 | if ($d eq $0) { 8 | unshift(@INC, "lib"); 9 | } 10 | else { 11 | unshift(@INC, $d . "/lib"); 12 | } 13 | } 14 | 15 | use strict; 16 | use CorScorer; 17 | 18 | if (@ARGV < 3) { 19 | print q| 20 | use: scorer.pl [name] 21 | 22 | metric: the metric desired to score the results: 23 | muc: MUCScorer (Vilain et al, 1995) 24 | bcub: B-Cubed (Bagga and Baldwin, 1998) 25 | ceafm: CEAF (Luo et al, 2005) using mention-based similarity 26 | ceafe: CEAF (Luo et al, 2005) using entity-based similarity 27 | blanc: BLANC 28 | all: uses all the metrics to score 29 | 30 | keys_file: file with expected coreference chains in SemEval format 31 | 32 | response_file: file with output of coreference system (SemEval format) 33 | 34 | name: [optional] the name of the document to score. If name is not 35 | given, all the documents in the dataset will be scored. If given 36 | name is "none" then all the documents are scored but only total 37 | results are shown. 38 | 39 | |; 40 | exit; 41 | } 42 | 43 | my $metric = shift(@ARGV); 44 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) { 45 | print "Invalid metric\n"; 46 | exit; 47 | } 48 | 49 | if ($metric eq 'all') { 50 | foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') { 51 | print "\nMETRIC $m:\n"; 52 | &CorScorer::Score($m, @ARGV); 53 | } 54 | } 55 | else { 56 | &CorScorer::Score($metric, @ARGV); 57 | } 58 | 59 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/CorefMetricTest.pm: -------------------------------------------------------------------------------- 1 | package CorefMetricTest; 2 | use strict; 3 | use warnings; 4 | use Exporter; 5 | 6 | our @ISA= qw(Exporter); 7 | our @EXPORT = qw(ComputeScoreFromCounts DiffExpectedAndActual); 8 | 9 | ################################################################################ 10 | # Compute recall, precision and F1. 11 | # 12 | # Input: (numerator_counts_for_recall, denominator_counts_for_recall, 13 | # numerator_counts_for_precision, denominator_counts_for_precision) 14 | # Output: (recall, precision, F1) 15 | ################################################################################ 16 | sub ComputeScoreFromCounts { 17 | # The first 4 are also coref link counts when using BLANC. 18 | my ($recall_numerator, $recall_denominator, 19 | $precision_numerator, $precision_denominator, @noncoref_counts) = @_; 20 | # The coref recall, precision, and F1 when using BLANC. 21 | my ($recall, $precision, $F1) = 22 | RPFFromCounts($recall_numerator, $recall_denominator, 23 | $precision_numerator, $precision_denominator); 24 | 25 | # BLANC: @noncoref_counts= 26 | # (noncoref_numerator_recall, noncoref_denominator_recall, 27 | # noncoref_numerator_precision, noncoref_denominator_precision) 28 | if (scalar(@noncoref_counts) == 4) { 29 | ($recall, $precision, $F1) = CorScorer::ComputeBLANCFromCounts( 30 | $recall_numerator, $recall_denominator, $precision_denominator, 31 | $noncoref_counts[0], $noncoref_counts[1], $noncoref_counts[3]); 32 | } 33 | $recall = ($recall < 0) ? 0 : $recall; 34 | $precision = ($precision < 0) ? 0 : $precision; 35 | $F1 = ($F1 < 0) ? 0 : $F1; 36 | return ($recall, $precision, $F1); 37 | } 38 | 39 | sub RPFFromCounts 40 | { 41 | my ($recall_numerator, $recall_denominator, 42 | $precision_numerator, $precision_denominator, @nonCorefCounts) = @_; 43 | my ($recall, $precision, $F1) = (-1, -1, 0); 44 | if ($recall_denominator > 0) { 45 | $recall = $recall_numerator / $recall_denominator; 46 | } 47 | if ($precision_denominator > 0) { 48 | $precision = $precision_numerator / $precision_denominator; 49 | } 50 | 51 | if (($recall + $precision) > 0) { 52 | $F1 = 2 * $recall * $precision / ($recall + $precision); 53 | } 54 | 55 | return ($recall, $precision, $F1); 56 | } 57 | 58 | # deprecated -- see CorScorer::ComputeBLANCFromCounts(). 59 | sub ComputeBLANCRPF 60 | { 61 | my ($coref_recall, $coref_precision, $coref_F1, 62 | $noncoref_recall, $noncoref_precision, $noncoref_F1) = @_; 63 | 64 | my ($recall, $precision, $F1); 65 | 66 | if ($coref_recall < 0 && $noncoref_recall < 0) { 67 | # no key mention. 68 | $recall = $precision = $F1 = 0; 69 | } elsif ($coref_recall < 0) { 70 | # key: all links are non-coref (mentions are all singltons). 71 | $recall = $noncoref_recall; 72 | $precision = ($noncoref_precision < 0) ? 0 : $noncoref_precision; 73 | $F1 = $noncoref_F1; 74 | } elsif ($noncoref_recall < 0) { 75 | # key: all links are coref (all mentions are in one entity). 76 | $recall = $coref_recall; 77 | $precision = ($coref_precision < 0) ? 0 : $coref_precision; 78 | $F1 = $coref_F1; 79 | } else { 80 | #key contains both coref and non-coref links. 81 | if ($coref_precision < 0 && $noncoref_precision < 0) { 82 | # no response. 83 | $recall = $precision = $F1 = 0; 84 | } else { 85 | if ($coref_precision < 0) { 86 | # response: all links are non-coref, or response mentions are all 87 | # singletons. 88 | $coref_precision = 0; 89 | } elsif ($noncoref_precision < 0) { 90 | # response: all links are coref, or all mentions are in one entity. 91 | $noncoref_precision = 0; 92 | } 93 | $recall = ($coref_recall + $noncoref_recall)/2; 94 | $precision = ($coref_precision + $noncoref_precision)/2; 95 | $F1 = ($coref_F1 + $noncoref_F1)/2; 96 | } 97 | } 98 | 99 | return ($recall, $precision, $F1); 100 | } 101 | 102 | ############################################################################## 103 | # Compute the sum of the duifference between the expected recall, precision, 104 | # F1 and the actual one. 105 | ############################################################################## 106 | sub DiffExpectedAndActual { 107 | my ($expected, $actual) = @_; 108 | if (scalar(@$expected) != scalar(@$actual)) { 109 | print STDERR "Expected and actual have diff dimensions: \n"; 110 | print STDERR " Expected: ", join(" ", @$expected), "\n"; 111 | print STDERR " Actual: ", join(" ", @$actual), "\n"; 112 | return 1.0e5; 113 | } 114 | my $sum = 0.0; 115 | my $i = 0; 116 | foreach my $e (@$expected) { 117 | $sum += abs($e - $actual->[$i]); 118 | ++$i; 119 | } 120 | return $sum; 121 | } 122 | 123 | 1; 124 | 125 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-1.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 jnk - 17 | test2 0 5 e (2) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-10.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 x - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 z - 17 | test2 0 5 e (4) 18 | test2 0 6 y - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-11.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 x - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 z - 17 | test2 0 5 e (0) 18 | test2 0 6 y - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-12.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 1) 7 | test1 0 5 b3 - 8 | test1 0 6 b4 - 9 | test1 0 7 jnk (2) 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (3) 13 | test2 0 1 x - 14 | test2 0 2 d1 (4 15 | test2 0 3 d2 4) 16 | test2 0 4 z - 17 | test2 0 5 e (5) 18 | test2 0 6 y - 19 | test2 0 7 f1 (6) 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-13.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 0) 7 | test1 0 5 b3 - 8 | test1 0 6 b4 - 9 | test1 0 7 jnk (0) 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 x - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 z - 17 | test2 0 5 e (0) 18 | test2 0 6 y - 19 | test2 0 7 f1 (0) 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-2.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 - 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 - 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c - 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 jnk - 17 | test2 0 5 e (2) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-3.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 y (2) 17 | test2 0 5 e (2) 18 | test2 0 6 z (3) 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-4.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 x (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-5.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 (1 7 | test1 0 5 b3 1) 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-6.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 (3 7 | test1 0 5 b3 3) 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-7.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1(1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1)1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-8.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1(3 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 3)1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A-9.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1(3(3(3(3(3(3(3(3(3(3 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 3)3)3)3)3)3)3)3)3)3)1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 x (1) 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 z (3) 17 | test2 0 5 e - 18 | test2 0 6 y (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-A.key: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (2 15 | test2 0 3 d2 2) 16 | test2 0 4 jnk - 17 | test2 0 5 e (2) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-B-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 - 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10043 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10043) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 (10043 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 10043) 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 - 72 | nw/xinhua/00/chtb_0009 - 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-B.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (10043 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 10043) 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10054 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10054) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 - 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 - 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 - 72 | nw/xinhua/00/chtb_0009 - 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-C-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 - 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10043 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10043) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 (10043 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 10043) 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 (10060) 72 | nw/xinhua/00/chtb_0009 (10060) 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-C.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (10043 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 - 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 10043) 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | nw/xinhua/00/chtb_0009 - 31 | nw/xinhua/00/chtb_0009 (10054 32 | nw/xinhua/00/chtb_0009 - 33 | nw/xinhua/00/chtb_0009 10054) 34 | nw/xinhua/00/chtb_0009 - 35 | nw/xinhua/00/chtb_0009 - 36 | nw/xinhua/00/chtb_0009 - 37 | nw/xinhua/00/chtb_0009 - 38 | nw/xinhua/00/chtb_0009 - 39 | nw/xinhua/00/chtb_0009 - 40 | nw/xinhua/00/chtb_0009 - 41 | nw/xinhua/00/chtb_0009 - 42 | nw/xinhua/00/chtb_0009 - 43 | nw/xinhua/00/chtb_0009 - 44 | nw/xinhua/00/chtb_0009 - 45 | nw/xinhua/00/chtb_0009 - 46 | nw/xinhua/00/chtb_0009 - 47 | nw/xinhua/00/chtb_0009 - 48 | nw/xinhua/00/chtb_0009 - 49 | nw/xinhua/00/chtb_0009 (10043) 50 | nw/xinhua/00/chtb_0009 - 51 | nw/xinhua/00/chtb_0009 - 52 | nw/xinhua/00/chtb_0009 - 53 | nw/xinhua/00/chtb_0009 - 54 | nw/xinhua/00/chtb_0009 - 55 | nw/xinhua/00/chtb_0009 - 56 | nw/xinhua/00/chtb_0009 - 57 | nw/xinhua/00/chtb_0009 - 58 | nw/xinhua/00/chtb_0009 - 59 | nw/xinhua/00/chtb_0009 - 60 | nw/xinhua/00/chtb_0009 - 61 | nw/xinhua/00/chtb_0009 - 62 | nw/xinhua/00/chtb_0009 - 63 | nw/xinhua/00/chtb_0009 - 64 | nw/xinhua/00/chtb_0009 (10054 65 | nw/xinhua/00/chtb_0009 10054) 66 | nw/xinhua/00/chtb_0009 - 67 | nw/xinhua/00/chtb_0009 - 68 | nw/xinhua/00/chtb_0009 (10054) 69 | nw/xinhua/00/chtb_0009 - 70 | nw/xinhua/00/chtb_0009 - 71 | nw/xinhua/00/chtb_0009 (10060) 72 | nw/xinhua/00/chtb_0009 (10060) 73 | 74 | #end document 75 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-D-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (3) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (3) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (3) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (3) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (3) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (3) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (3) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-D.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (2) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (3) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (3) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (3) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (3) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (3) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-E-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (2) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (1) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (1) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (1) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (1) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (1) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-E.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (2) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (3) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 (3) 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 (3) 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 (3) 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 (3) 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-F-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-F.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-G-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-G.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-H-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-H.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-I-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-I.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-J-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 - 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-J.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 - 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 - 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-K-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (2) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 (2) 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (3) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (3) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 (3) 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-K.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 - 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (1) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (1) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 - 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (1) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (1) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 (1) 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-L-1.response: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (2) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 - 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 (3) 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (3) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 (3) 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-L.key: -------------------------------------------------------------------------------- 1 | #begin document (nw/xinhua/00/chtb_0009); part 000 2 | nw/xinhua/00/chtb_0009 - 3 | nw/xinhua/00/chtb_0009 (1) 4 | nw/xinhua/00/chtb_0009 - 5 | nw/xinhua/00/chtb_0009 (1) 6 | nw/xinhua/00/chtb_0009 - 7 | nw/xinhua/00/chtb_0009 (1) 8 | nw/xinhua/00/chtb_0009 - 9 | nw/xinhua/00/chtb_0009 (2) 10 | nw/xinhua/00/chtb_0009 - 11 | nw/xinhua/00/chtb_0009 (2) 12 | nw/xinhua/00/chtb_0009 - 13 | nw/xinhua/00/chtb_0009 (2) 14 | nw/xinhua/00/chtb_0009 - 15 | nw/xinhua/00/chtb_0009 (2) 16 | nw/xinhua/00/chtb_0009 - 17 | nw/xinhua/00/chtb_0009 - 18 | nw/xinhua/00/chtb_0009 - 19 | nw/xinhua/00/chtb_0009 - 20 | nw/xinhua/00/chtb_0009 - 21 | nw/xinhua/00/chtb_0009 - 22 | nw/xinhua/00/chtb_0009 - 23 | nw/xinhua/00/chtb_0009 - 24 | nw/xinhua/00/chtb_0009 - 25 | nw/xinhua/00/chtb_0009 - 26 | nw/xinhua/00/chtb_0009 - 27 | nw/xinhua/00/chtb_0009 - 28 | nw/xinhua/00/chtb_0009 - 29 | nw/xinhua/00/chtb_0009 - 30 | 31 | #end document 32 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-M-1.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 jnk - 17 | test2 0 5 e (0) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-M-2.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 jnk - 17 | test2 0 5 e (4) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-M-3.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (1 15 | test2 0 3 d2 1) 16 | test2 0 4 jnk - 17 | test2 0 5 e (1) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-M-4.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk (0) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (0) 17 | test2 0 5 e - 18 | test2 0 6 jnk (0) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-M-5.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk (3) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (4) 17 | test2 0 5 e - 18 | test2 0 6 jnk (5) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-M-6.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk (1) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (1) 17 | test2 0 5 e - 18 | test2 0 6 jnk (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-M.key: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 jnk - 17 | test2 0 5 e (0) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-N-1.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 jnk - 17 | test2 0 5 e (4) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-N-2.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (0 15 | test2 0 3 d2 0) 16 | test2 0 4 jnk - 17 | test2 0 5 e (0) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (0 20 | test2 0 8 f2 - 21 | test2 0 9 f3 0) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-N-3.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (1 15 | test2 0 3 d2 1) 16 | test2 0 4 jnk - 17 | test2 0 5 e (1) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (2 20 | test2 0 8 f2 - 21 | test2 0 9 f3 2) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-N-4.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk (3) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (4) 17 | test2 0 5 e - 18 | test2 0 6 jnk (5) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-N-5.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (0) 13 | test2 0 1 jnk (0) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (0) 17 | test2 0 5 e - 18 | test2 0 6 jnk (0) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-N-6.response: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (0 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 0) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (1) 13 | test2 0 1 jnk (1) 14 | test2 0 2 d1 - 15 | test2 0 3 d2 - 16 | test2 0 4 jnk (1) 17 | test2 0 5 e - 18 | test2 0 6 jnk (2) 19 | test2 0 7 f1 - 20 | test2 0 8 f2 - 21 | test2 0 9 f3 - 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/DataFiles/TC-N.key: -------------------------------------------------------------------------------- 1 | #begin document (LuoTestCase); 2 | test1 0 0 a1 (0 3 | test1 0 1 a2 0) 4 | test1 0 2 junk - 5 | test1 0 3 b1 (1 6 | test1 0 4 b2 - 7 | test1 0 5 b3 - 8 | test1 0 6 b4 1) 9 | test1 0 7 jnk - 10 | test1 0 8 . - 11 | 12 | test2 0 0 c (2) 13 | test2 0 1 jnk - 14 | test2 0 2 d1 (3 15 | test2 0 3 d2 3) 16 | test2 0 4 jnk - 17 | test2 0 5 e (4) 18 | test2 0 6 jnk - 19 | test2 0 7 f1 (5 20 | test2 0 8 f2 - 21 | test2 0 9 f3 5) 22 | test2 0 10 . - 23 | #end document 24 | -------------------------------------------------------------------------------- /conll-2012/scorer/v8.01/test/test.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | BEGIN { 4 | $d = $0; 5 | $d =~ s/\/[^\/][^\/]*$//g; 6 | push(@INC, $d); 7 | push(@INC, $d . "/../lib"); 8 | } 9 | 10 | use strict; 11 | use CorScorer; 12 | use CorefMetricTest; 13 | use CorefMetricTestConfig; 14 | 15 | my $error_tolerance = 1.e-4; 16 | my $script_dir = $0; 17 | $script_dir =~ s/\/[^\/][^\/]*$//g; 18 | 19 | foreach my $test_case (@CorefMetricTestConfig::TestCases) { 20 | my $id = $test_case->{'id'}; 21 | my @key_response_files = ($script_dir . "/" . $test_case->{'key_file'}, 22 | $script_dir . "/" . $test_case->{'response_file'}); 23 | print "\nTesting case ($id): keyFile=", $key_response_files[0], 24 | " responseFile=", $key_response_files[1], "\n"; 25 | my $expected_metrics = $test_case->{'expected_metrics'}; 26 | foreach my $metric_name (sort keys %$expected_metrics) { 27 | my $expected_values = $expected_metrics->{$metric_name}; 28 | *::SAVED_STDOUT = *STDOUT; 29 | *STDOUT = *::SUPRRES_STDOUT; 30 | my @actual_counts = &CorScorer::Score($metric_name, @key_response_files); 31 | # Compute R,P,and F1 from raw counts. 32 | my @actual_values = CorefMetricTest::ComputeScoreFromCounts(@actual_counts); 33 | *STDOUT = *::SAVED_STDOUT; 34 | my $diff = CorefMetricTest::DiffExpectedAndActual($expected_values, \@actual_values); 35 | printf " metric: %+10s", $metric_name; 36 | if ($diff < $error_tolerance) { 37 | print " => PASS\n"; 38 | } else { 39 | print " => FAIL\n"; 40 | print " Expected (recall, prec, F1) = (", join(" ", @$expected_values), ")\n"; 41 | print " Actual (recall, prec, F1) = (", join(" ", @actual_values), ")\n"; 42 | #exit(1); 43 | } 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /conll-2012/v3/scripts/conll2coreference.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function usage { 4 | cat < 13 | 14 | 15 | Description: 16 | ----------- 17 | 18 | Takes a *conll file as input and prints out the corresponding coreference file 19 | 20 | ---------------------------------------------------------------------------------------------------- 21 | 22 | 23 | 24 | 25 | EOF 26 | exit; 27 | } 28 | 29 | 30 | function message 31 | { 32 | echo "----------------------------------------------------------------------------------------------------" 33 | echo 34 | echo $* 1>&2 35 | echo 36 | echo "----------------------------------------------------------------------------------------------------" 37 | 38 | } 39 | 40 | 41 | 42 | function r { echo ${1%.*}; } 43 | function t { echo ${1##*/}; } 44 | function e { echo $(t ${1##*.}); } 45 | function h { echo ${1%/*}; } 46 | 47 | # define helper function: run a command and print its exit code 48 | function erun () { 49 | debug=0 50 | if [[ $1 == "-d" ]]; then 51 | debug=1 52 | shift; 53 | fi 54 | 55 | 56 | if [[ $DEBUG -eq 1 ]]; then 57 | debug=1 58 | fi 59 | 60 | 61 | 62 | 63 | verbose=0 64 | if [[ $1 == "-v" ]]; then 65 | verbose=1 66 | shift; 67 | fi 68 | 69 | 70 | if [[ $VERBOSE -eq 1 ]]; then 71 | verbose=1 72 | fi 73 | 74 | 75 | 76 | 77 | 78 | 79 | if [[ $debug -eq 1 ]]; then 80 | echo "debug mode ..." 81 | echo "eval $1" 82 | else 83 | echo "normal mode ..." 84 | if [[ $verbose -eq 1 ]]; then 85 | echo -e "\nrun: $1\n-------------" 86 | fi 87 | 88 | eval $1 89 | fi 90 | 91 | 92 | local code=$? 93 | if [ $code -ne 0 ]; then 94 | echo "Exit code: $code" 95 | exit $code 96 | fi 97 | } 98 | 99 | 100 | 101 | 102 | # handle the valid command line options 103 | DEBUG=0 104 | VERBOSE=0 105 | DEBUG_OPTION="" 106 | while getopts vdh opt 107 | do 108 | case "$opt" in 109 | v) 110 | VERBOSE=1;; 111 | 112 | d) 113 | DEBUG=1;; 114 | 115 | \?) 116 | usage 117 | exit 1;; 118 | 119 | h) 120 | usage 121 | exit 0;; 122 | 123 | :) 124 | echo "option -$OPTARG requires an argument" 125 | usage 126 | exit 1;; 127 | 128 | esac 129 | done 130 | shift `expr $OPTIND - 1` 131 | 132 | 133 | # at this point $* contains the arguments after interpreting the options 134 | 135 | d=$1 136 | 137 | # if no arguments are specified, then just print usage 138 | if [[ $# -eq 0 ]]; then 139 | usage 140 | fi 141 | 142 | 143 | # debugging 144 | if [[ $DEBUG -eq 1 ]]; then 145 | echo "debugging mode is on ..." 1>&2 146 | DEBUG_OPTION="-d" 147 | fi 148 | 149 | 150 | 151 | 152 | 153 | for file in $(find $d -name "*_conll"); do 154 | 155 | if [[ $file =~ "data/english/annotations" ]]; then 156 | LANGUAGE=english 157 | elif [[ $file =~ "data/chinese/annotations" ]]; then 158 | LANGUAGE=chinese 159 | else 160 | LANGUAGE=arabic 161 | fi 162 | 163 | echo "language: $LANGUAGE" 164 | coref=${file/_conll/_coref} 165 | echo "$file -> $coref ..." 166 | conll2coreference.py -l $LANGUAGE $file > $coref 167 | done 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | # complain if the exit status of the last command executed is non-zero 177 | if [[ $? != 0 ]]; then echo "the last command exited with a non-zero status" 1>&2; fi 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /conll-2012/v3/scripts/conll2name.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function usage { 4 | cat < 13 | 14 | 15 | Description: 16 | ----------- 17 | 18 | Takes a *conll file as input and prints out the corresponding coreference file 19 | 20 | ---------------------------------------------------------------------------------------------------- 21 | 22 | 23 | 24 | 25 | EOF 26 | exit; 27 | } 28 | 29 | 30 | function message 31 | { 32 | echo "----------------------------------------------------------------------------------------------------" 33 | echo 34 | echo $* 1>&2 35 | echo 36 | echo "----------------------------------------------------------------------------------------------------" 37 | 38 | } 39 | 40 | 41 | 42 | function r { echo ${1%.*}; } 43 | function t { echo ${1##*/}; } 44 | function e { echo $(t ${1##*.}); } 45 | function h { echo ${1%/*}; } 46 | 47 | # define helper function: run a command and print its exit code 48 | function erun () { 49 | debug=0 50 | if [[ $1 == "-d" ]]; then 51 | debug=1 52 | shift; 53 | fi 54 | 55 | 56 | if [[ $DEBUG -eq 1 ]]; then 57 | debug=1 58 | fi 59 | 60 | 61 | 62 | 63 | verbose=0 64 | if [[ $1 == "-v" ]]; then 65 | verbose=1 66 | shift; 67 | fi 68 | 69 | 70 | if [[ $VERBOSE -eq 1 ]]; then 71 | verbose=1 72 | fi 73 | 74 | 75 | 76 | 77 | 78 | 79 | if [[ $debug -eq 1 ]]; then 80 | echo "debug mode ..." 81 | echo "eval $1" 82 | else 83 | echo "normal mode ..." 84 | if [[ $verbose -eq 1 ]]; then 85 | echo -e "\nrun: $1\n-------------" 86 | fi 87 | 88 | eval $1 89 | fi 90 | 91 | 92 | local code=$? 93 | if [ $code -ne 0 ]; then 94 | echo "Exit code: $code" 95 | exit $code 96 | fi 97 | } 98 | 99 | 100 | 101 | 102 | # handle the valid command line options 103 | DEBUG=0 104 | VERBOSE=0 105 | DEBUG_OPTION="" 106 | while getopts vdh opt 107 | do 108 | case "$opt" in 109 | v) 110 | VERBOSE=1;; 111 | 112 | d) 113 | DEBUG=1;; 114 | 115 | \?) 116 | usage 117 | exit 1;; 118 | 119 | h) 120 | usage 121 | exit 0;; 122 | 123 | :) 124 | echo "option -$OPTARG requires an argument" 125 | usage 126 | exit 1;; 127 | 128 | esac 129 | done 130 | shift `expr $OPTIND - 1` 131 | 132 | 133 | # at this point $* contains the arguments after interpreting the options 134 | 135 | d=$1 136 | 137 | # if no arguments are specified, then just print usage 138 | if [[ $# -eq 0 ]]; then 139 | usage 140 | fi 141 | 142 | 143 | # debugging 144 | if [[ $DEBUG -eq 1 ]]; then 145 | echo "debugging mode is on ..." 1>&2 146 | DEBUG_OPTION="-d" 147 | fi 148 | 149 | 150 | 151 | 152 | for file in $(find $d -name "*_conll"); do 153 | 154 | if [[ $file =~ "data/english/annotations" ]]; then 155 | LANGUAGE=english 156 | elif [[ $file =~ "data/chinese/annotations" ]]; then 157 | LANGUAGE=chinese 158 | else 159 | LANGUAGE=arabic 160 | fi 161 | 162 | echo "language: $LANGUAGE" 163 | 164 | name=${file/_conll/_name} 165 | echo "$file -> $name ..." 166 | conll2name.py -l $LANGUAGE $file > $name 167 | done 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | # complain if the exit status of the last command executed is non-zero 176 | if [[ $? != 0 ]]; then echo "the last command exited with a non-zero status" 1>&2; fi 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /conll-2012/v3/scripts/conll2parse.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function usage { 4 | cat < 13 | 14 | 15 | Description: 16 | ----------- 17 | 18 | Takes a *conll file as input and prints out the corresponding parse file 19 | 20 | ---------------------------------------------------------------------------------------------------- 21 | 22 | 23 | 24 | 25 | EOF 26 | exit; 27 | } 28 | 29 | 30 | function message 31 | { 32 | echo "----------------------------------------------------------------------------------------------------" 33 | echo 34 | echo $* 1>&2 35 | echo 36 | echo "----------------------------------------------------------------------------------------------------" 37 | 38 | } 39 | 40 | 41 | 42 | function r { echo ${1%.*}; } 43 | function t { echo ${1##*/}; } 44 | function e { echo $(t ${1##*.}); } 45 | function h { echo ${1%/*}; } 46 | 47 | # define helper function: run a command and print its exit code 48 | function erun () { 49 | debug=0 50 | if [[ $1 == "-d" ]]; then 51 | debug=1 52 | shift; 53 | fi 54 | 55 | 56 | if [[ $DEBUG -eq 1 ]]; then 57 | debug=1 58 | fi 59 | 60 | 61 | 62 | 63 | verbose=0 64 | if [[ $1 == "-v" ]]; then 65 | verbose=1 66 | shift; 67 | fi 68 | 69 | 70 | if [[ $VERBOSE -eq 1 ]]; then 71 | verbose=1 72 | fi 73 | 74 | 75 | 76 | 77 | 78 | 79 | if [[ $debug -eq 1 ]]; then 80 | echo "debug mode ..." 81 | echo "eval $1" 82 | else 83 | echo "normal mode ..." 84 | if [[ $verbose -eq 1 ]]; then 85 | echo -e "\nrun: $1\n-------------" 86 | fi 87 | 88 | eval $1 89 | fi 90 | 91 | 92 | local code=$? 93 | if [ $code -ne 0 ]; then 94 | echo "Exit code: $code" 95 | exit $code 96 | fi 97 | } 98 | 99 | 100 | 101 | 102 | # handle the valid command line options 103 | DEBUG=0 104 | VERBOSE=0 105 | DEBUG_OPTION="" 106 | while getopts vdh opt 107 | do 108 | case "$opt" in 109 | v) 110 | VERBOSE=1;; 111 | 112 | d) 113 | DEBUG=1;; 114 | 115 | \?) 116 | usage 117 | exit 1;; 118 | 119 | h) 120 | usage 121 | exit 0;; 122 | 123 | :) 124 | echo "option -$OPTARG requires an argument" 125 | usage 126 | exit 1;; 127 | 128 | esac 129 | done 130 | shift `expr $OPTIND - 1` 131 | 132 | 133 | # at this point $* contains the arguments after interpreting the options 134 | 135 | d=$1 136 | 137 | # if no arguments are specified, then just print usage 138 | if [[ $# -eq 0 ]]; then 139 | usage 140 | fi 141 | 142 | 143 | 144 | # debugging 145 | if [[ $DEBUG -eq 1 ]]; then 146 | echo "debugging mode is on ..." 1>&2 147 | DEBUG_OPTION="-d" 148 | fi 149 | 150 | 151 | 152 | 153 | 154 | for file in $(find $d -name "*_conll"); do 155 | 156 | if [[ $file =~ "data/english/annotations" ]]; then 157 | LANGUAGE=english 158 | elif [[ $file =~ "data/chinese/annotations" ]]; then 159 | LANGUAGE=chinese 160 | else 161 | LANGUAGE=arabic 162 | fi 163 | 164 | 165 | echo "language: $LANGUAGE" 166 | 167 | parse=${file/_conll/_parse} 168 | echo "$file -> $parse ..." 169 | conll2parse.py -l $LANGUAGE $file > $parse 170 | done 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | # complain if the exit status of the last command executed is non-zero 183 | if [[ $? != 0 ]]; then echo "the last command exited with a non-zero status" 1>&2; fi 184 | 185 | 186 | 187 | -------------------------------------------------------------------------------- /conll-2012/v3/scripts/skeleton2conll.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function usage { 4 | cat < 13 | 14 | 15 | Description: 16 | ----------- 17 | 18 | : Location of the data directory under the OntoNotes 4.0 release 19 | : The directory inside which the *_skel files exist and need to 20 | be convered to .conll files 21 | 22 | ---------------------------------------------------------------------------------------------------- 23 | 24 | 25 | 26 | 27 | EOF 28 | exit; 29 | } 30 | 31 | 32 | function message 33 | { 34 | (echo "----------------------------------------------------------------------------------------------------"; 35 | echo "" ; 36 | echo $* ; 37 | echo "" ; 38 | echo "----------------------------------------------------------------------------------------------------") 1>&2 39 | 40 | } 41 | 42 | function warning 43 | { 44 | message "$*" 45 | } 46 | 47 | function error 48 | { 49 | message "$*" 50 | exit 51 | } 52 | 53 | 54 | function r { echo ${1%.*}; } 55 | function t { echo ${1##*/}; } 56 | function e { echo $(t ${1##*.}); } 57 | function h { echo ${1%/*}; } 58 | 59 | 60 | 61 | # define helper function: run a command and print its exit code 62 | function erun () 63 | { 64 | local debug; 65 | local verbose; 66 | debug=0; 67 | if [[ $1 == "-d" ]]; then 68 | debug=1; 69 | shift; 70 | fi; 71 | verbose=0; 72 | if [[ $1 == "-v" ]]; then 73 | verbose=1; 74 | shift; 75 | fi; 76 | if [[ $DEBUG -eq 1 ]]; then 77 | debug=1; 78 | fi; 79 | if [[ $VERBOSE -eq 1 ]]; then 80 | verbose=1; 81 | fi; 82 | if [[ $debug -eq 1 ]]; then 83 | echo "eval $1"; 84 | else 85 | if [[ $verbose -eq 1 ]]; then 86 | echo "-> $1"; 87 | fi; 88 | eval $1; 89 | fi; 90 | local code=$?; 91 | if [ $code -ne 0 ]; then 92 | echo "Exit code: $code"; 93 | break; 94 | fi 95 | } 96 | 97 | 98 | 99 | # handle the valid command line options 100 | DEBUG=0 101 | TESTING=false 102 | VERBOSE=0 103 | DEBUG_OPTION="" 104 | EDITED="" 105 | while getopts D:dhT opt 106 | do 107 | case "$opt" in 108 | v) 109 | VERBOSE=1;; 110 | 111 | d) 112 | DEBUG=1 113 | DEBUG_OPTION="-d";; 114 | 115 | D) 116 | ON_DATA_DIR="$OPTARG" 117 | ON_DATA_DIR=${ON_DATA_DIR%/} 118 | 119 | if [[ -z $ON_DATA_DIR ]]; then 120 | error "please specify a valid ontonotes data directory using the -D option" 121 | usage 122 | fi;; 123 | 124 | T) 125 | # this option is used internally for testing 126 | TESTING=true;; 127 | 128 | \?) 129 | usage 130 | exit 1;; 131 | 132 | h) 133 | usage 134 | exit 0;; 135 | 136 | :) 137 | echo "option -$OPTARG requires an argument" 138 | usage 139 | exit 1;; 140 | 141 | esac 142 | done 143 | shift `expr $OPTIND - 1` 144 | 145 | 146 | 147 | 148 | # at this point $* contains the arguments after interpreting the options 149 | 150 | d=$1 151 | d=${d%/} 152 | 153 | 154 | # if the conll release directory is not correct 155 | if [[ $(t $d) != "conll-2012" ]]; then 156 | error "please make sure that you are pointing to the directory 'conll-2012'" 157 | fi 158 | 159 | 160 | 161 | # if we are testing the release, we do not want to clobber the 162 | # true _conll files 163 | if $TESTING; then 164 | EXT="_skel2conll" 165 | else 166 | EXT="_conll" 167 | fi 168 | 169 | 170 | # if no arguments are specified, then just print usage 171 | if [[ $# -eq 0 ]]; then 172 | usage 173 | fi 174 | 175 | 176 | 177 | 178 | for language in arabic english chinese; do 179 | # set the EDITED option only for english 180 | if [[ $language == "english" ]]; then 181 | EDITED="-edited" 182 | else 183 | EDITED="" 184 | fi 185 | 186 | for partition in train development test; do 187 | for skel in $(find $d/v?/data/$partition/data/$language/ -name "*_skel"); do 188 | gold_parse=$ON_DATA_DIR/$(r ${skel/*data\//}).parse 189 | 190 | if [[ ! -e $gold_parse ]]; then 191 | error "could not find the gold parse [$gold_parse] in the ontonotes distribution ... exiting ..." 192 | exit 193 | fi 194 | 195 | conll=${skel/_skel/$EXT} 196 | erun -v "python2 $d/v?/scripts/skeleton2conll.py $gold_parse $skel $conll $EDITED --text" 197 | done 198 | done 199 | done 200 | 201 | 202 | 203 | 204 | 205 | # complain if the exit status of the last command executed is non-zero 206 | if [[ $? != 0 ]]; then echo "the last command exited with a non-zero status" 1>&2; fi 207 | 208 | 209 | 210 | -------------------------------------------------------------------------------- /conll.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import re 6 | import tempfile 7 | import subprocess 8 | import operator 9 | import collections 10 | 11 | BEGIN_DOCUMENT_REGEX = re.compile(r"#begin document \((.*)\); part (\d+)") 12 | COREF_RESULTS_REGEX = re.compile(r".*Coreference: Recall: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tPrecision: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tF1: ([0-9.]+)%.*", re.DOTALL) 13 | 14 | def get_doc_key(doc_id, part): 15 | return "{}_{}".format(doc_id, int(part)) 16 | 17 | def output_conll(input_file, output_file, predictions, subtoken_map): 18 | prediction_map = {} 19 | for doc_key, clusters in predictions.items(): 20 | start_map = collections.defaultdict(list) 21 | end_map = collections.defaultdict(list) 22 | word_map = collections.defaultdict(list) 23 | for cluster_id, mentions in enumerate(clusters): 24 | for start, end in mentions: 25 | start, end = subtoken_map[doc_key][start], subtoken_map[doc_key][end] 26 | if start == end: 27 | word_map[start].append(cluster_id) 28 | else: 29 | start_map[start].append((cluster_id, end)) 30 | end_map[end].append((cluster_id, start)) 31 | for k,v in start_map.items(): 32 | start_map[k] = [cluster_id for cluster_id, end in sorted(v, key=operator.itemgetter(1), reverse=True)] 33 | for k,v in end_map.items(): 34 | end_map[k] = [cluster_id for cluster_id, start in sorted(v, key=operator.itemgetter(1), reverse=True)] 35 | prediction_map[doc_key] = (start_map, end_map, word_map) 36 | 37 | word_index = 0 38 | for line in input_file.readlines(): 39 | row = line.split() 40 | if len(row) == 0: 41 | output_file.write("\n") 42 | elif row[0].startswith("#"): 43 | begin_match = re.match(BEGIN_DOCUMENT_REGEX, line) 44 | if begin_match: 45 | doc_key = get_doc_key(begin_match.group(1), begin_match.group(2)) 46 | start_map, end_map, word_map = prediction_map[doc_key] 47 | word_index = 0 48 | output_file.write(line) 49 | output_file.write("\n") 50 | else: 51 | assert get_doc_key(row[0], row[1]) == doc_key 52 | coref_list = [] 53 | if word_index in end_map: 54 | for cluster_id in end_map[word_index]: 55 | coref_list.append("{})".format(cluster_id)) 56 | if word_index in word_map: 57 | for cluster_id in word_map[word_index]: 58 | coref_list.append("({})".format(cluster_id)) 59 | if word_index in start_map: 60 | for cluster_id in start_map[word_index]: 61 | coref_list.append("({}".format(cluster_id)) 62 | 63 | if len(coref_list) == 0: 64 | row[-1] = "-" 65 | else: 66 | row[-1] = "|".join(coref_list) 67 | 68 | output_file.write(" ".join(row)) 69 | output_file.write("\n") 70 | word_index += 1 71 | 72 | def official_conll_eval(gold_path, predicted_path, metric, official_stdout=False): 73 | cmd = ["conll-2012/scorer/v8.01/scorer.pl", metric, gold_path, predicted_path, "none"] 74 | process = subprocess.Popen(cmd, stdout=subprocess.PIPE) 75 | stdout, stderr = process.communicate() 76 | process.wait() 77 | 78 | stdout = stdout.decode("utf-8") 79 | if stderr is not None: 80 | print(stderr) 81 | 82 | if official_stdout: 83 | print("Official result for {}".format(metric)) 84 | print(stdout) 85 | 86 | coref_results_match = re.match(COREF_RESULTS_REGEX, stdout) 87 | recall = float(coref_results_match.group(1)) 88 | precision = float(coref_results_match.group(2)) 89 | f1 = float(coref_results_match.group(3)) 90 | return { "r": recall, "p": precision, "f": f1 } 91 | 92 | def evaluate_conll(gold_path, predictions, subtoken_maps, official_stdout=False): 93 | with tempfile.NamedTemporaryFile(delete=False, mode="w") as prediction_file: 94 | with open(gold_path, "r") as gold_file: 95 | output_conll(gold_file, prediction_file, predictions, subtoken_maps) 96 | print("Predicted conll file: {}".format(prediction_file.name)) 97 | return { m: official_conll_eval(gold_file.name, prediction_file.name, m, official_stdout) for m in ("muc", "bcub", "ceafe") } 98 | -------------------------------------------------------------------------------- /coref_ops.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | from tensorflow.python import pywrap_tensorflow 7 | 8 | coref_op_library = tf.load_op_library("./coref_kernels.so") 9 | 10 | extract_spans = coref_op_library.extract_spans 11 | tf.NotDifferentiable("ExtractSpans") 12 | -------------------------------------------------------------------------------- /current_models.py: -------------------------------------------------------------------------------- 1 | CURRENT_MODELS = { 2 | 'google_large_cased': ('bert-large-cased', '/checkpoint/danqi/coref_eval/bert_models/cased_L-24_H-1024_A-16/bert_model.ckpt'), 3 | 'small_batch_random': ('bert-large-cased', '/checkpoint/omerlevy/span_bert_models/cased/random/checkpoint_best.pt'), 4 | 'small_batch_no_nsp_random': ('bert-large-cased', '/checkpoint/omerlevy/mandar_data/pretraining_models/no_nsp_random/checkpoint_best.pt'), 5 | 'small_batch_no_nsp_pair': ('bert-large-cased', '/checkpoint/omerlevy/mandar_data/pretraining_models/no_nsp_pair/checkpoint_best.pt'), 6 | 'small_batch_no_nsp_pair_1.2m': ('bert-large-cased', '/checkpoint/omerlevy/mandar_data/pretraining_models/no_nsp_pair/checkpoint_37_1200000.pt'), 7 | 'small_batch_no_nsp_geo_1.2m': ('bert-large-cased', '/checkpoint/omerlevy/slow_models/geo/checkpoint_37_1200000.pt'), 8 | 'small_batch_random_1.2m': ('bert-large-cased', '/checkpoint/omerlevy/span_bert_models/cased/random/checkpoint_27_1200000.pt'), 9 | 'small_batch_geo_1.2m': ('bert-large-cased', '/checkpoint/omerlevy/mandar_data/pretraining_models/geo_span_0.2/checkpoint_27_1200000.pt'), 10 | 'small_batch_np_1.2m': ('bert-large-cased', '/checkpoint/omerlevy/mandar_data/pretraining_models/np_span/checkpoint_27_1200000.pt'), 11 | 'small_batch_ner_1.2m': ('bert-large-cased', '/checkpoint/omerlevy/mandar_data/pretraining_models/ner_span/checkpoint_27_1200000.pt'), 12 | 'small_batch_word_1.2m': ('bert-large-cased', '/checkpoint/omerlevy/mandar_data/pretraining_models/word/checkpoint_27_1200000.pt') 13 | } 14 | 15 | MODEL_CAT_TO_GOOGLE_DIR = {'bert-base-cased': 'cased_L-12_H-768_A-12', 'bert-base-uncased': 'uncased_L-12_H-768_A-12', 'bert-large-cased': 'cased_L-24_H-1024_A-16', 'bert-large-uncased': 'uncased_L-24_H-1024_A-16', 'bert-base-uncased-1024': 'uncased_L-12_H-768_A-12'} 16 | 17 | -------------------------------------------------------------------------------- /debug/compare.py: -------------------------------------------------------------------------------- 1 | import util 2 | import json 3 | 4 | def read_file(fn): 5 | js_dict = {} 6 | js_list = [] 7 | with open(fn) as f: 8 | for line in f: 9 | js = json.loads(line) 10 | js_dict[js['doc_key']] = js 11 | js_list += [js] 12 | # import ipdb 13 | # ipdb.set_trace() 14 | # print('read', len(js_dict), js_dict.keys()) 15 | return js_list 16 | 17 | def compare(bert_json, org_json, key='nw/xinhua/00/chtb_0060_0'): 18 | bert_json = read_file(bert_json) 19 | org_json = read_file(org_json) 20 | bert_text = [item for sublist in bert_json[key]['sentences'] for item in sublist] 21 | org_text = [item for sublist in org_json[key]['sentences'] for item in sublist] 22 | print(list(enumerate(zip(bert_json[key]['subtoken_map'], bert_text)))) 23 | for cl in bert_json[key]['clusters']: 24 | strings = [] 25 | for ((bs, be)) in cl: 26 | os, oe = bert_json[key]['subtoken_map'][bs], bert_json[key]['subtoken_map'][be] 27 | strings.append((bert_text[bs: be+1], bs, be, os, oe, org_text[os:oe+1])) 28 | print(strings) 29 | 30 | print('---') 31 | for cl in org_json[key]['clusters']: 32 | strings = [] 33 | for ((bs, be)) in cl: 34 | strings.append((org_text[bs: be+1], bs, be)) 35 | print(strings) 36 | 37 | def compare_json(json1, json2): 38 | json1 = read_file(json1) 39 | json2 = read_file(json2) 40 | for i, (l1, l2) in enumerate(zip(json1, json2)): 41 | assert l1['doc_key'] == l2['doc_key'] 42 | if tuple(util.flatten(l1['sentences'])) != tuple(util.flatten(l2['sentences'])): 43 | print(i, l1['doc_key'], list(enumerate(util.flatten(l1['sentences']))), list(enumerate(util.flatten(l2['sentences'])))) 44 | for j, (w1, w2) in enumerate(zip(util.flatten(l1['sentences']), util.flatten(l2['sentences']))): 45 | if w1 != w2: 46 | print(j, w1, w2) 47 | break 48 | 49 | compare_json('dev.english.jsonlines', 'data/seg_len_expts/dev.english.230.jsonlines') 50 | -------------------------------------------------------------------------------- /debug/count.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import util 4 | 5 | def is_overlap(c1, c2): 6 | if c1[0] <= c2[0] and c2[0] <= c1[1]: 7 | return True 8 | return False 9 | 10 | def count(data_file): 11 | f = open(data_file) 12 | max_num_sp = 0 13 | overlap, total = 0, 0 14 | for i, line in enumerate(f): 15 | # print('---', line) 16 | data = json.loads(line) 17 | clusters = util.flatten(data['clusters']) 18 | clusters = [tuple(c) for c in clusters] 19 | for c1 in clusters: 20 | for c2 in clusters: 21 | if c1 == c2: 22 | continue 23 | total += 1 24 | if (is_overlap(c1, c2)) or (is_overlap(c2, c1)): 25 | overlap += 1 26 | # print('overlap', c1, c2) 27 | # else: 28 | # print('non-overlap', c1, c2) 29 | print(overlap, total, overlap * 100.0 / total) 30 | 31 | print('max_num_sp', max_num_sp) 32 | 33 | def avg_len(data_file): 34 | f = open(data_file) 35 | total = 0 36 | max_num_sp = 0 37 | segments = [] 38 | for i, line in enumerate(f): 39 | # print('---', line) 40 | data = json.loads(line) 41 | text = util.flatten(data['sentences']) 42 | segments.append(len(data['sentences'])) 43 | total += len(text) 44 | max_num_sp = max(max_num_sp, len(text)) 45 | print(total / i) 46 | print(max_num_sp) 47 | print(len(segments), sum(segments) / len(segments), max(segments), sum([1 for s in segments if s == 1])) 48 | 49 | def cluster_distance(data_file): 50 | f = open(data_file) 51 | dist, pairs = 0, 0 52 | for i, line in enumerate(f): 53 | # print('---', line) 54 | data = json.loads(line) 55 | for cluster in data['clusters']: 56 | pairs += len(cluster) - 1 57 | spans = sorted([(s) for s,e in cluster]) 58 | for i in range(len(spans) - 1): 59 | dist += spans[i+1] - spans[i] 60 | print(dist / pairs, pairs) 61 | 62 | 63 | if __name__ == '__main__': 64 | data_file = sys.argv[1] 65 | cluster_distance(data_file) 66 | 67 | -------------------------------------------------------------------------------- /debug/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from six.moves import input 6 | import tensorflow as tf 7 | import coref_model as cm 8 | import util 9 | 10 | import nltk 11 | nltk.download("punkt") 12 | from nltk.tokenize import sent_tokenize, word_tokenize 13 | import sys 14 | import os 15 | sys.path.append(os.path.abspath('../bert')) 16 | import tokenization 17 | 18 | tokenizer = tokenization.FullTokenizer( 19 | vocab_file='../bert/cased_L-12_H-768_A-12/vocab.txt', do_lower_case=False) 20 | def create_example(text): 21 | #raw_sentences = sent_tokenize(text) 22 | sentences = [['[CLS]'] + tokenizer.tokenize(text) + ['[SEP]']] 23 | sentence_map = [0] * len(sentences[0]) 24 | speakers = [["" for _ in sentence] for sentence in sentences] 25 | return { 26 | "doc_key": "nw", 27 | "clusters": [], 28 | "sentences": sentences, 29 | "speakers": speakers, 30 | 'sentence_map': sentence_map 31 | } 32 | 33 | def print_predictions(example): 34 | words = util.flatten(example["sentences"]) 35 | for cluster in example["predicted_clusters"]: 36 | print(u"Predicted cluster: {}".format([" ".join(words[m[0]:m[1]+1]) for m in cluster])) 37 | 38 | def make_predictions(text, model): 39 | example = create_example(text) 40 | tensorized_example = model.tensorize_example(example, is_training=False) 41 | feed_dict = {i:t for i,t in zip(model.input_tensors, tensorized_example)} 42 | # print(feed_dict) 43 | mention_starts, mention_ends, candidate_mention_scores, top_span_starts, top_span_ends, antecedents, antecedent_scores = session.run(model.predictions, feed_dict=feed_dict) 44 | 45 | predicted_antecedents = model.get_predicted_antecedents(antecedents, antecedent_scores) 46 | 47 | example["predicted_clusters"], _ = model.get_predicted_clusters(mention_starts, mention_ends, predicted_antecedents) 48 | example["top_spans"] = zip((int(i) for i in mention_starts), (int(i) for i in mention_ends)) 49 | return example 50 | 51 | if __name__ == "__main__": 52 | config = util.initialize_from_env() 53 | log_dir = config["log_dir"] 54 | model = cm.CorefModel(config) 55 | saver = tf.train.Saver() 56 | with tf.Session() as session: 57 | # model.restore(session) 58 | ckpt = tf.train.get_checkpoint_state(log_dir) 59 | if ckpt and ckpt.model_checkpoint_path: 60 | print("Restoring from: {}".format(ckpt.model_checkpoint_path)) 61 | saver.restore(session, ckpt.model_checkpoint_path) 62 | 63 | while True: 64 | text = input("Document text: ") 65 | print_predictions(make_predictions(text, model)) 66 | -------------------------------------------------------------------------------- /debug/diff_clusters.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import numpy as np 4 | 5 | def diff(input_file, output_file): 6 | output = [] 7 | with open(input_file) as f: 8 | for line in f: 9 | datum = json.loads(line) 10 | pred = sorted([[(s, e) for s,e in cluster] for cluster in datum['predicted_clusters']], key=lambda x: x[0]) 11 | gold_clusters = sorted([[(s, e) for s,e in cluster] for cluster in datum['clusters']], key=lambda x: x[0]) 12 | pred_annotations = [] 13 | output.append(datum) 14 | covered = [False for i in range(len(gold_clusters))] 15 | for cluster in pred: 16 | overlap_fn = lambda k: len(set(cluster).intersection(set(k))) 17 | scores = [len(set(cluster).intersection(set(k))) / len(cluster) for k in gold_clusters] 18 | best_match = np.argmax(scores) 19 | pred_annotations.append([]) 20 | for s, e in cluster: 21 | present = 0 if (s, e) in gold_clusters[best_match] else 1 22 | pred_annotations[-1].append([s, e, present]) 23 | if scores[best_match] > 0: 24 | covered[best_match] = True 25 | for s, e in set(gold_clusters[best_match]).difference(cluster): 26 | pred_annotations[-1].append([s, e, 2]) 27 | for i in range(len(gold_clusters)): 28 | if not covered[i]: 29 | pred_annotations.append([]) 30 | for s, e in gold_clusters[i]: 31 | pred_annotations[-1].append([s, e, 2]) 32 | datum['pred_annotations'] = pred_annotations 33 | datum['predicted_clusters'] = pred 34 | datum['clusters'] = gold_clusters 35 | with open(output_file, 'w') as f: 36 | for datum in output: 37 | f.write(json.dumps(datum) + '\n') 38 | 39 | if __name__ == '__main__': 40 | diff(sys.argv[1], sys.argv[2]) 41 | -------------------------------------------------------------------------------- /debug/gen_keys.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | from util import flatten 4 | from collections import defaultdict 5 | 6 | def gen_keys(fname, out): 7 | key_dict = defaultdict(list) 8 | cluster_len = defaultdict(list) 9 | buckets = {1: '1', 2: '2', 3: '3-4', 4:'3-4', 5: '5-6', 6: '5-6', 7: '7-9', 8: '7-9', 9: '7-9', 10: '10+'} 10 | with open(fname) as f: 11 | for line in f: 12 | datum = json.loads(line) 13 | bucket = min(10, len(datum['sentences'])) 14 | key_dict[buckets[bucket]].append(datum['doc_key']) 15 | for cluster in datum['clusters']: 16 | cluster = sorted((s,e) for s,e in cluster) 17 | cluster_len[buckets[bucket]].append(cluster[-1][0] - cluster[0][0]) 18 | # pairs_lens = [] 19 | # for i in range(len(cluster)): 20 | # for j in range(i + 1, len(cluster)): 21 | # pairs_lens.append(cluster[j][0] - cluster[i][0]) 22 | # cluster_len[buckets[bucket]].append(sum(pairs_lens) / len(pairs_lens)) 23 | 24 | print(sum(flatten([x for x in cluster_len.values()])) / len(flatten((x for x in cluster_len.values())))) 25 | for k, v in key_dict.items(): 26 | print(k, sum(cluster_len[k]) / len(cluster_len[k])) 27 | with open(out.replace('txt', k + '.txt'), 'w') as f: 28 | for key in v: 29 | f.write(key + '\n') 30 | 31 | gen_keys(sys.argv[1], sys.argv[2]) 32 | -------------------------------------------------------------------------------- /debug/http_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | import time 6 | import json 7 | import numpy as np 8 | 9 | import cgi 10 | from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler 11 | import ssl 12 | 13 | import tensorflow as tf 14 | import coref_model as cm 15 | import util 16 | 17 | import nltk 18 | nltk.download("punkt") 19 | from nltk.tokenize import sent_tokenize, word_tokenize 20 | import sys 21 | import os 22 | sys.path.append(os.path.abspath('../bert')) 23 | import tokenization 24 | 25 | tokenizer = tokenization.FullTokenizer( 26 | vocab_file='../bert/cased_L-12_H-768_A-12/vocab.txt', do_lower_case=False) 27 | 28 | class CorefRequestHandler(BaseHTTPRequestHandler): 29 | model = None 30 | def do_POST(self): 31 | form = cgi.FieldStorage( 32 | fp=self.rfile, 33 | headers=self.headers, 34 | environ={"REQUEST_METHOD":"POST", 35 | "CONTENT_TYPE":self.headers["Content-Type"] 36 | }) 37 | if "text" in form: 38 | text = form["text"].value.decode("utf-8") 39 | if len(text) <= 10000: 40 | print(u"Document text: {}".format(text)) 41 | example = make_predictions(text, self.model) 42 | print_predictions(example) 43 | self.send_response(200) 44 | self.send_header("Content-Type", "application/json") 45 | self.end_headers() 46 | self.wfile.write(json.dumps(example)) 47 | return 48 | self.send_response(400) 49 | self.send_header("Content-Type", "application/json") 50 | self.end_headers() 51 | 52 | 53 | def create_example(text): 54 | #raw_sentences = sent_tokenize(text) 55 | sentences = [['[CLS]'] + tokenizer.tokenize(text) + ['[SEP]']] 56 | sentence_map = [0] * len(sentences[0]) 57 | speakers = [["" for _ in sentence] for sentence in sentences] 58 | return { 59 | "doc_key": "nw", 60 | "clusters": [], 61 | "sentences": sentences, 62 | "speakers": speakers, 63 | 'sentence_map': sentence_map 64 | } 65 | 66 | def print_predictions(example): 67 | words = util.flatten(example["sentences"]) 68 | for cluster in example["predicted_clusters"]: 69 | print(u"Predicted cluster: {}".format([" ".join(words[m[0]:m[1]+1]) for m in cluster])) 70 | 71 | def make_predictions(text, model): 72 | example = create_example(text) 73 | tensorized_example = model.tensorize_example(example, is_training=False) 74 | feed_dict = {i:t for i,t in zip(model.input_tensors, tensorized_example)} 75 | _, _, _, mention_starts, mention_ends, antecedents, antecedent_scores, head_scores = session.run(model.predictions + [model.head_scores], feed_dict=feed_dict) 76 | 77 | predicted_antecedents = model.get_predicted_antecedents(antecedents, antecedent_scores) 78 | 79 | example["predicted_clusters"], _ = model.get_predicted_clusters(mention_starts, mention_ends, predicted_antecedents) 80 | example["top_spans"] = zip((int(i) for i in mention_starts), (int(i) for i in mention_ends)) 81 | example["head_scores"] = head_scores.tolist() 82 | return example 83 | 84 | if __name__ == "__main__": 85 | util.set_gpus() 86 | 87 | name = sys.argv[1] 88 | if len(sys.argv) > 2: 89 | port = int(sys.argv[2]) 90 | else: 91 | port = None 92 | 93 | print "Running experiment: {}.".format(name) 94 | config = util.get_config("experiments.conf")[name] 95 | config["log_dir"] = util.mkdirs(os.path.join(config["log_root"], name)) 96 | 97 | util.print_config(config) 98 | model = cm.CorefModel(config) 99 | 100 | saver = tf.train.Saver() 101 | log_dir = config["log_dir"] 102 | 103 | with tf.Session() as session: 104 | checkpoint_path = os.path.join(log_dir, "model.max.ckpt") 105 | saver.restore(session, checkpoint_path) 106 | 107 | if port is not None: 108 | CorefRequestHandler.model = model 109 | server = HTTPServer(("", port), CorefRequestHandler) 110 | print("Running server at port {}".format(port)) 111 | server.serve_forever() 112 | else: 113 | while True: 114 | text = raw_input("Document text: ") 115 | print_predictions(make_predictions(text, model)) 116 | -------------------------------------------------------------------------------- /debug/print_clusters.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import util 4 | 5 | def print_clusters(data_file): 6 | f = open(data_file) 7 | for i, line in enumerate(f): 8 | data = json.loads(line) 9 | text = util.flatten(data['sentences']) 10 | # clusters = [[text[s:e+1] for s,e in cluster] for cluster in data['clusters']] 11 | #print(text) 12 | for ci, cluster in enumerate(data['clusters']): 13 | spans = [text[s:e+1] for s,e in cluster] 14 | print(i, ci, spans) 15 | if i > 5: 16 | break 17 | 18 | if __name__ == '__main__': 19 | data_file = sys.argv[1] 20 | print_clusters(data_file) 21 | -------------------------------------------------------------------------------- /debug/pronoun_evaluation.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import defaultdict 3 | import sys 4 | from util import flatten 5 | 6 | def get_pronoun_mention_pairs(clusters, pronouns): 7 | pronoun_mention_pairs = [] 8 | unaccounted = set() 9 | for pronoun in pronouns: 10 | has_cluster = False 11 | for cluster in clusters: 12 | has_pronoun = any([pronoun == s and s == e for (s,e) in cluster]) 13 | if has_pronoun: 14 | assert has_cluster is False 15 | has_cluster = True 16 | for s, e in cluster: 17 | if not (pronoun == s and s == e): 18 | pronoun_mention_pairs.append((pronoun, (s,e))) 19 | if not has_cluster: 20 | unaccounted.add(pronoun) 21 | pronoun_mention_pairs.append((pronoun, None)) 22 | return set(pronoun_mention_pairs), set(unaccounted) 23 | 24 | def get_mention_pairs(clusters, pronouns): 25 | pronoun_mention_pairs = [] 26 | unaccounted = set() 27 | for pronoun in pronouns: 28 | has_cluster = False 29 | ps, pe = pronoun 30 | for cluster in clusters: 31 | has_pronoun = any([ps == s and pe == e for s, e in cluster]) 32 | if has_pronoun: 33 | assert has_cluster is False 34 | has_cluster = True 35 | for s, e in cluster: 36 | if ps == s and pe == e: 37 | pronoun_mention_pairs.append(((ps, pe), (s,e))) 38 | if not has_cluster: 39 | unaccounted.add((ps, pe)) 40 | pronoun_mention_pairs.append(((ps, pe), None)) 41 | return set(pronoun_mention_pairs), set(unaccounted) 42 | 43 | def evaluate(fname): 44 | p, r, f1 = [], [], [] 45 | pronoun_text = defaultdict(int) 46 | num_gold_pairs, num_pred_pairs = 0, 0 47 | total_gold_singletons, total_pred_singletons, total_singleton_intersection = 0, 0, 0 48 | with open(fname) as f: 49 | for line in f: 50 | datum = json.loads(line) 51 | tokens = flatten(datum['sentences']) 52 | #pronouns = flatten(datum['clusters']) 53 | pair_fn = get_mention_pairs 54 | # for pidx in pronouns: 55 | # pronoun_text[(tokens[pidx].lower())] += 1 56 | gold_pronoun_mention_pairs, gold_singletons = pair_fn(datum['clusters'], flatten(datum['clusters'])) 57 | pred_pronoun_mention_pairs, pred_singletons = pair_fn(datum['predicted_clusters'], flatten(datum['predicted_clusters'])) 58 | total_gold_singletons += len(gold_singletons) 59 | total_pred_singletons += len(pred_singletons) 60 | total_singleton_intersection += len(gold_singletons.intersection(pred_singletons)) 61 | intersection = gold_pronoun_mention_pairs.intersection(pred_pronoun_mention_pairs) 62 | num_gold_pairs += len(gold_pronoun_mention_pairs) 63 | num_pred_pairs += len(pred_pronoun_mention_pairs) 64 | this_recall = len(intersection) / len(gold_pronoun_mention_pairs) if len(gold_pronoun_mention_pairs) > 0 else 1.0 65 | this_prec = len(intersection) / len(pred_pronoun_mention_pairs) if len(pred_pronoun_mention_pairs) > 0 else 1.0 66 | this_f1 = 2 * this_recall * this_prec / (this_recall + this_prec) if this_recall + this_prec > 0 else 0 67 | p += [this_prec] 68 | r += [this_recall] 69 | f1 += [this_f1] 70 | print('gold_singletons: {}, pred_singletons: {} intersection: {}'.format(total_gold_singletons, total_pred_singletons, total_singleton_intersection)) 71 | print('num_gold: {}, num_pred: {}, P: {}, R: {} F1: {}'.format(num_gold_pairs, num_pred_pairs, sum(p) / len(p), sum(r) / len(r), sum(f1) / len(f1))) 72 | #print(sum(pronoun_text.values()), sorted(list(pronoun_text.items()), key=lambda k : k[1])) 73 | 74 | if __name__ == '__main__': 75 | evaluate(sys.argv[1]) 76 | -------------------------------------------------------------------------------- /debug/slurm_coref.slrm: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #SBATCH --partition=learnfair 3 | #SBATCH --constraint=volta32gb 4 | #SBATCH --nodes=1 5 | #SBATCH --ntasks-per-node=1 6 | #SBATCH --mem=100000 7 | #SBATCH --cpus-per-task=16 8 | #SBATCH --gres=gpu:1 9 | #SBATCH --time=1200 10 | 11 | python train.py $1 12 | -------------------------------------------------------------------------------- /debug/tune.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from current_models import CURRENT_MODELS, MODEL_CAT_TO_GOOGLE_DIR 4 | # 512 always performs best for our models. 5 | max_sents = {128: 11, 256: 5, 384: 3, 512: 3} 6 | # max_sents = {64: 23, 128:11, 256: 5, 384: 3, 512: 3} 7 | 8 | bert_lrs = [1e-5, 2e-5] 9 | task_lrs = [1e-4, 2e-4, 3e-4] #, 5e-4, 1e-3] 10 | 11 | def get_conf_name(model, seg_len, bert_lr, task_lr, task_optimizer=None, eps=None): 12 | if task_optimizer is None and eps is None: 13 | return '{}_sl{}_blr{}_tlr{}'.format(model, seg_len, bert_lr, task_lr) 14 | else: 15 | return '{}_sl{}_blr{}_tlr{}_to{}_eps{}'.format(model, seg_len, bert_lr, task_lr, task_optimizer, eps) 16 | 17 | def get_conf_lines(model, seg_len, bert_lr, task_lr, bert_model_dir, checkpoint, task_optimizer=None, eps=None): 18 | lines = [] 19 | casing = 'uncased' if 'uncased' in bert_model_dir else 'cased' 20 | lines += [get_conf_name(model, seg_len, bert_lr, task_lr, task_optimizer, eps) + ' = ${best} {'] 21 | lines += [' num_docs = {}'.format(args.num_docs)] 22 | lines += [' bert_learning_rate = {}'.format(bert_lr)] 23 | lines += [' task_learning_rate = {}'.format(task_lr)] 24 | lines += [' max_segment_len = {}'.format(seg_len)] 25 | lines += [' ffnn_size = {}'.format(args.ffnn_size)] 26 | lines += [' train_path = {}/{}/train.{}.{}.jsonlines'.format(args.data_dir, casing, args.lang, seg_len)] 27 | lines += [' eval_path = {}/{}/dev.{}.{}.jsonlines'.format(args.data_dir, casing, args.lang, seg_len)] 28 | lines += [' conll_eval_path = {}/gold_conll/dev.{}.v4_gold_conll'.format(args.data_dir, args.lang)] 29 | lines += [' max_training_sentences = {}'.format(max_sents[seg_len])] 30 | lines += [' bert_config_file = {}/bert_config.json'.format(bert_model_dir)] 31 | lines += [' vocab_file = {}/vocab.txt'.format(bert_model_dir)] 32 | lines += [' tf_checkpoint = {}/bert_model.ckpt'.format(bert_model_dir)] 33 | lines += [' init_checkpoint = {}'.format(checkpoint)] 34 | if task_optimizer is not None: 35 | lines += [' task_optimizer = {}'.format(task_optimizer)] 36 | if eps is not None: 37 | lines += [' adam_eps = {}'.format(eps)] 38 | 39 | lines += ['}\n'] 40 | return lines 41 | 42 | def generate(args): 43 | num_confs = 0 44 | with open(args.conf_file, 'a') as f: 45 | for (model, (model_cat, ckpt_file)) in CURRENT_MODELS.items(): 46 | bert_model_dir = os.path.join(args.data_dir, 'bert_models', MODEL_CAT_TO_GOOGLE_DIR[model_cat]) 47 | for sl in max_sents.keys(): 48 | for bert_lr in bert_lrs: 49 | for task_lr in task_lrs: 50 | lines = get_conf_lines(model, sl, bert_lr, task_lr, bert_model_dir, ckpt_file) 51 | if args.trial: 52 | print('\n'.join(lines) + '\n') 53 | else: 54 | f.write('\n'.join(lines) + '\n') 55 | num_confs += 1 56 | print('{} configs written to {}'.format(num_confs, args.conf_file)) 57 | 58 | 59 | def run_slrm(args): 60 | with open(args.jobs_file) as f: 61 | for i, line in enumerate(f): 62 | job = line.strip() 63 | os.system('sbatch -J {} {} {}'.format('coref_' + job, args.slrm_file, job)) 64 | print('starting job {}'.format(job)) 65 | 66 | if __name__ == '__main__': 67 | parser = argparse.ArgumentParser() 68 | parser.add_argument("--data_dir", type=str, required=True, help='High level coref data dir') 69 | parser.add_argument("--generate_configs", action='store_true', help='appends configs to --conf_file') 70 | parser.add_argument("--run_jobs", action='store_true', help='send jobs from --jobs_file to the cluster') 71 | 72 | # you mostly don't need to touch these below 73 | parser.add_argument("--trial", action='store_true', help='Print config to stdout if true') 74 | parser.add_argument("--conf_file", default='experiments.conf', type=str, help='Output config file') 75 | parser.add_argument("--jobs_file", default='torun.txt', type=str, help='file contraining list of jobs') 76 | parser.add_argument("--slrm_file", default='slurm_coref.slrm', type=str, help='Slrm file') 77 | parser.add_argument("--num_docs", default=2802, type=int) 78 | parser.add_argument("--ffnn_size", default=3000, type=int) 79 | parser.add_argument("--lang", default='english', type=str) 80 | args = parser.parse_args() 81 | if not args.generate_configs and not args.run_jobs: 82 | print('Only one of --generate_configs and --run_jobs should be true') 83 | elif args.generate_configs and args.run_jobs: 84 | print('Only one of --generate_configs and --run_jobs should be true. First generate the configs with --generate_configs only. Make sure you have the right list in the jobs_file. The run with --run_jobs.') 85 | elif args.generate_configs: 86 | generate(args) 87 | else: 88 | run_slrm(args) 89 | 90 | -------------------------------------------------------------------------------- /download_pretrained.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo Downloading $1 3 | wget -P $data_dir http://nlp.cs.washington.edu/pair2vec/$1.tar.gz 4 | tar xvzf $data_dir/$1.tar.gz -C $data_dir 5 | rm $data_dir/$1.tar.gz 6 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | import os 7 | 8 | import tensorflow as tf 9 | import util 10 | 11 | def read_doc_keys(fname): 12 | keys = set() 13 | with open(fname) as f: 14 | for line in f: 15 | keys.add(line.strip()) 16 | return keys 17 | 18 | if __name__ == "__main__": 19 | config = util.initialize_from_env() 20 | model = util.get_model(config) 21 | saver = tf.train.Saver() 22 | log_dir = config["log_dir"] 23 | with tf.Session() as session: 24 | model.restore(session) 25 | # Make sure eval mode is True if you want official conll results 26 | model.evaluate(session, official_stdout=True, eval_mode=True) 27 | -------------------------------------------------------------------------------- /experiments.conf: -------------------------------------------------------------------------------- 1 | # Main configuration. Do not edit! Copy to experiments.conf and change that. 2 | best { 3 | # Edit this 4 | data_dir = /sdb/data/new_coref 5 | model_type = independent 6 | # Computation limits. 7 | max_top_antecedents = 50 8 | max_training_sentences = 5 9 | top_span_ratio = 0.4 10 | max_num_speakers = 20 11 | max_segment_len = 256 12 | 13 | # Learning 14 | bert_learning_rate = 1e-5 15 | task_learning_rate = 2e-4 16 | num_docs = 2802 17 | 18 | # Model hyperparameters. 19 | dropout_rate = 0.3 20 | ffnn_size = 1000 21 | ffnn_depth = 1 22 | num_epochs = 20 23 | feature_size = 20 24 | max_span_width = 30 25 | use_metadata = true 26 | use_features = true 27 | use_segment_distance = true 28 | model_heads = true 29 | coref_depth = 2 30 | coarse_to_fine = true 31 | fine_grained = true 32 | use_prior = true 33 | 34 | # Other. 35 | train_path = train.english.jsonlines 36 | eval_path = dev.english.jsonlines 37 | conll_eval_path = dev.english.v4_gold_conll 38 | single_example = true 39 | genres = ["bc", "bn", "mz", "nw", "pt", "tc", "wb"] 40 | eval_frequency = 1000 41 | report_frequency = 100 42 | log_root = ${data_dir} 43 | adam_eps = 1e-6 44 | task_optimizer = adam 45 | } 46 | 47 | bert_base = ${best}{ 48 | num_docs = 2802 49 | bert_learning_rate = 1e-05 50 | task_learning_rate = 0.0002 51 | max_segment_len = 128 52 | ffnn_size = 3000 53 | train_path = ${data_dir}/train.english.128.jsonlines 54 | eval_path = ${data_dir}/dev.english.128.jsonlines 55 | conll_eval_path = ${data_dir}/dev.english.v4_gold_conll 56 | max_training_sentences = 11 57 | bert_config_file = ${best.log_root}/bert_base/bert_config.json 58 | vocab_file = ${best.log_root}/bert_base/vocab.txt 59 | tf_checkpoint = ${best.log_root}/bert_base/model.max.ckpt 60 | init_checkpoint = ${best.log_root}/bert_base/model.max.ckpt 61 | } 62 | 63 | train_bert_base = ${bert_base}{ 64 | tf_checkpoint = ${best.log_root}/cased_L-12_H-768_A-12/bert_model.ckpt 65 | init_checkpoint = ${best.log_root}/cased_L-12_H-768_A-12/bert_model.ckpt 66 | } 67 | 68 | bert_large = ${best}{ 69 | num_docs = 2802 70 | bert_learning_rate = 1e-05 71 | task_learning_rate = 0.0002 72 | max_segment_len = 384 73 | ffnn_size = 3000 74 | train_path = ${data_dir}/train.english.384.jsonlines 75 | eval_path = ${data_dir}/dev.english.384.jsonlines 76 | conll_eval_path = ${data_dir}/dev.english.v4_gold_conll 77 | max_training_sentences = 3 78 | bert_config_file = ${best.log_root}/bert_large/bert_config.json 79 | vocab_file = ${best.log_root}/bert_large/vocab.txt 80 | tf_checkpoint = ${best.log_root}/bert_large/model.max.ckpt 81 | init_checkpoint = ${best.log_root}/bert_large/model.max.ckpt 82 | } 83 | 84 | train_bert_large = ${bert_large}{ 85 | tf_checkpoint = ${best.log_root}/cased_L-24_H-1024_A-16/bert_model.ckpt 86 | init_checkpoint = ${best.log_root}/cased_L-24_H-1024_A-16/bert_model.ckpt 87 | } 88 | 89 | spanbert_base = ${best}{ 90 | num_docs = 2802 91 | bert_learning_rate = 2e-05 92 | task_learning_rate = 0.0001 93 | max_segment_len = 384 94 | ffnn_size = 3000 95 | train_path = ${data_dir}/train.english.384.jsonlines 96 | eval_path = ${data_dir}/dev.english.384.jsonlines 97 | conll_eval_path = ${data_dir}/dev.english.v4_gold_conll 98 | max_training_sentences = 3 99 | bert_config_file = ${best.log_root}/spanbert_base/bert_config.json 100 | vocab_file = ${best.log_root}/spanbert_base/vocab.txt 101 | tf_checkpoint = ${best.log_root}/spanbert_base/model.max.ckpt 102 | init_checkpoint = ${best.log_root}/spanbert_base/model.max.ckpt 103 | } 104 | 105 | train_spanbert_base = ${spanbert_base}{ 106 | tf_checkpoint = ${best.log_root}/cased_L-12_H-768_A-12/bert_model.ckpt 107 | init_checkpoint = ${best.log_root}/spanbert_hf_base/pytorch_model.bin 108 | } 109 | 110 | spanbert_large = ${best}{ 111 | num_docs = 2802 112 | bert_learning_rate = 1e-05 113 | task_learning_rate = 0.0003 114 | max_segment_len = 512 115 | ffnn_size = 3000 116 | train_path = ${data_dir}/train.english.512.jsonlines 117 | eval_path = ${data_dir}/dev.english.512.jsonlines 118 | conll_eval_path = ${data_dir}/dev.english.v4_gold_conll 119 | max_training_sentences = 3 120 | bert_config_file = ${best.log_root}/spanbert_large/bert_config.json 121 | vocab_file = ${best.log_root}/spanbert_large/vocab.txt 122 | tf_checkpoint = ${best.log_root}/spanbert_large/model.max.ckpt 123 | init_checkpoint = ${best.log_root}/spanbert_large/model.max.ckpt 124 | } 125 | 126 | train_spanbert_large = ${spanbert_large}{ 127 | tf_checkpoint = ${best.log_root}/cased_L-24_H-1024_A-16/bert_model.ckpt 128 | init_checkpoint = ${best.log_root}/spanbert_hf/pytorch_model.bin 129 | } 130 | -------------------------------------------------------------------------------- /gap_to_jsonlines.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import os 4 | from bert import tokenization 5 | 6 | def read_tsv(tsv_file): 7 | data = [] 8 | first = True 9 | with open(tsv_file) as f: 10 | for line in f: 11 | cols = line.split('\t') 12 | if first: 13 | fields = list(enumerate(cols)) 14 | first = False 15 | continue 16 | data += [{col : (cols[index] if 'offset' not in col else int(cols[index])) for index, col in fields}] 17 | return data 18 | 19 | def is_start(char_offset, char_to_word_offset, text): 20 | # print(char_offset, char_to_word_offset[char_offset-1: char_offset +1], text[char_offset-10:char_offset+10]) 21 | return char_offset == 0 or char_to_word_offset[char_offset] != char_to_word_offset[char_offset - 1] 22 | 23 | def tokenize(dataset, vocab_file): 24 | tokenizer = tokenization.FullTokenizer( 25 | vocab_file=vocab_file, do_lower_case=False) if vocab_file is not None else None 26 | def is_whitespace(c): 27 | if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: 28 | return True 29 | return False 30 | def is_punctuation(c): 31 | if c == '.' or c == "," or c == "`" or c == '"' or c == "'" or c == '(' or c == ')' or c == '-' or c == '/' or c == '' or c == '*': 32 | return True 33 | return False 34 | for datum in dataset: 35 | paragraph_text = datum["Text"] 36 | doc_tokens = [] 37 | char_to_word_offset = [] 38 | prev_is_whitespace, prev_is_punc = True, True 39 | for c in paragraph_text: 40 | if is_punctuation(c): 41 | prev_is_punc = True 42 | doc_tokens.append(c) 43 | elif is_whitespace(c): 44 | prev_is_whitespace = True 45 | else: 46 | if prev_is_whitespace or prev_is_punc: 47 | doc_tokens.append(c) 48 | else: 49 | doc_tokens[-1] += c 50 | prev_is_whitespace = False 51 | prev_is_punc = False 52 | char_to_word_offset.append(len(doc_tokens) - 1) 53 | tok_to_subtoken, para_subtokens, sentence_map = ([], ['[CLS]'], [0]) if tokenizer is not None else ([], [], []) 54 | current_sentence = 0 55 | clusters = [] 56 | for tok_index, token in enumerate(doc_tokens): 57 | subtokens = [token] if tokenizer is None else tokenizer.tokenize(token) 58 | sentence_map += [current_sentence] * len(subtokens) 59 | tok_to_subtoken.append((len(para_subtokens), len(para_subtokens) + len(subtokens) - 1)) 60 | para_subtokens += subtokens 61 | if token == '.': 62 | current_sentence += 1 63 | datum['speakers'] = ['[SPL]'] + ['Speaker#1'] * (len(para_subtokens)-1) + ['[SPL]'] if tokenizer is not None else ['Speaker#1'] * (len(para_subtokens)-1) 64 | datum['sentences'] = para_subtokens + ['[SEP]'] if tokenizer is not None else para_subtokens 65 | datum['sentence_map'] = sentence_map + [sentence_map[-1]] if tokenizer is not None else sentence_map 66 | clusters = [] 67 | a_start, a_end = datum['A-offset'], datum['A-offset'] + len(datum['A']) 68 | b_start, b_end = datum['B-offset'], datum['B-offset'] + len(datum['B']) 69 | pronoun_start, pronoun_end = datum['Pronoun-offset'], datum['Pronoun-offset'] + len(datum['Pronoun']) 70 | entity_start, entity_end = (a_start, a_end) if datum['A-coref'] == 'TRUE' else (b_start, b_end) 71 | 72 | datum['a_subtoken_span'] = tok_to_subtoken[char_to_word_offset[a_start]][0], tok_to_subtoken[char_to_word_offset[a_end]][1] 73 | datum['b_subtoken_span'] = tok_to_subtoken[char_to_word_offset[b_start]][0], tok_to_subtoken[char_to_word_offset[b_end]][1] 74 | datum['pronoun_subtoken_span'] = tok_to_subtoken[char_to_word_offset[pronoun_start]][0], tok_to_subtoken[char_to_word_offset[pronoun_end]][1] 75 | 76 | if datum['A-coref'] == 'TRUE' or datum['B-coref'] == 'TRUE': 77 | entity_span = datum['a_subtoken_span'] if datum['A-coref'] else datum['b_subtoken_span'] 78 | clusters.append(entity_span) 79 | clusters.append(datum['pronoun_subtoken_span']) 80 | datum['clusters'] = [clusters] 81 | ext = 'tok.jsonlines' if tokenizer is None else 'jsonlines' 82 | with open(tsv_file.replace('tsv', ext), 'w') as f: 83 | for datum in dataset: 84 | json_datum = {'doc_key': datum['ID'], 'sentences': [datum['sentences']], 'speakers': [datum['speakers']], 85 | 'sentence_map': datum['sentence_map'], 'clusters': datum['clusters'], 86 | 'a_subtoken_span': datum['a_subtoken_span'], 'b_subtoken_span': datum['b_subtoken_span'], 'pronoun_subtoken_span': datum['pronoun_subtoken_span']} 87 | f.write(json.dumps(json_datum) + '\n') 88 | 89 | def convert(tsv_file, vocab_file): 90 | dataset = read_tsv(tsv_file) 91 | tokenize(dataset, vocab_file) 92 | 93 | 94 | if __name__ == '__main__': 95 | tsv_file = sys.argv[1] 96 | vocab_file = sys.argv[2] if len(sys.argv) == 3 else None 97 | convert(tsv_file, vocab_file) 98 | -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | from collections import Counter 7 | from sklearn.utils.linear_assignment_ import linear_assignment 8 | 9 | 10 | def f1(p_num, p_den, r_num, r_den, beta=1): 11 | p = 0 if p_den == 0 else p_num / float(p_den) 12 | r = 0 if r_den == 0 else r_num / float(r_den) 13 | return 0 if p + r == 0 else (1 + beta * beta) * p * r / (beta * beta * p + r) 14 | 15 | class CorefEvaluator(object): 16 | def __init__(self): 17 | self.evaluators = [Evaluator(m) for m in (muc, b_cubed, ceafe)] 18 | 19 | def update(self, predicted, gold, mention_to_predicted, mention_to_gold): 20 | for e in self.evaluators: 21 | e.update(predicted, gold, mention_to_predicted, mention_to_gold) 22 | 23 | def get_f1(self): 24 | return sum(e.get_f1() for e in self.evaluators) / len(self.evaluators) 25 | 26 | def get_recall(self): 27 | return sum(e.get_recall() for e in self.evaluators) / len(self.evaluators) 28 | 29 | def get_precision(self): 30 | return sum(e.get_precision() for e in self.evaluators) / len(self.evaluators) 31 | 32 | def get_prf(self): 33 | return self.get_precision(), self.get_recall(), self.get_f1() 34 | 35 | class Evaluator(object): 36 | def __init__(self, metric, beta=1): 37 | self.p_num = 0 38 | self.p_den = 0 39 | self.r_num = 0 40 | self.r_den = 0 41 | self.metric = metric 42 | self.beta = beta 43 | 44 | def update(self, predicted, gold, mention_to_predicted, mention_to_gold): 45 | if self.metric == ceafe: 46 | pn, pd, rn, rd = self.metric(predicted, gold) 47 | else: 48 | pn, pd = self.metric(predicted, mention_to_gold) 49 | rn, rd = self.metric(gold, mention_to_predicted) 50 | self.p_num += pn 51 | self.p_den += pd 52 | self.r_num += rn 53 | self.r_den += rd 54 | 55 | def get_f1(self): 56 | return f1(self.p_num, self.p_den, self.r_num, self.r_den, beta=self.beta) 57 | 58 | def get_recall(self): 59 | return 0 if self.r_num == 0 else self.r_num / float(self.r_den) 60 | 61 | def get_precision(self): 62 | return 0 if self.p_num == 0 else self.p_num / float(self.p_den) 63 | 64 | def get_prf(self): 65 | return self.get_precision(), self.get_recall(), self.get_f1() 66 | 67 | def get_counts(self): 68 | return self.p_num, self.p_den, self.r_num, self.r_den 69 | 70 | 71 | def evaluate_documents(documents, metric, beta=1): 72 | evaluator = Evaluator(metric, beta=beta) 73 | for document in documents: 74 | evaluator.update(document) 75 | return evaluator.get_precision(), evaluator.get_recall(), evaluator.get_f1() 76 | 77 | 78 | def b_cubed(clusters, mention_to_gold): 79 | num, dem = 0, 0 80 | 81 | for c in clusters: 82 | if len(c) == 1: 83 | continue 84 | 85 | gold_counts = Counter() 86 | correct = 0 87 | for m in c: 88 | if m in mention_to_gold: 89 | gold_counts[tuple(mention_to_gold[m])] += 1 90 | for c2, count in gold_counts.items(): 91 | if len(c2) != 1: 92 | correct += count * count 93 | 94 | num += correct / float(len(c)) 95 | dem += len(c) 96 | 97 | return num, dem 98 | 99 | 100 | def muc(clusters, mention_to_gold): 101 | tp, p = 0, 0 102 | for c in clusters: 103 | p += len(c) - 1 104 | tp += len(c) 105 | linked = set() 106 | for m in c: 107 | if m in mention_to_gold: 108 | linked.add(mention_to_gold[m]) 109 | else: 110 | tp -= 1 111 | tp -= len(linked) 112 | return tp, p 113 | 114 | 115 | def phi4(c1, c2): 116 | return 2 * len([m for m in c1 if m in c2]) / float(len(c1) + len(c2)) 117 | 118 | 119 | def ceafe(clusters, gold_clusters): 120 | clusters = [c for c in clusters if len(c) != 1] 121 | scores = np.zeros((len(gold_clusters), len(clusters))) 122 | for i in range(len(gold_clusters)): 123 | for j in range(len(clusters)): 124 | scores[i, j] = phi4(gold_clusters[i], clusters[j]) 125 | matching = linear_assignment(-scores) 126 | similarity = sum(scores[matching[:, 0], matching[:, 1]]) 127 | return similarity, len(clusters), similarity, len(gold_clusters) 128 | 129 | 130 | def lea(clusters, mention_to_gold): 131 | num, dem = 0, 0 132 | 133 | for c in clusters: 134 | if len(c) == 1: 135 | continue 136 | 137 | common_links = 0 138 | all_links = len(c) * (len(c) - 1) / 2.0 139 | for i, m in enumerate(c): 140 | if m in mention_to_gold: 141 | for m2 in c[i + 1:]: 142 | if m2 in mention_to_gold and mention_to_gold[m] == mention_to_gold[m2]: 143 | common_links += 1 144 | 145 | num += len(c) * common_links / float(all_links) 146 | dem += len(c) 147 | 148 | return num, dem 149 | -------------------------------------------------------------------------------- /optimization.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from bert.optimization import AdamWeightDecayOptimizer 3 | 4 | def create_custom_optimizer(tvars, loss, bert_init_lr, task_init_lr, num_train_steps, num_warmup_steps, use_tpu, global_step=None, freeze=-1, task_opt='adam', eps=1e-6): 5 | """Creates an optimizer training op.""" 6 | if global_step is None: 7 | global_step = tf.train.get_or_create_global_step() 8 | 9 | bert_learning_rate = tf.constant(value=bert_init_lr, shape=[], dtype=tf.float32) 10 | task_learning_rate = tf.constant(value=task_init_lr, shape=[], dtype=tf.float32) 11 | 12 | # Implements linear decay of the learning rate. 13 | bert_learning_rate = tf.train.polynomial_decay( 14 | bert_learning_rate, 15 | global_step, 16 | num_train_steps, 17 | end_learning_rate=0.0, 18 | power=1.0, 19 | cycle=False) 20 | task_learning_rate = tf.train.polynomial_decay( 21 | task_learning_rate, 22 | global_step, 23 | num_train_steps, 24 | end_learning_rate=0.0, 25 | power=1.0, 26 | cycle=False) 27 | 28 | # Implements linear warmup. I.e., if global_step < num_warmup_steps, the 29 | # learning rate will be `global_step/num_warmup_steps * init_lr`. 30 | if num_warmup_steps: 31 | global_steps_int = tf.cast(global_step, tf.int32) 32 | warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32) 33 | 34 | global_steps_float = tf.cast(global_steps_int, tf.float32) 35 | warmup_steps_float = tf.cast(warmup_steps_int, tf.float32) 36 | 37 | warmup_percent_done = global_steps_float / warmup_steps_float 38 | bert_warmup_learning_rate = bert_init_lr * warmup_percent_done 39 | task_warmup_learning_rate = task_init_lr * warmup_percent_done 40 | 41 | is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32) 42 | bert_learning_rate = ( 43 | (1.0 - is_warmup) * bert_learning_rate + is_warmup * bert_warmup_learning_rate) 44 | 45 | # It is recommended that you use this optimizer for fine tuning, since this 46 | # is how the model was trained (note that the Adam m/v variables are NOT 47 | # loaded from init_checkpoint.) 48 | bert_optimizer = AdamWeightDecayOptimizer( 49 | learning_rate=bert_learning_rate, 50 | weight_decay_rate=0.01, 51 | beta_1=0.9, 52 | beta_2=0.999, 53 | epsilon=eps, 54 | exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) 55 | if task_opt == 'adam_weight_decay': 56 | task_optimizer = AdamWeightDecayOptimizer( 57 | learning_rate=task_learning_rate, 58 | weight_decay_rate=0.01, 59 | beta_1=0.9, 60 | beta_2=0.999, 61 | epsilon=eps 62 | ) 63 | elif task_opt == 'adam': 64 | task_optimizer = tf.train.AdamOptimizer( 65 | learning_rate=task_learning_rate) 66 | else: 67 | raise NotImplementedError('Check optimizer. {} is invalid.'.format(task_opt)) 68 | 69 | # tvars = tf.trainable_variables() 70 | bert_vars, task_vars = [], [] 71 | for var in tvars: 72 | if var.name.startswith('bert'): 73 | can_optimize = False 74 | if var.name.startswith('bert/encoder/layer_') and int(var.name.split('/')[2][len('layer_'):]) >= freeze: 75 | can_optimize = True 76 | if freeze == -1 or can_optimize: 77 | bert_vars.append(var) 78 | else: 79 | task_vars.append(var) 80 | print('bert:task', len(bert_vars), len(task_vars)) 81 | grads = tf.gradients(loss, bert_vars + task_vars) 82 | bert_grads = grads[:len(bert_vars)] 83 | task_grads = grads[len(bert_vars):] 84 | 85 | # This is how the model was pre-trained. 86 | (bert_grads, _) = tf.clip_by_global_norm(bert_grads, clip_norm=1.0) 87 | (task_grads, _) = tf.clip_by_global_norm(task_grads, clip_norm=1.0) 88 | 89 | # global_step1 = tf.Print(global_step, [global_step], 'before') 90 | bert_train_op = bert_optimizer.apply_gradients( 91 | zip(bert_grads, bert_vars), global_step=global_step) 92 | task_train_op = task_optimizer.apply_gradients( 93 | zip(task_grads, task_vars), global_step=global_step) 94 | if task_opt == 'adam_weight_decay': 95 | new_global_step = global_step + 1 96 | train_op = tf.group(bert_train_op, task_train_op, [global_step.assign(new_global_step)]) 97 | else: 98 | train_op = tf.group(bert_train_op, task_train_op) 99 | return train_op 100 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys 6 | import json 7 | 8 | import tensorflow as tf 9 | import util 10 | 11 | if __name__ == "__main__": 12 | config = util.initialize_from_env() 13 | log_dir = config["log_dir"] 14 | 15 | # Input file in .jsonlines format. 16 | input_filename = sys.argv[2] 17 | 18 | # Predictions will be written to this file in .jsonlines format. 19 | output_filename = sys.argv[3] 20 | 21 | model = util.get_model(config) 22 | saver = tf.train.Saver() 23 | 24 | with tf.Session() as session: 25 | model.restore(session) 26 | 27 | with open(output_filename, "w") as output_file: 28 | with open(input_filename) as input_file: 29 | for example_num, line in enumerate(input_file.readlines()): 30 | example = json.loads(line) 31 | tensorized_example = model.tensorize_example(example, is_training=False) 32 | feed_dict = {i:t for i,t in zip(model.input_tensors, tensorized_example)} 33 | _, _, _, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores = session.run(model.predictions, feed_dict=feed_dict) 34 | predicted_antecedents = model.get_predicted_antecedents(top_antecedents, top_antecedent_scores) 35 | example["predicted_clusters"], _ = model.get_predicted_clusters(top_span_starts, top_span_ends, predicted_antecedents) 36 | example["top_spans"] = list(zip((int(i) for i in top_span_starts), (int(i) for i in top_span_ends))) 37 | example['head_scores'] = [] 38 | 39 | output_file.write(json.dumps(example)) 40 | output_file.write("\n") 41 | if example_num % 100 == 0: 42 | print("Decoded {} examples.".format(example_num + 1)) 43 | -------------------------------------------------------------------------------- /pytorch_to_tf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import sys 4 | import tensorflow as tf 5 | from tensorflow.python.ops import variable_scope as vs 6 | from tensorflow.python.framework import ops 7 | 8 | tensors_to_transpose = ( 9 | "dense/kernel", 10 | "attention/self/query", 11 | "attention/self/key", 12 | "attention/self/value" 13 | ) 14 | 15 | var_map = ( 16 | ('layer.', 'layer_'), 17 | ('word_embeddings.weight', 'word_embeddings'), 18 | ('position_embeddings.weight', 'position_embeddings'), 19 | ('token_type_embeddings.weight', 'token_type_embeddings'), 20 | ('.', '/'), 21 | ('LayerNorm/weight', 'LayerNorm/gamma'), 22 | ('LayerNorm/bias', 'LayerNorm/beta'), 23 | ('weight', 'kernel') 24 | ) 25 | 26 | def to_tf_var_name(name: str): 27 | for patt, repl in iter(var_map): 28 | name = name.replace(patt, repl) 29 | return '{}'.format(name) 30 | 31 | def my_convert_keys(model): 32 | converted = {} 33 | for k_pt, v in model.items(): 34 | k_tf = to_tf_var_name(k_pt) 35 | converted[k_tf] = v 36 | return converted 37 | 38 | def load_from_pytorch_checkpoint(checkpoint, assignment_map): 39 | pytorch_model = torch.load(checkpoint, map_location='cpu') 40 | pt_model_with_tf_keys = my_convert_keys(pytorch_model) 41 | for _, name in assignment_map.items(): 42 | store_vars = vs._get_default_variable_store()._vars 43 | var = store_vars.get(name, None) 44 | assert var is not None 45 | if name not in pt_model_with_tf_keys: 46 | print('WARNING:', name, 'not found in original model.') 47 | continue 48 | array = pt_model_with_tf_keys[name].cpu().numpy() 49 | if any([x in name for x in tensors_to_transpose]): 50 | array = array.transpose() 51 | assert tuple(var.get_shape().as_list()) == tuple(array.shape) 52 | init_value = ops.convert_to_tensor(array, dtype=np.float32) 53 | var._initial_value = init_value 54 | var._initializer_op = var.assign(init_value) 55 | 56 | 57 | def print_vars(pytorch_ckpt, tf_ckpt): 58 | tf_vars = tf.train.list_variables(tf_ckpt) 59 | tf_vars = {k:v for (k, v) in tf_vars} 60 | pytorch_model = torch.load(pytorch_ckpt) 61 | pt_model_with_tf_keys = my_convert_keys(pytorch_model) 62 | only_pytorch, only_tf, common = [], [], [] 63 | tf_only = set(tf_vars.keys()) 64 | for k, v in pt_model_with_tf_keys.items(): 65 | if k in tf_vars: 66 | common.append(k) 67 | tf_only.remove(k) 68 | else: 69 | only_pytorch.append(k) 70 | print('-------------------') 71 | print('Common', len(common)) 72 | for k in common: 73 | array = pt_model_with_tf_keys[k].cpu().numpy() 74 | if any([x in k for x in tensors_to_transpose]): 75 | array = array.transpose() 76 | tf_shape = tuple(tf_vars[k]) 77 | pt_shape = tuple(array.shape) 78 | if tf_shape != pt_shape: 79 | print(k, tf_shape, pt_shape) 80 | print('-------------------') 81 | print('Pytorch only', len(only_pytorch)) 82 | for k in only_pytorch: 83 | print(k, pt_model_with_tf_keys[k].size()) 84 | print('-------------------') 85 | print('TF only', len(tf_only)) 86 | for k in tf_only: 87 | print(k, tf_vars[k]) 88 | 89 | if __name__ == '__main__': 90 | print_vars(sys.argv[1], sys.argv[2]) 91 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.7.1 2 | astor==0.8.0 3 | awscli==1.14.58 4 | certifi==2019.6.16 5 | cort==0.2.4.5 6 | docutils==0.14 7 | future==0.16.0 8 | gast==0.2.2 9 | google-pasta==0.1.7 10 | grpcio==1.23.0 11 | h5py==2.7.1 12 | html5lib==0.9999999 13 | Jinja2==2.10 14 | JPype1==0.6.2 15 | Keras-Applications==1.0.8 16 | Keras-Preprocessing==1.1.0 17 | Markdown==3.1.1 18 | MarkupSafe==1.0 19 | mmh3==2.5.1 20 | msgpack-python==0.5.4 21 | numpy==1.17.0 22 | Pillow==6.1.0 23 | protobuf==3.9.1 24 | psycopg2==2.7.4 25 | pyasn1==0.4.2 26 | pycparser==2.18 27 | pyhocon==0.3.51 28 | pyparsing==2.2.0 29 | PyStanfordDependencies==0.3.1 30 | python-dateutil==2.6.1 31 | pytz==2017.3 32 | s3transfer==0.1.13 33 | scikit-learn==0.19.1 34 | scipy==1.0.0 35 | six==1.12.0 36 | tensorboard==1.14.0 37 | tensorflow-estimator==1.14.0 38 | tensorflow-gpu==1.14.0 39 | termcolor==1.1.0 40 | torch==1.2.0 41 | torchvision==0.4.0 42 | typing==3.6.4 43 | Werkzeug==0.14.1 44 | wrapt==1.11.2 45 | -------------------------------------------------------------------------------- /setup_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Build custom kernels. 3 | TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) 4 | TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) 5 | 6 | # Linux (pip) 7 | g++ -std=c++11 -shared coref_kernels.cc -o coref_kernels.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 -D_GLIBCXX_USE_CXX11_ABI=0 8 | -------------------------------------------------------------------------------- /setup_training.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | ontonotes_path=$1 5 | data_dir=$2 6 | 7 | dlx() { 8 | wget -P $data_dir $1/$2 9 | tar -xvzf $data_dir/$2 -C $data_dir 10 | rm $data_dir/$2 11 | } 12 | 13 | download_bert(){ 14 | model=$1 15 | wget -P $data_dir https://storage.googleapis.com/bert_models/2018_10_18/$model.zip 16 | unzip $data_dir/$model.zip 17 | rm $data_dir/$model.zip 18 | mv $model $data_dir/ 19 | } 20 | 21 | download_spanbert(){ 22 | model=$1 23 | wget -P $data_dir https://dl.fbaipublicfiles.com/fairseq/models/$model.tar.gz 24 | mkdir $data_dir/$model 25 | tar xvfz $data_dir/$model.tar.gz -C $data_dir/$model 26 | rm $data_dir/$model.tar.gz 27 | } 28 | 29 | 30 | conll_url=http://conll.cemantix.org/2012/download 31 | dlx $conll_url conll-2012-train.v4.tar.gz 32 | dlx $conll_url conll-2012-development.v4.tar.gz 33 | dlx $conll_url/test conll-2012-test-key.tar.gz 34 | dlx $conll_url/test conll-2012-test-official.v9.tar.gz 35 | 36 | dlx $conll_url conll-2012-scripts.v3.tar.gz 37 | dlx http://conll.cemantix.org/download reference-coreference-scorers.v8.01.tar.gz 38 | 39 | download_bert cased_L-12_H-768_A-12 40 | download_bert cased_L-24_H-1024_A-16 41 | download_spanbert spanbert_hf 42 | download_spanbert spanbert_hf_base 43 | 44 | bash conll-2012/v3/scripts/skeleton2conll.sh -D $ontonotes_path/data/files/data $data_dir/conll-2012 45 | 46 | function compile_partition() { 47 | rm -f $2.$5.$3$4 48 | cat $data_dir/conll-2012/$3/data/$1/data/$5/annotations/*/*/*/*.$3$4 >> $data_dir/$2.$5.$3$4 49 | } 50 | 51 | function compile_language() { 52 | compile_partition development dev v4 _gold_conll $1 53 | compile_partition train train v4 _gold_conll $1 54 | compile_partition test test v4 _gold_conll $1 55 | } 56 | 57 | compile_language english 58 | #compile_language chinese 59 | #compile_language arabic 60 | 61 | vocab_file=cased_config_vocab/vocab.txt 62 | python minimize.py $vocab_file $data_dir $data_dir false 63 | -------------------------------------------------------------------------------- /to_gap_tsv.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import util 4 | 5 | def find_pronoun_cluster(prediction, pronoun_subtoken_span, cluster_key='predicted_clusters'): 6 | for cluster in prediction[cluster_key]: 7 | if pronoun_subtoken_span in cluster: 8 | return cluster 9 | return [] 10 | 11 | def read_json(json_file): 12 | data = {} 13 | with open(json_file) as f: 14 | for line in f: 15 | line = json.loads(line) 16 | data[line['doc_key']] = line 17 | return data 18 | 19 | def is_aligned(span1, span2): 20 | if span1[0] >= span2[0] and span1[1] <= span2[1]: 21 | return True 22 | if span2[0] >= span1[0] and span2[1] <= span1[1]: 23 | return True 24 | return False 25 | 26 | def is_substring_aligned(span1, sents, name): 27 | span_text = ' '.join(sents[span1[0]:span1[1] + 1]) 28 | if span_text in name or name in span_text: 29 | return True 30 | return False 31 | 32 | def read_tsv_file(tsv_file): 33 | tsv = {} 34 | with open(tsv_file) as f: 35 | for line in f: 36 | cols = line.split('\t') 37 | tsv[cols[0]] = cols 38 | return tsv 39 | 40 | def convert(json_file, tsv_file): 41 | data = read_json(json_file) 42 | tsv = read_tsv_file(tsv_file) if tsv_file is not None else None 43 | predictions = ['\t'.join(['ID', 'A-coref', 'B-coref'])] 44 | for key, datum in data.items(): 45 | prediction = data[key] 46 | sents = util.flatten(prediction['sentences']) 47 | if tsv is not None: 48 | print(list(enumerate(tsv[key]))) 49 | a_offset, b_offset, pronoun_offset = tuple(map(int, tsv[key][5].split(':'))), tuple(map(int, tsv[key][8].split(':'))), tuple(map(int, tsv[key][3].split(':'))) 50 | assert ' '.join(sents[a_offset[0]:a_offset[1]]) == tsv[key][4], (sents[a_offset[0]:a_offset[1]], tsv[key][4]) 51 | assert ' '.join(sents[b_offset[0]:b_offset[1]]) == tsv[key][7], (sents[b_offset[0]:b_offset[1]], tsv[key][7]) 52 | assert ' '.join(sents[pronoun_offset[0]:pronoun_offset[1]]) == tsv[key][2], (sents[pronoun_offset[0]:pronoun_offset[1]], tsv[key][2]) 53 | # continue 54 | pronoun_cluster = find_pronoun_cluster(prediction, prediction['pronoun_subtoken_span']) 55 | a_coref, b_coref = 'FALSE', 'FALSE' 56 | a_text, b_text = (tsv[key][4], tsv[key][7]) if tsv is not None else (None, None) 57 | for span in pronoun_cluster: 58 | a_aligned = is_aligned(span, prediction['a_subtoken_span']) if tsv is None else is_substring_aligned(span, sents, a_text) 59 | b_aligned = is_aligned(span, prediction['b_subtoken_span']) if tsv is None else is_substring_aligned(span, sents, b_text) 60 | 61 | if a_aligned: 62 | a_coref = 'TRUE' 63 | if b_aligned: 64 | b_coref = 'TRUE' 65 | predictions += ['\t'.join([key, a_coref, b_coref])] 66 | # write file 67 | with open(json_file.replace('jsonlines', 'tsv'), 'w') as f: 68 | f.write('\n'.join(predictions)) 69 | 70 | 71 | if __name__ == '__main__': 72 | json_file = sys.argv[1] 73 | tsv_file = sys.argv[2] if len(sys.argv) == 3 else None 74 | convert(json_file, tsv_file) 75 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | import os 7 | import time 8 | 9 | import tensorflow as tf 10 | import util 11 | import logging 12 | format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s' 13 | logging.basicConfig(format=format) 14 | logger = logging.getLogger(__name__) 15 | logger.setLevel(logging.INFO) 16 | 17 | if __name__ == "__main__": 18 | config = util.initialize_from_env() 19 | 20 | report_frequency = config["report_frequency"] 21 | eval_frequency = config["eval_frequency"] 22 | 23 | model = util.get_model(config) 24 | saver = tf.train.Saver() 25 | 26 | log_dir = config["log_dir"] 27 | max_steps = config['num_epochs'] * config['num_docs'] 28 | writer = tf.summary.FileWriter(log_dir, flush_secs=20) 29 | 30 | max_f1 = 0 31 | mode = 'w' 32 | 33 | with tf.Session() as session: 34 | session.run(tf.global_variables_initializer()) 35 | model.start_enqueue_thread(session) 36 | accumulated_loss = 0.0 37 | 38 | ckpt = tf.train.get_checkpoint_state(log_dir) 39 | if ckpt and ckpt.model_checkpoint_path: 40 | print("Restoring from: {}".format(ckpt.model_checkpoint_path)) 41 | saver.restore(session, ckpt.model_checkpoint_path) 42 | mode = 'a' 43 | fh = logging.FileHandler(os.path.join(log_dir, 'stdout.log'), mode=mode) 44 | fh.setFormatter(logging.Formatter(format)) 45 | logger.addHandler(fh) 46 | 47 | initial_time = time.time() 48 | while True: 49 | tf_loss, tf_global_step, _ = session.run([model.loss, model.global_step, model.train_op]) 50 | accumulated_loss += tf_loss 51 | # print('tf global_step', tf_global_step) 52 | 53 | if tf_global_step % report_frequency == 0: 54 | total_time = time.time() - initial_time 55 | steps_per_second = tf_global_step / total_time 56 | 57 | average_loss = accumulated_loss / report_frequency 58 | logger.info("[{}] loss={:.2f}, steps/s={:.2f}".format(tf_global_step, average_loss, steps_per_second)) 59 | writer.add_summary(util.make_summary({"loss": average_loss}), tf_global_step) 60 | accumulated_loss = 0.0 61 | 62 | if tf_global_step > 0 and tf_global_step % eval_frequency == 0: 63 | saver.save(session, os.path.join(log_dir, "model"), global_step=tf_global_step) 64 | eval_summary, eval_f1 = model.evaluate(session, tf_global_step) 65 | 66 | if eval_f1 > max_f1: 67 | max_f1 = eval_f1 68 | util.copy_checkpoint(os.path.join(log_dir, "model-{}".format(tf_global_step)), os.path.join(log_dir, "model.max.ckpt")) 69 | 70 | writer.add_summary(eval_summary, tf_global_step) 71 | writer.add_summary(util.make_summary({"max_eval_f1": max_f1}), tf_global_step) 72 | 73 | logger.info("[{}] evaL_f1={:.4f}, max_f1={:.4f}".format(tf_global_step, eval_f1, max_f1)) 74 | if tf_global_step > max_steps: 75 | break 76 | -------------------------------------------------------------------------------- /viz/.gitignore: -------------------------------------------------------------------------------- 1 | *.jsonlines 2 | -------------------------------------------------------------------------------- /viz/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | End-to-End Neural Coreference Resolution Visualization 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 |
15 |

Clusters

16 |
    17 |
    18 | 19 | 20 | 21 | 22 |
    23 |

    Document

    24 | 25 | Render attention weights on hover 26 |
    27 | 30 | 33 |

    34 |
    35 |
    36 |

    Annotated Clusters

    37 |
      38 |
      39 |
      40 |

      Predicted Clusters

      41 |
        42 |
        43 |
        44 | 45 | 46 | -------------------------------------------------------------------------------- /viz/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python -m http.server 6006 4 | --------------------------------------------------------------------------------