├── .gitignore
├── README.md
├── configs
    └── basic.conf
├── constants.py
├── data
    ├── __init__.py
    ├── base.py
    └── helpers.py
├── evaluate_and_visualize.py
├── models
    ├── __init__.py
    ├── base.py
    ├── encoder.py
    └── helpers.py
├── reference-coreference-scorers-8.01
    ├── README.txt
    ├── lib
    │   ├── Algorithm
    │   │   ├── Munkres.pm
    │   │   └── README.Munkres
    │   ├── CorScorer.pm
    │   ├── Cwd.pm
    │   ├── Data
    │   │   └── Dumper.pm
    │   └── Math
    │   │   └── Combinatorics.pm
    ├── scorer.bat
    ├── scorer.pl
    └── test
    │   ├── CorefMetricTest.pm
    │   ├── CorefMetricTestConfig.pm
    │   ├── DataFiles
    │       ├── TC-A-1.response
    │       ├── TC-A-10.response
    │       ├── TC-A-11.response
    │       ├── TC-A-12.response
    │       ├── TC-A-13.response
    │       ├── TC-A-2.response
    │       ├── TC-A-3.response
    │       ├── TC-A-4.response
    │       ├── TC-A-5.response
    │       ├── TC-A-6.response
    │       ├── TC-A-7.response
    │       ├── TC-A-8.response
    │       ├── TC-A-9.response
    │       ├── TC-A.key
    │       ├── TC-B-1.response
    │       ├── TC-B.key
    │       ├── TC-C-1.response
    │       ├── TC-C.key
    │       ├── TC-D-1.response
    │       ├── TC-D.key
    │       ├── TC-E-1.response
    │       ├── TC-E.key
    │       ├── TC-F-1.response
    │       ├── TC-F.key
    │       ├── TC-G-1.response
    │       ├── TC-G.key
    │       ├── TC-H-1.response
    │       ├── TC-H.key
    │       ├── TC-I-1.response
    │       ├── TC-I.key
    │       ├── TC-J-1.response
    │       ├── TC-J.key
    │       ├── TC-K-1.response
    │       ├── TC-K.key
    │       ├── TC-L-1.response
    │       ├── TC-L.key
    │       ├── TC-M-1.response
    │       ├── TC-M-2.response
    │       ├── TC-M-3.response
    │       ├── TC-M-4.response
    │       ├── TC-M-5.response
    │       ├── TC-M-6.response
    │       ├── TC-M.key
    │       ├── TC-N-1.response
    │       ├── TC-N-2.response
    │       ├── TC-N-3.response
    │       ├── TC-N-4.response
    │       ├── TC-N-5.response
    │       ├── TC-N-6.response
    │       └── TC-N.key
    │   ├── TestCases.README
    │   └── test.pl
├── requirements.txt
├── runner.py
├── scorer.py
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | !reference-coreference-scorers-8.01/lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Event Coreference Resolution
 2 | 
 3 | This repo provides the code for the paper [A Context-Dependent Gated Module for Incorporating Symbolic Semantics into Event Coreference Resolution](https://arxiv.org/abs/2104.01697) (NAACL 2021). If you have any question or suggestion, please open a new [Github issue](https://github.com/laituan245/eventcoref/issues/new).
 4 | 
 5 | 
 6 | ## Instructions
 7 | You can install the dependencies for the project using the following command:
 8 | ```
 9 | pip install -r requirements.txt
10 | ```
11 | 
12 | Also you will need to set the permission for the evaluation script
13 | ```
14 | chmod +x reference-coreference-scorers-8.01/scorer.pl
15 | ```
16 | 
17 | 
18 | To train a basic model, run the following command:
19 | ```
20 | python runner.py -c basic
21 | ```
22 | Arguments:
23 | - -c, --config_name: Config name (default value is `basic`. See [configs/basic.conf](https://github.com/laituan245/eventcoref/blob/main/configs/basic.conf) for the list of configs).
24 | 
25 | ## Data
26 | Due to license issue, we cannot publicly share ACE 2005 and KBP 2016. Please download the data from the LDC website.
27 | 


--------------------------------------------------------------------------------
/configs/basic.conf:
--------------------------------------------------------------------------------
  1 | basic {
  2 |   # Data-Related Configs
  3 |   base_dataset_path = resources/ACE05-E
  4 |   predictions_path = resources/ACE05-E-Preds
  5 |   use_groundtruth = false
  6 |   increase_ace_dev_set = false
  7 | 
  8 |   # Model-Related Configs
  9 |   transformer = SpanBERT/spanbert-base-cased
 10 |   multi_piece_strategy = average
 11 |   latent_size = 500
 12 |   ffnn_size = 500
 13 |   ffnn_depth = 1
 14 |   feature_size = 50
 15 | 
 16 |   # Features-Related Configs
 17 |   combine_strategy = simple    # Supported values are simple and gated
 18 |   use_typ_features = false     # Event Type Features
 19 |   use_pol_features = false     # Polarity Features
 20 |   use_mod_features = false     # Modality Features
 21 |   use_gen_features = false     # Genericty Features
 22 |   use_ten_features = false     # Tense Features
 23 |   typ_noise_prob = 0.0
 24 |   pol_noise_prob = 0.0
 25 |   mod_noise_prob = 0.0
 26 |   gen_noise_prob = 0.0
 27 |   ten_noise_prob = 0.0
 28 | 
 29 |   # Training/Inference Configs
 30 |   gradient_checkpointing = false
 31 |   transformer_learning_rate = 5e-05
 32 |   task_learning_rate = 0.0005
 33 |   epochs = 50
 34 |   batch_size = 8
 35 |   transformer_dropout_rate = 0.5
 36 |   dropout_rate = 0.5
 37 |   max_grad_norm = 1.0
 38 |   transformer_weight_decay = 0.1
 39 | 
 40 |   # Others
 41 |   no_cuda = false
 42 | }
 43 | 
 44 | # Use type feature
 45 | simple_type_feature = ${basic} {
 46 |   combine_strategy = simple
 47 |   use_typ_features = true
 48 | }
 49 | 
 50 | gated_type_feature = ${basic} {
 51 |   combine_strategy = gated
 52 |   use_typ_features = true
 53 | }
 54 | 
 55 | # Use polarity feature
 56 | simple_polarity_feature = ${basic} {
 57 |   combine_strategy = simple
 58 |   use_pol_features = true
 59 | }
 60 | 
 61 | gated_polarity_feature = ${basic} {
 62 |   combine_strategy = gated
 63 |   use_pol_features = true
 64 | }
 65 | 
 66 | # Use modality feature
 67 | simple_modality_feature = ${basic} {
 68 |   combine_strategy = simple
 69 |   use_mod_features = true
 70 | }
 71 | 
 72 | gated_modality_feature = ${basic} {
 73 |   combine_strategy = gated
 74 |   use_mod_features = true
 75 | }
 76 | 
 77 | gated_modality_feature_with_random_noise = ${gated_modality_feature} {
 78 |   mod_noise_prob = 0.15
 79 | }
 80 | 
 81 | # Use genericity feature
 82 | simple_genericity_feature = ${basic} {
 83 |   combine_strategy = simple
 84 |   use_gen_features = true
 85 | }
 86 | 
 87 | gated_genericity_feature = ${basic} {
 88 |   combine_strategy = gated
 89 |   use_gen_features = true
 90 | }
 91 | 
 92 | gated_genericity_feature_with_random_noise = ${gated_genericity_feature} {
 93 |   gen_noise_prob = 0.15
 94 | }
 95 | 
 96 | # Use tense feature
 97 | simple_tense_feature = ${basic} {
 98 |   combine_strategy = simple
 99 |   use_ten_features = true
100 | }
101 | 
102 | gated_tense_feature = ${basic} {
103 |   combine_strategy = gated
104 |   use_ten_features = true
105 | }
106 | 
107 | gated_tense_feature_with_random_noise = ${gated_tense_feature} {
108 |   ten_noise_prob = 0.25
109 | }
110 | 
111 | # Use all features
112 | simple_all_features = ${basic} {
113 |   combine_strategy = simple   # Supported values are simple and gated
114 |   use_typ_features = true     # Event Type Features
115 |   use_pol_features = true     # Polarity Features
116 |   use_mod_features = true     # Modality Features
117 |   use_gen_features = true     # Genericty Features
118 |   use_ten_features = true     # Tense Features
119 | }
120 | 
121 | simple_all_features_groundtruth = ${simple_all_features} {
122 |   use_groundtruth = true
123 | }
124 | 
125 | simple_all_features_with_random_noise = ${simple_all_features} {
126 |   mod_noise_prob = 0.15
127 |   gen_noise_prob = 0.15
128 |   ten_noise_prob = 0.25
129 | }
130 | 
131 | gated_all_features = ${basic} {
132 |   combine_strategy = gated   # Supported values are simple and gated
133 |   use_typ_features = true    # Event Type Features
134 |   use_pol_features = true    # Polarity Features
135 |   use_mod_features = true    # Modality Features
136 |   use_gen_features = true    # Genericty Features
137 |   use_ten_features = true    # Tense Features
138 | }
139 | 
140 | gated_all_features_with_random_noise = ${gated_all_features} {
141 |   mod_noise_prob = 0.15
142 |   gen_noise_prob = 0.15
143 |   ten_noise_prob = 0.25
144 | }
145 | 


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
1 | # Model Types
2 | POL_TYPES = ['Negative', 'Positive']
3 | MOD_TYPES = ['Asserted', 'Other']
4 | GEN_TYPES = ['Generic', 'Specific']
5 | TEN_TYPES = ['Unspecified', 'Past', 'Future', 'Present']
6 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | from data.helpers import load_oneie_dataset
2 | 


--------------------------------------------------------------------------------
/data/base.py:
--------------------------------------------------------------------------------
  1 | import nltk
  2 | from utils import *
  3 | 
  4 | class Document:
  5 |     def __init__(self, doc_id, sentences, event_mentions, entity_mentions, pred_graphs):
  6 |         self.doc_id = doc_id
  7 |         self.sentences = sentences
  8 |         self.words = flatten(sentences)
  9 |         self.event_mentions = event_mentions
 10 |         self.entity_mentions = entity_mentions
 11 |         self.num_words = len(self.words)
 12 |         self.pred_graphs = pred_graphs
 13 | 
 14 |         # Post-process self.event_mentions
 15 |         for e in self.event_mentions:
 16 |             _arguments = []
 17 |             for argument in e['arguments']:
 18 |                 for entity_mention in self.entity_mentions:
 19 |                     if entity_mention['id'] == argument['entity_id']:
 20 |                         _arguments.append({
 21 |                             'text': argument['text'],
 22 |                             'role': argument['role'],
 23 |                             'entity': entity_mention,
 24 |                         })
 25 |             assert(len(_arguments) == len(e['arguments']))
 26 |             e['arguments'] = _arguments
 27 | 
 28 |         # Update self.events
 29 |         self.events = {}
 30 |         for event_mention in event_mentions:
 31 |             mention_id = event_mention['id']
 32 |             event_id = mention_id[:mention_id.rfind('-')]
 33 |             if not event_id in self.events:
 34 |                 self.events[event_id] = []
 35 |             self.events[event_id].append(event_mention)
 36 | 
 37 |         # Build self.coreferential_pairs
 38 |         self.coreferential_pairs = set()
 39 |         for i in range(len(event_mentions)):
 40 |             for j in range(i+1, len(event_mentions)):
 41 |                 # Find the event id of the first event mention
 42 |                 mention_i = event_mentions[i]
 43 |                 mention_id_i = mention_i['id']
 44 |                 event_id_i = mention_id_i[:mention_id_i.rfind('-')]
 45 |                 # Find the event id of the second event mention
 46 |                 mention_j = event_mentions[j]
 47 |                 mention_id_j = mention_j['id']
 48 |                 event_id_j = mention_id_j[:mention_id_j.rfind('-')]
 49 |                 # Check if refer to the same event
 50 |                 if event_id_i == event_id_j:
 51 |                     loc_i = (mention_i['trigger']['start'], mention_i['trigger']['end'])
 52 |                     loc_j = (mention_j['trigger']['start'], mention_j['trigger']['end'])
 53 |                     self.coreferential_pairs.add((loc_i, loc_j))
 54 |                     self.coreferential_pairs.add((loc_j, loc_i))
 55 | 
 56 |         # Extract pred_triggers, pred_entities, pred_relations, pred_event_mentions
 57 |         assert(len(pred_graphs) == 0 or len(pred_graphs) == len(sentences))
 58 |         self.pred_trigges, self.pred_entities = [], []
 59 |         self.pred_relations, self.pred_event_mentions = [], []
 60 |         for graph in pred_graphs:
 61 |             if len(graph) > 0:
 62 |                 for trigger in graph['triggers']:
 63 |                     lookedup_attrs = trigger.pop(-1)
 64 |                     self.pred_trigges.append({
 65 |                         'tokens': self.words[trigger[0]:trigger[1]],
 66 |                         'start': trigger[0], 'end': trigger[1],
 67 |                         'confidence': trigger[3]
 68 |                     })
 69 |                     self.pred_event_mentions.append({
 70 |                         'event_type': trigger[2],
 71 |                         'trigger': self.pred_trigges[-1],
 72 |                         'arguments': [],
 73 |                         'event_polarity': lookedup_attrs['event_polarity'],
 74 |                         'event_modality': lookedup_attrs['event_modality'],
 75 |                         'event_genericity': lookedup_attrs['event_genericity'],
 76 |                         'event_tense': lookedup_attrs['event_tense']
 77 |                     })
 78 |                 for entity in graph['entities']:
 79 |                     self.pred_entities.append({
 80 |                         'tokens': self.words[entity[0]:entity[1]],
 81 |                         'start': entity[0], 'end': entity[1],
 82 |                         'entity_type': entity[2], 'mention_type': entity[3],
 83 |                         'confidence': entity[4]
 84 |                     })
 85 |                 for relation in graph['relations']:
 86 |                     arg1 = self.pred_entities[relation[0]]
 87 |                     arg2 = self.pred_entities[relation[1]]
 88 |                     self.pred_relations.append({
 89 |                         'arg1': arg1, 'arg2': arg2,
 90 |                         'relation_type': relation[2],
 91 |                         'confidence': relation[3]
 92 |                     })
 93 |                 for role in graph['roles']:
 94 |                     event_mention = self.pred_event_mentions[role[0]]
 95 |                     entity = self.pred_entities[role[1]]
 96 |                     event_mention['arguments'].append({
 97 |                         'entity': entity,
 98 |                         'role': role[2],
 99 |                         'confidence': role[-1]
100 |                     })
101 | 
102 |         # Add field has_correct_trigger to each event mention
103 |         trigger_locs = set()
104 |         for e in self.event_mentions:
105 |             e['has_correct_trigger'] = True
106 |             trigger_locs.add((e['trigger']['start'], e['trigger']['end']))
107 |         for e in self.pred_event_mentions:
108 |             trigger_start = e['trigger']['start']
109 |             trigger_end = e['trigger']['end']
110 |             e['has_correct_trigger'] = (trigger_start, trigger_end) in trigger_locs
111 |         for e in self.event_mentions: assert(e['has_correct_trigger']) # Sanity test
112 | 
113 | 
114 | class Dataset:
115 |     def __init__(self, data, tokenizer, sliding_window_size = 512):
116 |         '''
117 |             data: A list of GroundTruthDocument
118 |             tokenizer: A transformer Tokenizer
119 |             sliding_window_size: Size of sliding window (for a long document, we split it into overlapping segments)
120 |         '''
121 |         self.data = data
122 | 
123 |         # Tokenize the documents
124 |         for doc in self.data:
125 |             # Build doc_tokens, doc.word_starts_indexes
126 |             doc_tokens, word_starts_indexes, start_index = [], [], 0
127 |             for w in doc.words:
128 |                 word_tokens = tokenizer.tokenize(w)
129 |                 doc_tokens += word_tokens
130 |                 word_starts_indexes.append(start_index)
131 |                 start_index += len(word_tokens)
132 |             doc.word_starts_indexes = word_starts_indexes
133 |             assert(len(doc.word_starts_indexes) == len(doc.words))
134 | 
135 |             # Build token_windows, mask_windows, and input_masks
136 |             doc_token_ids = tokenizer.convert_tokens_to_ids(doc_tokens)
137 |             doc.token_windows, doc.mask_windows = \
138 |                 convert_to_sliding_window(doc_token_ids, sliding_window_size, tokenizer)
139 |             doc.input_masks = extract_input_masks_from_mask_windows(doc.mask_windows)
140 | 
141 |         # Compute the set of event types
142 |         self.event_types = set()
143 |         for doc in self.data:
144 |             for e in doc.event_mentions:
145 |                 self.event_types.add(e['event_type'])
146 |         self.event_types = sorted(list(self.event_types))
147 | 
148 |     def __len__(self):
149 |         return len(self.data)
150 | 
151 |     def __getitem__(self, item):
152 |         return self.data[item]
153 | 


--------------------------------------------------------------------------------
/data/helpers.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import random
  3 | 
  4 | from os.path import join
  5 | from data.base import Dataset, Document
  6 | 
  7 | def load_oneie_dataset(
  8 |         base_path, tokenizer,
  9 |         predictions_path=None, remove_doc_with_no_events=True,
 10 |         increase_ace_dev_set=False
 11 |     ):
 12 |     id2split, id2sents = {}, {}
 13 | 
 14 |     # Read ground-truth data files
 15 |     for split in ['train', 'dev', 'test']:
 16 |         path = join(base_path, '{}.oneie.json'.format(split))
 17 |         with open(path, 'r', encoding='utf-8') as r:
 18 |             for line in r:
 19 |                 sent_inst = json.loads(line)
 20 |                 doc_id = sent_inst['doc_id']
 21 |                 id2split[doc_id] = split
 22 |                 # Update id2sents
 23 |                 if not doc_id in id2sents:
 24 |                     id2sents[doc_id] = []
 25 |                 id2sents[doc_id].append(sent_inst)
 26 | 
 27 |     # Read prediction files (if available)
 28 |     predicted_attrs = None
 29 |     if predictions_path:
 30 |         sentid2graph = {}
 31 |         for split in ['train', 'dev', 'test']:
 32 |             path = join(predictions_path, '{}.json'.format(split))
 33 |             with open(path, 'r', encoding='utf-8') as r:
 34 |                 for line in r:
 35 |                     sent_preds = json.loads(line)
 36 |                     sentid2graph[sent_preds['sent_id']] = sent_preds['graph']
 37 | 
 38 |         # Read attributes prediction files
 39 |         attrs_preds_path = join(predictions_path, 'attrs_preds.json')
 40 |         predicted_attrs = json.load(open(attrs_preds_path, 'r'))
 41 |         _predicted_attrs = {}
 42 |         for key in predicted_attrs:
 43 |             split_index = key.rfind('.(')
 44 |             doc_id = key[:split_index]
 45 |             start, end = key[split_index+2:-1].split('-')
 46 |             start, end = int(start), int(end)
 47 |             _predicted_attrs[(doc_id, start, end)] = predicted_attrs[key]
 48 |         predicted_attrs = _predicted_attrs
 49 | 
 50 |     # Parse documents one-by-one
 51 |     train, dev, test = [], [], []
 52 |     for doc_id in id2sents:
 53 |         words_ctx, pred_trigger_ctx, pred_entities_ctx = 0, 0, 0
 54 |         sents = id2sents[doc_id]
 55 |         sentences, event_mentions, entity_mentions, pred_graphs = [], [], [], []
 56 |         for sent_index, sent in enumerate(sents):
 57 |             sentences.append(sent['tokens'])
 58 |             # Parse entity mentions
 59 |             for entity_mention in sent['entity_mentions']:
 60 |                 entity_mention['start'] += words_ctx
 61 |                 entity_mention['end'] += words_ctx
 62 |                 entity_mentions.append(entity_mention)
 63 |             # Parse event mentions
 64 |             for event_mention in sent['event_mentions']:
 65 |                 event_mention['sent_index'] = sent_index
 66 |                 event_mention['trigger']['start'] += words_ctx
 67 |                 event_mention['trigger']['end'] += words_ctx
 68 |                 event_mentions.append(event_mention)
 69 |             # Update pred_graphs
 70 |             if predictions_path:
 71 |                 graph = sentid2graph.get(sent['sent_id'], {})
 72 |                 if len(graph) > 0:
 73 |                     for entity in graph['entities']:
 74 |                         entity[0] += words_ctx
 75 |                         entity[1] += words_ctx
 76 |                     for trigger in graph['triggers']:
 77 |                         trigger[0] += words_ctx
 78 |                         trigger[1] += words_ctx
 79 |                         # Look up predicted attributes
 80 |                         if predicted_attrs:
 81 |                             lookedup_attrs = predicted_attrs[(doc_id, trigger[0], trigger[1])]
 82 |                             trigger.append(lookedup_attrs)
 83 |                     for relation in graph['relations']:
 84 |                         relation[0] += pred_entities_ctx
 85 |                         relation[1] += pred_entities_ctx
 86 |                     for role in graph['roles']:
 87 |                         role[0] += pred_trigger_ctx
 88 |                         role[1] += pred_entities_ctx
 89 |                     pred_trigger_ctx += len(graph['triggers'])
 90 |                     pred_entities_ctx += len(graph['entities'])
 91 |                 pred_graphs.append(graph)
 92 |             # Update words_ctx
 93 |             words_ctx += len(sent['tokens'])
 94 |         doc = Document(doc_id, sentences, event_mentions, entity_mentions, pred_graphs)
 95 |         split = id2split[doc_id]
 96 |         if split == 'train':
 97 |             if not remove_doc_with_no_events or len(event_mentions) > 0:
 98 |                 train.append(doc)
 99 |         if split == 'dev': dev.append(doc)
100 |         if split == 'test': test.append(doc)
101 | 
102 |     if increase_ace_dev_set:
103 |         # Randomly move 12 docs from train set to dev set
104 |         random.seed(0)
105 |         random.shuffle(train)
106 |         dev = train[:12] + dev
107 |         train = train[12:]
108 | 
109 |     # Convert to Document class
110 |     train, dev, test = Dataset(train, tokenizer), Dataset(dev, tokenizer), Dataset(test, tokenizer)
111 | 
112 |     # Verbose
113 |     print('Loaded {} train examples'.format(len(train)))
114 |     print('Loaded {} dev examples'.format(len(dev)))
115 |     print('Loaded {} test examples'.format(len(test)))
116 | 
117 |     return train, dev, test
118 | 


--------------------------------------------------------------------------------
/evaluate_and_visualize.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import json
  4 | import torch
  5 | import tqdm
  6 | import pyhocon
  7 | import random
  8 | 
  9 | from transformers import *
 10 | from models import EventCorefModel
 11 | from scorer import evaluate
 12 | from argparse import ArgumentParser
 13 | from data import load_oneie_dataset
 14 | from utils import RunningAverage, prepare_configs, flatten
 15 | from scorer import get_predicted_antecedents
 16 | 
 17 | def generate_coref_preds(model, data, output_path='predictions.json'):
 18 |     predictions = {}
 19 |     for inst in data:
 20 |         doc_words = inst.words
 21 |         event_mentions = inst.event_mentions
 22 |         preds = model(inst, is_training=False)[1]
 23 |         preds = [x.cpu().data.numpy() for x in preds]
 24 |         top_antecedents, top_antecedent_scores = preds[2:]
 25 |         predicted_antecedents = get_predicted_antecedents(top_antecedents, top_antecedent_scores)
 26 | 
 27 |         predicted_clusters, m2cluster = [], {}
 28 |         for ix, e in enumerate(event_mentions):
 29 |             if predicted_antecedents[ix] < 0:
 30 |                 cluster_id = len(predicted_clusters)
 31 |                 predicted_clusters.append([e])
 32 |             else:
 33 |                 antecedent_idx = predicted_antecedents[ix]
 34 |                 p_e = event_mentions[antecedent_idx]
 35 |                 cluster_id = m2cluster[p_e['id']]
 36 |                 predicted_clusters[cluster_id].append(e)
 37 |             m2cluster[e['id']] = cluster_id
 38 |         # Update predictions
 39 |         predictions[inst.doc_id] = {}
 40 |         predictions[inst.doc_id]['words']= doc_words
 41 |         predictions[inst.doc_id]['predicted_clusters'] = predicted_clusters
 42 | 
 43 |     with open(output_path, 'w+') as outfile:
 44 |         json.dump(predictions, outfile)
 45 | 
 46 | def generate_visualizations(sample_outputs, output_path='visualization.html'):
 47 |     with open(sample_outputs) as json_file:
 48 |         data = json.load(json_file)
 49 | 
 50 |     with open(output_path, 'w+') as output_file:
 51 |         for doc_id in data.keys():
 52 |             doc = data[doc_id]
 53 |             doc_words = doc['words']
 54 |             clusters = doc['predicted_clusters']
 55 |             event_mentions = flatten(clusters)
 56 |             output_file.write('<b>Document {}</b><br>'.format(doc_id))
 57 |             output_file.write('{}<br><br><br>'.format(doc_to_html(doc, event_mentions)))
 58 |             for ix, cluster in enumerate(doc['predicted_clusters']):
 59 |                 if len(cluster) == 1: continue
 60 |                 output_file.write('<b>Cluster {}</b></br>'.format(ix+1))
 61 |                 for em in cluster:
 62 |                     output_file.write('{}<br>'.format(event_mentions_to_html(doc_words, em)))
 63 |                 output_file.write('<br><br>')
 64 |             output_file.write('<br><hr>')
 65 | 
 66 | def doc_to_html(doc, event_mentions):
 67 |     doc_words = doc['words']
 68 |     doc_words = [str(word) for word in doc_words]
 69 |     for e in event_mentions:
 70 |         t_start, t_end = e['trigger']['start'], e['trigger']['end'] - 1
 71 |         doc_words[t_start] = '<span style="color:blue">' + doc_words[t_start]
 72 |         doc_words[t_end] = doc_words[t_end] + '</span>'
 73 |     return ' '.join(doc_words)
 74 | 
 75 | def event_mentions_to_html(doc_words, em):
 76 |     trigger_start = em['trigger']['start']
 77 |     trigger_end = em['trigger']['end']
 78 |     context_left = ' '.join(doc_words[trigger_start-10:trigger_start])
 79 |     context_right = ' '.join(doc_words[trigger_end:trigger_end+10])
 80 |     final_str = context_left + ' <span style="color:red">' + em['trigger']['text'] + '</span> ' + context_right
 81 |     final_str = '<i>Event {} (Type {}) </i> | '.format(em['id'], em['event_type']) + final_str
 82 |     return final_str
 83 | 
 84 | def evaluate_and_visualize(config_name, model_path, output_path):
 85 |     # Prepare tokenizer, dataset, and model
 86 |     configs = prepare_configs(config_name, verbose=False)
 87 |     tokenizer = BertTokenizer.from_pretrained(configs['transformer'])
 88 |     train_set, dev_set, test_set = load_oneie_dataset(configs['base_dataset_path'], tokenizer)
 89 |     model = EventCorefModel(configs, train_set.event_types)
 90 | 
 91 |     # Reload the model and evaluate
 92 |     checkpoint = torch.load(model_path)
 93 |     model.load_state_dict(checkpoint['model_state_dict'])
 94 |     print('Evaluation on the dev set', flush=True)
 95 |     evaluate(model, dev_set, configs)['avg']
 96 |     print('Evaluation on the test set', flush=True)
 97 |     evaluate(model, test_set, configs)
 98 | 
 99 |     # Generate visualizations (for the test set)
100 |     generate_coref_preds(model, test_set, '_predictions.json')
101 |     generate_visualizations('_predictions.json', output_path)
102 |     os.remove('_predictions.json')
103 | 
104 | if __name__ == "__main__":
105 |     # Parse argument
106 |     parser = ArgumentParser()
107 |     parser.add_argument('-c', '--config_name')
108 |     parser.add_argument('-m', '--model_path')
109 |     parser.add_argument('-o', '--output_path', default='visualization.html')
110 |     args = parser.parse_args()
111 | 
112 |     # Start training
113 |     evaluate_and_visualize(args.config_name, args.model_path, args.output_path)
114 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | import utils
  6 | 
  7 | from constants import *
  8 | from models.base import *
  9 | from models.helpers import *
 10 | from models.encoder import *
 11 | 
 12 | class EventCorefModel(BaseModel):
 13 |     def __init__(self, configs, event_types):
 14 |         BaseModel.__init__(self, configs)
 15 |         self.event_types = sorted(event_types)
 16 | 
 17 |         # Transformer Encoder
 18 |         self.transformer_encoder = TransformerEncoder(configs)
 19 |         self.linear = nn.Linear(3 * self.transformer_encoder.hidden_size, configs['latent_size'])
 20 | 
 21 |         # Symbolic Features Encoder
 22 |         self.symbolic_encoder = SymbolicFeaturesEncoder(configs, self.event_types)
 23 | 
 24 |         # Feature Fusion Network
 25 |         self.fusion_network = FeatureFusionNetwork(latent_size=configs['latent_size'],
 26 |                                                    combine_strategy=configs['combine_strategy'],
 27 |                                                    nb_modules=len(self.symbolic_encoder.enabled_features))
 28 | 
 29 |         # Pair Scorer
 30 |         self.dropout = nn.Dropout(configs['dropout_rate'])
 31 |         self.pair_scorer = FFNNModule(input_size=self.get_pair_size(),
 32 |                                       hidden_sizes=[configs['ffnn_size']] * configs['ffnn_depth'],
 33 |                                       output_size=1,
 34 |                                       dropout=configs['dropout_rate'])
 35 | 
 36 |         # Move model to device
 37 |         self.to(self.device)
 38 | 
 39 |     def forward(self, inst, is_training):
 40 |         self.train() if is_training else self.eval()
 41 | 
 42 |         # Extract event_mentions and entity_mentions
 43 |         if self.configs['use_groundtruth']:
 44 |             entity_mentions = inst.entity_mentions
 45 |             event_mentions = inst.event_mentions
 46 |         else:
 47 |             entity_mentions = inst.pred_entities
 48 |             event_mentions = inst.pred_event_mentions
 49 | 
 50 |         # Convert to Torch Tensor
 51 |         input_ids = torch.tensor(inst.token_windows).to(self.device)
 52 |         input_masks = torch.tensor(inst.input_masks).to(self.device)
 53 |         mask_windows = torch.tensor(inst.mask_windows).to(self.device)
 54 |         num_windows, window_size = input_ids.size()
 55 | 
 56 |         # Apply the Transfomer encoder to get tokens features
 57 |         tokens_features = self.transformer_encoder(input_ids, input_masks, mask_windows,
 58 |                                                    num_windows, window_size, is_training).squeeze()
 59 |         num_tokens = tokens_features.size()[0]
 60 | 
 61 |         # Compute word_features (averaging)
 62 |         word_features = []
 63 |         word_starts_indexes = inst.word_starts_indexes
 64 |         word_ends_indexes = word_starts_indexes[1:] + [num_tokens]
 65 |         word_features = get_span_emb(tokens_features, word_starts_indexes, word_ends_indexes)
 66 |         assert(word_features.size()[0] == inst.num_words)
 67 | 
 68 |         # Compute entity_features
 69 |         entity_starts = [m['start'] for m in entity_mentions]
 70 |         entity_ends = [m['end'] for m in entity_mentions]
 71 |         entity_features = get_span_emb(word_features, entity_starts, entity_ends)
 72 | 
 73 |         # Compute trigger_features
 74 |         event_starts = [e['trigger']['start'] for e in event_mentions]
 75 |         event_ends = [e['trigger']['end'] for e in event_mentions]
 76 |         trigger_features = get_span_emb(word_features, event_starts, event_ends)
 77 | 
 78 |         # Compute pair_trigger_features
 79 |         pair_trigger_features = get_pair_embs(trigger_features)
 80 |         pair_trigger_features = F.relu(self.linear(pair_trigger_features))
 81 | 
 82 |         # Compute pair_features
 83 |         if len(self.symbolic_encoder.enabled_features) == 0:
 84 |             # Not using any additional symbolic features
 85 |             pair_features = pair_trigger_features
 86 |         else:
 87 |             # Use additional symbolic features
 88 |             pair_symbolic_features = self.symbolic_encoder(event_mentions)
 89 |             pair_features = self.fusion_network(pair_trigger_features, pair_symbolic_features)
 90 | 
 91 |         # Compute pair_scores
 92 |         pair_features = self.dropout(pair_features)
 93 |         pair_scores = self.pair_scorer(pair_features)
 94 | 
 95 |         # Compute antecedent_scores
 96 |         k = len(event_mentions)
 97 |         span_range = torch.arange(0, k).to(self.device)
 98 |         antecedent_offsets = span_range.view(-1, 1) - span_range.view(1, -1)
 99 |         antecedents_mask = antecedent_offsets >= 1 # [k, k]
100 |         antecedent_scores = pair_scores + torch.log(antecedents_mask.float())
101 | 
102 |         # Compute antecedent_labels
103 |         candidate_cluster_ids = self.get_cluster_ids(event_mentions, inst.coreferential_pairs)
104 |         same_cluster_indicator = candidate_cluster_ids.unsqueeze(0) == candidate_cluster_ids.unsqueeze(1)
105 |         same_cluster_indicator = same_cluster_indicator & antecedents_mask
106 | 
107 |         non_dummy_indicator = (candidate_cluster_ids > -1).unsqueeze(1)
108 |         pairwise_labels = same_cluster_indicator & non_dummy_indicator
109 |         dummy_labels = ~pairwise_labels.any(1, keepdim=True)
110 |         antecedent_labels = torch.cat([dummy_labels, pairwise_labels], 1)
111 | 
112 |         # Compute loss
113 |         dummy_zeros = torch.zeros([k, 1]).to(self.device)
114 |         antecedent_scores = torch.cat([dummy_zeros, antecedent_scores], dim=1)
115 |         gold_scores = antecedent_scores + torch.log(antecedent_labels.float())
116 |         log_norm = logsumexp(antecedent_scores, dim = 1)
117 |         loss = torch.sum(log_norm - logsumexp(gold_scores, dim=1))
118 | 
119 |         # loss and preds
120 |         top_antecedents = torch.arange(0, k).to(self.device)
121 |         top_antecedents = top_antecedents.unsqueeze(0).repeat(k, 1)
122 |         preds = [torch.tensor(event_starts),
123 |                  torch.tensor(event_ends),
124 |                  top_antecedents,
125 |                  antecedent_scores]
126 | 
127 |         return loss, preds
128 | 
129 |     def get_cluster_ids(self, event_mentions, coreferential_pairs):
130 |         cluster_ids = [-1] * len(event_mentions)
131 |         nb_nonsingleton_clusters = 0
132 |         for i in range(len(event_mentions)):
133 |             mention_i = event_mentions[i]
134 |             loc_i = (mention_i['trigger']['start'], mention_i['trigger']['end'])
135 |             for j in range(i-1, -1, -1):
136 |                 mention_j = event_mentions[j]
137 |                 loc_j = (mention_j['trigger']['start'], mention_j['trigger']['end'])
138 |                 if ((loc_i, loc_j)) in coreferential_pairs:
139 |                     if cluster_ids[j] > -1:
140 |                         cluster_ids[i] = cluster_ids[j]
141 |                     else:
142 |                         cluster_ids[i] = cluster_ids[j] = nb_nonsingleton_clusters
143 |                         nb_nonsingleton_clusters += 1
144 |         return torch.tensor(cluster_ids).to(self.device)
145 | 
146 | 
147 |     def get_pair_size(self):
148 |         return (1 + len(self.symbolic_encoder.enabled_features)) * self.configs['latent_size']
149 | 


--------------------------------------------------------------------------------
/models/base.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | import json
  6 | import random
  7 | 
  8 | from transformers import *
  9 | from math import ceil, floor
 10 | 
 11 | # Optimizer
 12 | class ModelOptimizer(object):
 13 |     def __init__(self, transformer_optimizer, transformer_scheduler,
 14 |                  task_optimizer, task_init_lr, max_iter):
 15 |         self.iter = 0
 16 |         self.transformer_optimizer = transformer_optimizer
 17 |         self.transformer_scheduler = transformer_scheduler
 18 | 
 19 |         self.task_optimizer = task_optimizer
 20 |         self.task_init_lr = task_init_lr
 21 |         self.max_iter = max_iter
 22 | 
 23 |     def zero_grad(self):
 24 |         self.transformer_optimizer.zero_grad()
 25 |         self.task_optimizer.zero_grad()
 26 | 
 27 |     def step(self):
 28 |         self.iter += 1
 29 |         self.transformer_optimizer.step()
 30 |         self.task_optimizer.step()
 31 |         self.transformer_scheduler.step()
 32 |         self.poly_lr_scheduler(self.task_optimizer, self.task_init_lr, self.iter, self.max_iter)
 33 | 
 34 |     @staticmethod
 35 |     def poly_lr_scheduler(optimizer, init_lr, iter, max_iter,
 36 |                           lr_decay_iter=1, power=1.0):
 37 |         """Polynomial decay of learning rate
 38 |             :param init_lr is base learning rate
 39 |             :param iter is a current iteration
 40 |             :param max_iter is number of maximum iterations
 41 |             :param lr_decay_iter how frequently decay occurs, default is 1
 42 |             :param power is a polymomial power
 43 |         """
 44 |         if iter % lr_decay_iter or iter > max_iter:
 45 |             return optimizer
 46 | 
 47 |         lr = init_lr*(1 - iter/max_iter)**power
 48 |         for param_group in optimizer.param_groups:
 49 |             param_group['lr'] = lr
 50 | 
 51 |         return lr
 52 | 
 53 | # BaseModel
 54 | class BaseModel(nn.Module):
 55 |     def __init__(self, configs):
 56 |         super(BaseModel, self).__init__()
 57 |         self.configs = configs
 58 |         self.device = torch.device('cuda' if torch.cuda.is_available() and not configs['no_cuda'] else 'cpu')
 59 | 
 60 |     def get_optimizer(self, num_warmup_steps, num_train_steps, start_iter = 0):
 61 |         # Extract transformer parameters and task-specific parameters
 62 |         transformer_params, task_params = [], []
 63 |         for name, param in self.named_parameters():
 64 |             if param.requires_grad:
 65 |                 if "transformer.encoder" in name:
 66 |                     transformer_params.append((name, param))
 67 |                 else:
 68 |                     task_params.append((name, param))
 69 | 
 70 |         # Prepare transformer_optimizer and transformer_scheduler
 71 |         no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
 72 |         optimizer_grouped_parameters = [
 73 |             {'params': [p for n, p in transformer_params if not any(nd in n for nd in no_decay)], 'weight_decay': self.configs['transformer_weight_decay']},
 74 |             {'params': [p for n, p in transformer_params if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
 75 |             ]
 76 |         transformer_optimizer = AdamW(
 77 |             optimizer_grouped_parameters,
 78 |             lr=self.configs['transformer_learning_rate'],
 79 |             betas=(0.9, 0.999),
 80 |             eps=1e-06,
 81 |         )
 82 |         transformer_scheduler = get_linear_schedule_with_warmup(transformer_optimizer,
 83 |                                                                 num_warmup_steps=num_warmup_steps,
 84 |                                                                 num_training_steps=num_train_steps)
 85 | 
 86 |         # Prepare the optimizer for task-specific parameters
 87 |         task_optimizer = optim.Adam([p for n, p in task_params], lr=self.configs['task_learning_rate'])
 88 | 
 89 |         # Unify transformer_optimizer and task_optimizer
 90 |         model_optimizer = ModelOptimizer(transformer_optimizer, transformer_scheduler,
 91 |                                          task_optimizer, self.configs['task_learning_rate'],
 92 |                                          num_train_steps)
 93 |         model_optimizer.iter = start_iter
 94 | 
 95 |         return model_optimizer
 96 | 
 97 | # FFNN Module
 98 | class FFNNModule(nn.Module):
 99 |     """ Generic FFNN-based Scoring Module
100 |     """
101 |     def __init__(self, input_size, hidden_sizes, output_size, dropout = 0.2):
102 |         super(FFNNModule, self).__init__()
103 |         self.layers = []
104 | 
105 |         prev_size = input_size
106 |         for hidden_size in hidden_sizes:
107 |             self.layers.append(nn.Linear(prev_size, hidden_size))
108 |             self.layers.append(nn.ReLU(True))
109 |             self.layers.append(nn.Dropout(dropout))
110 |             prev_size = hidden_size
111 | 
112 |         self.layers.append(nn.Linear(prev_size, output_size))
113 | 
114 |         self.layer_module = nn.ModuleList(self.layers)
115 | 
116 |     def forward(self, x):
117 |         out = x
118 |         for layer in self.layer_module:
119 |             out = layer(out)
120 |         return out.squeeze()
121 | 
122 | # FeatureSelectionModule
123 | class FeatureSelectionModule(nn.Module):
124 |     def __init__(self, latent_size, combine_strategy):
125 |         super(FeatureSelectionModule, self).__init__()
126 | 
127 |         self.latent_size = latent_size
128 |         self.combine_strategy = combine_strategy
129 |         assert(combine_strategy in ['simple', 'gated'])
130 | 
131 |         if combine_strategy == 'gated':
132 |             # Gate Computation Parameters
133 |             self.Wu = nn.Linear(2 * latent_size, latent_size)
134 | 
135 |     def forward(self, x1, x2):
136 |         if self.combine_strategy == 'simple':
137 |             return x2
138 |         if self.combine_strategy == 'gated':
139 |             x = torch.cat([x1, x2], dim=-1)
140 |             # Orthogonal Decomposition
141 |             x1_dot_x2 = torch.sum(x1 * x2, dim=-1, keepdim=True)
142 |             x1_dot_x1 = torch.sum(x1 * x1, dim=-1, keepdim=True)
143 |             parallel = (x1_dot_x2 / x1_dot_x1) * x1
144 |             orthogonal = x2 - parallel
145 |             # Gates
146 |             ug = torch.sigmoid(self.Wu(x))
147 |             x2_prime = (1 - ug) * parallel + ug * orthogonal
148 |             return x2_prime
149 | 
150 | # FeatureFusionNetwork
151 | class FeatureFusionNetwork(nn.Module):
152 |     def __init__(self, latent_size, combine_strategy, nb_modules):
153 |         super(FeatureFusionNetwork, self).__init__()
154 | 
155 |         self.latent_size = latent_size
156 |         self.combine_strategy = combine_strategy
157 |         self.nb_modules = nb_modules
158 | 
159 |         modules = []
160 |         for _ in range(nb_modules):
161 |             modules.append(FeatureSelectionModule(latent_size, combine_strategy))
162 |         self.fusion_modules = nn.ModuleList(modules)
163 | 
164 |     def forward(self, c, xs):
165 |         features = [c]
166 |         for module, x in zip(self.fusion_modules, xs):
167 |             features.append(module(c, x))
168 |         return torch.cat(features, dim=-1)
169 | 
170 |     @property
171 |     def output_size(self):
172 |         return (self.nb_modules + 1) * self.latent_size
173 | 


--------------------------------------------------------------------------------
/models/encoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | import utils
  6 | import random
  7 | 
  8 | from constants import *
  9 | from transformers import *
 10 | from models.helpers import *
 11 | 
 12 | class TransformerEncoder(nn.Module):
 13 |     def __init__(self, configs):
 14 |         super(TransformerEncoder, self).__init__()
 15 |         self.configs = configs
 16 | 
 17 |         # Transformer Encoder
 18 |         self.transformer = AutoModel.from_pretrained(configs['transformer'])
 19 |         self.transformer_dropout = nn.Dropout(configs['transformer_dropout_rate'])
 20 |         self.transformer.config.gradient_checkpointing  = configs['gradient_checkpointing']
 21 |         self.hidden_size = self.transformer.config.hidden_size
 22 | 
 23 |     def forward(self, input_ids, input_masks, mask_windows,
 24 |                 num_windows, window_size, is_training,
 25 |                 context_lengths = [0], token_type_ids = None):
 26 |         self.train() if is_training else self.eval()
 27 |         num_contexts = len(context_lengths)
 28 | 
 29 |         features = self.transformer(input_ids, input_masks, token_type_ids)[0]
 30 |         features = features.view(num_contexts, num_windows, -1, self.hidden_size)
 31 | 
 32 |         flattened_features = []
 33 |         for i in range(num_contexts):
 34 |             _features = features[i, :, :, :]
 35 |             _features = _features[:, context_lengths[i]:, :]
 36 |             _features = _features[:, : window_size, :]
 37 |             flattened_features.append(self.flatten(_features, mask_windows))
 38 |         flattened_features = torch.cat(flattened_features)
 39 | 
 40 |         return self.transformer_dropout(flattened_features)
 41 | 
 42 |     def flatten(self, features, mask_windows):
 43 |         num_windows, window_size, hidden_size = features.size()
 44 |         flattened_emb = torch.reshape(features, (num_windows * window_size, hidden_size))
 45 |         boolean_mask = mask_windows > 0
 46 |         boolean_mask = boolean_mask.view([num_windows * window_size])
 47 |         return flattened_emb[boolean_mask].unsqueeze(0)
 48 | 
 49 | class SymbolicFeaturesEncoder(nn.Module):
 50 |     def __init__(self, configs, event_types):
 51 |         super(SymbolicFeaturesEncoder, self).__init__()
 52 |         self.configs = configs
 53 |         self.feature_size = configs['feature_size']
 54 |         self.latent_size = configs['latent_size']
 55 |         self.event_types = event_types
 56 | 
 57 |         # Embeddings and Linear Layers
 58 |         if configs['use_typ_features']:
 59 |             self.typ_embed = nn.Embedding(len(event_types), self.feature_size)
 60 |             self.typ_linear = nn.Linear(3 * self.feature_size, self.latent_size)
 61 |         if configs['use_pol_features']:
 62 |             self.pol_embed = nn.Embedding(len(POL_TYPES), self.feature_size)
 63 |             self.pol_linear = nn.Linear(3 * self.feature_size, self.latent_size)
 64 |         if configs['use_mod_features']:
 65 |             self.mod_embed = nn.Embedding(len(MOD_TYPES), self.feature_size)
 66 |             self.mod_linear = nn.Linear(3 * self.feature_size, self.latent_size)
 67 |         if configs['use_gen_features']:
 68 |             self.gen_embed = nn.Embedding(len(GEN_TYPES), self.feature_size)
 69 |             self.gen_linear = nn.Linear(3 * self.feature_size, self.latent_size)
 70 |         if configs['use_ten_features']:
 71 |             self.ten_embed = nn.Embedding(len(TEN_TYPES), self.feature_size)
 72 |             self.ten_linear = nn.Linear(3 * self.feature_size, self.latent_size)
 73 | 
 74 |         # Initialize Embeddings
 75 |         for name, param in self.named_parameters():
 76 |             if (not 'transformer' in name.lower()) and 'embedding' in name.lower():
 77 |                 print('Re-initialize embedding {}'.format(name))
 78 |                 param.data.uniform_(-0.5, 0.5)
 79 | 
 80 |     def forward(self, events):
 81 |         features = []
 82 |         if self.configs['use_typ_features']: features.append(self.get_features(events, 'event_type'))
 83 |         if self.configs['use_pol_features']: features.append(self.get_features(events, 'event_polarity'))
 84 |         if self.configs['use_mod_features']: features.append(self.get_features(events, 'event_modality'))
 85 |         if self.configs['use_gen_features']: features.append(self.get_features(events, 'event_genericity'))
 86 |         if self.configs['use_ten_features']: features.append(self.get_features(events, 'event_tense'))
 87 |         return features
 88 | 
 89 |     def get_features(self, events, key):
 90 |         if key == 'event_type':
 91 |             embed, linear, value_types = self.typ_embed, self.typ_linear, self.event_types
 92 |             noisy_prob = self.configs['typ_noise_prob']
 93 |         if key == 'event_polarity':
 94 |             embed, linear, value_types = self.pol_embed, self.pol_linear, POL_TYPES
 95 |             noisy_prob = self.configs['pol_noise_prob']
 96 |         if key == 'event_modality':
 97 |             embed, linear, value_types = self.mod_embed, self.mod_linear, MOD_TYPES
 98 |             noisy_prob = self.configs['mod_noise_prob']
 99 |         if key == 'event_genericity':
100 |             embed, linear, value_types = self.gen_embed, self.gen_linear, GEN_TYPES
101 |             noisy_prob = self.configs['gen_noise_prob']
102 |         if key == 'event_tense':
103 |             embed, linear, value_types = self.ten_embed, self.ten_linear, TEN_TYPES
104 |             noisy_prob = self.configs['ten_noise_prob']
105 | 
106 |         values = []
107 |         for e in events:
108 |             value = e[key]
109 |             if self.training and random.uniform(0, 1) < noisy_prob and e['has_correct_trigger']:
110 |                 value = random.choice(value_types)
111 |             values.append(value_types.index(value))
112 | 
113 |         values = torch.tensor(values).to(next(self.parameters()).device)
114 |         latent_feats = F.relu(linear(get_pair_embs(embed(values))))
115 |         return latent_feats
116 | 
117 |     @property
118 |     def enabled_features(self):
119 |         enabled_features = []
120 |         if self.configs['use_typ_features']: enabled_features.append('event_type')
121 |         if self.configs['use_pol_features']: enabled_features.append('event_polarity')
122 |         if self.configs['use_mod_features']: enabled_features.append('event_modality')
123 |         if self.configs['use_gen_features']: enabled_features.append('event_genericity')
124 |         if self.configs['use_ten_features']: enabled_features.append('event_tense')
125 |         return enabled_features
126 | 


--------------------------------------------------------------------------------
/models/helpers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def get_span_emb(context_features, span_starts, span_ends):
 4 |     num_tokens = context_features.size()[0]
 5 | 
 6 |     features = []
 7 |     for s, e in zip(span_starts, span_ends):
 8 |         sliced_features = context_features[s:e, :]
 9 |         features.append(torch.mean(sliced_features, dim=0, keepdim=True))
10 |     features = torch.cat(features, dim=0)
11 |     return features
12 | 
13 | def get_pair_embs(event_features):
14 |     n, d = event_features.size()
15 |     features_list = []
16 | 
17 |     # Compute diff_embs and prod_embs
18 |     src_embs = event_features.view(1, n, d).repeat([n, 1, 1])
19 |     target_embs = event_features.view(n, 1, d).repeat([1, n, 1])
20 |     prod_embds = src_embs * target_embs
21 | 
22 |     # Update features_list
23 |     features_list.append(src_embs)
24 |     features_list.append(target_embs)
25 |     features_list.append(prod_embds)
26 | 
27 |     # Concatenation
28 |     pair_embs = torch.cat(features_list, 2)
29 | 
30 |     return pair_embs
31 | 
32 | def logsumexp(inputs, dim=None, keepdim=False):
33 |     """Numerically stable logsumexp.
34 |     Args:
35 |         inputs: A Variable with any shape.
36 |         dim: An integer.
37 |         keepdim: A boolean.
38 |     Returns:
39 |         Equivalent of log(sum(exp(inputs), dim=dim, keepdim=keepdim)).
40 |     """
41 |     # For a 1-D array x (any array along a single dimension),
42 |     # log sum exp(x) = s + log sum exp(x - s)
43 |     # with s = max(x) being a common choice.
44 |     if dim is None:
45 |         inputs = inputs.view(-1)
46 |         dim = 0
47 |     s, _ = torch.max(inputs, dim=dim, keepdim=True)
48 |     outputs = s + (inputs - s).exp().sum(dim=dim, keepdim=True).log()
49 |     if not keepdim:
50 |         outputs = outputs.squeeze(dim)
51 |     return outputs
52 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/README.txt:
--------------------------------------------------------------------------------
  1 | NAME
  2 |    CorScorer: Perl package for scoring coreference resolution systems
  3 |    using different metrics.
  4 | 
  5 | 
  6 | VERSION
  7 |    v8.01 -- reference implementations of MUC, B-cubed, CEAF and BLANC metrics.
  8 | 
  9 | 
 10 | CHANGES SINCE v8.0
 11 |    - fixed a bug that crashed the BLANC scorer when a duplicate singleton
 12 |      mention was present in the response.
 13 | 
 14 | INSTALLATION
 15 |    Requirements:
 16 |       1. Perl: downloadable from http://perl.org
 17 |       2. Algorithm-Munkres: included in this package and downloadable
 18 |          from CPAN http://search.cpan.org/~tpederse/Algorithm-Munkres-0.08
 19 | 
 20 | USE
 21 |    This package is distributed with two scripts to execute the scorer from
 22 |    the command line.
 23 | 
 24 |    Windows (tm): scorer.bat
 25 |    Linux: scorer.pl
 26 | 
 27 | 
 28 | SYNOPSIS
 29 |    use CorScorer;
 30 | 
 31 |    $metric = 'ceafm';
 32 | 
 33 |    # Scores the whole dataset
 34 |    &CorScorer::Score($metric, $keys_file, $response_file);
 35 | 
 36 |    # Scores one file
 37 |    &CorScorer::Score($metric, $keys_file, $response_file, $name);
 38 | 
 39 | 
 40 | INPUT
 41 |    metric: the metric desired to score the results:
 42 |      muc: MUCScorer (Vilain et al, 1995)
 43 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
 44 |      ceafm: CEAF (Luo et al., 2005) using mention-based similarity
 45 |      ceafe: CEAF (Luo et al., 2005) using entity-based similarity
 46 |      blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions
 47 |      all: uses all the metrics to score
 48 | 
 49 |    keys_file: file with expected coreference chains in CoNLL-2011/2012 format
 50 | 
 51 |    response_file: file with output of coreference system (CoNLL-2011/2012 format)
 52 | 
 53 |    name: [optional] the name of the document to score. If name is not
 54 |      given, all the documents in the dataset will be scored. If given
 55 |      name is "none" then all the documents are scored but only total
 56 |      results are shown.
 57 | 
 58 | 
 59 | OUTPUT
 60 |    The score subroutine returns an array with four values in this order:
 61 |    1) Recall numerator
 62 |    2) Recall denominator
 63 |    3) Precision numerator
 64 |    4) Precision denominator
 65 | 
 66 |    Also recall, precision and F1 are printed in the standard output when variable
 67 |    $VERBOSE is not null.
 68 | 
 69 |    Final scores:
 70 |    Recall = recall_numerator / recall_denominator
 71 |    Precision = precision_numerator / precision_denominator
 72 |    F1 = 2 * Recall * Precision / (Recall + Precision)
 73 | 
 74 |    Identification of mentions
 75 |    An scorer for identification of mentions (recall, precision and F1) is also included.
 76 |    Mentions from system response are compared with key mentions. This version performs
 77 |    strict mention matching as was used in the CoNLL-2011 and 2012 shared tasks.
 78 | 
 79 | AUTHORS
 80 |    Emili Sapena, Universitat Politècnica de Catalunya, http://www.lsi.upc.edu/~esapena, esapena <at> lsi.upc.edu
 81 |    Sameer Pradhan, sameer.pradhan <at> childrens.harvard.edu
 82 |    Sebastian Martschat, sebastian.martschat <at> h-its.org
 83 |    Xiaoqiang Luo, xql <at> google.com
 84 | 
 85 | COPYRIGHT AND LICENSE
 86 |    Copyright (C) 2009-2011, Emili Sapena esapena <at> lsi.upc.edu
 87 |                  2011-2014, Sameer Pradhan sameer.pradhan <at> childrens.harvard.edu
 88 | 
 89 |    This program is free software; you can redistribute it and/or modify it
 90 |    under the terms of the GNU General Public License as published by the
 91 |    Free Software Foundation; either version 2 of the License, or (at your
 92 |    option) any later version. This program is distributed in the hope that
 93 |    it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 94 |    warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 95 |    GNU General Public License for more details.
 96 | 
 97 |    You should have received a copy of the GNU General Public License along
 98 |    with this program; if not, write to the Free Software Foundation, Inc.,
 99 |    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
100 | 
101 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/lib/Algorithm/Munkres.pm:
--------------------------------------------------------------------------------
  1 | package Algorithm::Munkres;
  2 | 
  3 | use 5.006;
  4 | use strict;
  5 | use warnings;
  6 | 
  7 | require Exporter;
  8 | 
  9 | our @ISA = qw(Exporter);
 10 | 
 11 | our @EXPORT = qw( assign );
 12 | 
 13 | our $VERSION = '0.08';
 14 | 
 15 | #Variables global to the package
 16 | my @mat = ();
 17 | my @mask = ();
 18 | my @colcov = ();
 19 | my @rowcov = ();
 20 | my $Z0_row = 0;
 21 | my $Z0_col = 0;
 22 | my @path = ();
 23 | 
 24 | #The exported subroutine.
 25 | #Expected Input: Reference to the input matrix (MxN)
 26 | #Output: Mx1 matrix, giving the column number of the value assigned to each row. (For more explaination refer perldoc)
 27 | sub assign
 28 | {
 29 |     #reference to the input matrix
 30 |     my $rmat = shift;
 31 |     my $rsolution_mat = shift;
 32 |     my ($row, $row_len) = (0,0);
 33 | 
 34 |     # re-initialize that global variables
 35 |     @mat = ();
 36 |     @mask = ();
 37 |     @colcov = ();
 38 |     @rowcov = ();
 39 |     $Z0_row = 0;
 40 |     $Z0_col = 0;
 41 |     @path = ();
 42 | 
 43 |     #variables local to the subroutine
 44 |     my $step = 0;
 45 |     my ($i, $j) = (0,0);
 46 | 
 47 |     #the input matrix
 48 |     my @inp_mat = @$rmat;
 49 | 
 50 |     #copy the orginal matrix, before applying the algorithm to the matrix
 51 |     foreach (@inp_mat)
 52 |     {
 53 | 	push @mat, [ @$_ ];
 54 |     }
 55 | 
 56 |     #check if the input matrix is well-formed i.e. either square or rectangle.
 57 |     $row_len = $#{$mat[0]};
 58 |     foreach my $row (@mat)
 59 |     {
 60 | 	if($row_len != $#$row)
 61 | 	{
 62 | 	    die "Please check the input matrix.\nThe input matrix is not a well-formed matrix!\nThe input matrix has to be rectangular or square matrix.\n";
 63 | 	}
 64 |     }
 65 | 
 66 |     #check if the matrix is a square matrix, 
 67 |     #if not convert it to square matrix by padding zeroes.
 68 |     if($#mat < $#{$mat[0]})
 69 |     {
 70 | 	# Add rows
 71 | 	my $diff = $#{$mat[0]} - $#mat;
 72 | 	for (1 .. $diff)
 73 | 	{
 74 | 	    push @mat, [ (0) x @{$mat[0]} ];
 75 | 	}
 76 |     }
 77 |     elsif($#mat > $#{$mat[0]})
 78 |     {
 79 | 	# Add columns
 80 | 	my $diff = $#mat - $#{$mat[0]};
 81 | 	for (0 .. $#mat)
 82 | 	{
 83 | 	    push @{$mat[$_]}, (0) x $diff;
 84 | 	}
 85 |     }
 86 | 
 87 |     #initialize mask, column cover and row cover matrices
 88 |     clear_covers();
 89 | 
 90 |     for($i=0;$i<=$#mat;$i++)
 91 |     {
 92 |  	push @mask, [ (0) x @mat ];
 93 |     }
 94 | 
 95 |     #The algorithm can be grouped in 6 steps.
 96 |     &stepone();
 97 |     &steptwo();
 98 |     $step = &stepthree();
 99 |     while($step == 4)
100 |     {
101 | 	$step = &stepfour();
102 | 	while($step == 6)
103 | 	{
104 | 	    &stepsix();
105 | 	    $step = &stepfour();	    
106 | 	}
107 | 	&stepfive();
108 | 	$step = &stepthree();
109 |     }
110 | 
111 |     #create the output matrix
112 |     for my $i (0 .. $#mat)
113 |     {
114 | 	for my $j (0 .. $#{$mat[$i]})
115 | 	{
116 | 	    if($mask[$i][$j] == 1)
117 | 	    {
118 | 		$rsolution_mat->[$i] = $j;
119 | 	    }
120 | 	}
121 |     }
122 | 
123 | 
124 | #Code for tracing------------------
125 |     <<'ee';
126 |     print "\nInput Matrix:\n";
127 |     for($i=0;$i<=$#mat;$i++)
128 |     {
129 | 	for($j=0;$j<=$#mat;$j++)
130 | 	{
131 | 	    print $mat[$i][$j] . "\t";
132 | 	}
133 | 	print "\n";
134 |     }
135 |     
136 |     print "\nMask Matrix:\n";
137 |     for($i=0;$i<=$#mat;$i++)
138 |     {
139 | 	for($j=0;$j<=$#mat;$j++)
140 | 	{
141 | 	    print $mask[$i][$j] . "\t";
142 | 	}
143 | 	print "\n";
144 |     }
145 | 
146 |     print "\nOutput Matrix:\n";
147 |     print "$_\n" for @$rsolution_mat;
148 | ee
149 | 
150 | #----------------------------------
151 | 
152 | }
153 | 
154 | #Step 1 - Find minimum value for every row and subtract this min from each element of the row.
155 | sub stepone
156 | {
157 | #    print "Step 1 \n";
158 | 
159 |     #Find the minimum value for every row
160 |     for my $row (@mat)
161 |     {
162 | 	my $min = $row->[0];
163 | 	for (@$row)
164 | 	{
165 | 	    $min = $_ if $min > $_;
166 | 	}    
167 | 	
168 |         #Subtract the minimum value of the row from each element of the row.
169 | 	@$row = map {$_ - $min} @$row;
170 |     }
171 | #    print "Step 1 end \n";
172 | }
173 | 
174 | #Step 2 - Star the zeroes, Create the mask and cover matrices. Re-initialize the cover matrices for next steps.
175 | #To star a zero: We search for a zero in the matrix and than cover the column and row in which it occurs. Now this zero is starred.
176 | #A next starred zero can occur only in those columns and rows which have not been previously covered by any other starred zero.
177 | sub steptwo
178 | {
179 | #    print "Step 2 \n";
180 |  
181 |     my ($i, $j) = (0,0);
182 | 
183 |     for($i=0;$i<=$#mat;$i++)
184 |     {
185 | 	for($j=0;$j<=$#{$mat[$i]};$j++)
186 | 	{
187 | 	    if($mat[$i][$j] == 0 && $colcov[$j] == 0 && $rowcov[$i] == 0)
188 | 	    {
189 | 		$mask[$i][$j] = 1;
190 | 		$colcov[$j] = 1;
191 | 		$rowcov[$i] = 1;
192 | 	    }
193 | 	}
194 |     }
195 |     #Re-initialize the cover matrices
196 |     &clear_covers();
197 | #    print "Step 2 end\n";
198 | }
199 | 
200 | #Step 3 - Check if each column has a starred zero. If yes then the problem is solved else proceed to step 4
201 | sub stepthree
202 | {
203 | #    print "Step 3 \n";
204 | 
205 |     my $cnt = 0;
206 | 
207 |     for my $i (0 .. $#mat)
208 |     {
209 | 	for my $j (0 .. $#mat)
210 | 	{
211 | 	    if($mask[$i][$j] == 1)
212 | 	    {
213 | 		$colcov[$j] = 1;
214 | 		$cnt++;
215 | 	    }
216 | 	}
217 |     }
218 |     if($cnt > $#mat)
219 |     {
220 | #       print "Step 3 end. Next expected step 7 \n";
221 |        return 7;
222 |     }
223 |     else
224 |     {
225 | #       print "Step 3 end. Next expected step 4 \n";
226 |        return 4;
227 |     }
228 | 
229 | }
230 | 
231 | #Step 4 - Try to find a zero which is not starred and whose columns and rows are not yet covered. 
232 | #If such a zero found, prime it, try to find a starred zero in its row, 
233 | #                                                 if not found proceed to step 5 
234 | #                                                 else continue
235 | #Else proceed to step 6.
236 | sub stepfour
237 | {
238 | #    print "Step 4 \n";
239 | 
240 |     while(1)
241 |     {
242 | 	my ($row, $col) = &find_a_zero();
243 | 	if ($row < 0)
244 | 	{
245 | 	    # No zeroes
246 | 	    return 6;
247 | 	}
248 | 
249 | 	$mask[$row][$col] = 2;
250 | 	my $star_col = &find_star_in_row($row);
251 | 	if ($star_col >= 0)
252 | 	{
253 | 	    $col = $star_col;
254 | 	    $rowcov[$row] = 1;
255 | 	    $colcov[$col] = 0;
256 | 	}
257 | 	else
258 | 	{
259 | 	    $Z0_row = $row;
260 | 	    $Z0_col = $col;
261 | 	    return 5;
262 | 	}
263 |     }
264 | }
265 | 
266 | #Tries to find yet uncovered zero
267 | sub find_a_zero
268 | {
269 |     for my $i (0 .. $#mat)
270 |     {
271 | 	next if $rowcov[$i];
272 | 
273 | 	for my $j (reverse(0 .. $#mat))  # Prefer large $j
274 | 	{
275 | 	    next if $colcov[$j];
276 | 	    return ($i, $j) if $mat[$i][$j] == 0;
277 | 	}
278 |     }
279 | 
280 |     return (-1, -1);
281 | }
282 | 
283 | #Tries to find starred zero in the given row and returns the column number
284 | sub find_star_in_row
285 | {
286 |     my $row = shift;
287 | 
288 |     for my $j (0 .. $#mat)
289 |     {
290 | 	if($mask[$row][$j] == 1)
291 | 	{
292 | 	    return $j;
293 | 	}
294 |     }
295 |     return -1;
296 | }
297 | 
298 | #Step 5 - Try to find a starred zero in the column of the uncovered zero found in the step 4.
299 | #If starred zero found, try to find a prime zero in its row.
300 | #Continue finding starred zero in the column and primed zero in the row until, 
301 | #we get to a primed zero which does not have a starred zero in its column.
302 | #At this point reduce the non-zero values of mask matrix by 1. i.e. change prime zeros to starred zeroes.
303 | #Clear the cover matrices and clear any primes i.e. values=2 from mask matrix.
304 | sub stepfive
305 | {
306 | #    print "Step 5 \n";
307 | 
308 |     my $cnt = 0;
309 |     my $done = 0;
310 | 
311 |     $path[$cnt][0] = $Z0_row;
312 |     $path[$cnt][1] = $Z0_col;
313 |     
314 |     while($done == 0)
315 |     {
316 | 	my $row = &find_star_in_col($path[$cnt][1]);
317 | 	if($row > -1)
318 | 	{
319 | 	    $cnt++;
320 | 	    $path[$cnt][0] = $row;
321 | 	    $path[$cnt][1] = $path[$cnt - 1][1];
322 | 	}
323 | 	else
324 | 	{
325 | 	    $done = 1;
326 | 	}
327 | 	if($done == 0)
328 | 	{
329 | 	    my $col = &find_prime_in_row($path[$cnt][0]);
330 | 	    $cnt++;
331 | 	    $path[$cnt][0] = $path[$cnt - 1][0];
332 | 	    $path[$cnt][1] = $col;
333 | 	}
334 |     }
335 |     &convert_path($cnt);
336 |     &clear_covers();
337 |     &erase_primes();
338 | 
339 | #    print "Step 5 end \n";
340 | }
341 | 
342 | #Tries to find starred zero in the given column and returns the row number
343 | sub find_star_in_col
344 | {
345 |     my $col = shift;
346 | 
347 |     for my $i (0 .. $#mat)
348 |     {
349 | 	return $i if $mask[$i][$col] == 1;
350 |     }
351 |     
352 |     return -1;
353 | }
354 | 
355 | #Tries to find primed zero in the given row and returns the column number
356 | sub find_prime_in_row
357 | {
358 |     my $row = shift;
359 | 
360 |     for my $j (0 .. $#mat)
361 |     {
362 | 	return $j if $mask[$row][$j] == 2;
363 |     }
364 |     
365 |     return -1;
366 | }
367 | 
368 | #Reduces non-zero value in the mask matrix by 1.
369 | #i.e. converts all primes to stars and stars to none.
370 | sub convert_path
371 | {
372 |     my $cnt = shift;
373 | 
374 |     for my $i (0 .. $cnt)
375 |     {
376 | 	for ( $mask[$path[$i][0]][$path[$i][1]] ) {
377 | 	    $_ = ( $_ == 1 ) ? 0 : 1;
378 | 	}
379 |     }
380 | }
381 | 
382 | #Clears cover matrices
383 | sub clear_covers
384 | {
385 |     @rowcov = @colcov = (0) x @mat;
386 | }
387 | 
388 | #Changes all primes i.e. values=2 to 0.
389 | sub erase_primes
390 | {
391 |     for my $row (@mask)
392 |     {
393 | 	for my $j (0 .. $#$row)
394 | 	{
395 | 	    $row->[$j] = 0 if $row->[$j] == 2;
396 | 	}
397 |     }
398 | }
399 | 
400 | #Step 6 - Find the minimum value from the rows and columns which are currently not covered.
401 | #Subtract this minimum value from all the elements of the columns which are not covered.
402 | #Add this minimum value to all the elements of the rows which are covered.
403 | #Proceed to step 4.
404 | sub stepsix
405 | {
406 | #    print "Step 6 \n";
407 |     my ($i, $j);
408 |     my $minval = 0;
409 | 
410 |     $minval = &find_smallest();
411 |     
412 |     for($i=0;$i<=$#mat;$i++)
413 |     {
414 | 	for($j=0;$j<=$#{$mat[$i]};$j++)
415 | 	{
416 | 	    if($rowcov[$i] == 1)
417 | 	    {
418 | 		$mat[$i][$j] += $minval;
419 | 	    }
420 | 	    if($colcov[$j] == 0)
421 | 	    {
422 | 		$mat[$i][$j] -= $minval;
423 | 	    }
424 | 	}
425 |     }
426 | 
427 | #    print "Step 6 end \n";
428 | }
429 | 
430 | #Finds the minimum value from all the matrix values which are not covered.
431 | sub find_smallest
432 | {
433 |     my $minval;
434 | 
435 |     for my $i (0 .. $#mat)
436 |     {
437 | 	next if $rowcov[$i];
438 | 
439 | 	for my $j (0 .. $#mat)
440 | 	{
441 | 	    next if $colcov[$j];
442 | 	    if( !defined($minval) || $minval > $mat[$i][$j])
443 | 	    {
444 | 		$minval = $mat[$i][$j];
445 | 	    }
446 | 	}
447 |     }
448 |     return $minval;
449 | }
450 | 
451 | 
452 | 1;
453 | __END__
454 | 
455 | =head1 NAME
456 | 
457 |     Algorithm::Munkres - Perl extension for Munkres' solution to 
458 |     classical Assignment problem for square and rectangular matrices 
459 |     This module extends the solution of Assignment problem for square
460 |     matrices to rectangular matrices by padding zeros. Thus a rectangular 
461 |     matrix is converted to square matrix by padding necessary zeros.
462 | 
463 | =head1 SYNOPSIS
464 | 
465 | use Algorithm::Munkres;
466 | 
467 |     @mat = (
468 | 	 [2, 4, 7, 9],
469 | 	 [3, 9, 5, 1],
470 | 	 [8, 2, 9, 7],
471 | 	 );
472 | 
473 | assign(\@mat,\@out_mat);
474 | 
475 |     Then the @out_mat array will have the output as: (0,3,1,2),
476 |     where 
477 |     0th element indicates that 0th row is assigned 0th column i.e value=2
478 |     1st element indicates that 1st row is assigned 3rd column i.e.value=1
479 |     2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
480 |     3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0
481 | 
482 | 
483 | =head1 DESCRIPTION
484 | 
485 |     Assignment Problem: Given N jobs, N workers and the time taken by 
486 |     each worker to complete a job then how should the assignment of a 
487 |     Worker to a Job be done, so as to minimize the time taken. 
488 | 
489 | 	Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that:
490 | 	    x  y  z		
491 | 	 p  2  4  7
492 | 	 q  3  9  5
493 | 	 r  8  2  9
494 |         
495 |         where the cell values of the above matrix give the time required
496 |         for the worker(given by column name) to complete the job(given by 
497 |         the row name) 
498 |     
499 | 	then possible solutions are:	
500 | 		 	 Total
501 | 	 1. 2, 9, 9       20
502 | 	 2. 2, 2, 5        9
503 | 	 3. 3, 4, 9       16
504 | 	 4. 3, 2, 7       12
505 | 	 5. 8, 9, 7       24
506 | 	 6. 8, 4, 5       17
507 | 
508 |     Thus (2) is the optimal solution for the above problem.
509 |     This kind of brute-force approach of solving Assignment problem 
510 |     quickly becomes slow and bulky as N grows, because the number of 
511 |     possible solution are N! and thus the task is to evaluate each 
512 |     and then find the optimal solution.(If N=10, number of possible
513 |     solutions: 3628800 !)
514 |     Munkres' gives us a solution to this problem, which is implemented 
515 |     in this module.
516 | 
517 |     This module also solves Assignment problem for rectangular matrices 
518 |     (M x N) by converting them to square matrices by padding zeros. ex:
519 |     If input matrix is:
520 | 	 [2, 4, 7, 9],
521 | 	 [3, 9, 5, 1],
522 | 	 [8, 2, 9, 7]
523 |     i.e 3 x 4 then we will convert it to 4 x 4 and the modified input 
524 |     matrix will be:
525 | 	 [2, 4, 7, 9],
526 | 	 [3, 9, 5, 1],
527 | 	 [8, 2, 9, 7],
528 |  	 [0, 0, 0, 0]
529 | 
530 | =head1 EXPORT
531 | 
532 |     "assign" function by default.
533 | 
534 | =head1 INPUT
535 | 
536 |     The input matrix should be in a two dimensional array(array of 
537 |     array) and the 'assign' subroutine expects a reference to this 
538 |     array and not the complete array. 
539 |     eg:assign(\@inp_mat, \@out_mat);
540 |     The second argument to the assign subroutine is the reference 
541 |     to the output array.
542 | 
543 | =head1 OUTPUT
544 | 
545 |     The assign subroutine expects references to two arrays as its 
546 |     input paramenters. The second parameter is the reference to the
547 |     output array. This array is populated by assign subroutine. This 
548 |     array is single dimensional Nx1 matrix.
549 |     For above example the output array returned will be:
550 |      (0,
551 |      2,
552 |      1)
553 | 
554 |     where 
555 |     0th element indicates that 0th row is assigned 0th column i.e value=2
556 |     1st element indicates that 1st row is assigned 2nd column i.e.value=5
557 |     2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
558 | 
559 | =head1 SEE ALSO
560 | 
561 |     1. http://216.249.163.93/bob.pilgrim/445/munkres.html
562 | 
563 |     2. Munkres, J. Algorithms for the assignment and transportation 
564 |        Problems. J. Siam 5 (Mar. 1957), 32-38
565 | 
566 |     3. François Bourgeois and Jean-Claude Lassalle. 1971.
567 |        An extension of the Munkres algorithm for the assignment 
568 |        problem to rectangular matrices.
569 |        Communication ACM, 14(12):802-804
570 | 
571 | =head1 AUTHOR
572 | 
573 |     Anagha Kulkarni, University of Minnesota Duluth
574 |     kulka020 <at> d.umn.edu
575 | 	
576 |     Ted Pedersen, University of Minnesota Duluth
577 |     tpederse <at> d.umn.edu
578 | 
579 | =head1 COPYRIGHT AND LICENSE
580 | 
581 | Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni
582 | 
583 | This program is free software; you can redistribute it and/or
584 | modify it under the terms of the GNU General Public License
585 | as published by the Free Software Foundation; either version 2
586 | of the License, or (at your option) any later version.
587 | This program is distributed in the hope that it will be useful,
588 | but WITHOUT ANY WARRANTY; without even the implied warranty of
589 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
590 | GNU General Public License for more details.
591 | 
592 | You should have received a copy of the GNU General Public License
593 | along with this program; if not, write to the Free Software
594 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
595 | 
596 | =cut
597 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/lib/Algorithm/README.Munkres:
--------------------------------------------------------------------------------
  1 | NAME
  2 |         Algorithm-Munkres : Perl extension for Munkres' solution to 
  3 |         classical Assignment problem for square and rectangular matrices 
  4 |         This module extends the solution of Assignment problem for square
  5 |         matrices to rectangular matrices by padding zeros. Thus a rectangular 
  6 |         matrix is converted to square matrix by padding necessary zeros.
  7 | 
  8 | SYNOPSIS
  9 |     use Algorithm::Munkres;
 10 | 
 11 |         @mat = (
 12 |              [2, 4, 7, 9],
 13 |              [3, 9, 5, 1],
 14 |              [8, 2, 9, 7],
 15 |              );
 16 | 
 17 |     assign(\@mat,\@out_mat);
 18 | 
 19 |         Then the @out_mat array will have the output as: (0,3,1,2),
 20 |         where 
 21 |         0th element indicates that 0th row is assigned 0th column i.e value=2
 22 |         1st element indicates that 1st row is assigned 3rd column i.e.value=1
 23 |         2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
 24 |         3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0
 25 | 
 26 | DESCRIPTION
 27 |         Assignment Problem: Given N jobs, N workers and the time taken by 
 28 |         each worker to complete a job then how should the assignment of a 
 29 |         Worker to a Job be done, so as to minimize the time taken. 
 30 | 
 31 |             Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that:
 32 |                 x  y  z             
 33 |              p  2  4  7
 34 |              q  3  9  5
 35 |              r  8  2  9
 36 |         
 37 |             where the cell values of the above matrix give the time required
 38 |             for the worker(given by column name) to complete the job(given by 
 39 |             the row name) 
 40 |     
 41 |             then possible solutions are:    
 42 |                              Total
 43 |              1. 2, 9, 9       20
 44 |              2. 2, 2, 5        9
 45 |              3. 3, 4, 9       16
 46 |              4. 3, 2, 7       12
 47 |              5. 8, 9, 7       24
 48 |              6. 8, 4, 5       17
 49 | 
 50 |         Thus (2) is the optimal solution for the above problem.
 51 |         This kind of brute-force approach of solving Assignment problem 
 52 |         quickly becomes slow and bulky as N grows, because the number of 
 53 |         possible solution are N! and thus the task is to evaluate each 
 54 |         and then find the optimal solution.(If N=10, number of possible
 55 |         solutions: 3628800 !)
 56 |         Munkres' gives us a solution to this problem, which is implemented 
 57 |         in this module.
 58 | 
 59 |         This module also solves Assignment problem for rectangular matrices 
 60 |         (M x N) by converting them to square matrices by padding zeros. ex:
 61 |         If input matrix is:
 62 |              [2, 4, 7, 9],
 63 |              [3, 9, 5, 1],
 64 |              [8, 2, 9, 7]
 65 |         i.e 3 x 4 then we will convert it to 4 x 4 and the modified input 
 66 |         matrix will be:
 67 |              [2, 4, 7, 9],
 68 |              [3, 9, 5, 1],
 69 |              [8, 2, 9, 7],
 70 |              [0, 0, 0, 0]
 71 | 
 72 | EXPORT
 73 |         "assign" function by default.
 74 | 
 75 | INPUT
 76 |         The input matrix should be in a two dimensional array(array of 
 77 |         array) and the 'assign' subroutine expects a reference to this 
 78 |         array and not the complete array. 
 79 |         eg:assign(\@inp_mat, \@out_mat);
 80 |         The second argument to the assign subroutine is the reference 
 81 |         to the output array.
 82 | 
 83 | OUTPUT
 84 |         The assign subroutine expects references to two arrays as its 
 85 |         input paramenters. The second parameter is the reference to the
 86 |         output array. This array is populated by assign subroutine. This 
 87 |         array is single dimensional Nx1 matrix.
 88 |         For above example the output array returned will be:
 89 |          (0,
 90 |          2,
 91 |          1)
 92 | 
 93 |         where 
 94 |         0th element indicates that 0th row is assigned 0th column i.e value=2
 95 |         1st element indicates that 1st row is assigned 2nd column i.e.value=5
 96 |         2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
 97 | 
 98 | SEE ALSO
 99 |         1. http://216.249.163.93/bob.pilgrim/445/munkres.html
100 | 
101 |         2. Munkres, J. Algorithms for the assignment and transportation 
102 |            Problems. J. Siam 5 (Mar. 1957), 32-38
103 | 
104 |         3. François Bourgeois and Jean-Claude Lassalle. 1971.
105 |            An extension of the Munkres algorithm for the assignment 
106 |            problem to rectangular matrices.
107 |            Communication ACM, 14(12):802-804
108 | 
109 | AUTHOR
110 |         Anagha Kulkarni, University of Minnesota Duluth
111 |         kulka020 <at> d.umn.edu
112 |         
113 |         Ted Pedersen, University of Minnesota Duluth
114 |         tpederse <at> d.umn.edu
115 | 
116 | COPYRIGHT AND LICENSE
117 |     Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni
118 | 
119 |     This program is free software; you can redistribute it and/or modify it
120 |     under the terms of the GNU General Public License as published by the
121 |     Free Software Foundation; either version 2 of the License, or (at your
122 |     option) any later version. This program is distributed in the hope that
123 |     it will be useful, but WITHOUT ANY WARRANTY; without even the implied
124 |     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
125 |     GNU General Public License for more details.
126 | 
127 |     You should have received a copy of the GNU General Public License along
128 |     with this program; if not, write to the Free Software Foundation, Inc.,
129 |     59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
130 | 
131 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/lib/Cwd.pm:
--------------------------------------------------------------------------------
  1 | package Cwd;
  2 | 
  3 | =head1 NAME
  4 | 
  5 | Cwd - get pathname of current working directory
  6 | 
  7 | =head1 SYNOPSIS
  8 | 
  9 |     use Cwd;
 10 |     my $dir = getcwd;
 11 | 
 12 |     use Cwd 'abs_path';
 13 |     my $abs_path = abs_path($file);
 14 | 
 15 | =head1 DESCRIPTION
 16 | 
 17 | This module provides functions for determining the pathname of the
 18 | current working directory.  It is recommended that getcwd (or another
 19 | *cwd() function) be used in I<all> code to ensure portability.
 20 | 
 21 | By default, it exports the functions cwd(), getcwd(), fastcwd(), and
 22 | fastgetcwd() (and, on Win32, getdcwd()) into the caller's namespace.  
 23 | 
 24 | 
 25 | =head2 getcwd and friends
 26 | 
 27 | Each of these functions are called without arguments and return the
 28 | absolute path of the current working directory.
 29 | 
 30 | =over 4
 31 | 
 32 | =item getcwd
 33 | 
 34 |     my $cwd = getcwd();
 35 | 
 36 | Returns the current working directory.
 37 | 
 38 | Exposes the POSIX function getcwd(3) or re-implements it if it's not
 39 | available.
 40 | 
 41 | =item cwd
 42 | 
 43 |     my $cwd = cwd();
 44 | 
 45 | The cwd() is the most natural form for the current architecture.  For
 46 | most systems it is identical to `pwd` (but without the trailing line
 47 | terminator).
 48 | 
 49 | =item fastcwd
 50 | 
 51 |     my $cwd = fastcwd();
 52 | 
 53 | A more dangerous version of getcwd(), but potentially faster.
 54 | 
 55 | It might conceivably chdir() you out of a directory that it can't
 56 | chdir() you back into.  If fastcwd encounters a problem it will return
 57 | undef but will probably leave you in a different directory.  For a
 58 | measure of extra security, if everything appears to have worked, the
 59 | fastcwd() function will check that it leaves you in the same directory
 60 | that it started in.  If it has changed it will C<die> with the message
 61 | "Unstable directory path, current directory changed
 62 | unexpectedly".  That should never happen.
 63 | 
 64 | =item fastgetcwd
 65 | 
 66 |   my $cwd = fastgetcwd();
 67 | 
 68 | The fastgetcwd() function is provided as a synonym for cwd().
 69 | 
 70 | =item getdcwd
 71 | 
 72 |     my $cwd = getdcwd();
 73 |     my $cwd = getdcwd('C:');
 74 | 
 75 | The getdcwd() function is also provided on Win32 to get the current working
 76 | directory on the specified drive, since Windows maintains a separate current
 77 | working directory for each drive.  If no drive is specified then the current
 78 | drive is assumed.
 79 | 
 80 | This function simply calls the Microsoft C library _getdcwd() function.
 81 | 
 82 | =back
 83 | 
 84 | 
 85 | =head2 abs_path and friends
 86 | 
 87 | These functions are exported only on request.  They each take a single
 88 | argument and return the absolute pathname for it.  If no argument is
 89 | given they'll use the current working directory.
 90 | 
 91 | =over 4
 92 | 
 93 | =item abs_path
 94 | 
 95 |   my $abs_path = abs_path($file);
 96 | 
 97 | Uses the same algorithm as getcwd().  Symbolic links and relative-path
 98 | components ("." and "..") are resolved to return the canonical
 99 | pathname, just like realpath(3).
100 | 
101 | =item realpath
102 | 
103 |   my $abs_path = realpath($file);
104 | 
105 | A synonym for abs_path().
106 | 
107 | =item fast_abs_path
108 | 
109 |   my $abs_path = fast_abs_path($file);
110 | 
111 | A more dangerous, but potentially faster version of abs_path.
112 | 
113 | =back
114 | 
115 | =head2 $ENV{PWD}
116 | 
117 | If you ask to override your chdir() built-in function, 
118 | 
119 |   use Cwd qw(chdir);
120 | 
121 | then your PWD environment variable will be kept up to date.  Note that
122 | it will only be kept up to date if all packages which use chdir import
123 | it from Cwd.
124 | 
125 | 
126 | =head1 NOTES
127 | 
128 | =over 4
129 | 
130 | =item *
131 | 
132 | Since the path separators are different on some operating systems ('/'
133 | on Unix, ':' on MacPerl, etc...) we recommend you use the File::Spec
134 | modules wherever portability is a concern.
135 | 
136 | =item *
137 | 
138 | Actually, on Mac OS, the C<getcwd()>, C<fastgetcwd()> and C<fastcwd()>
139 | functions are all aliases for the C<cwd()> function, which, on Mac OS,
140 | calls `pwd`.  Likewise, the C<abs_path()> function is an alias for
141 | C<fast_abs_path()>.
142 | 
143 | =back
144 | 
145 | =head1 AUTHOR
146 | 
147 | Originally by the perl5-porters.
148 | 
149 | Maintained by Ken Williams <KWILLIAMS@cpan.org>
150 | 
151 | =head1 COPYRIGHT
152 | 
153 | Copyright (c) 2004 by the Perl 5 Porters.  All rights reserved.
154 | 
155 | This program is free software; you can redistribute it and/or modify
156 | it under the same terms as Perl itself.
157 | 
158 | Portions of the C code in this library are copyright (c) 1994 by the
159 | Regents of the University of California.  All rights reserved.  The
160 | license on this code is compatible with the licensing of the rest of
161 | the distribution - please see the source code in F<Cwd.xs> for the
162 | details.
163 | 
164 | =head1 SEE ALSO
165 | 
166 | L<File::chdir>
167 | 
168 | =cut
169 | 
170 | use strict;
171 | use Exporter;
172 | use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION);
173 | 
174 | $VERSION = '3.39_02';
175 | my $xs_version = $VERSION;
176 | $VERSION =~ tr/_//;
177 | 
178 | @ISA = qw/ Exporter /;
179 | @EXPORT = qw(cwd getcwd fastcwd fastgetcwd);
180 | push @EXPORT, qw(getdcwd) if $^O eq 'MSWin32';
181 | @EXPORT_OK = qw(chdir abs_path fast_abs_path realpath fast_realpath);
182 | 
183 | # sys_cwd may keep the builtin command
184 | 
185 | # All the functionality of this module may provided by builtins,
186 | # there is no sense to process the rest of the file.
187 | # The best choice may be to have this in BEGIN, but how to return from BEGIN?
188 | 
189 | if ($^O eq 'os2') {
190 |     local $^W = 0;
191 | 
192 |     *cwd                = defined &sys_cwd ? \&sys_cwd : \&_os2_cwd;
193 |     *getcwd             = \&cwd;
194 |     *fastgetcwd         = \&cwd;
195 |     *fastcwd            = \&cwd;
196 | 
197 |     *fast_abs_path      = \&sys_abspath if defined &sys_abspath;
198 |     *abs_path           = \&fast_abs_path;
199 |     *realpath           = \&fast_abs_path;
200 |     *fast_realpath      = \&fast_abs_path;
201 | 
202 |     return 1;
203 | }
204 | 
205 | # Need to look up the feature settings on VMS.  The preferred way is to use the
206 | # VMS::Feature module, but that may not be available to dual life modules.
207 | 
208 | my $use_vms_feature;
209 | BEGIN {
210 |     if ($^O eq 'VMS') {
211 |         if (eval { local $SIG{__DIE__}; require VMS::Feature; }) {
212 |             $use_vms_feature = 1;
213 |         }
214 |     }
215 | }
216 | 
217 | # Need to look up the UNIX report mode.  This may become a dynamic mode
218 | # in the future.
219 | sub _vms_unix_rpt {
220 |     my $unix_rpt;
221 |     if ($use_vms_feature) {
222 |         $unix_rpt = VMS::Feature::current("filename_unix_report");
223 |     } else {
224 |         my $env_unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || '';
225 |         $unix_rpt = $env_unix_rpt =~ /^[ET1]/i; 
226 |     }
227 |     return $unix_rpt;
228 | }
229 | 
230 | # Need to look up the EFS character set mode.  This may become a dynamic
231 | # mode in the future.
232 | sub _vms_efs {
233 |     my $efs;
234 |     if ($use_vms_feature) {
235 |         $efs = VMS::Feature::current("efs_charset");
236 |     } else {
237 |         my $env_efs = $ENV{'DECC$EFS_CHARSET'} || '';
238 |         $efs = $env_efs =~ /^[ET1]/i; 
239 |     }
240 |     return $efs;
241 | }
242 | 
243 | 
244 | # If loading the XS stuff doesn't work, we can fall back to pure perl
245 | eval {
246 |   if ( $] >= 5.006 ) {
247 |     require XSLoader;
248 |     XSLoader::load( __PACKAGE__, $xs_version);
249 |   } else {
250 |     require DynaLoader;
251 |     push @ISA, 'DynaLoader';
252 |     __PACKAGE__->bootstrap( $xs_version );
253 |   }
254 | };
255 | 
256 | # Big nasty table of function aliases
257 | my %METHOD_MAP =
258 |   (
259 |    VMS =>
260 |    {
261 |     cwd			=> '_vms_cwd',
262 |     getcwd		=> '_vms_cwd',
263 |     fastcwd		=> '_vms_cwd',
264 |     fastgetcwd		=> '_vms_cwd',
265 |     abs_path		=> '_vms_abs_path',
266 |     fast_abs_path	=> '_vms_abs_path',
267 |    },
268 | 
269 |    MSWin32 =>
270 |    {
271 |     # We assume that &_NT_cwd is defined as an XSUB or in the core.
272 |     cwd			=> '_NT_cwd',
273 |     getcwd		=> '_NT_cwd',
274 |     fastcwd		=> '_NT_cwd',
275 |     fastgetcwd		=> '_NT_cwd',
276 |     abs_path		=> 'fast_abs_path',
277 |     realpath		=> 'fast_abs_path',
278 |    },
279 | 
280 |    dos => 
281 |    {
282 |     cwd			=> '_dos_cwd',
283 |     getcwd		=> '_dos_cwd',
284 |     fastgetcwd		=> '_dos_cwd',
285 |     fastcwd		=> '_dos_cwd',
286 |     abs_path		=> 'fast_abs_path',
287 |    },
288 | 
289 |    # QNX4.  QNX6 has a $os of 'nto'.
290 |    qnx =>
291 |    {
292 |     cwd			=> '_qnx_cwd',
293 |     getcwd		=> '_qnx_cwd',
294 |     fastgetcwd		=> '_qnx_cwd',
295 |     fastcwd		=> '_qnx_cwd',
296 |     abs_path		=> '_qnx_abs_path',
297 |     fast_abs_path	=> '_qnx_abs_path',
298 |    },
299 | 
300 |    cygwin =>
301 |    {
302 |     getcwd		=> 'cwd',
303 |     fastgetcwd		=> 'cwd',
304 |     fastcwd		=> 'cwd',
305 |     abs_path		=> 'fast_abs_path',
306 |     realpath		=> 'fast_abs_path',
307 |    },
308 | 
309 |    epoc =>
310 |    {
311 |     cwd			=> '_epoc_cwd',
312 |     getcwd	        => '_epoc_cwd',
313 |     fastgetcwd		=> '_epoc_cwd',
314 |     fastcwd		=> '_epoc_cwd',
315 |     abs_path		=> 'fast_abs_path',
316 |    },
317 | 
318 |    MacOS =>
319 |    {
320 |     getcwd		=> 'cwd',
321 |     fastgetcwd		=> 'cwd',
322 |     fastcwd		=> 'cwd',
323 |     abs_path		=> 'fast_abs_path',
324 |    },
325 |   );
326 | 
327 | $METHOD_MAP{NT} = $METHOD_MAP{MSWin32};
328 | 
329 | 
330 | # Find the pwd command in the expected locations.  We assume these
331 | # are safe.  This prevents _backtick_pwd() consulting $ENV{PATH}
332 | # so everything works under taint mode.
333 | my $pwd_cmd;
334 | foreach my $try ('/bin/pwd',
335 | 		 '/usr/bin/pwd',
336 | 		 '/QOpenSys/bin/pwd', # OS/400 PASE.
337 | 		) {
338 | 
339 |     if( -x $try ) {
340 |         $pwd_cmd = $try;
341 |         last;
342 |     }
343 | }
344 | my $found_pwd_cmd = defined($pwd_cmd);
345 | unless ($pwd_cmd) {
346 |     # Isn't this wrong?  _backtick_pwd() will fail if somenone has
347 |     # pwd in their path but it is not /bin/pwd or /usr/bin/pwd?
348 |     # See [perl #16774]. --jhi
349 |     $pwd_cmd = 'pwd';
350 | }
351 | 
352 | # Lazy-load Carp
353 | sub _carp  { require Carp; Carp::carp(@_)  }
354 | sub _croak { require Carp; Carp::croak(@_) }
355 | 
356 | # The 'natural and safe form' for UNIX (pwd may be setuid root)
357 | sub _backtick_pwd {
358 |     # Localize %ENV entries in a way that won't create new hash keys
359 |     my @localize = grep exists $ENV{$_}, qw(PATH IFS CDPATH ENV BASH_ENV);
360 |     local @ENV{@localize};
361 |     
362 |     my $cwd = `$pwd_cmd`;
363 |     # Belt-and-suspenders in case someone said "undef $/".
364 |     local $/ = "\n";
365 |     # `pwd` may fail e.g. if the disk is full
366 |     chomp($cwd) if defined $cwd;
367 |     $cwd;
368 | }
369 | 
370 | # Since some ports may predefine cwd internally (e.g., NT)
371 | # we take care not to override an existing definition for cwd().
372 | 
373 | unless ($METHOD_MAP{$^O}{cwd} or defined &cwd) {
374 |     # The pwd command is not available in some chroot(2)'ed environments
375 |     my $sep = $Config::Config{path_sep} || ':';
376 |     my $os = $^O;  # Protect $^O from tainting
377 | 
378 | 
379 |     # Try again to find a pwd, this time searching the whole PATH.
380 |     if (defined $ENV{PATH} and $os ne 'MSWin32') {  # no pwd on Windows
381 | 	my @candidates = split($sep, $ENV{PATH});
382 | 	while (!$found_pwd_cmd and @candidates) {
383 | 	    my $candidate = shift @candidates;
384 | 	    $found_pwd_cmd = 1 if -x "$candidate/pwd";
385 | 	}
386 |     }
387 | 
388 |     # MacOS has some special magic to make `pwd` work.
389 |     if( $os eq 'MacOS' || $found_pwd_cmd )
390 |     {
391 | 	*cwd = \&_backtick_pwd;
392 |     }
393 |     else {
394 | 	*cwd = \&getcwd;
395 |     }
396 | }
397 | 
398 | if ($^O eq 'cygwin') {
399 |   # We need to make sure cwd() is called with no args, because it's
400 |   # got an arg-less prototype and will die if args are present.
401 |   local $^W = 0;
402 |   my $orig_cwd = \&cwd;
403 |   *cwd = sub { &$orig_cwd() }
404 | }
405 | 
406 | 
407 | # set a reasonable (and very safe) default for fastgetcwd, in case it
408 | # isn't redefined later (20001212 rspier)
409 | *fastgetcwd = \&cwd;
410 | 
411 | # A non-XS version of getcwd() - also used to bootstrap the perl build
412 | # process, when miniperl is running and no XS loading happens.
413 | sub _perl_getcwd
414 | {
415 |     abs_path('.');
416 | }
417 | 
418 | # By John Bazik
419 | #
420 | # Usage: $cwd = &fastcwd;
421 | #
422 | # This is a faster version of getcwd.  It's also more dangerous because
423 | # you might chdir out of a directory that you can't chdir back into.
424 |     
425 | sub fastcwd_ {
426 |     my($odev, $oino, $cdev, $cino, $tdev, $tino);
427 |     my(@path, $path);
428 |     local(*DIR);
429 | 
430 |     my($orig_cdev, $orig_cino) = stat('.');
431 |     ($cdev, $cino) = ($orig_cdev, $orig_cino);
432 |     for (;;) {
433 | 	my $direntry;
434 | 	($odev, $oino) = ($cdev, $cino);
435 | 	CORE::chdir('..') || return undef;
436 | 	($cdev, $cino) = stat('.');
437 | 	last if $odev == $cdev && $oino == $cino;
438 | 	opendir(DIR, '.') || return undef;
439 | 	for (;;) {
440 | 	    $direntry = readdir(DIR);
441 | 	    last unless defined $direntry;
442 | 	    next if $direntry eq '.';
443 | 	    next if $direntry eq '..';
444 | 
445 | 	    ($tdev, $tino) = lstat($direntry);
446 | 	    last unless $tdev != $odev || $tino != $oino;
447 | 	}
448 | 	closedir(DIR);
449 | 	return undef unless defined $direntry; # should never happen
450 | 	unshift(@path, $direntry);
451 |     }
452 |     $path = '/' . join('/', @path);
453 |     if ($^O eq 'apollo') { $path = "/".$path; }
454 |     # At this point $path may be tainted (if tainting) and chdir would fail.
455 |     # Untaint it then check that we landed where we started.
456 |     $path =~ /^(.*)\z/s		# untaint
457 | 	&& CORE::chdir($1) or return undef;
458 |     ($cdev, $cino) = stat('.');
459 |     die "Unstable directory path, current directory changed unexpectedly"
460 | 	if $cdev != $orig_cdev || $cino != $orig_cino;
461 |     $path;
462 | }
463 | if (not defined &fastcwd) { *fastcwd = \&fastcwd_ }
464 | 
465 | 
466 | # Keeps track of current working directory in PWD environment var
467 | # Usage:
468 | #	use Cwd 'chdir';
469 | #	chdir $newdir;
470 | 
471 | my $chdir_init = 0;
472 | 
473 | sub chdir_init {
474 |     if ($ENV{'PWD'} and $^O ne 'os2' and $^O ne 'dos' and $^O ne 'MSWin32') {
475 | 	my($dd,$di) = stat('.');
476 | 	my($pd,$pi) = stat($ENV{'PWD'});
477 | 	if (!defined $dd or !defined $pd or $di != $pi or $dd != $pd) {
478 | 	    $ENV{'PWD'} = cwd();
479 | 	}
480 |     }
481 |     else {
482 | 	my $wd = cwd();
483 | 	$wd = Win32::GetFullPathName($wd) if $^O eq 'MSWin32';
484 | 	$ENV{'PWD'} = $wd;
485 |     }
486 |     # Strip an automounter prefix (where /tmp_mnt/foo/bar == /foo/bar)
487 |     if ($^O ne 'MSWin32' and $ENV{'PWD'} =~ m|(/[^/]+(/[^/]+/[^/]+))(.*)|s) {
488 | 	my($pd,$pi) = stat($2);
489 | 	my($dd,$di) = stat($1);
490 | 	if (defined $pd and defined $dd and $di == $pi and $dd == $pd) {
491 | 	    $ENV{'PWD'}="$2$3";
492 | 	}
493 |     }
494 |     $chdir_init = 1;
495 | }
496 | 
497 | sub chdir {
498 |     my $newdir = @_ ? shift : '';	# allow for no arg (chdir to HOME dir)
499 |     $newdir =~ s|///*|/|g unless $^O eq 'MSWin32';
500 |     chdir_init() unless $chdir_init;
501 |     my $newpwd;
502 |     if ($^O eq 'MSWin32') {
503 | 	# get the full path name *before* the chdir()
504 | 	$newpwd = Win32::GetFullPathName($newdir);
505 |     }
506 | 
507 |     return 0 unless CORE::chdir $newdir;
508 | 
509 |     if ($^O eq 'VMS') {
510 | 	return $ENV{'PWD'} = $ENV{'DEFAULT'}
511 |     }
512 |     elsif ($^O eq 'MacOS') {
513 | 	return $ENV{'PWD'} = cwd();
514 |     }
515 |     elsif ($^O eq 'MSWin32') {
516 | 	$ENV{'PWD'} = $newpwd;
517 | 	return 1;
518 |     }
519 | 
520 |     if (ref $newdir eq 'GLOB') { # in case a file/dir handle is passed in
521 | 	$ENV{'PWD'} = cwd();
522 |     } elsif ($newdir =~ m#^/#s) {
523 | 	$ENV{'PWD'} = $newdir;
524 |     } else {
525 | 	my @curdir = split(m#/#,$ENV{'PWD'});
526 | 	@curdir = ('') unless @curdir;
527 | 	my $component;
528 | 	foreach $component (split(m#/#, $newdir)) {
529 | 	    next if $component eq '.';
530 | 	    pop(@curdir),next if $component eq '..';
531 | 	    push(@curdir,$component);
532 | 	}
533 | 	$ENV{'PWD'} = join('/',@curdir) || '/';
534 |     }
535 |     1;
536 | }
537 | 
538 | 
539 | sub _perl_abs_path
540 | {
541 |     my $start = @_ ? shift : '.';
542 |     my($dotdots, $cwd, @pst, @cst, $dir, @tst);
543 | 
544 |     unless (@cst = stat( $start ))
545 |     {
546 | 	_carp("stat($start): $!");
547 | 	return '';
548 |     }
549 | 
550 |     unless (-d _) {
551 |         # Make sure we can be invoked on plain files, not just directories.
552 |         # NOTE that this routine assumes that '/' is the only directory separator.
553 | 	
554 |         my ($dir, $file) = $start =~ m{^(.*)/(.+)$}
555 | 	    or return cwd() . '/' . $start;
556 | 	
557 | 	# Can't use "-l _" here, because the previous stat was a stat(), not an lstat().
558 | 	if (-l $start) {
559 | 	    my $link_target = readlink($start);
560 | 	    die "Can't resolve link $start: $!" unless defined $link_target;
561 | 	    
562 | 	    require File::Spec;
563 |             $link_target = $dir . '/' . $link_target
564 |                 unless File::Spec->file_name_is_absolute($link_target);
565 | 	    
566 | 	    return abs_path($link_target);
567 | 	}
568 | 	
569 | 	return $dir ? abs_path($dir) . "/$file" : "/$file";
570 |     }
571 | 
572 |     $cwd = '';
573 |     $dotdots = $start;
574 |     do
575 |     {
576 | 	$dotdots .= '/..';
577 | 	@pst = @cst;
578 | 	local *PARENT;
579 | 	unless (opendir(PARENT, $dotdots))
580 | 	{
581 | 	    # probably a permissions issue.  Try the native command.
582 | 	    require File::Spec;
583 | 	    return File::Spec->rel2abs( $start, _backtick_pwd() );
584 | 	}
585 | 	unless (@cst = stat($dotdots))
586 | 	{
587 | 	    _carp("stat($dotdots): $!");
588 | 	    closedir(PARENT);
589 | 	    return '';
590 | 	}
591 | 	if ($pst[0] == $cst[0] && $pst[1] == $cst[1])
592 | 	{
593 | 	    $dir = undef;
594 | 	}
595 | 	else
596 | 	{
597 | 	    do
598 | 	    {
599 | 		unless (defined ($dir = readdir(PARENT)))
600 | 	        {
601 | 		    _carp("readdir($dotdots): $!");
602 | 		    closedir(PARENT);
603 | 		    return '';
604 | 		}
605 | 		$tst[0] = $pst[0]+1 unless (@tst = lstat("$dotdots/$dir"))
606 | 	    }
607 | 	    while ($dir eq '.' || $dir eq '..' || $tst[0] != $pst[0] ||
608 | 		   $tst[1] != $pst[1]);
609 | 	}
610 | 	$cwd = (defined $dir ? "$dir" : "" ) . "/$cwd" ;
611 | 	closedir(PARENT);
612 |     } while (defined $dir);
613 |     chop($cwd) unless $cwd eq '/'; # drop the trailing /
614 |     $cwd;
615 | }
616 | 
617 | 
618 | my $Curdir;
619 | sub fast_abs_path {
620 |     local $ENV{PWD} = $ENV{PWD} || ''; # Guard against clobberage
621 |     my $cwd = getcwd();
622 |     require File::Spec;
623 |     my $path = @_ ? shift : ($Curdir ||= File::Spec->curdir);
624 | 
625 |     # Detaint else we'll explode in taint mode.  This is safe because
626 |     # we're not doing anything dangerous with it.
627 |     ($path) = $path =~ /(.*)/;
628 |     ($cwd)  = $cwd  =~ /(.*)/;
629 | 
630 |     unless (-e $path) {
631 |  	_croak("$path: No such file or directory");
632 |     }
633 | 
634 |     unless (-d _) {
635 |         # Make sure we can be invoked on plain files, not just directories.
636 | 	
637 | 	my ($vol, $dir, $file) = File::Spec->splitpath($path);
638 | 	return File::Spec->catfile($cwd, $path) unless length $dir;
639 | 
640 | 	if (-l $path) {
641 | 	    my $link_target = readlink($path);
642 | 	    die "Can't resolve link $path: $!" unless defined $link_target;
643 | 	    
644 | 	    $link_target = File::Spec->catpath($vol, $dir, $link_target)
645 |                 unless File::Spec->file_name_is_absolute($link_target);
646 | 	    
647 | 	    return fast_abs_path($link_target);
648 | 	}
649 | 	
650 | 	return $dir eq File::Spec->rootdir
651 | 	  ? File::Spec->catpath($vol, $dir, $file)
652 | 	  : fast_abs_path(File::Spec->catpath($vol, $dir, '')) . '/' . $file;
653 |     }
654 | 
655 |     if (!CORE::chdir($path)) {
656 |  	_croak("Cannot chdir to $path: $!");
657 |     }
658 |     my $realpath = getcwd();
659 |     if (! ((-d $cwd) && (CORE::chdir($cwd)))) {
660 |  	_croak("Cannot chdir back to $cwd: $!");
661 |     }
662 |     $realpath;
663 | }
664 | 
665 | # added function alias to follow principle of least surprise
666 | # based on previous aliasing.  --tchrist 27-Jan-00
667 | *fast_realpath = \&fast_abs_path;
668 | 
669 | 
670 | # --- PORTING SECTION ---
671 | 
672 | # VMS: $ENV{'DEFAULT'} points to default directory at all times
673 | # 06-Mar-1996  Charles Bailey  bailey@newman.upenn.edu
674 | # Note: Use of Cwd::chdir() causes the logical name PWD to be defined
675 | #   in the process logical name table as the default device and directory
676 | #   seen by Perl. This may not be the same as the default device
677 | #   and directory seen by DCL after Perl exits, since the effects
678 | #   the CRTL chdir() function persist only until Perl exits.
679 | 
680 | sub _vms_cwd {
681 |     return $ENV{'DEFAULT'};
682 | }
683 | 
684 | sub _vms_abs_path {
685 |     return $ENV{'DEFAULT'} unless @_;
686 |     my $path = shift;
687 | 
688 |     my $efs = _vms_efs;
689 |     my $unix_rpt = _vms_unix_rpt;
690 | 
691 |     if (defined &VMS::Filespec::vmsrealpath) {
692 |         my $path_unix = 0;
693 |         my $path_vms = 0;
694 | 
695 |         $path_unix = 1 if ($path =~ m#(?<=\^)/#);
696 |         $path_unix = 1 if ($path =~ /^\.\.?$/);
697 |         $path_vms = 1 if ($path =~ m#[\[<\]]#);
698 |         $path_vms = 1 if ($path =~ /^--?$/);
699 | 
700 |         my $unix_mode = $path_unix;
701 |         if ($efs) {
702 |             # In case of a tie, the Unix report mode decides.
703 |             if ($path_vms == $path_unix) {
704 |                 $unix_mode = $unix_rpt;
705 |             } else {
706 |                 $unix_mode = 0 if $path_vms;
707 |             }
708 |         }
709 | 
710 |         if ($unix_mode) {
711 |             # Unix format
712 |             return VMS::Filespec::unixrealpath($path);
713 |         }
714 | 
715 | 	# VMS format
716 | 
717 | 	my $new_path = VMS::Filespec::vmsrealpath($path);
718 | 
719 | 	# Perl expects directories to be in directory format
720 | 	$new_path = VMS::Filespec::pathify($new_path) if -d $path;
721 | 	return $new_path;
722 |     }
723 | 
724 |     # Fallback to older algorithm if correct ones are not
725 |     # available.
726 | 
727 |     if (-l $path) {
728 |         my $link_target = readlink($path);
729 |         die "Can't resolve link $path: $!" unless defined $link_target;
730 | 
731 |         return _vms_abs_path($link_target);
732 |     }
733 | 
734 |     # may need to turn foo.dir into [.foo]
735 |     my $pathified = VMS::Filespec::pathify($path);
736 |     $path = $pathified if defined $pathified;
737 | 	
738 |     return VMS::Filespec::rmsexpand($path);
739 | }
740 | 
741 | sub _os2_cwd {
742 |     $ENV{'PWD'} = `cmd /c cd`;
743 |     chomp $ENV{'PWD'};
744 |     $ENV{'PWD'} =~ s:\\:/:g ;
745 |     return $ENV{'PWD'};
746 | }
747 | 
748 | sub _win32_cwd_simple {
749 |     $ENV{'PWD'} = `cd`;
750 |     chomp $ENV{'PWD'};
751 |     $ENV{'PWD'} =~ s:\\:/:g ;
752 |     return $ENV{'PWD'};
753 | }
754 | 
755 | sub _win32_cwd {
756 |     # Need to avoid taking any sort of reference to the typeglob or the code in
757 |     # the optree, so that this tests the runtime state of things, as the
758 |     # ExtUtils::MakeMaker tests for "miniperl" need to be able to fake things at
759 |     # runtime by deleting the subroutine. *foo{THING} syntax on a symbol table
760 |     # lookup avoids needing a string eval, which has been reported to cause
761 |     # problems (for reasons that we haven't been able to get to the bottom of -
762 |     # rt.cpan.org #56225)
763 |     if (*{$DynaLoader::{boot_DynaLoader}}{CODE}) {
764 | 	$ENV{'PWD'} = Win32::GetCwd();
765 |     }
766 |     else { # miniperl
767 | 	chomp($ENV{'PWD'} = `cd`);
768 |     }
769 |     $ENV{'PWD'} =~ s:\\:/:g ;
770 |     return $ENV{'PWD'};
771 | }
772 | 
773 | *_NT_cwd = defined &Win32::GetCwd ? \&_win32_cwd : \&_win32_cwd_simple;
774 | 
775 | sub _dos_cwd {
776 |     if (!defined &Dos::GetCwd) {
777 |         $ENV{'PWD'} = `command /c cd`;
778 |         chomp $ENV{'PWD'};
779 |         $ENV{'PWD'} =~ s:\\:/:g ;
780 |     } else {
781 |         $ENV{'PWD'} = Dos::GetCwd();
782 |     }
783 |     return $ENV{'PWD'};
784 | }
785 | 
786 | sub _qnx_cwd {
787 | 	local $ENV{PATH} = '';
788 | 	local $ENV{CDPATH} = '';
789 | 	local $ENV{ENV} = '';
790 |     $ENV{'PWD'} = `/usr/bin/fullpath -t`;
791 |     chomp $ENV{'PWD'};
792 |     return $ENV{'PWD'};
793 | }
794 | 
795 | sub _qnx_abs_path {
796 | 	local $ENV{PATH} = '';
797 | 	local $ENV{CDPATH} = '';
798 | 	local $ENV{ENV} = '';
799 |     my $path = @_ ? shift : '.';
800 |     local *REALPATH;
801 | 
802 |     defined( open(REALPATH, '-|') || exec '/usr/bin/fullpath', '-t', $path ) or
803 |       die "Can't open /usr/bin/fullpath: $!";
804 |     my $realpath = <REALPATH>;
805 |     close REALPATH;
806 |     chomp $realpath;
807 |     return $realpath;
808 | }
809 | 
810 | sub _epoc_cwd {
811 |     $ENV{'PWD'} = EPOC::getcwd();
812 |     return $ENV{'PWD'};
813 | }
814 | 
815 | 
816 | # Now that all the base-level functions are set up, alias the
817 | # user-level functions to the right places
818 | 
819 | if (exists $METHOD_MAP{$^O}) {
820 |   my $map = $METHOD_MAP{$^O};
821 |   foreach my $name (keys %$map) {
822 |     local $^W = 0;  # assignments trigger 'subroutine redefined' warning
823 |     no strict 'refs';
824 |     *{$name} = \&{$map->{$name}};
825 |   }
826 | }
827 | 
828 | # In case the XS version doesn't load.
829 | *abs_path = \&_perl_abs_path unless defined &abs_path;
830 | *getcwd = \&_perl_getcwd unless defined &getcwd;
831 | 
832 | # added function alias for those of us more
833 | # used to the libc function.  --tchrist 27-Jan-00
834 | *realpath = \&abs_path;
835 | 
836 | 1;
837 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/scorer.bat:
--------------------------------------------------------------------------------
 1 | @rem = '--*-Perl-*--
 2 | @echo off
 3 | if "%OS%" == "Windows_NT" goto WinNT
 4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
 5 | goto endofperl
 6 | :WinNT
 7 | perl -x -S %0 %*
 8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
 9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH.
10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
11 | goto endofperl
12 | @rem ';
13 | #!perl
14 | #line 15
15 | 
16 | BEGIN {
17 |     $d = $0;
18 |     $d =~ s/\/[^\/][^\/]*$//g;
19 |     push(@INC, $d."/lib");
20 | }
21 | 
22 | use strict;
23 | use CorScorer;
24 | 
25 | if (@ARGV < 3) {
26 |   print q|
27 |   use: scorer.bat <metric> <keys_file> <response_file> [name]
28 |   
29 |   metric: the metric desired to score the results:
30 |      muc: MUCScorer (Vilain et al, 1995)
31 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
32 |      ceafm: CEAF (Luo et al, 2005) using mention-based similarity
33 |      ceafe: CEAF (Luo et al, 2005) using entity-based similarity
34 |      all: uses all the metrics to score
35 |   
36 |   keys_file: file with expected coreference chains in SemEval format
37 |   
38 |   response_file: file with output of coreference system (SemEval format)
39 |   
40 |   name: [optional] the name of the document to score. If name is not
41 |      given, all the documents in the dataset will be scored. If given
42 |      name is "none" then all the documents are scored but only total
43 |      results are shown.
44 |   
45 |   |;
46 |   exit;
47 | }
48 | 
49 | my $metric = shift (@ARGV);
50 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) {
51 |   print "Invalid metric\n";
52 |   exit;
53 | }
54 | 
55 | 
56 | if ($metric eq 'all') {
57 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') {
58 |     print "\nMETRIC $m:\n";
59 |     &CorScorer::Score( $m, @ARGV );
60 |   }
61 | }
62 | else {
63 |   &CorScorer::Score( $metric, @ARGV );
64 | }
65 | 
66 | __END__
67 | :endofperl
68 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/scorer.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |   $d = $0;
 5 |   $d =~ s/\/[^\/][^\/]*$//g;
 6 | 
 7 |   if ($d eq $0) {
 8 |     unshift(@INC, "lib");
 9 |   }
10 |   else {
11 |     unshift(@INC, $d . "/lib");
12 |   }
13 | }
14 | 
15 | use strict;
16 | use CorScorer;
17 | 
18 | if (@ARGV < 3) {
19 |   print q|
20 | use: scorer.pl <metric> <keys_file> <response_file> [name]
21 | 
22 |   metric: the metric desired to score the results:
23 |     muc: MUCScorer (Vilain et al, 1995)
24 |     bcub: B-Cubed (Bagga and Baldwin, 1998)
25 |     ceafm: CEAF (Luo et al, 2005) using mention-based similarity
26 |     ceafe: CEAF (Luo et al, 2005) using entity-based similarity
27 |     blanc: BLANC
28 |     all: uses all the metrics to score
29 | 
30 |   keys_file: file with expected coreference chains in SemEval format
31 | 
32 |   response_file: file with output of coreference system (SemEval format)
33 | 
34 |   name: [optional] the name of the document to score. If name is not
35 |     given, all the documents in the dataset will be scored. If given
36 |     name is "none" then all the documents are scored but only total
37 |     results are shown.
38 | 
39 | |;
40 |   exit;
41 | }
42 | 
43 | my $metric = shift(@ARGV);
44 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) {
45 |   print "Invalid metric\n";
46 |   exit;
47 | }
48 | 
49 | if ($metric eq 'all') {
50 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') {
51 |     print "\nMETRIC $m:\n";
52 |     &CorScorer::Score($m, @ARGV);
53 |   }
54 | }
55 | else {
56 |   &CorScorer::Score($metric, @ARGV);
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/CorefMetricTest.pm:
--------------------------------------------------------------------------------
  1 | package CorefMetricTest;
  2 | use strict;
  3 | use warnings;
  4 | use Exporter;
  5 | 
  6 | our @ISA= qw(Exporter);
  7 | our @EXPORT = qw(ComputeScoreFromCounts DiffExpectedAndActual);
  8 | 
  9 | ################################################################################
 10 | # Compute recall, precision and F1.
 11 | # 
 12 | # Input: (numerator_counts_for_recall, denominator_counts_for_recall,
 13 | #         numerator_counts_for_precision, denominator_counts_for_precision)
 14 | # Output: (recall, precision, F1)
 15 | ################################################################################
 16 | sub ComputeScoreFromCounts {
 17 |   # The first 4 are also coref link counts when using BLANC.
 18 |   my ($recall_numerator, $recall_denominator, 
 19 |       $precision_numerator, $precision_denominator, @noncoref_counts) = @_;
 20 |   # The coref recall, precision, and F1 when using BLANC.
 21 |   my ($recall, $precision, $F1) = 
 22 |     RPFFromCounts($recall_numerator, $recall_denominator, 
 23 |                   $precision_numerator, $precision_denominator);
 24 | 
 25 |   # BLANC: @noncoref_counts=
 26 |   #   (noncoref_numerator_recall, noncoref_denominator_recall, 
 27 |   #    noncoref_numerator_precision, noncoref_denominator_precision) 
 28 |   if (scalar(@noncoref_counts) == 4) {
 29 |     ($recall, $precision, $F1) = CorScorer::ComputeBLANCFromCounts(
 30 | 	$recall_numerator, $recall_denominator, $precision_denominator,
 31 | 	$noncoref_counts[0], $noncoref_counts[1], $noncoref_counts[3]);
 32 |   }
 33 |   $recall = ($recall < 0) ? 0 : $recall;
 34 |   $precision = ($precision < 0) ? 0 : $precision;
 35 |   $F1 = ($F1 < 0) ? 0 : $F1;
 36 |   return ($recall, $precision, $F1);
 37 | }
 38 | 
 39 | sub RPFFromCounts
 40 | {
 41 |   my ($recall_numerator, $recall_denominator, 
 42 |       $precision_numerator, $precision_denominator, @nonCorefCounts) = @_;
 43 |   my ($recall, $precision, $F1) = (-1, -1, 0);
 44 |   if ($recall_denominator > 0) {
 45 |     $recall = $recall_numerator / $recall_denominator; 
 46 |   }
 47 |   if ($precision_denominator > 0) {
 48 |     $precision = $precision_numerator / $precision_denominator;
 49 |   }
 50 | 
 51 |   if (($recall + $precision) > 0) {
 52 |     $F1 = 2 * $recall * $precision / ($recall + $precision);
 53 |   }
 54 |   
 55 |   return ($recall, $precision, $F1);
 56 | }
 57 | 
 58 | # deprecated -- see CorScorer::ComputeBLANCFromCounts().
 59 | sub ComputeBLANCRPF
 60 | {
 61 |   my ($coref_recall, $coref_precision, $coref_F1,
 62 |       $noncoref_recall, $noncoref_precision, $noncoref_F1) = @_;
 63 | 
 64 |   my ($recall, $precision, $F1);
 65 | 
 66 |   if ($coref_recall < 0 && $noncoref_recall < 0) {
 67 |     # no key mention.
 68 |     $recall = $precision = $F1 = 0;
 69 |   } elsif ($coref_recall < 0) {
 70 |     # key: all links are non-coref (mentions are all singltons).
 71 |     $recall = $noncoref_recall;
 72 |     $precision = ($noncoref_precision < 0) ? 0 : $noncoref_precision;
 73 |     $F1 = $noncoref_F1;
 74 |   } elsif ($noncoref_recall < 0) {
 75 |     # key: all links are coref (all mentions are in one entity).
 76 |     $recall = $coref_recall;
 77 |     $precision = ($coref_precision < 0) ? 0 : $coref_precision;
 78 |     $F1 = $coref_F1;
 79 |   } else {
 80 |     #key contains both coref and non-coref links.
 81 |     if ($coref_precision < 0 && $noncoref_precision < 0) {
 82 |       # no response.
 83 |       $recall = $precision = $F1 = 0;
 84 |     } else {
 85 |       if ($coref_precision < 0) {
 86 |         # response: all links are non-coref, or response mentions are all
 87 |         # singletons.
 88 |         $coref_precision = 0;
 89 |       } elsif ($noncoref_precision < 0) {
 90 |         # response: all links are coref, or all mentions are in one entity.
 91 |         $noncoref_precision = 0;
 92 |       }
 93 |       $recall = ($coref_recall + $noncoref_recall)/2;
 94 |       $precision  = ($coref_precision + $noncoref_precision)/2;
 95 |       $F1 = ($coref_F1 + $noncoref_F1)/2;
 96 |     }
 97 |   }
 98 | 
 99 |   return ($recall, $precision, $F1);
100 | }
101 | 
102 | ##############################################################################
103 | # Compute the sum of the duifference between the expected recall, precision, 
104 | # F1 and the actual one. 
105 | ##############################################################################
106 | sub DiffExpectedAndActual {
107 |   my ($expected, $actual) = @_;
108 |   if (scalar(@$expected) != scalar(@$actual)) {
109 |     print STDERR "Expected and actual have diff dimensions: \n";
110 |     print STDERR "   Expected: ", join(" ", @$expected), "\n";
111 |     print STDERR "     Actual: ", join(" ", @$actual), "\n";
112 |     return 1.0e5;
113 |   }
114 |   my $sum = 0.0;
115 |   my $i = 0;
116 |   foreach my $e (@$expected) {
117 |     $sum += abs($e - $actual->[$i]);
118 |     ++$i;
119 |   }
120 |   return $sum;
121 | }
122 | 
123 | 1;
124 | 
125 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/CorefMetricTestConfig.pm:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | # This is the test configuration file. Test cases are stored in an 
  3 | # array, each element consisting of:
  4 | #   (1) id: a unique identifier for the test case.
  5 | #   (2) key_file: the key file to be tested in the CoNLL format.
  6 | #   (3) response_file: the response file to be tested in the CoNLL format.
  7 | #   (4) expected_metrics: is a hash label from a metric name (identical to those
  8 | #                         used in the scorer.{pl|bat}) to an array of expected
  9 | #                         metric values. All metrics have 3 expected numbers:
 10 | #                         (recall, precision, F-measure).
 11 | ################################################################################
 12 | 
 13 | package CorefMetricTestConfig;
 14 | use strict;
 15 | use warnings;
 16 | use Exporter;
 17 | 
 18 | our @ISA= qw( Exporter );
 19 | 
 20 | # these are exported by default.
 21 | our @EXPORT = qw(TestCases);
 22 | 
 23 | #
 24 | # Values following metric names are [recall, precision, F1]
 25 | #
 26 | our @TestCases = (
 27 | { id => "A1", 
 28 |   key_file => "DataFiles/TC-A.key",
 29 |   response_file => "DataFiles/TC-A-1.response",
 30 |   expected_metrics => { "muc" => [1, 1, 1], 
 31 |                         "bcub" => [6/6, 6/6, 1],
 32 |                         "ceafm" => [1, 1, 1],
 33 |                         "ceafe" => [1, 1, 1],
 34 |                         "blanc" => [1, 1, 1] }
 35 | },
 36 | { id => "A2", 
 37 |   key_file => "DataFiles/TC-A.key", 
 38 |   response_file => "DataFiles/TC-A-2.response",
 39 |   expected_metrics => { "muc" => [1/3, 1/1, 0.5], 
 40 |                         "bcub" => [(7/3)/6, 3/3, 14/25],
 41 |                         "ceafm" => [0.5, 1, 0.66667],
 42 |                         "ceafe" => [0.6, 0.9, 0.72],
 43 |                         "blanc" => [0.21591, 1, 0.35385] }
 44 | },
 45 | { id => "A3", 
 46 |   key_file => "DataFiles/TC-A.key", 
 47 |   response_file => "DataFiles/TC-A-3.response",
 48 |   expected_metrics => { "muc" => [3/3, 3/5, 0.75], 
 49 |                         "bcub" => [6/6, (4+7/12)/9, 110/163],
 50 |                         "ceafm" => [1, 0.66667, 0.8],
 51 |                         "ceafe" => [0.88571, 0.66429, 0.75918],
 52 |                         "blanc" => [1, 0.42593, 0.59717] }
 53 | },
 54 | { id => "A4", 
 55 |   key_file => "DataFiles/TC-A.key",
 56 |   response_file => "DataFiles/TC-A-4.response",
 57 |   expected_metrics => { "muc" => [1/3, 1/3, 1/3], 
 58 |                         "bcub" => [(3+1/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
 59 |                         "ceafm" => [0.66667, 0.57143, 0.61538],
 60 |                         "ceafe" => [0.73333, 0.55, 0.62857],
 61 |                         "blanc" => [0.35227, 0.27206, 0.30357] }
 62 | },
 63 | { id => "A5", 
 64 |   key_file => "DataFiles/TC-A.key", 
 65 |   response_file => "DataFiles/TC-A-5.response",
 66 |   expected_metrics => { "muc" => [1/3, 1/4, 2/7], 
 67 |                         "bcub" => [(3+1/3)/6, 2.5/8, 2*(5/9)*(5/16)/((5/9)+(5/16))],
 68 |                         "ceafm" => [0.66667, 0.5, 0.57143],
 69 |                         "ceafe" => [0.68889, 0.51667, 0.59048],
 70 |                         "blanc" => [0.35227, 0.19048, 0.24716] }
 71 | },
 72 | { id => "A6", 
 73 |   key_file => "DataFiles/TC-A.key", 
 74 |   response_file => "DataFiles/TC-A-6.response",
 75 |   expected_metrics => { "muc" => [1/3, 1/4, 2/7],
 76 |                         "bcub" => [(10/3)/6, (1+4/3+1/2)/8, 2*(5/9)*(17/48)/((5/9)+(17/48))],
 77 |                         "ceafm" => [0.66667, 0.5, 0.57143],
 78 |                         "ceafe" => [0.73333, 0.55, 0.62857],
 79 |                         "blanc" => [0.35227, 0.20870, 0.25817] }
 80 | },
 81 | { id => "A7", 
 82 |   key_file => "DataFiles/TC-A.key", 
 83 |   response_file => "DataFiles/TC-A-7.response",
 84 |   expected_metrics => { "muc" => [1/3, 1/3, 1/3], 
 85 |                         "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
 86 |                         "ceafm" => [0.66667, 0.57143, 0.61538],
 87 |                         "ceafe" => [0.73333, 0.55, 0.62857],
 88 |                         "blanc" => [0.35227, 0.27206, 0.30357] }
 89 | },
 90 | { id => "A8", 
 91 |   key_file => "DataFiles/TC-A.key", 
 92 |   response_file => "DataFiles/TC-A-8.response",
 93 |   expected_metrics => { "muc" => [1/3, 1/3, 1/3], 
 94 |                         "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
 95 |                         "ceafm" => [0.66667, 0.57143, 0.61538],
 96 |                         "ceafe" => [0.73333, 0.55, 0.62857],
 97 |                         "blanc" => [0.35227, 0.27206, 0.30357] }
 98 | },
 99 | { id => "A9", 
100 |   key_file => "DataFiles/TC-A.key", 
101 |   response_file => "DataFiles/TC-A-9.response",
102 |   expected_metrics => { "muc" => [1/3, 1/3, 1/3],
103 |                         "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
104 |                         "ceafm" => [0.66667, 0.57143, 0.61538],
105 |                         "ceafe" => [0.73333, 0.55, 0.62857],
106 |                         "blanc" => [0.35227, 0.27206, 0.30357] }
107 | },
108 | { id => "A10", 
109 |   key_file => "DataFiles/TC-A.key", 
110 |   response_file => "DataFiles/TC-A-10.response",
111 |   expected_metrics => { "muc" => [0, 0, 0], 
112 |                         "bcub" => [3/6, 6/6, 2/3],
113 |                         #”ceafm" => [1, 1, 1],
114 |                         #”ceafe" => [1, 1, 1],
115 |                         "blanc" => [0.5, 0.36667, 0.42308] }
116 | },
117 | { id => "A11", 
118 |   key_file => "DataFiles/TC-A.key", 
119 |   response_file => "DataFiles/TC-A-11.response",
120 |   expected_metrics => { "muc" => [3/3, 3/5, 6/8], 
121 |                         "bcub" => [6/6, (1/6+2*2/6+3*3/6)/6, 14/25],
122 |                         #”ceafm" => [1, 1, 1],
123 |                         #”ceafe" => [1, 1, 1],
124 |                         "blanc" => [0.5, 0.13333, 0.21053] }
125 | },
126 | { id => "A12", 
127 |   key_file => "DataFiles/TC-A.key", 
128 |   response_file => "DataFiles/TC-A-12.response",
129 |   expected_metrics => { "muc" => [0, 0, 0], 
130 |                         "bcub" => [(1+1/2+2/3)/6, 4/7, 2*(13/36)*(4/7)/((13/36)+(4/7))],
131 |                         #”ceafm" => [1, 1, 1],
132 |                         #”ceafe" => [1, 1, 1],
133 |                         "blanc" => [0.22727, 0.11905, 0.15625] }
134 | },
135 | { id => "A13", 
136 |   key_file => "DataFiles/TC-A.key", 
137 |   response_file => "DataFiles/TC-A-13.response",
138 |   expected_metrics => { "muc" => [1/3, 1/6, 2/9], 
139 |                         "bcub" => [(1+1/2+2*2/3)/6, (1/7+1/7+2*2/7)/7, 2*(17/36)*(6/49)/((17/36)+(6/49))],
140 |                         #”ceafm" => [1, 1, 1],
141 |                         #”ceafe" => [1, 1, 1],
142 |                         "blanc" => [0.125, 0.02381, 0.04] }
143 | },
144 | { id => "B1", 
145 |   key_file => "DataFiles/TC-B.key", 
146 |   response_file => "DataFiles/TC-B-1.response",
147 |   expected_metrics => { #"muc" => [1, 1, 1], 
148 |                         #"bcub" => [1, 1, 1],
149 |                         #”ceafm" => [1, 1, 1],
150 |                         #”ceafe" => [1, 1, 1],
151 |                         "blanc" => [1/2 * (1/4 + 1/3), 1/2 * (1/4 + 1/3), 1/2 * (1/4 + 1/3)] }
152 | },
153 | { id => "C1", 
154 |   key_file => "DataFiles/TC-C.key", 
155 |   response_file => "DataFiles/TC-C-1.response",
156 |   expected_metrics => { #"muc" => [1, 1, 1], 
157 |                         #"bcub" => [1, 1, 1],
158 |                         #”ceafm" => [1, 1, 1],
159 |                         #”ceafe" => [1, 1, 1],
160 |                         "blanc" => [1/2 * (2/5 + 10/16), 1/2 * (2/5 + 10/16), 1/2 * (2/5 + 10/16)] }
161 | },
162 | { id => "D1", 
163 | 		key_file => "DataFiles/TC-D.key", 
164 | 		response_file => "DataFiles/TC-D-1.response",
165 |  		expected_metrics => { "muc" => [9/9, 9/10, 2*(9/9)*(9/10)/(9/9+9/10)], 
166 |                           "bcub" => [12/12, 16/21, 2*(12/12)*(16/21)/(12/12+16/21)],
167 |                           #"ceafm" => [1, 1, 1],
168 |                           #"ceafe" => [1, 1, 1],
169 |                           #"blanc" => [1, 1, 1]
170 |                         }
171 | },
172 | { id => "E1", 
173 | 		key_file => "DataFiles/TC-E.key", 
174 | 		response_file => "DataFiles/TC-E-1.response",
175 |  		expected_metrics => { "muc" => [9/9, 9/10, 2*(9/9)*(9/10)/(9/9+9/10)], 
176 |                           "bcub" => [1, 7/12, 2*1*(7/12)/(1+7/12)],
177 |                           #"ceafm" => [1, 1, 1],
178 |                           #"ceafe" => [1, 1, 1],
179 |                           #"blanc" => [1, 1, 1]
180 |                         }
181 | },
182 | { id => "F1", 
183 | 		key_file => "DataFiles/TC-F.key", 
184 | 		response_file => "DataFiles/TC-F-1.response",
185 |  		expected_metrics => { "muc" => [2/3, 2/2, 2*(2/3)*(2/2)/(2/3+2/2)] ,
186 |                           #"bcub" => ,
187 |                           #"ceafm" => ,
188 |                           #"ceafe" => ,
189 |                           #"blanc" => 
190 |                         }
191 | },
192 | { id => "G1", 
193 | 		key_file => "DataFiles/TC-G.key", 
194 | 		response_file => "DataFiles/TC-G-1.response",
195 |  		expected_metrics => { "muc" => [2/2, 2/3, 2*(2/2)*(2/3)/(2/2+2/3)],
196 |                           #"bcub" => ,
197 |                           #"ceafm" => ,
198 |                           #"ceafe" => ,
199 |                           #"blanc" => 
200 |                         }
201 | },
202 | { id => "H1", 
203 | 		key_file => "DataFiles/TC-H.key", 
204 | 		response_file => "DataFiles/TC-H-1.response",
205 |  		expected_metrics => { "muc" => [1, 1, 1],
206 |                           #"bcub" => ,
207 |                           #"ceafm" => ,
208 |                           #"ceafe" => ,
209 |                           #"blanc" => 
210 |                         }
211 | },
212 | { id => "I1", 
213 | 		key_file => "DataFiles/TC-I.key", 
214 | 		response_file => "DataFiles/TC-I-1.response",
215 |  		expected_metrics => { "muc" => [2/3, 2/2, 2*(2/3)*(2/2)/(2/3+2/2)],
216 |                           #"bcub" => ,
217 |                           #"ceafm" => ,
218 |                           #"ceafe" => ,
219 |                           #"blanc" => 
220 |                         }
221 | },
222 | { id => "J1", 
223 | 		key_file => "DataFiles/TC-J.key", 
224 | 		response_file => "DataFiles/TC-J-1.response",
225 |  		expected_metrics => { "muc" => [1/2, 1/1, 2*(1/2)*(1/1)/(1/2+1/1)],
226 |                           #"bcub" => ,
227 |                           #"ceafm" => ,
228 |                           #"ceafe" => ,
229 |                           #"blanc" => 
230 |                         }
231 | },
232 | { id => "K1", 
233 | 		key_file => "DataFiles/TC-K.key", 
234 | 		response_file => "DataFiles/TC-K-1.response",
235 |  		expected_metrics => { "muc" => [3/6, 3/6, 3/6],
236 |                           #"bcub" => ,
237 |                           #"ceafm" => ,
238 |                           #"ceafe" => ,
239 |                           #"blanc" => 
240 |                         }
241 | },
242 | { id => "L1", 
243 | 		key_file => "DataFiles/TC-L.key", 
244 | 		response_file => "DataFiles/TC-L-1.response",
245 |  		expected_metrics => { "muc" => [2/5, 2/4, 2*(2/5)*(2/4)/(2/5+2/4)],
246 |                           #"bcub" => ,
247 |                           #"ceafm" => ,
248 |                           #"ceafe" => ,
249 |                           #"blanc" => 
250 |                         }
251 | },
252 | { id => "M1", 
253 | 		key_file => "DataFiles/TC-M.key", 
254 | 		response_file => "DataFiles/TC-M-1.response",
255 |  		expected_metrics => { "muc" => [1, 1, 1],
256 |                           "bcub" => [1, 1, 1],
257 |                           "ceafm" => [1, 1, 1],
258 |                           "ceafe" => [1, 1, 1],
259 |                           "blanc" => [1, 1, 1] }
260 | },
261 | { id => "M2", 
262 | 		key_file => "DataFiles/TC-M.key", 
263 | 		response_file => "DataFiles/TC-M-2.response",
264 |  		expected_metrics => { "muc" => [0, 0, 0],
265 |                           #"bcub" => ,
266 |                           #"ceafm" => ,
267 |                           #"ceafe" => ,
268 |                           "blanc" => [0, 0, 0] }
269 | },
270 | { id => "M3", 
271 | 		key_file => "DataFiles/TC-M.key", 
272 | 		response_file => "DataFiles/TC-M-3.response",
273 |  		expected_metrics => { #"muc" => ,
274 |                           #"bcub" => ,
275 |                           #"ceafm" => ,
276 |                           #"ceafe" => ,
277 |                           "blanc" => [0.26667, 1, 0.42105] }
278 | },
279 | { id => "M4", 
280 | 		key_file => "DataFiles/TC-M.key", 
281 | 		response_file => "DataFiles/TC-M-4.response",
282 |  		expected_metrics => { #"muc" => ,
283 |                           #"bcub" => ,
284 |                           #"ceafm" => ,
285 |                           #"ceafe" => ,
286 |                           "blanc" => [0.2, 0.2, 0.2] }
287 | },
288 | { id => "M5", 
289 | 		key_file => "DataFiles/TC-M.key", 
290 | 		response_file => "DataFiles/TC-M-5.response",
291 |  		expected_metrics => { "muc" => [0, 0, 0],
292 |                           #"bcub" => ,
293 |                           #"ceafm" => ,
294 |                           #"ceafe" => ,
295 |                           "blanc" => [0, 0, 0] }
296 | },
297 | { id => "M6", 
298 | 		key_file => "DataFiles/TC-M.key", 
299 | 		response_file => "DataFiles/TC-M-6.response",
300 |  		expected_metrics => { #"muc" => ,
301 |                           #"bcub" => ,
302 |                           #"ceafm" => ,
303 |                           #"ceafe" => ,
304 |                           "blanc" => [0.06667, 0.25, 0.10526] }
305 | },
306 | { id => "N1", 
307 | 		key_file => "DataFiles/TC-N.key", 
308 | 		response_file => "DataFiles/TC-N-1.response",
309 |  		expected_metrics => { "muc" => [0, 0, 0],
310 |                           #"bcub" => [1, 1, 1],
311 |                           #"ceafm" => [1, 1, 1],
312 |                           #"ceafe" => [1, 1, 1],
313 |                           "blanc" => [1, 1, 1] }
314 | },
315 | { id => "N2", 
316 | 		key_file => "DataFiles/TC-N.key", 
317 | 		response_file => "DataFiles/TC-N-2.response",
318 |  		expected_metrics => { "muc" => [0, 0, 0],
319 |                           #"bcub" => ,
320 |                           #"ceafm" => ,
321 |                           #"ceafe" => ,
322 |                           "blanc" => [0, 0, 0] }
323 | },
324 | { id => "N3", 
325 | 		key_file => "DataFiles/TC-N.key", 
326 | 		response_file => "DataFiles/TC-N-3.response",
327 |  		expected_metrics => { #"muc" => ,
328 |                           #"bcub" => ,
329 |                           #"ceafm" => ,
330 |                           #"ceafe" => ,
331 |                           "blanc" => [0.73333, 1, 0.84615] }
332 | },
333 | { id => "N4", 
334 | 		key_file => "DataFiles/TC-N.key", 
335 | 		response_file => "DataFiles/TC-N-4.response",
336 |  		expected_metrics => { "muc" => [0, 0, 0],
337 |                           #"bcub" => ,
338 |                           #"ceafm" => ,
339 |                           #"ceafe" => ,
340 |                           "blanc" => [0.2, 0.2, 0.2] }
341 | },
342 | { id => "N5", 
343 | 		key_file => "DataFiles/TC-N.key", 
344 | 		response_file => "DataFiles/TC-N-5.response",
345 |  		expected_metrics => { #"muc" => ,
346 |                           #"bcub" => ,
347 |                           #"ceafm" => ,
348 |                           #"ceafe" => ,
349 |                           "blanc" => [0, 0, 0] }
350 | },
351 | { id => "N6", 
352 | 		key_file => "DataFiles/TC-N.key", 
353 | 		response_file => "DataFiles/TC-N-6.response",
354 |  		expected_metrics => { #"muc" => ,
355 |                           #"bcub" => ,
356 |                           #"ceafm" => ,
357 |                           #"ceafe" => ,
358 |                           "blanc" => [0.13333, 0.18182, 0.15385] }
359 | }
360 | 
361 | );
362 | 
363 | 1;
364 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-10.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	z	-
17 | test2	0	5	e	(4)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-11.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-12.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	1)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(2)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(3)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(4
15 | test2	0	3	d2	4)
16 | test2	0	4	z	-
17 | test2	0	5	e	(5)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(6)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-13.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	0)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(0)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	-
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	-
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	y	(2)
17 | test2	0	5	e	(2)
18 | test2	0	6	z	(3)
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	x	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(1
 7 | test1	0	5	b3	1)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(3
 7 | test1	0	5	b3	3)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-7.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-8.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-9.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3(3(3(3(3(3(3(3(3(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)3)3)3)3)3)3)3)3)3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-B-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-B.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-C-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-C.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-D-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-D.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-E-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (1)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (1)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (1)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (1)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-E.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-F-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-F.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-G-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-G.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-H-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-H.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-I-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-I.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-J-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-J.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-K-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 (3)
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-K.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (1)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (1)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-L-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (3)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-L.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/TestCases.README:
--------------------------------------------------------------------------------
  1 | TC-A-1 - perfect:
  2 | Key/Ref: {a} {bc} {def}
  3 | Rsp/Sys: {a} {bc} {def}
  4 | Expected: BCUB=1 [recall=6/6, prec=6/6]
  5 | Expected: MUC=1 [recall=3/3=1, prec=3/3=1]
  6 | Expected: CEAFm=1 [recall=6/6=1, prec=6/6=1]
  7 | Expected: CEAFe=1 [recall=3/3=1, prec=3/3=1]
  8 | Expected: BLANC=1 [recall_c=4/4=1, prec_c=4/4=1, recall_n=11/11=1, prec_n=11/11=1]
  9 | 
 10 | TC-A-2 -- response with missing mentions/entities
 11 | Key/Ref: {a} {bc} {def}
 12 | Rsp/Sys: {a} {de}
 13 | Expected: BCUB=.5599 [recall=7/18, prec=3/3]
 14 | Expected: MUC=0.5 [recall=1/3, prec=1/1]
 15 | Expected: CEAFm=6/9=0.67 [common=3, recall=3/6=0.5, Prec=3/3=1]
 16 | Expected: CEAFe=3.6/5=0.72 [common=1+4/5=1.8, recall=1.8/3=0.6, Prec=1.8/2=0.9]
 17 | Expected: BLANC=0.35 [recall_c=1/4, prec_c=1/1, recall_n=2/11, prec_n=2/2]
 18 | 
 19 | TC-A-3 -- response with false-alarm mentions/entities
 20 | Key/Ref: {a} {bc} {def}
 21 | Rsp/Sys: {a} {bcx} {defy} {z}
 22 | Expected: BCUB=.6748 [recall=6/6, prec=55/108]
 23 | Expected: MUC=0.75 [recall=3/3, prec=3/5]
 24 | Expected: CEAFm=12/15=0.8 [common=6, recall=6/6=1, prec=6/9=.67]
 25 | Expected: CEAFe=3.6/5=0.76 [common=1+4/5+6/7=2.66, recall=2.66/3=0.89, Prec=2.66/4=0.66]
 26 | Expected: BLANC=0.60 [recall_c=4/4, prec_c=4/9, recall_n=11/11, prec_n=11/27]
 27 | 
 28 | 
 29 | TC-A-4 -- response with both missing and false-alarm mentions/entities
 30 | Key/Ref: {a} {bc} {def}
 31 | Rsp/Sys: {a} {bcx} {dy} {z}
 32 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
 33 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
 34 | Expected: CEAFm=8/13=0.62 [common=4 recall=4/6=0.67 prec=4/7=.57]
 35 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
 36 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
 37 | 
 38 | TC-A-5 -- response with both missing and false-alarm mentions/entities, and overlapping mentions (capitalized letter: b and B). Overlapping mention B in the aligned entity.
 39 | Key/Ref: {a} {bc} {def}
 40 | Rsp/Sys: {a} {bcxB} {dy} {z}
 41 | Expected: BCUB=.4 [recall=5/9, prec=5/16]
 42 | Expected: MUC=2/7=.28571 [recall=1/3, prec=1/4]
 43 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
 44 | Expected: CEAFe=4.14/7=0.59 [common=1+4/6+2/5=2.07, recall=2.07/3=0.69, Prec=2.07/4=0.52]
 45 | Expected: BLANC=0.25 [recall_c=1/4, prec_c=1/7, recall_n=5/11, prec_n=5/21]
 46 | 
 47 | TC-A-6 -- response with both missing and false-alarm mentions/entities, and overlapping mentions (capitalized letter: b and B). Overlapping mention B in an unaligned entity.
 48 | Key/Ref: {a} {bc} {def}
 49 | Rsp/Sys: {a} {bcx} {dy} {Bz}
 50 | Expected: BCUB=.4325 [recall=5/9, prec=17/48]
 51 | Expected: MUC=2/7=.28571 [recall=1/3, prec=1/4]
 52 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
 53 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
 54 | Expected: BLANC=0.26 [recall_c=1/4, prec_c=1/5, recall_n=5/11, prec_n=5/23]
 55 | 
 56 | TC-A-7 -- response with both missing and false-alarm mentions/entities, and duplicate mentions (capitalized letter: b and B). Duplicate mention B in the same cluster entity (note: this is diff from TC5) -- this tests mention de-duplication.
 57 | Key/Ref: {a} {bc} {def}
 58 | Rsp/Sys: {a} {bcxB} {dy} {z}
 59 |  de-dup: {a} {bcx} {dy} {z}
 60 | 
 61 | de-dup:
 62 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
 63 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
 64 | Expected: CEAFm=8/13=0.61538 [common=4, recall=4/6=0.66667, Prec=4/7=0.57143]
 65 | Expected: CEAFe=4.14/7=0.62857 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73333, Prec=2.2/4=0.55]
 66 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
 67 | 
 68 | if No de-dup: 
 69 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
 70 | Expected: CEAFe=4.14/7=0.59 [common=1+4/6+2/5=2.07, recall=2.07/3=0.69, Prec=2.07/4=0.52]
 71 | 
 72 | 
 73 | TC-A-8 -- response with both missing and false-alarm mentions/entities, and duplicate mentions (capitalized letter: b and B). Duplicate mention B in a diff entity from b.
 74 | Key/Ref: {a} {bc} {def}
 75 | Rsp/Sys: {a} {bcx} {dy} {Bz}
 76 | 
 77 | De-dup: 
 78 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
 79 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
 80 | Expected: CEAFm=8/13=0.61538 [common=4 recall=4/6=0.67 prec=4/7=.57143]
 81 | Expected: CEAFe=4.14/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
 82 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
 83 | 
 84 | If no de-dup: 
 85 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
 86 | Expected: CEAFe=4.14/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
 87 | 
 88 | TC-A-9 -- show B3 can be canned: "b" is repeated 10 times so precision approaches 1
 89 | Key/Ref: {a} {bc} {def}
 90 | Rsp/Sys: {a} {bcx} {dy} {Bx10z}
 91 | de-dup Rsp/Sys: {a} {bcx} {dy} {z}
 92 | 
 93 | De-dup: 
 94 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
 95 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
 96 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/7=.57143]
 97 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
 98 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
 99 | 
100 | 
101 | TC-A-10 - Gold mentions. Only singletons in the response.
102 | Key/Ref: {a} {bc} {def}
103 | Rsp/Sys: {a} {b} {c} {d} {e} {f}
104 | Expected: BCUB=.6667 [recall=3/6, prec=6/6]
105 | Expected: MUC=0 [recall=0, prec=0]
106 | Expected: BLANC=0.42 [recall_c=0/4, prec_c=0/0, f_c=0, recall_n=11/11, prec_n=11/15]
107 | 
108 | 
109 | TC-A-11 - Gold mentions. All mentions are coreferent in the response.
110 | Key/Ref: {a} {bc} {def}
111 | Rsp/Sys: {abcdef}
112 | 
113 | Expected: BCUB=0.5599 [recall=6/6, prec=7/18]
114 | Expected: MUC=6/8=0.75 [recall=3/3, prec=3/5]
115 | Expected: BLANC=0.21 [recall_c=4/4, prec_c=4/15, recall_n=0/11, prec_n=0/0, f_n=0]
116 | 
117 | 
118 | TC-A-12 - System mentions. Only singletons in the response.
119 | Key/Ref: {a} {bc} {def}
120 | Rsp/Sys: {a} {x} {y} {c} {d} {e} {z}
121 | 
122 | Expected: BCUB=0.4425 [recall=13/36, prec=4/7]
123 | Expected: MUC=0 [recall=0, prec=0]
124 | Expected: BLANC=0.16 [recall_c=0/4, prec_c=0/0, f_c=0, recall_n=5/11, prec_n=5/21]
125 | 
126 | 
127 | TC-A-13 - System mentions. All mentions are coreferent in the response.
128 | Key/Ref: {a} {bc} {def}
129 | Rsp/Sys: {axycdez}
130 | 
131 | Expected: BCUB=0.19447 [recall=17/36, prec=6/49]
132 | Expected: MUC=2/9 [recall=1/3, prec=1/6]
133 | Expected: BLANC=0.04 [recall_c=1/4, prec_c=1/21, recall_n=0/11, prec_n=0/0, f_n=0]
134 | 
135 | 
136 | TC-B-1 -- spurious mention (x) and missing mention (a) in response; link (bc) is a key non-coref link and is an incorrect response coref link.
137 | 
138 |     Keys: {ab} {cde} 
139 | Response: {bcx} {de}
140 | 
141 |     key coref links: C_k = {(ab), (cd), (de), (ce)}
142 | key non-coref links: N_k = {(ac), (ad), (ae), (bc), (bd), (be)}
143 | 
144 |     response coref links: C_r = {(bc), (bx), (cx), (de)}
145 | response non-coref links: N_r = {(bd), (be), (cd), (ce),  (xd), (xe)}
146 | 
147 | (I'll use ^ for set intersection)
148 | C_k ^ C_r = {(de)} => R_c = |C_k^C_r| / |C_k| = 1/4, P_c = 1/|C_r| = 1/4, F_c = 1/4
149 | N_k ^ N_r = {(bd), (be)} => R_n = |N_k^N_r|/|N_k| = 2/6,  P_n = 2/|N_r| = 2/6, F_n = 1/3
150 | 
151 | BLANC = 1/2 (F_c + F_n) = 7/24.
152 | 
153 | 
154 | 
155 | 
156 | 	TC-C-1 -- same as TC14 plus a new entity and its correct prediction shown. this was for testing the more than two entity case.
157 | 
158 |     Keys: {ab} {cde} {fg}
159 | Response: {bcx} {de} {fg}
160 | 
161 |          key coref links: C_k = {(ab), (cd), (de), (ce), (fg)}} 
162 |      key non-coref links: N_k = {(ac), (ad), (ae), (bc), (bd), (be), (af), (ag), (bf), (bg), (cf), (cg), (df), (dg), (ef), (eg)}
163 | 
164 |     response coref links: C_r = {(bc), (bx), (cx), (de), (fg)}
165 | response non-coref links: N_r = {(bd), (be), (cd), (ce), (xd), (xe), (bf), (bg), (cf), (cg), (xf), (xg), (df), (dg), (ef), (eg)}
166 | 
167 | (I'll use ^ for set intersection)
168 | C_k ^ C_r = {(de), (fg)} => R_c = |C_k^C_r| / |C_k| = 2/5, P_c = 2/|C_r| = 2/5, F_c = 2/5 = 0.40
169 | N_k ^ N_r = {(bd), (be), (bf), (bg), (cf), (cg), (df), (dg), (ef), (eg)} => R_n = |N_k^N_r|/|N_k| = 10/16,  P_n = 10/|N_r| = 10/16, F_n = 10/16 = 0.625
170 | 
171 | BLANC = 1/2 (F_c + F_n) = 0.5125
172 | 
173 | 
174 | 
175 | # ------------ examples from the B-CUBED paper
176 | 
177 | TC-D-1 -- merging one small cluster with a big cluster
178 | 
179 | key: {12345} {67} {89ABC}
180 | ---
181 | 
182 | 1-2-3-4-5
183 | 
184 | 6-7     
185 | 
186 | 8-9-A-B-C
187 | 
188 | 
189 | 
190 | response: {12345} {6789ABC}
191 | ---------
192 | 
193 | 1-2-3-4-5
194 | 
195 | 6-7     
196 |   |
197 |   8-9-A-B-C
198 | 
199 | 
200 | Expected: BCUB [r=12/12, p=16/21, f=0.864864865]
201 | Expected:  MUC [r=9/9, p=9/10, f=0.947368421]
202 | 
203 | 
204 | 
205 | TC-E-1 -- merging two big clusters
206 | 
207 | 
208 | key: {12345} {67} {89ABC}
209 | ---
210 | 
211 | 1-2-3-4-5
212 | 
213 | 6-7     
214 | 
215 | 8-9-A-B-C
216 | 
217 | 
218 | 
219 | response: {123456789ABC} {67}
220 | ---------
221 | 
222 | 1-2-3-4-5
223 |         |
224 | 6-7     |
225 |         |
226 |         8-9-A-B-C
227 | 
228 | 
229 | Expected: BCUB [r=1, p=7/12, f=0.736842105]
230 | Expected:  MUC [r=9/9, p=9/10, f=0.947368421]
231 | 
232 | 
233 | # ---------- examples from the MUC paper
234 | 
235 | TC-F-1 -- 
236 | 
237 |      key: {ABCD}    ---- Links: A-B; B-C; C-D
238 | response: {AB} {CD} ---- Links: A-B; C-D
239 | 
240 | Expected:  MUC [r=2/3, p=2/2, f=2*(2/3)*(2/2)/(2/3+2/2)]
241 | 
242 | 
243 | 
244 | TC-G-1 --
245 | 
246 |      key: {AB} {CD} ---- Links: A-B; C-D
247 | response: {ABCD}    ---- Links: A-B; B-C; C-D
248 | 
249 | Expected:  MUC [r=2/2, p=2/3, f=2*(2/2)*(2/3)/(2/2+2/3)]
250 | 
251 | 
252 | 
253 | TC-H-1 --
254 | 
255 |      key: {ABCD} ---- Links: A-B; B-C; B-D
256 | response: {ABCD} ---- Links: A-B; B-C; C-D
257 | 
258 | Expected:  MUC [r=1, p=1, f=1]
259 | 
260 | 
261 | 
262 | TC-I-1 --
263 | 
264 |      key: {ABCD}    ---- Links: A-B; B-C; B-D
265 | response: {AB} {CD} ---- Links: A-B; C-D
266 | 
267 | Expected:  MUC [r=2/3, p=2/2, f=2*(2/3)*(2/2)/(2/3+2/2)]
268 | 
269 | 
270 | 
271 | TC-J-1 --
272 | 
273 |      key: {ABC} ---- Links: A-B; B-C
274 | response: {AC}  ---- Links: A-C
275 | 
276 | Expected:  MUC [r=1/2, p=1/1, f=2*(1/2)*(1/1)/(1/2+1/1)]
277 | 
278 | 
279 | 
280 | TC-K-1 --
281 | 
282 |      key: {BCDEGHJ}         ---- Links: B-C; C-D; D-E; E-G; G-H; H-J
283 | response: {ABC} {DEF} {GHI} ---- Links: A-B; B-C; D-E; E-F; G-H; H-I
284 | 
285 | Expected:  MUC [r=3/6, p=3/6, f=3/6]
286 | 
287 | 
288 | 
289 | TC-L-1 --
290 | 
291 |      key: {ABC} {DEFG}    ---- Links: A-B; B-C; D-E; E-F; F-G
292 | response: {AB} {CD} {FGH} ---- Links: A-B; C-D; F-G; G-H
293 | 
294 | Expected:  MUC [r=2/5, p=2/4, f=2*(2/5)*(2/4)/(2/5+2/4)]
295 | 
296 | 
297 | TC-M-1 - Only coreferent mentions in the key. Gold mentions. Matching response. Since the key contains no non-coreference link, BLANC equals recall_c, prec_c, F_c.
298 | Key/Ref: {abcdef}
299 | Rsp/Sys: {abcdef}
300 | 
301 | Expected: BCUB=1
302 | Expected: MUC=1
303 | Expected: CEAFm=1
304 | Expected: CEAFe=1
305 | Expected: BLANC=1 [recall_c=15/15=1, prec_c=15/15=1]
306 | 
307 | 
308 | TC-M-2 - Only coreferent mentions in the key. Gold mentions. Response contains only non-coreference links. 
309 | Key/Ref: {abcdef}
310 | Rsp/Sys: {a} {b} {c} {d} {e} {f}
311 | 
312 | Expected: MUC=0
313 | Expected: BLANC=0 [recall_c=0/15=0, prec_c=0/0=0]
314 | 
315 | 
316 | TC-M-3 - Only coreferent mentions in the key. Gold mentions. Response contains coreference and non-coreference links. 
317 | Key/Ref: {abcdef}
318 | Rsp/Sys: {ab} {cde} {f}
319 | 
320 | Expected: BLANC=0.42 [recall_c=4/15, prec_c=4/4=1]
321 | 
322 | 
323 | TC-M-4 - Only coreferent mentions in the key. System mentions: only coreferent mentions. Since the key contains no non-coreference link, BLANC equals recall_c, prec_c, F_c.
324 | Key/Ref: {abcdef}
325 | Rsp/Sys: {abcxyz}
326 | 
327 | Expected: BLANC=0.20 [recall_c=3/15, prec_c=3/15]
328 | 
329 | 
330 | TC-M-5 - Only coreferent mentions in the key. System mentions: only singletons.
331 | Key/Ref: {abcdef}
332 | Rsp/Sys: {a} {b} {c} {x} {y} {z}
333 | 
334 | Expected: MUC=0
335 | Expected: BLANC=0 [recall_c=0/15=0, prec_c=0/0=0]
336 | 
337 | 
338 | TC-M-6 - Only coreferent mentions in the key. System mentions: coreference and non-coreference links. 
339 | Key/Ref: {abcdef}
340 | Rsp/Sys: {ab} {cxy} {z}
341 | 
342 | Expected: BLANC=0.11 [recall_c=1/15, prec_c=1/4]
343 | 
344 | 
345 | TC-N-1 - Only singletons in the key. Gold mentions. Matching response. Since the key contains no coreference link, BLANC equals recall_n, prec_n, F_n.
346 | Key/Ref: {a} {b} {c} {d} {e} {f}
347 | Rsp/Sys: {a} {b} {c} {d} {e} {f}
348 | 
349 | Expected: BCUB=1
350 | Expected: MUC=0
351 | Expected: CEAFm=1
352 | Expected: CEAFe=1
353 | Expected: BLANC=1 [recall_n=15/15=1, prec_n=15/15=1]
354 | 
355 | 
356 | TC-N-2 - Only singletons in the key. Gold mentions. Response contains only coreference links. 
357 | Key/Ref: {a} {b} {c} {d} {e} {f}
358 | Rsp/Sys: {abcdef}
359 | 
360 | Expected: BLANC=0 [recall_n=0/15=0, prec_n=0/0=0]
361 | 
362 | 
363 | TC-N-3 - Only singletons in the key. Gold mentions. Response contains coreference and non-coreference links. 
364 | Key/Ref: {a} {b} {c} {d} {e} {f}
365 | Rsp/Sys: {ab} {cde} {f}
366 | 
367 | Expected: BLANC=0.85 [recall_n=11/15, prec_n=11/11=1]
368 | 
369 | 
370 | TC-N-4 - Only singletons in the key. System mentions: only singletons. Since the key contains no coreference link, BLANC equals recall_n, prec_n, F_n.
371 | Key/Ref: {a} {b} {c} {d} {e} {f}
372 | Rsp/Sys: {a} {b} {c} {x} {y} {z}
373 | 
374 | Expected: MUC=0
375 | Expected: BLANC=0.20 [recall_n=3/15, prec_n=3/15]
376 | 
377 | 
378 | TC-N-5 - Only singletons in the key. System mentions: only coreference links. 
379 | Key/Ref: {a} {b} {c} {d} {e} {f}
380 | Rsp/Sys: {abcxyz}
381 | 
382 | Expected: BLANC=0 [recall_n=0/15=0, prec_n=0/0=0]
383 | 
384 | 
385 | TC-N-6 - Only singletons in the key. Only coreferent mentions in the key. System mentions: coreference and non-coreference links. 
386 | Key/Ref: {a} {b} {c} {d} {e} {f}
387 | Rsp/Sys: {ab} {cxy} {z}
388 | 
389 | Expected: BLANC=0.15 [recall_n=2/15, prec_n=2/11]
390 | 
391 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/test.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |     $d = $0;
 5 |     $d =~ s/\/[^\/][^\/]*$//g;
 6 |     push(@INC, $d);
 7 |     push(@INC, $d . "/../lib");
 8 | }
 9 | 
10 | use strict;
11 | use CorScorer;
12 | use CorefMetricTest;
13 | use CorefMetricTestConfig;
14 | 
15 | my $error_tolerance = 1.e-4;
16 | my $script_dir = $0;
17 | $script_dir =~ s/\/[^\/][^\/]*$//g;
18 | 
19 | foreach my $test_case (@CorefMetricTestConfig::TestCases) {
20 |   my $id = $test_case->{'id'};
21 |   my @key_response_files = ($script_dir . "/" . $test_case->{'key_file'}, 
22 |                             $script_dir . "/" . $test_case->{'response_file'});
23 |   print "\nTesting case ($id): keyFile=", $key_response_files[0], 
24 |         " responseFile=", $key_response_files[1], "\n";
25 |   my $expected_metrics = $test_case->{'expected_metrics'};
26 |   foreach my $metric_name (sort keys %$expected_metrics) {
27 |     my $expected_values = $expected_metrics->{$metric_name};
28 |     *::SAVED_STDOUT = *STDOUT;
29 |     *STDOUT = *::SUPRRES_STDOUT;
30 |     my @actual_counts = &CorScorer::Score($metric_name, @key_response_files);
31 |     # Compute R,P,and F1 from raw counts.
32 |     my @actual_values = CorefMetricTest::ComputeScoreFromCounts(@actual_counts);
33 |     *STDOUT = *::SAVED_STDOUT;
34 |     my $diff = CorefMetricTest::DiffExpectedAndActual($expected_values, \@actual_values);
35 |     printf "  metric: %+10s", $metric_name;
36 |     if ($diff < $error_tolerance) {
37 |       print " => PASS\n";
38 |     } else {
39 |       print " => FAIL\n";
40 |       print "    Expected (recall, prec, F1) = (", join(" ", @$expected_values), ")\n";
41 |       print "    Actual (recall, prec, F1) = (", join(" ", @actual_values), ")\n";
42 |       #exit(1);
43 |     }
44 |   }
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==3.0.2
2 | pyhocon
3 | boltons
4 | scikit-learn==0.22.2
5 | stanza==1.1.1
6 | 


--------------------------------------------------------------------------------
/runner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import math
 3 | import torch
 4 | import tqdm
 5 | import random
 6 | import time
 7 | 
 8 | from transformers import *
 9 | from models import EventCorefModel
10 | from utils import RunningAverage, prepare_configs, get_n_params
11 | from scorer import evaluate
12 | from data import load_oneie_dataset
13 | from argparse import ArgumentParser
14 | 
15 | def train(config_name):
16 |     # Prepare tokenizer, dataset, and model
17 |     configs = prepare_configs(config_name)
18 |     tokenizer = AutoTokenizer.from_pretrained(configs['transformer'])
19 |     predictions_path = None if configs['use_groundtruth'] else configs['predictions_path']
20 |     train_set, dev_set, test_set = \
21 |         load_oneie_dataset(configs['base_dataset_path'], tokenizer, predictions_path,
22 |                            increase_ace_dev_set=configs['increase_ace_dev_set'])
23 |     model = EventCorefModel(configs, train_set.event_types)
24 |     print('Initialized tokenier, dataset, and model')
25 |     print('Number of parameters is {}'.format(get_n_params(model)))
26 | 
27 |     # Initialize the optimizer
28 |     num_train_docs = len(train_set)
29 |     epoch_steps = int(math.ceil(num_train_docs / configs['batch_size']))
30 |     num_train_steps = int(epoch_steps * configs['epochs'])
31 |     num_warmup_steps = int(num_train_steps * 0.1)
32 |     optimizer = model.get_optimizer(num_warmup_steps, num_train_steps)
33 |     print('Initialized optimizer')
34 | 
35 |     # Main training loop
36 |     best_dev_score, iters, batch_loss = 0.0, 0, 0
37 |     for epoch in range(configs['epochs']):
38 |         #print('Epoch: {}'.format(epoch))
39 |         print('\n')
40 |         progress = tqdm.tqdm(total=epoch_steps, ncols=80,
41 |                              desc='Train {}'.format(epoch))
42 |         accumulated_loss = RunningAverage()
43 | 
44 |         train_indices = list(range(num_train_docs))
45 |         random.shuffle(train_indices)
46 |         start_train = time.time()
47 |         for train_idx in train_indices:
48 |             iters += 1
49 |             inst = train_set[train_idx]
50 |             iter_loss = model(inst, is_training=True)[0]
51 |             iter_loss /= configs['batch_size']
52 |             iter_loss.backward()
53 |             batch_loss += iter_loss.data.item()
54 |             if iters % configs['batch_size'] == 0:
55 |                 accumulated_loss.update(batch_loss)
56 |                 torch.nn.utils.clip_grad_norm_(model.parameters(), configs['max_grad_norm'])
57 |                 optimizer.step()
58 |                 optimizer.zero_grad()
59 |                 batch_loss = 0
60 |                 # Update progress bar
61 |                 progress.update(1)
62 |                 progress.set_postfix_str('Average Train Loss: {}'.format(accumulated_loss()))
63 |         progress.close()
64 |         print('One epoch training took {} seconds'.format(time.time() - start_train))
65 | 
66 |         # Evaluation after each epoch
67 |         print('Evaluation on the dev set', flush=True)
68 |         start_dev = time.time()
69 |         dev_score = evaluate(model, dev_set, configs)['avg']
70 |         print('Evaluation on dev set took {} seconds'.format(time.time() - start_dev))
71 | 
72 |         # Save model if it has better dev score
73 |         if dev_score > best_dev_score:
74 |             best_dev_score = dev_score
75 |             # Save the model
76 |             save_path = os.path.join(configs['saved_path'], 'model.pt')
77 |             torch.save({'model_state_dict': model.state_dict()}, save_path)
78 |             print('Saved the model', flush=True)
79 |             # Evaluation on the test set
80 |             print('Evaluation on the test set', flush=True)
81 |             start_test = time.time()
82 |             evaluate(model, test_set, configs)
83 |             print('Evaluation on test set took {} seconds'.format(time.time() - start_test))
84 | 
85 | if __name__ == "__main__":
86 |     # Parse argument
87 |     parser = ArgumentParser()
88 |     parser.add_argument('-c', '--config_name', default='basic')
89 |     args = parser.parse_args()
90 | 
91 |     # Start training
92 |     train(args.config_name)
93 | 


--------------------------------------------------------------------------------
/scorer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import numpy as np
  4 | import tempfile
  5 | import subprocess
  6 | import torch
  7 | import re
  8 | 
  9 | from boltons.iterutils import pairwise, windowed
 10 | from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
 11 | 
 12 | COREF_RESULTS_REGEX = re.compile(r".*Coreference: Recall: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tPrecision: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tF1: ([0-9.]+)%.*", re.DOTALL)
 13 | BLANC_RESULTS_REGEX = re.compile(r".*BLANC: Recall: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tPrecision: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tF1: ([0-9.]+)%.*", re.DOTALL)
 14 | 
 15 | def evaluate(model, eval_set, configs, verbose=True):
 16 |     return evaluate_coref(model, eval_set, configs, verbose)
 17 | 
 18 | def evaluate_coref(model, eval_set, configs, verbose=True):
 19 |     predictions = []
 20 |     for inst in eval_set:
 21 |         # Apply the model for prediction
 22 |         with torch.no_grad():
 23 |             loss, preds = model(inst, is_training=False)
 24 |         preds = [x.cpu().data.numpy() for x in preds]
 25 |         top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores = preds
 26 |         predicted_antecedents = get_predicted_antecedents(top_antecedents, top_antecedent_scores)
 27 | 
 28 |         predicted_clusters, m2cluster = [], {}
 29 |         for ix, (s, e) in enumerate(zip(top_span_starts, top_span_ends)):
 30 |             if predicted_antecedents[ix] < 0:
 31 |                 cluster_id = len(predicted_clusters)
 32 |                 predicted_clusters.append([(s, e)])
 33 |             else:
 34 |                 antecedent_idx = predicted_antecedents[ix]
 35 |                 p_s, p_e = top_span_starts[antecedent_idx], top_span_ends[antecedent_idx]
 36 |                 cluster_id = m2cluster[(p_s, p_e)]
 37 |                 predicted_clusters[cluster_id].append((s,e))
 38 |             m2cluster[(s,e)] = cluster_id
 39 |         predictions.append(m2cluster)
 40 | 
 41 | 
 42 |     with tempfile.NamedTemporaryFile(delete=False, mode='w') as gold_file:
 43 |         output_gold_conll(gold_file, eval_set.data)
 44 |         with tempfile.NamedTemporaryFile(delete=False, mode='w') as prediction_file:
 45 |             for ix, inst in enumerate(eval_set.data):
 46 |                 doc_id = inst.doc_id
 47 |                 m2cluster = predictions[ix]
 48 |                 cluster_labels = ['-'] * inst.num_words
 49 |                 for (start, end) in m2cluster.keys():
 50 |                     c_label = m2cluster[(start, end)]
 51 |                     end = end - 1
 52 |                     if start == end:
 53 |                         cluster_labels[start] = '({})'.format(c_label)
 54 |                     else:
 55 |                         cluster_labels[start] = '({}'.format(c_label)
 56 |                         cluster_labels[end] = '{})'.format(c_label)
 57 | 
 58 |                 # Write the doc info to output file
 59 |                 prediction_file.write('#begin document ({}); part 000\n'.format(doc_id))
 60 |                 for i in range(inst.num_words):
 61 |                     prediction_file.write('{} {}\n'.format(doc_id, cluster_labels[i]))
 62 |                 prediction_file.write('\n')
 63 |                 prediction_file.write('#end document\n')
 64 | 
 65 |             gold_file.flush()
 66 |             prediction_file.flush()
 67 |             print("Gold conll file: {}".format(gold_file.name))
 68 |             print("Prediction conll file: {}".format(prediction_file.name))
 69 |             metrics = ("muc", "bcub", "ceafe", "blanc", "ceafm")
 70 |             summary = { m: official_conll_eval(gold_file.name, prediction_file.name, m) for m in metrics}
 71 |             os.remove(gold_file.name)
 72 |             os.remove(prediction_file.name)
 73 | 
 74 |             avg = 0.0
 75 |             for metric in metrics[:-1]: avg += summary[metric]['f'] # Excluding ceafm when calculating avg
 76 |             avg /= len(metrics[:-1])
 77 |             summary['avg'] = avg
 78 | 
 79 |             summary_text = ''
 80 |             for metric in metrics:
 81 |                 summary_text += '[{}] F1 = {} | '.format(metric, summary[metric]['f'])
 82 |             summary_text +=  'AVG = {}'.format(avg)
 83 |             print(summary_text)
 84 | 
 85 |             return summary
 86 | 
 87 | 
 88 | def official_conll_eval(gold_path, predicted_path, metric, official_stdout=False):
 89 |     cmd = ["reference-coreference-scorers-8.01/scorer.pl", metric, gold_path, predicted_path, "none"]
 90 |     process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 91 |     stdout, stderr = process.communicate()
 92 |     process.wait()
 93 | 
 94 |     stdout = stdout.decode("utf-8")
 95 |     if stderr is not None:
 96 |         print(stderr)
 97 | 
 98 |     if official_stdout:
 99 |         print("Official result for {}".format(metric))
100 |         print(stdout)
101 | 
102 |     regexp = COREF_RESULTS_REGEX if metric != 'blanc' else BLANC_RESULTS_REGEX
103 |     coref_results_match = re.match(regexp, stdout)
104 |     recall = float(coref_results_match.group(1))
105 |     precision = float(coref_results_match.group(2))
106 |     f1 = float(coref_results_match.group(3))
107 |     return { "r": recall, "p": precision, "f": f1 }
108 | 
109 | def get_predicted_antecedents(antecedents, antecedent_scores):
110 |     predicted_antecedents = []
111 |     for i, index in enumerate(np.argmax(antecedent_scores, axis=1) - 1):
112 |         if index < 0: predicted_antecedents.append(-1)
113 |         else: predicted_antecedents.append(antecedents[i, index])
114 |     return predicted_antecedents
115 | 
116 | def output_gold_conll(gold_file, documents):
117 |     for doc in documents:
118 |         doc_id = doc.doc_id
119 | 
120 |         # Build cluster_labels
121 |         eventid2label = {}
122 |         cluster_labels = ['-'] * doc.num_words
123 |         for e in doc.event_mentions:
124 |             mention_id = e['id']
125 |             event_id = mention_id[:mention_id.rfind('-')]
126 |             if not event_id in eventid2label:
127 |                 eventid2label[event_id] = 1 + len(eventid2label)
128 |             start_idx, end_idx = e['trigger']['start'], e['trigger']['end']-1
129 |             if start_idx == end_idx:
130 |                 cluster_labels[start_idx] = '({})'.format(eventid2label[event_id])
131 |             else:
132 |                 cluster_labels[start_idx] = '({}'.format(eventid2label[event_id])
133 |                 cluster_labels[end_idx] = '{})'.format(eventid2label[event_id])
134 | 
135 |         # Write the doc info to output file
136 |         gold_file.write('#begin document ({}); part 000\n'.format(doc_id))
137 |         for i in range(doc.num_words):
138 |             gold_file.write('{} {}\n'.format(doc_id, cluster_labels[i]))
139 |         gold_file.write('\n')
140 |         gold_file.write('#end document\n')
141 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import torch
  4 | import pyhocon
  5 | import numpy as np
  6 | import tempfile
  7 | from boltons.iterutils import pairwise, windowed
  8 | 
  9 | def prepare_configs(config_name, verbose=True):
 10 |     configs = pyhocon.ConfigFactory.parse_file('configs/basic.conf')[config_name]
 11 |     configs['saved_path'] = 'trained'
 12 |     if not os.path.exists(configs['saved_path']):
 13 |         os.makedirs(configs['saved_path'])
 14 |     if verbose: print(configs)
 15 |     return configs
 16 | 
 17 | def flatten(l):
 18 |     return [item for sublist in l for item in sublist]
 19 | 
 20 | def listRightIndex(alist, value):
 21 |     return len(alist) - alist[-1::-1].index(value) -1
 22 | 
 23 | def bucket_distance(distances, nb_buckets=15):
 24 |     """
 25 |     Places the given values (designed for distances) into semi-logscale buckets.
 26 |     For example if nb_buckets = 15 then:
 27 |     [0, 1, 2, 3, 4, 5-7, 8-15, 16-31, 32-63, 64-127, 128-255, 256-511, 512-1023, 1024-2047, 2048+].
 28 |     """
 29 |     logspace_idx = torch.floor(torch.log2(distances.float())).long() + 3
 30 |     use_identity = (distances <= 4).long()
 31 |     combined_idx = use_identity * distances + (1 - use_identity) * logspace_idx
 32 |     return torch.clamp(combined_idx, 0, nb_buckets-1)
 33 | 
 34 | def extract_input_masks_from_mask_windows(mask_windows):
 35 |     input_masks = []
 36 |     for mask_window in mask_windows:
 37 |         subtoken_count = listRightIndex(mask_window, -3) + 1
 38 |         input_masks.append([1] * subtoken_count + [0] * (len(mask_window) - subtoken_count))
 39 |     input_masks = np.array(input_masks)
 40 |     return input_masks
 41 | 
 42 | def convert_to_sliding_window(expanded_tokens, sliding_window_size, tokenizer):
 43 |     """
 44 |     construct sliding windows, allocate tokens and masks into each window
 45 |     :param expanded_tokens:
 46 |     :param sliding_window_size:
 47 |     :return:
 48 |     """
 49 |     CLS = tokenizer.convert_tokens_to_ids(['[CLS]'])
 50 |     SEP = tokenizer.convert_tokens_to_ids(['[SEP]'])
 51 |     PAD = tokenizer.convert_tokens_to_ids(['[PAD]'])
 52 |     expanded_masks = [1] * len(expanded_tokens)
 53 |     sliding_windows = construct_sliding_windows(len(expanded_tokens), sliding_window_size - 2)
 54 |     token_windows = []  # expanded tokens to sliding window
 55 |     mask_windows = []  # expanded masks to sliding window
 56 |     for window_start, window_end, window_mask in sliding_windows:
 57 |         original_tokens = expanded_tokens[window_start: window_end]
 58 |         original_masks = expanded_masks[window_start: window_end]
 59 |         window_masks = [-2 if w == 0 else o for w, o in zip(window_mask, original_masks)]
 60 |         one_window_token = CLS + original_tokens + SEP + PAD * (sliding_window_size - 2 - len(original_tokens))
 61 |         one_window_mask = [-3] + window_masks + [-3] + [-4] * (sliding_window_size - 2 - len(original_tokens))
 62 |         assert len(one_window_token) == sliding_window_size
 63 |         assert len(one_window_mask) == sliding_window_size
 64 |         token_windows.append(one_window_token)
 65 |         mask_windows.append(one_window_mask)
 66 |     return token_windows, mask_windows
 67 | 
 68 | def construct_sliding_windows(sequence_length: int, sliding_window_size: int):
 69 |     """
 70 |     construct sliding windows for BERT processing
 71 |     :param sequence_length: e.g. 9
 72 |     :param sliding_window_size: e.g. 4
 73 |     :return: [(0, 4, [1, 1, 1, 0]), (2, 6, [0, 1, 1, 0]), (4, 8, [0, 1, 1, 0]), (6, 9, [0, 1, 1])]
 74 |     """
 75 |     sliding_windows = []
 76 |     stride = int(sliding_window_size / 2)
 77 |     start_index = 0
 78 |     end_index = 0
 79 |     while end_index < sequence_length:
 80 |         end_index = min(start_index + sliding_window_size, sequence_length)
 81 |         left_value = 1 if start_index == 0 else 0
 82 |         right_value = 1 if end_index == sequence_length else 0
 83 |         mask = [left_value] * int(sliding_window_size / 4) + [1] * int(sliding_window_size / 2) \
 84 |                + [right_value] * (sliding_window_size - int(sliding_window_size / 2) - int(sliding_window_size / 4))
 85 |         mask = mask[: end_index - start_index]
 86 |         sliding_windows.append((start_index, end_index, mask))
 87 |         start_index += stride
 88 |     assert sum([sum(window[2]) for window in sliding_windows]) == sequence_length
 89 |     return sliding_windows
 90 | 
 91 | # Get total number of parameters in a model
 92 | def get_n_params(model):
 93 |     pp=0
 94 |     for p in list(model.parameters()):
 95 |         nn=1
 96 |         for s in list(p.size()):
 97 |             nn = nn*s
 98 |         pp += nn
 99 |     return pp
100 | 
101 | class RunningAverage():
102 |     """A simple class that maintains the running average of a quantity
103 |     Example:
104 |     ```
105 |     loss_avg = RunningAverage()
106 |     loss_avg.update(2)
107 |     loss_avg.update(4)
108 |     loss_avg() = 3
109 |     ```
110 |     """
111 |     def __init__(self):
112 |         self.steps = 0
113 |         self.total = 0
114 | 
115 |     def update(self, val):
116 |         self.total += val
117 |         self.steps += 1
118 | 
119 |     def __call__(self):
120 |         return self.total/float(self.steps)
121 | 


--------------------------------------------------------------------------------