├── .gitignore
├── README.md
├── configs
└── basic.conf
├── constants.py
├── data
├── __init__.py
├── base.py
└── helpers.py
├── evaluate_and_visualize.py
├── models
├── __init__.py
├── base.py
├── encoder.py
└── helpers.py
├── reference-coreference-scorers-8.01
├── README.txt
├── lib
│ ├── Algorithm
│ │ ├── Munkres.pm
│ │ └── README.Munkres
│ ├── CorScorer.pm
│ ├── Cwd.pm
│ ├── Data
│ │ └── Dumper.pm
│ └── Math
│ │ └── Combinatorics.pm
├── scorer.bat
├── scorer.pl
└── test
│ ├── CorefMetricTest.pm
│ ├── CorefMetricTestConfig.pm
│ ├── DataFiles
│ ├── TC-A-1.response
│ ├── TC-A-10.response
│ ├── TC-A-11.response
│ ├── TC-A-12.response
│ ├── TC-A-13.response
│ ├── TC-A-2.response
│ ├── TC-A-3.response
│ ├── TC-A-4.response
│ ├── TC-A-5.response
│ ├── TC-A-6.response
│ ├── TC-A-7.response
│ ├── TC-A-8.response
│ ├── TC-A-9.response
│ ├── TC-A.key
│ ├── TC-B-1.response
│ ├── TC-B.key
│ ├── TC-C-1.response
│ ├── TC-C.key
│ ├── TC-D-1.response
│ ├── TC-D.key
│ ├── TC-E-1.response
│ ├── TC-E.key
│ ├── TC-F-1.response
│ ├── TC-F.key
│ ├── TC-G-1.response
│ ├── TC-G.key
│ ├── TC-H-1.response
│ ├── TC-H.key
│ ├── TC-I-1.response
│ ├── TC-I.key
│ ├── TC-J-1.response
│ ├── TC-J.key
│ ├── TC-K-1.response
│ ├── TC-K.key
│ ├── TC-L-1.response
│ ├── TC-L.key
│ ├── TC-M-1.response
│ ├── TC-M-2.response
│ ├── TC-M-3.response
│ ├── TC-M-4.response
│ ├── TC-M-5.response
│ ├── TC-M-6.response
│ ├── TC-M.key
│ ├── TC-N-1.response
│ ├── TC-N-2.response
│ ├── TC-N-3.response
│ ├── TC-N-4.response
│ ├── TC-N-5.response
│ ├── TC-N-6.response
│ └── TC-N.key
│ ├── TestCases.README
│ └── test.pl
├── requirements.txt
├── runner.py
├── scorer.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | !reference-coreference-scorers-8.01/lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | pip-wheel-metadata/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # IPython
84 | profile_default/
85 | ipython_config.py
86 |
87 | # pyenv
88 | .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98 | __pypackages__/
99 |
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 |
104 | # SageMath parsed files
105 | *.sage.py
106 |
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 |
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 |
120 | # Rope project settings
121 | .ropeproject
122 |
123 | # mkdocs documentation
124 | /site
125 |
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 |
131 | # Pyre type checker
132 | .pyre/
133 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Event Coreference Resolution
2 |
3 | This repo provides the code for the paper [A Context-Dependent Gated Module for Incorporating Symbolic Semantics into Event Coreference Resolution](https://arxiv.org/abs/2104.01697) (NAACL 2021). If you have any question or suggestion, please open a new [Github issue](https://github.com/laituan245/eventcoref/issues/new).
4 |
5 |
6 | ## Instructions
7 | You can install the dependencies for the project using the following command:
8 | ```
9 | pip install -r requirements.txt
10 | ```
11 |
12 | Also you will need to set the permission for the evaluation script
13 | ```
14 | chmod +x reference-coreference-scorers-8.01/scorer.pl
15 | ```
16 |
17 |
18 | To train a basic model, run the following command:
19 | ```
20 | python runner.py -c basic
21 | ```
22 | Arguments:
23 | - -c, --config_name: Config name (default value is `basic`. See [configs/basic.conf](https://github.com/laituan245/eventcoref/blob/main/configs/basic.conf) for the list of configs).
24 |
25 | ## Data
26 | Due to license issue, we cannot publicly share ACE 2005 and KBP 2016. Please download the data from the LDC website.
27 |
--------------------------------------------------------------------------------
/configs/basic.conf:
--------------------------------------------------------------------------------
1 | basic {
2 | # Data-Related Configs
3 | base_dataset_path = resources/ACE05-E
4 | predictions_path = resources/ACE05-E-Preds
5 | use_groundtruth = false
6 | increase_ace_dev_set = false
7 |
8 | # Model-Related Configs
9 | transformer = SpanBERT/spanbert-base-cased
10 | multi_piece_strategy = average
11 | latent_size = 500
12 | ffnn_size = 500
13 | ffnn_depth = 1
14 | feature_size = 50
15 |
16 | # Features-Related Configs
17 | combine_strategy = simple # Supported values are simple and gated
18 | use_typ_features = false # Event Type Features
19 | use_pol_features = false # Polarity Features
20 | use_mod_features = false # Modality Features
21 | use_gen_features = false # Genericty Features
22 | use_ten_features = false # Tense Features
23 | typ_noise_prob = 0.0
24 | pol_noise_prob = 0.0
25 | mod_noise_prob = 0.0
26 | gen_noise_prob = 0.0
27 | ten_noise_prob = 0.0
28 |
29 | # Training/Inference Configs
30 | gradient_checkpointing = false
31 | transformer_learning_rate = 5e-05
32 | task_learning_rate = 0.0005
33 | epochs = 50
34 | batch_size = 8
35 | transformer_dropout_rate = 0.5
36 | dropout_rate = 0.5
37 | max_grad_norm = 1.0
38 | transformer_weight_decay = 0.1
39 |
40 | # Others
41 | no_cuda = false
42 | }
43 |
44 | # Use type feature
45 | simple_type_feature = ${basic} {
46 | combine_strategy = simple
47 | use_typ_features = true
48 | }
49 |
50 | gated_type_feature = ${basic} {
51 | combine_strategy = gated
52 | use_typ_features = true
53 | }
54 |
55 | # Use polarity feature
56 | simple_polarity_feature = ${basic} {
57 | combine_strategy = simple
58 | use_pol_features = true
59 | }
60 |
61 | gated_polarity_feature = ${basic} {
62 | combine_strategy = gated
63 | use_pol_features = true
64 | }
65 |
66 | # Use modality feature
67 | simple_modality_feature = ${basic} {
68 | combine_strategy = simple
69 | use_mod_features = true
70 | }
71 |
72 | gated_modality_feature = ${basic} {
73 | combine_strategy = gated
74 | use_mod_features = true
75 | }
76 |
77 | gated_modality_feature_with_random_noise = ${gated_modality_feature} {
78 | mod_noise_prob = 0.15
79 | }
80 |
81 | # Use genericity feature
82 | simple_genericity_feature = ${basic} {
83 | combine_strategy = simple
84 | use_gen_features = true
85 | }
86 |
87 | gated_genericity_feature = ${basic} {
88 | combine_strategy = gated
89 | use_gen_features = true
90 | }
91 |
92 | gated_genericity_feature_with_random_noise = ${gated_genericity_feature} {
93 | gen_noise_prob = 0.15
94 | }
95 |
96 | # Use tense feature
97 | simple_tense_feature = ${basic} {
98 | combine_strategy = simple
99 | use_ten_features = true
100 | }
101 |
102 | gated_tense_feature = ${basic} {
103 | combine_strategy = gated
104 | use_ten_features = true
105 | }
106 |
107 | gated_tense_feature_with_random_noise = ${gated_tense_feature} {
108 | ten_noise_prob = 0.25
109 | }
110 |
111 | # Use all features
112 | simple_all_features = ${basic} {
113 | combine_strategy = simple # Supported values are simple and gated
114 | use_typ_features = true # Event Type Features
115 | use_pol_features = true # Polarity Features
116 | use_mod_features = true # Modality Features
117 | use_gen_features = true # Genericty Features
118 | use_ten_features = true # Tense Features
119 | }
120 |
121 | simple_all_features_groundtruth = ${simple_all_features} {
122 | use_groundtruth = true
123 | }
124 |
125 | simple_all_features_with_random_noise = ${simple_all_features} {
126 | mod_noise_prob = 0.15
127 | gen_noise_prob = 0.15
128 | ten_noise_prob = 0.25
129 | }
130 |
131 | gated_all_features = ${basic} {
132 | combine_strategy = gated # Supported values are simple and gated
133 | use_typ_features = true # Event Type Features
134 | use_pol_features = true # Polarity Features
135 | use_mod_features = true # Modality Features
136 | use_gen_features = true # Genericty Features
137 | use_ten_features = true # Tense Features
138 | }
139 |
140 | gated_all_features_with_random_noise = ${gated_all_features} {
141 | mod_noise_prob = 0.15
142 | gen_noise_prob = 0.15
143 | ten_noise_prob = 0.25
144 | }
145 |
--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
1 | # Model Types
2 | POL_TYPES = ['Negative', 'Positive']
3 | MOD_TYPES = ['Asserted', 'Other']
4 | GEN_TYPES = ['Generic', 'Specific']
5 | TEN_TYPES = ['Unspecified', 'Past', 'Future', 'Present']
6 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | from data.helpers import load_oneie_dataset
2 |
--------------------------------------------------------------------------------
/data/base.py:
--------------------------------------------------------------------------------
1 | import nltk
2 | from utils import *
3 |
4 | class Document:
5 | def __init__(self, doc_id, sentences, event_mentions, entity_mentions, pred_graphs):
6 | self.doc_id = doc_id
7 | self.sentences = sentences
8 | self.words = flatten(sentences)
9 | self.event_mentions = event_mentions
10 | self.entity_mentions = entity_mentions
11 | self.num_words = len(self.words)
12 | self.pred_graphs = pred_graphs
13 |
14 | # Post-process self.event_mentions
15 | for e in self.event_mentions:
16 | _arguments = []
17 | for argument in e['arguments']:
18 | for entity_mention in self.entity_mentions:
19 | if entity_mention['id'] == argument['entity_id']:
20 | _arguments.append({
21 | 'text': argument['text'],
22 | 'role': argument['role'],
23 | 'entity': entity_mention,
24 | })
25 | assert(len(_arguments) == len(e['arguments']))
26 | e['arguments'] = _arguments
27 |
28 | # Update self.events
29 | self.events = {}
30 | for event_mention in event_mentions:
31 | mention_id = event_mention['id']
32 | event_id = mention_id[:mention_id.rfind('-')]
33 | if not event_id in self.events:
34 | self.events[event_id] = []
35 | self.events[event_id].append(event_mention)
36 |
37 | # Build self.coreferential_pairs
38 | self.coreferential_pairs = set()
39 | for i in range(len(event_mentions)):
40 | for j in range(i+1, len(event_mentions)):
41 | # Find the event id of the first event mention
42 | mention_i = event_mentions[i]
43 | mention_id_i = mention_i['id']
44 | event_id_i = mention_id_i[:mention_id_i.rfind('-')]
45 | # Find the event id of the second event mention
46 | mention_j = event_mentions[j]
47 | mention_id_j = mention_j['id']
48 | event_id_j = mention_id_j[:mention_id_j.rfind('-')]
49 | # Check if refer to the same event
50 | if event_id_i == event_id_j:
51 | loc_i = (mention_i['trigger']['start'], mention_i['trigger']['end'])
52 | loc_j = (mention_j['trigger']['start'], mention_j['trigger']['end'])
53 | self.coreferential_pairs.add((loc_i, loc_j))
54 | self.coreferential_pairs.add((loc_j, loc_i))
55 |
56 | # Extract pred_triggers, pred_entities, pred_relations, pred_event_mentions
57 | assert(len(pred_graphs) == 0 or len(pred_graphs) == len(sentences))
58 | self.pred_trigges, self.pred_entities = [], []
59 | self.pred_relations, self.pred_event_mentions = [], []
60 | for graph in pred_graphs:
61 | if len(graph) > 0:
62 | for trigger in graph['triggers']:
63 | lookedup_attrs = trigger.pop(-1)
64 | self.pred_trigges.append({
65 | 'tokens': self.words[trigger[0]:trigger[1]],
66 | 'start': trigger[0], 'end': trigger[1],
67 | 'confidence': trigger[3]
68 | })
69 | self.pred_event_mentions.append({
70 | 'event_type': trigger[2],
71 | 'trigger': self.pred_trigges[-1],
72 | 'arguments': [],
73 | 'event_polarity': lookedup_attrs['event_polarity'],
74 | 'event_modality': lookedup_attrs['event_modality'],
75 | 'event_genericity': lookedup_attrs['event_genericity'],
76 | 'event_tense': lookedup_attrs['event_tense']
77 | })
78 | for entity in graph['entities']:
79 | self.pred_entities.append({
80 | 'tokens': self.words[entity[0]:entity[1]],
81 | 'start': entity[0], 'end': entity[1],
82 | 'entity_type': entity[2], 'mention_type': entity[3],
83 | 'confidence': entity[4]
84 | })
85 | for relation in graph['relations']:
86 | arg1 = self.pred_entities[relation[0]]
87 | arg2 = self.pred_entities[relation[1]]
88 | self.pred_relations.append({
89 | 'arg1': arg1, 'arg2': arg2,
90 | 'relation_type': relation[2],
91 | 'confidence': relation[3]
92 | })
93 | for role in graph['roles']:
94 | event_mention = self.pred_event_mentions[role[0]]
95 | entity = self.pred_entities[role[1]]
96 | event_mention['arguments'].append({
97 | 'entity': entity,
98 | 'role': role[2],
99 | 'confidence': role[-1]
100 | })
101 |
102 | # Add field has_correct_trigger to each event mention
103 | trigger_locs = set()
104 | for e in self.event_mentions:
105 | e['has_correct_trigger'] = True
106 | trigger_locs.add((e['trigger']['start'], e['trigger']['end']))
107 | for e in self.pred_event_mentions:
108 | trigger_start = e['trigger']['start']
109 | trigger_end = e['trigger']['end']
110 | e['has_correct_trigger'] = (trigger_start, trigger_end) in trigger_locs
111 | for e in self.event_mentions: assert(e['has_correct_trigger']) # Sanity test
112 |
113 |
114 | class Dataset:
115 | def __init__(self, data, tokenizer, sliding_window_size = 512):
116 | '''
117 | data: A list of GroundTruthDocument
118 | tokenizer: A transformer Tokenizer
119 | sliding_window_size: Size of sliding window (for a long document, we split it into overlapping segments)
120 | '''
121 | self.data = data
122 |
123 | # Tokenize the documents
124 | for doc in self.data:
125 | # Build doc_tokens, doc.word_starts_indexes
126 | doc_tokens, word_starts_indexes, start_index = [], [], 0
127 | for w in doc.words:
128 | word_tokens = tokenizer.tokenize(w)
129 | doc_tokens += word_tokens
130 | word_starts_indexes.append(start_index)
131 | start_index += len(word_tokens)
132 | doc.word_starts_indexes = word_starts_indexes
133 | assert(len(doc.word_starts_indexes) == len(doc.words))
134 |
135 | # Build token_windows, mask_windows, and input_masks
136 | doc_token_ids = tokenizer.convert_tokens_to_ids(doc_tokens)
137 | doc.token_windows, doc.mask_windows = \
138 | convert_to_sliding_window(doc_token_ids, sliding_window_size, tokenizer)
139 | doc.input_masks = extract_input_masks_from_mask_windows(doc.mask_windows)
140 |
141 | # Compute the set of event types
142 | self.event_types = set()
143 | for doc in self.data:
144 | for e in doc.event_mentions:
145 | self.event_types.add(e['event_type'])
146 | self.event_types = sorted(list(self.event_types))
147 |
148 | def __len__(self):
149 | return len(self.data)
150 |
151 | def __getitem__(self, item):
152 | return self.data[item]
153 |
--------------------------------------------------------------------------------
/data/helpers.py:
--------------------------------------------------------------------------------
1 | import json
2 | import random
3 |
4 | from os.path import join
5 | from data.base import Dataset, Document
6 |
7 | def load_oneie_dataset(
8 | base_path, tokenizer,
9 | predictions_path=None, remove_doc_with_no_events=True,
10 | increase_ace_dev_set=False
11 | ):
12 | id2split, id2sents = {}, {}
13 |
14 | # Read ground-truth data files
15 | for split in ['train', 'dev', 'test']:
16 | path = join(base_path, '{}.oneie.json'.format(split))
17 | with open(path, 'r', encoding='utf-8') as r:
18 | for line in r:
19 | sent_inst = json.loads(line)
20 | doc_id = sent_inst['doc_id']
21 | id2split[doc_id] = split
22 | # Update id2sents
23 | if not doc_id in id2sents:
24 | id2sents[doc_id] = []
25 | id2sents[doc_id].append(sent_inst)
26 |
27 | # Read prediction files (if available)
28 | predicted_attrs = None
29 | if predictions_path:
30 | sentid2graph = {}
31 | for split in ['train', 'dev', 'test']:
32 | path = join(predictions_path, '{}.json'.format(split))
33 | with open(path, 'r', encoding='utf-8') as r:
34 | for line in r:
35 | sent_preds = json.loads(line)
36 | sentid2graph[sent_preds['sent_id']] = sent_preds['graph']
37 |
38 | # Read attributes prediction files
39 | attrs_preds_path = join(predictions_path, 'attrs_preds.json')
40 | predicted_attrs = json.load(open(attrs_preds_path, 'r'))
41 | _predicted_attrs = {}
42 | for key in predicted_attrs:
43 | split_index = key.rfind('.(')
44 | doc_id = key[:split_index]
45 | start, end = key[split_index+2:-1].split('-')
46 | start, end = int(start), int(end)
47 | _predicted_attrs[(doc_id, start, end)] = predicted_attrs[key]
48 | predicted_attrs = _predicted_attrs
49 |
50 | # Parse documents one-by-one
51 | train, dev, test = [], [], []
52 | for doc_id in id2sents:
53 | words_ctx, pred_trigger_ctx, pred_entities_ctx = 0, 0, 0
54 | sents = id2sents[doc_id]
55 | sentences, event_mentions, entity_mentions, pred_graphs = [], [], [], []
56 | for sent_index, sent in enumerate(sents):
57 | sentences.append(sent['tokens'])
58 | # Parse entity mentions
59 | for entity_mention in sent['entity_mentions']:
60 | entity_mention['start'] += words_ctx
61 | entity_mention['end'] += words_ctx
62 | entity_mentions.append(entity_mention)
63 | # Parse event mentions
64 | for event_mention in sent['event_mentions']:
65 | event_mention['sent_index'] = sent_index
66 | event_mention['trigger']['start'] += words_ctx
67 | event_mention['trigger']['end'] += words_ctx
68 | event_mentions.append(event_mention)
69 | # Update pred_graphs
70 | if predictions_path:
71 | graph = sentid2graph.get(sent['sent_id'], {})
72 | if len(graph) > 0:
73 | for entity in graph['entities']:
74 | entity[0] += words_ctx
75 | entity[1] += words_ctx
76 | for trigger in graph['triggers']:
77 | trigger[0] += words_ctx
78 | trigger[1] += words_ctx
79 | # Look up predicted attributes
80 | if predicted_attrs:
81 | lookedup_attrs = predicted_attrs[(doc_id, trigger[0], trigger[1])]
82 | trigger.append(lookedup_attrs)
83 | for relation in graph['relations']:
84 | relation[0] += pred_entities_ctx
85 | relation[1] += pred_entities_ctx
86 | for role in graph['roles']:
87 | role[0] += pred_trigger_ctx
88 | role[1] += pred_entities_ctx
89 | pred_trigger_ctx += len(graph['triggers'])
90 | pred_entities_ctx += len(graph['entities'])
91 | pred_graphs.append(graph)
92 | # Update words_ctx
93 | words_ctx += len(sent['tokens'])
94 | doc = Document(doc_id, sentences, event_mentions, entity_mentions, pred_graphs)
95 | split = id2split[doc_id]
96 | if split == 'train':
97 | if not remove_doc_with_no_events or len(event_mentions) > 0:
98 | train.append(doc)
99 | if split == 'dev': dev.append(doc)
100 | if split == 'test': test.append(doc)
101 |
102 | if increase_ace_dev_set:
103 | # Randomly move 12 docs from train set to dev set
104 | random.seed(0)
105 | random.shuffle(train)
106 | dev = train[:12] + dev
107 | train = train[12:]
108 |
109 | # Convert to Document class
110 | train, dev, test = Dataset(train, tokenizer), Dataset(dev, tokenizer), Dataset(test, tokenizer)
111 |
112 | # Verbose
113 | print('Loaded {} train examples'.format(len(train)))
114 | print('Loaded {} dev examples'.format(len(dev)))
115 | print('Loaded {} test examples'.format(len(test)))
116 |
117 | return train, dev, test
118 |
--------------------------------------------------------------------------------
/evaluate_and_visualize.py:
--------------------------------------------------------------------------------
1 | import os
2 | import math
3 | import json
4 | import torch
5 | import tqdm
6 | import pyhocon
7 | import random
8 |
9 | from transformers import *
10 | from models import EventCorefModel
11 | from scorer import evaluate
12 | from argparse import ArgumentParser
13 | from data import load_oneie_dataset
14 | from utils import RunningAverage, prepare_configs, flatten
15 | from scorer import get_predicted_antecedents
16 |
17 | def generate_coref_preds(model, data, output_path='predictions.json'):
18 | predictions = {}
19 | for inst in data:
20 | doc_words = inst.words
21 | event_mentions = inst.event_mentions
22 | preds = model(inst, is_training=False)[1]
23 | preds = [x.cpu().data.numpy() for x in preds]
24 | top_antecedents, top_antecedent_scores = preds[2:]
25 | predicted_antecedents = get_predicted_antecedents(top_antecedents, top_antecedent_scores)
26 |
27 | predicted_clusters, m2cluster = [], {}
28 | for ix, e in enumerate(event_mentions):
29 | if predicted_antecedents[ix] < 0:
30 | cluster_id = len(predicted_clusters)
31 | predicted_clusters.append([e])
32 | else:
33 | antecedent_idx = predicted_antecedents[ix]
34 | p_e = event_mentions[antecedent_idx]
35 | cluster_id = m2cluster[p_e['id']]
36 | predicted_clusters[cluster_id].append(e)
37 | m2cluster[e['id']] = cluster_id
38 | # Update predictions
39 | predictions[inst.doc_id] = {}
40 | predictions[inst.doc_id]['words']= doc_words
41 | predictions[inst.doc_id]['predicted_clusters'] = predicted_clusters
42 |
43 | with open(output_path, 'w+') as outfile:
44 | json.dump(predictions, outfile)
45 |
46 | def generate_visualizations(sample_outputs, output_path='visualization.html'):
47 | with open(sample_outputs) as json_file:
48 | data = json.load(json_file)
49 |
50 | with open(output_path, 'w+') as output_file:
51 | for doc_id in data.keys():
52 | doc = data[doc_id]
53 | doc_words = doc['words']
54 | clusters = doc['predicted_clusters']
55 | event_mentions = flatten(clusters)
56 | output_file.write('Document {}
'.format(doc_id))
57 | output_file.write('{}
'.format(doc_to_html(doc, event_mentions)))
58 | for ix, cluster in enumerate(doc['predicted_clusters']):
59 | if len(cluster) == 1: continue
60 | output_file.write('Cluster {}'.format(ix+1))
61 | for em in cluster:
62 | output_file.write('{}
'.format(event_mentions_to_html(doc_words, em)))
63 | output_file.write('
')
64 | output_file.write('
')
65 |
66 | def doc_to_html(doc, event_mentions):
67 | doc_words = doc['words']
68 | doc_words = [str(word) for word in doc_words]
69 | for e in event_mentions:
70 | t_start, t_end = e['trigger']['start'], e['trigger']['end'] - 1
71 | doc_words[t_start] = '' + doc_words[t_start]
72 | doc_words[t_end] = doc_words[t_end] + ''
73 | return ' '.join(doc_words)
74 |
75 | def event_mentions_to_html(doc_words, em):
76 | trigger_start = em['trigger']['start']
77 | trigger_end = em['trigger']['end']
78 | context_left = ' '.join(doc_words[trigger_start-10:trigger_start])
79 | context_right = ' '.join(doc_words[trigger_end:trigger_end+10])
80 | final_str = context_left + ' ' + em['trigger']['text'] + ' ' + context_right
81 | final_str = 'Event {} (Type {}) | '.format(em['id'], em['event_type']) + final_str
82 | return final_str
83 |
84 | def evaluate_and_visualize(config_name, model_path, output_path):
85 | # Prepare tokenizer, dataset, and model
86 | configs = prepare_configs(config_name, verbose=False)
87 | tokenizer = BertTokenizer.from_pretrained(configs['transformer'])
88 | train_set, dev_set, test_set = load_oneie_dataset(configs['base_dataset_path'], tokenizer)
89 | model = EventCorefModel(configs, train_set.event_types)
90 |
91 | # Reload the model and evaluate
92 | checkpoint = torch.load(model_path)
93 | model.load_state_dict(checkpoint['model_state_dict'])
94 | print('Evaluation on the dev set', flush=True)
95 | evaluate(model, dev_set, configs)['avg']
96 | print('Evaluation on the test set', flush=True)
97 | evaluate(model, test_set, configs)
98 |
99 | # Generate visualizations (for the test set)
100 | generate_coref_preds(model, test_set, '_predictions.json')
101 | generate_visualizations('_predictions.json', output_path)
102 | os.remove('_predictions.json')
103 |
104 | if __name__ == "__main__":
105 | # Parse argument
106 | parser = ArgumentParser()
107 | parser.add_argument('-c', '--config_name')
108 | parser.add_argument('-m', '--model_path')
109 | parser.add_argument('-o', '--output_path', default='visualization.html')
110 | args = parser.parse_args()
111 |
112 | # Start training
113 | evaluate_and_visualize(args.config_name, args.model_path, args.output_path)
114 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | import utils
6 |
7 | from constants import *
8 | from models.base import *
9 | from models.helpers import *
10 | from models.encoder import *
11 |
12 | class EventCorefModel(BaseModel):
13 | def __init__(self, configs, event_types):
14 | BaseModel.__init__(self, configs)
15 | self.event_types = sorted(event_types)
16 |
17 | # Transformer Encoder
18 | self.transformer_encoder = TransformerEncoder(configs)
19 | self.linear = nn.Linear(3 * self.transformer_encoder.hidden_size, configs['latent_size'])
20 |
21 | # Symbolic Features Encoder
22 | self.symbolic_encoder = SymbolicFeaturesEncoder(configs, self.event_types)
23 |
24 | # Feature Fusion Network
25 | self.fusion_network = FeatureFusionNetwork(latent_size=configs['latent_size'],
26 | combine_strategy=configs['combine_strategy'],
27 | nb_modules=len(self.symbolic_encoder.enabled_features))
28 |
29 | # Pair Scorer
30 | self.dropout = nn.Dropout(configs['dropout_rate'])
31 | self.pair_scorer = FFNNModule(input_size=self.get_pair_size(),
32 | hidden_sizes=[configs['ffnn_size']] * configs['ffnn_depth'],
33 | output_size=1,
34 | dropout=configs['dropout_rate'])
35 |
36 | # Move model to device
37 | self.to(self.device)
38 |
39 | def forward(self, inst, is_training):
40 | self.train() if is_training else self.eval()
41 |
42 | # Extract event_mentions and entity_mentions
43 | if self.configs['use_groundtruth']:
44 | entity_mentions = inst.entity_mentions
45 | event_mentions = inst.event_mentions
46 | else:
47 | entity_mentions = inst.pred_entities
48 | event_mentions = inst.pred_event_mentions
49 |
50 | # Convert to Torch Tensor
51 | input_ids = torch.tensor(inst.token_windows).to(self.device)
52 | input_masks = torch.tensor(inst.input_masks).to(self.device)
53 | mask_windows = torch.tensor(inst.mask_windows).to(self.device)
54 | num_windows, window_size = input_ids.size()
55 |
56 | # Apply the Transfomer encoder to get tokens features
57 | tokens_features = self.transformer_encoder(input_ids, input_masks, mask_windows,
58 | num_windows, window_size, is_training).squeeze()
59 | num_tokens = tokens_features.size()[0]
60 |
61 | # Compute word_features (averaging)
62 | word_features = []
63 | word_starts_indexes = inst.word_starts_indexes
64 | word_ends_indexes = word_starts_indexes[1:] + [num_tokens]
65 | word_features = get_span_emb(tokens_features, word_starts_indexes, word_ends_indexes)
66 | assert(word_features.size()[0] == inst.num_words)
67 |
68 | # Compute entity_features
69 | entity_starts = [m['start'] for m in entity_mentions]
70 | entity_ends = [m['end'] for m in entity_mentions]
71 | entity_features = get_span_emb(word_features, entity_starts, entity_ends)
72 |
73 | # Compute trigger_features
74 | event_starts = [e['trigger']['start'] for e in event_mentions]
75 | event_ends = [e['trigger']['end'] for e in event_mentions]
76 | trigger_features = get_span_emb(word_features, event_starts, event_ends)
77 |
78 | # Compute pair_trigger_features
79 | pair_trigger_features = get_pair_embs(trigger_features)
80 | pair_trigger_features = F.relu(self.linear(pair_trigger_features))
81 |
82 | # Compute pair_features
83 | if len(self.symbolic_encoder.enabled_features) == 0:
84 | # Not using any additional symbolic features
85 | pair_features = pair_trigger_features
86 | else:
87 | # Use additional symbolic features
88 | pair_symbolic_features = self.symbolic_encoder(event_mentions)
89 | pair_features = self.fusion_network(pair_trigger_features, pair_symbolic_features)
90 |
91 | # Compute pair_scores
92 | pair_features = self.dropout(pair_features)
93 | pair_scores = self.pair_scorer(pair_features)
94 |
95 | # Compute antecedent_scores
96 | k = len(event_mentions)
97 | span_range = torch.arange(0, k).to(self.device)
98 | antecedent_offsets = span_range.view(-1, 1) - span_range.view(1, -1)
99 | antecedents_mask = antecedent_offsets >= 1 # [k, k]
100 | antecedent_scores = pair_scores + torch.log(antecedents_mask.float())
101 |
102 | # Compute antecedent_labels
103 | candidate_cluster_ids = self.get_cluster_ids(event_mentions, inst.coreferential_pairs)
104 | same_cluster_indicator = candidate_cluster_ids.unsqueeze(0) == candidate_cluster_ids.unsqueeze(1)
105 | same_cluster_indicator = same_cluster_indicator & antecedents_mask
106 |
107 | non_dummy_indicator = (candidate_cluster_ids > -1).unsqueeze(1)
108 | pairwise_labels = same_cluster_indicator & non_dummy_indicator
109 | dummy_labels = ~pairwise_labels.any(1, keepdim=True)
110 | antecedent_labels = torch.cat([dummy_labels, pairwise_labels], 1)
111 |
112 | # Compute loss
113 | dummy_zeros = torch.zeros([k, 1]).to(self.device)
114 | antecedent_scores = torch.cat([dummy_zeros, antecedent_scores], dim=1)
115 | gold_scores = antecedent_scores + torch.log(antecedent_labels.float())
116 | log_norm = logsumexp(antecedent_scores, dim = 1)
117 | loss = torch.sum(log_norm - logsumexp(gold_scores, dim=1))
118 |
119 | # loss and preds
120 | top_antecedents = torch.arange(0, k).to(self.device)
121 | top_antecedents = top_antecedents.unsqueeze(0).repeat(k, 1)
122 | preds = [torch.tensor(event_starts),
123 | torch.tensor(event_ends),
124 | top_antecedents,
125 | antecedent_scores]
126 |
127 | return loss, preds
128 |
129 | def get_cluster_ids(self, event_mentions, coreferential_pairs):
130 | cluster_ids = [-1] * len(event_mentions)
131 | nb_nonsingleton_clusters = 0
132 | for i in range(len(event_mentions)):
133 | mention_i = event_mentions[i]
134 | loc_i = (mention_i['trigger']['start'], mention_i['trigger']['end'])
135 | for j in range(i-1, -1, -1):
136 | mention_j = event_mentions[j]
137 | loc_j = (mention_j['trigger']['start'], mention_j['trigger']['end'])
138 | if ((loc_i, loc_j)) in coreferential_pairs:
139 | if cluster_ids[j] > -1:
140 | cluster_ids[i] = cluster_ids[j]
141 | else:
142 | cluster_ids[i] = cluster_ids[j] = nb_nonsingleton_clusters
143 | nb_nonsingleton_clusters += 1
144 | return torch.tensor(cluster_ids).to(self.device)
145 |
146 |
147 | def get_pair_size(self):
148 | return (1 + len(self.symbolic_encoder.enabled_features)) * self.configs['latent_size']
149 |
--------------------------------------------------------------------------------
/models/base.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | import json
6 | import random
7 |
8 | from transformers import *
9 | from math import ceil, floor
10 |
11 | # Optimizer
12 | class ModelOptimizer(object):
13 | def __init__(self, transformer_optimizer, transformer_scheduler,
14 | task_optimizer, task_init_lr, max_iter):
15 | self.iter = 0
16 | self.transformer_optimizer = transformer_optimizer
17 | self.transformer_scheduler = transformer_scheduler
18 |
19 | self.task_optimizer = task_optimizer
20 | self.task_init_lr = task_init_lr
21 | self.max_iter = max_iter
22 |
23 | def zero_grad(self):
24 | self.transformer_optimizer.zero_grad()
25 | self.task_optimizer.zero_grad()
26 |
27 | def step(self):
28 | self.iter += 1
29 | self.transformer_optimizer.step()
30 | self.task_optimizer.step()
31 | self.transformer_scheduler.step()
32 | self.poly_lr_scheduler(self.task_optimizer, self.task_init_lr, self.iter, self.max_iter)
33 |
34 | @staticmethod
35 | def poly_lr_scheduler(optimizer, init_lr, iter, max_iter,
36 | lr_decay_iter=1, power=1.0):
37 | """Polynomial decay of learning rate
38 | :param init_lr is base learning rate
39 | :param iter is a current iteration
40 | :param max_iter is number of maximum iterations
41 | :param lr_decay_iter how frequently decay occurs, default is 1
42 | :param power is a polymomial power
43 | """
44 | if iter % lr_decay_iter or iter > max_iter:
45 | return optimizer
46 |
47 | lr = init_lr*(1 - iter/max_iter)**power
48 | for param_group in optimizer.param_groups:
49 | param_group['lr'] = lr
50 |
51 | return lr
52 |
53 | # BaseModel
54 | class BaseModel(nn.Module):
55 | def __init__(self, configs):
56 | super(BaseModel, self).__init__()
57 | self.configs = configs
58 | self.device = torch.device('cuda' if torch.cuda.is_available() and not configs['no_cuda'] else 'cpu')
59 |
60 | def get_optimizer(self, num_warmup_steps, num_train_steps, start_iter = 0):
61 | # Extract transformer parameters and task-specific parameters
62 | transformer_params, task_params = [], []
63 | for name, param in self.named_parameters():
64 | if param.requires_grad:
65 | if "transformer.encoder" in name:
66 | transformer_params.append((name, param))
67 | else:
68 | task_params.append((name, param))
69 |
70 | # Prepare transformer_optimizer and transformer_scheduler
71 | no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
72 | optimizer_grouped_parameters = [
73 | {'params': [p for n, p in transformer_params if not any(nd in n for nd in no_decay)], 'weight_decay': self.configs['transformer_weight_decay']},
74 | {'params': [p for n, p in transformer_params if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
75 | ]
76 | transformer_optimizer = AdamW(
77 | optimizer_grouped_parameters,
78 | lr=self.configs['transformer_learning_rate'],
79 | betas=(0.9, 0.999),
80 | eps=1e-06,
81 | )
82 | transformer_scheduler = get_linear_schedule_with_warmup(transformer_optimizer,
83 | num_warmup_steps=num_warmup_steps,
84 | num_training_steps=num_train_steps)
85 |
86 | # Prepare the optimizer for task-specific parameters
87 | task_optimizer = optim.Adam([p for n, p in task_params], lr=self.configs['task_learning_rate'])
88 |
89 | # Unify transformer_optimizer and task_optimizer
90 | model_optimizer = ModelOptimizer(transformer_optimizer, transformer_scheduler,
91 | task_optimizer, self.configs['task_learning_rate'],
92 | num_train_steps)
93 | model_optimizer.iter = start_iter
94 |
95 | return model_optimizer
96 |
97 | # FFNN Module
98 | class FFNNModule(nn.Module):
99 | """ Generic FFNN-based Scoring Module
100 | """
101 | def __init__(self, input_size, hidden_sizes, output_size, dropout = 0.2):
102 | super(FFNNModule, self).__init__()
103 | self.layers = []
104 |
105 | prev_size = input_size
106 | for hidden_size in hidden_sizes:
107 | self.layers.append(nn.Linear(prev_size, hidden_size))
108 | self.layers.append(nn.ReLU(True))
109 | self.layers.append(nn.Dropout(dropout))
110 | prev_size = hidden_size
111 |
112 | self.layers.append(nn.Linear(prev_size, output_size))
113 |
114 | self.layer_module = nn.ModuleList(self.layers)
115 |
116 | def forward(self, x):
117 | out = x
118 | for layer in self.layer_module:
119 | out = layer(out)
120 | return out.squeeze()
121 |
122 | # FeatureSelectionModule
123 | class FeatureSelectionModule(nn.Module):
124 | def __init__(self, latent_size, combine_strategy):
125 | super(FeatureSelectionModule, self).__init__()
126 |
127 | self.latent_size = latent_size
128 | self.combine_strategy = combine_strategy
129 | assert(combine_strategy in ['simple', 'gated'])
130 |
131 | if combine_strategy == 'gated':
132 | # Gate Computation Parameters
133 | self.Wu = nn.Linear(2 * latent_size, latent_size)
134 |
135 | def forward(self, x1, x2):
136 | if self.combine_strategy == 'simple':
137 | return x2
138 | if self.combine_strategy == 'gated':
139 | x = torch.cat([x1, x2], dim=-1)
140 | # Orthogonal Decomposition
141 | x1_dot_x2 = torch.sum(x1 * x2, dim=-1, keepdim=True)
142 | x1_dot_x1 = torch.sum(x1 * x1, dim=-1, keepdim=True)
143 | parallel = (x1_dot_x2 / x1_dot_x1) * x1
144 | orthogonal = x2 - parallel
145 | # Gates
146 | ug = torch.sigmoid(self.Wu(x))
147 | x2_prime = (1 - ug) * parallel + ug * orthogonal
148 | return x2_prime
149 |
150 | # FeatureFusionNetwork
151 | class FeatureFusionNetwork(nn.Module):
152 | def __init__(self, latent_size, combine_strategy, nb_modules):
153 | super(FeatureFusionNetwork, self).__init__()
154 |
155 | self.latent_size = latent_size
156 | self.combine_strategy = combine_strategy
157 | self.nb_modules = nb_modules
158 |
159 | modules = []
160 | for _ in range(nb_modules):
161 | modules.append(FeatureSelectionModule(latent_size, combine_strategy))
162 | self.fusion_modules = nn.ModuleList(modules)
163 |
164 | def forward(self, c, xs):
165 | features = [c]
166 | for module, x in zip(self.fusion_modules, xs):
167 | features.append(module(c, x))
168 | return torch.cat(features, dim=-1)
169 |
170 | @property
171 | def output_size(self):
172 | return (self.nb_modules + 1) * self.latent_size
173 |
--------------------------------------------------------------------------------
/models/encoder.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | import utils
6 | import random
7 |
8 | from constants import *
9 | from transformers import *
10 | from models.helpers import *
11 |
12 | class TransformerEncoder(nn.Module):
13 | def __init__(self, configs):
14 | super(TransformerEncoder, self).__init__()
15 | self.configs = configs
16 |
17 | # Transformer Encoder
18 | self.transformer = AutoModel.from_pretrained(configs['transformer'])
19 | self.transformer_dropout = nn.Dropout(configs['transformer_dropout_rate'])
20 | self.transformer.config.gradient_checkpointing = configs['gradient_checkpointing']
21 | self.hidden_size = self.transformer.config.hidden_size
22 |
23 | def forward(self, input_ids, input_masks, mask_windows,
24 | num_windows, window_size, is_training,
25 | context_lengths = [0], token_type_ids = None):
26 | self.train() if is_training else self.eval()
27 | num_contexts = len(context_lengths)
28 |
29 | features = self.transformer(input_ids, input_masks, token_type_ids)[0]
30 | features = features.view(num_contexts, num_windows, -1, self.hidden_size)
31 |
32 | flattened_features = []
33 | for i in range(num_contexts):
34 | _features = features[i, :, :, :]
35 | _features = _features[:, context_lengths[i]:, :]
36 | _features = _features[:, : window_size, :]
37 | flattened_features.append(self.flatten(_features, mask_windows))
38 | flattened_features = torch.cat(flattened_features)
39 |
40 | return self.transformer_dropout(flattened_features)
41 |
42 | def flatten(self, features, mask_windows):
43 | num_windows, window_size, hidden_size = features.size()
44 | flattened_emb = torch.reshape(features, (num_windows * window_size, hidden_size))
45 | boolean_mask = mask_windows > 0
46 | boolean_mask = boolean_mask.view([num_windows * window_size])
47 | return flattened_emb[boolean_mask].unsqueeze(0)
48 |
49 | class SymbolicFeaturesEncoder(nn.Module):
50 | def __init__(self, configs, event_types):
51 | super(SymbolicFeaturesEncoder, self).__init__()
52 | self.configs = configs
53 | self.feature_size = configs['feature_size']
54 | self.latent_size = configs['latent_size']
55 | self.event_types = event_types
56 |
57 | # Embeddings and Linear Layers
58 | if configs['use_typ_features']:
59 | self.typ_embed = nn.Embedding(len(event_types), self.feature_size)
60 | self.typ_linear = nn.Linear(3 * self.feature_size, self.latent_size)
61 | if configs['use_pol_features']:
62 | self.pol_embed = nn.Embedding(len(POL_TYPES), self.feature_size)
63 | self.pol_linear = nn.Linear(3 * self.feature_size, self.latent_size)
64 | if configs['use_mod_features']:
65 | self.mod_embed = nn.Embedding(len(MOD_TYPES), self.feature_size)
66 | self.mod_linear = nn.Linear(3 * self.feature_size, self.latent_size)
67 | if configs['use_gen_features']:
68 | self.gen_embed = nn.Embedding(len(GEN_TYPES), self.feature_size)
69 | self.gen_linear = nn.Linear(3 * self.feature_size, self.latent_size)
70 | if configs['use_ten_features']:
71 | self.ten_embed = nn.Embedding(len(TEN_TYPES), self.feature_size)
72 | self.ten_linear = nn.Linear(3 * self.feature_size, self.latent_size)
73 |
74 | # Initialize Embeddings
75 | for name, param in self.named_parameters():
76 | if (not 'transformer' in name.lower()) and 'embedding' in name.lower():
77 | print('Re-initialize embedding {}'.format(name))
78 | param.data.uniform_(-0.5, 0.5)
79 |
80 | def forward(self, events):
81 | features = []
82 | if self.configs['use_typ_features']: features.append(self.get_features(events, 'event_type'))
83 | if self.configs['use_pol_features']: features.append(self.get_features(events, 'event_polarity'))
84 | if self.configs['use_mod_features']: features.append(self.get_features(events, 'event_modality'))
85 | if self.configs['use_gen_features']: features.append(self.get_features(events, 'event_genericity'))
86 | if self.configs['use_ten_features']: features.append(self.get_features(events, 'event_tense'))
87 | return features
88 |
89 | def get_features(self, events, key):
90 | if key == 'event_type':
91 | embed, linear, value_types = self.typ_embed, self.typ_linear, self.event_types
92 | noisy_prob = self.configs['typ_noise_prob']
93 | if key == 'event_polarity':
94 | embed, linear, value_types = self.pol_embed, self.pol_linear, POL_TYPES
95 | noisy_prob = self.configs['pol_noise_prob']
96 | if key == 'event_modality':
97 | embed, linear, value_types = self.mod_embed, self.mod_linear, MOD_TYPES
98 | noisy_prob = self.configs['mod_noise_prob']
99 | if key == 'event_genericity':
100 | embed, linear, value_types = self.gen_embed, self.gen_linear, GEN_TYPES
101 | noisy_prob = self.configs['gen_noise_prob']
102 | if key == 'event_tense':
103 | embed, linear, value_types = self.ten_embed, self.ten_linear, TEN_TYPES
104 | noisy_prob = self.configs['ten_noise_prob']
105 |
106 | values = []
107 | for e in events:
108 | value = e[key]
109 | if self.training and random.uniform(0, 1) < noisy_prob and e['has_correct_trigger']:
110 | value = random.choice(value_types)
111 | values.append(value_types.index(value))
112 |
113 | values = torch.tensor(values).to(next(self.parameters()).device)
114 | latent_feats = F.relu(linear(get_pair_embs(embed(values))))
115 | return latent_feats
116 |
117 | @property
118 | def enabled_features(self):
119 | enabled_features = []
120 | if self.configs['use_typ_features']: enabled_features.append('event_type')
121 | if self.configs['use_pol_features']: enabled_features.append('event_polarity')
122 | if self.configs['use_mod_features']: enabled_features.append('event_modality')
123 | if self.configs['use_gen_features']: enabled_features.append('event_genericity')
124 | if self.configs['use_ten_features']: enabled_features.append('event_tense')
125 | return enabled_features
126 |
--------------------------------------------------------------------------------
/models/helpers.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | def get_span_emb(context_features, span_starts, span_ends):
4 | num_tokens = context_features.size()[0]
5 |
6 | features = []
7 | for s, e in zip(span_starts, span_ends):
8 | sliced_features = context_features[s:e, :]
9 | features.append(torch.mean(sliced_features, dim=0, keepdim=True))
10 | features = torch.cat(features, dim=0)
11 | return features
12 |
13 | def get_pair_embs(event_features):
14 | n, d = event_features.size()
15 | features_list = []
16 |
17 | # Compute diff_embs and prod_embs
18 | src_embs = event_features.view(1, n, d).repeat([n, 1, 1])
19 | target_embs = event_features.view(n, 1, d).repeat([1, n, 1])
20 | prod_embds = src_embs * target_embs
21 |
22 | # Update features_list
23 | features_list.append(src_embs)
24 | features_list.append(target_embs)
25 | features_list.append(prod_embds)
26 |
27 | # Concatenation
28 | pair_embs = torch.cat(features_list, 2)
29 |
30 | return pair_embs
31 |
32 | def logsumexp(inputs, dim=None, keepdim=False):
33 | """Numerically stable logsumexp.
34 | Args:
35 | inputs: A Variable with any shape.
36 | dim: An integer.
37 | keepdim: A boolean.
38 | Returns:
39 | Equivalent of log(sum(exp(inputs), dim=dim, keepdim=keepdim)).
40 | """
41 | # For a 1-D array x (any array along a single dimension),
42 | # log sum exp(x) = s + log sum exp(x - s)
43 | # with s = max(x) being a common choice.
44 | if dim is None:
45 | inputs = inputs.view(-1)
46 | dim = 0
47 | s, _ = torch.max(inputs, dim=dim, keepdim=True)
48 | outputs = s + (inputs - s).exp().sum(dim=dim, keepdim=True).log()
49 | if not keepdim:
50 | outputs = outputs.squeeze(dim)
51 | return outputs
52 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/README.txt:
--------------------------------------------------------------------------------
1 | NAME
2 | CorScorer: Perl package for scoring coreference resolution systems
3 | using different metrics.
4 |
5 |
6 | VERSION
7 | v8.01 -- reference implementations of MUC, B-cubed, CEAF and BLANC metrics.
8 |
9 |
10 | CHANGES SINCE v8.0
11 | - fixed a bug that crashed the BLANC scorer when a duplicate singleton
12 | mention was present in the response.
13 |
14 | INSTALLATION
15 | Requirements:
16 | 1. Perl: downloadable from http://perl.org
17 | 2. Algorithm-Munkres: included in this package and downloadable
18 | from CPAN http://search.cpan.org/~tpederse/Algorithm-Munkres-0.08
19 |
20 | USE
21 | This package is distributed with two scripts to execute the scorer from
22 | the command line.
23 |
24 | Windows (tm): scorer.bat
25 | Linux: scorer.pl
26 |
27 |
28 | SYNOPSIS
29 | use CorScorer;
30 |
31 | $metric = 'ceafm';
32 |
33 | # Scores the whole dataset
34 | &CorScorer::Score($metric, $keys_file, $response_file);
35 |
36 | # Scores one file
37 | &CorScorer::Score($metric, $keys_file, $response_file, $name);
38 |
39 |
40 | INPUT
41 | metric: the metric desired to score the results:
42 | muc: MUCScorer (Vilain et al, 1995)
43 | bcub: B-Cubed (Bagga and Baldwin, 1998)
44 | ceafm: CEAF (Luo et al., 2005) using mention-based similarity
45 | ceafe: CEAF (Luo et al., 2005) using entity-based similarity
46 | blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions
47 | all: uses all the metrics to score
48 |
49 | keys_file: file with expected coreference chains in CoNLL-2011/2012 format
50 |
51 | response_file: file with output of coreference system (CoNLL-2011/2012 format)
52 |
53 | name: [optional] the name of the document to score. If name is not
54 | given, all the documents in the dataset will be scored. If given
55 | name is "none" then all the documents are scored but only total
56 | results are shown.
57 |
58 |
59 | OUTPUT
60 | The score subroutine returns an array with four values in this order:
61 | 1) Recall numerator
62 | 2) Recall denominator
63 | 3) Precision numerator
64 | 4) Precision denominator
65 |
66 | Also recall, precision and F1 are printed in the standard output when variable
67 | $VERBOSE is not null.
68 |
69 | Final scores:
70 | Recall = recall_numerator / recall_denominator
71 | Precision = precision_numerator / precision_denominator
72 | F1 = 2 * Recall * Precision / (Recall + Precision)
73 |
74 | Identification of mentions
75 | An scorer for identification of mentions (recall, precision and F1) is also included.
76 | Mentions from system response are compared with key mentions. This version performs
77 | strict mention matching as was used in the CoNLL-2011 and 2012 shared tasks.
78 |
79 | AUTHORS
80 | Emili Sapena, Universitat Politècnica de Catalunya, http://www.lsi.upc.edu/~esapena, esapena lsi.upc.edu
81 | Sameer Pradhan, sameer.pradhan childrens.harvard.edu
82 | Sebastian Martschat, sebastian.martschat h-its.org
83 | Xiaoqiang Luo, xql google.com
84 |
85 | COPYRIGHT AND LICENSE
86 | Copyright (C) 2009-2011, Emili Sapena esapena lsi.upc.edu
87 | 2011-2014, Sameer Pradhan sameer.pradhan childrens.harvard.edu
88 |
89 | This program is free software; you can redistribute it and/or modify it
90 | under the terms of the GNU General Public License as published by the
91 | Free Software Foundation; either version 2 of the License, or (at your
92 | option) any later version. This program is distributed in the hope that
93 | it will be useful, but WITHOUT ANY WARRANTY; without even the implied
94 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
95 | GNU General Public License for more details.
96 |
97 | You should have received a copy of the GNU General Public License along
98 | with this program; if not, write to the Free Software Foundation, Inc.,
99 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
100 |
101 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/lib/Algorithm/Munkres.pm:
--------------------------------------------------------------------------------
1 | package Algorithm::Munkres;
2 |
3 | use 5.006;
4 | use strict;
5 | use warnings;
6 |
7 | require Exporter;
8 |
9 | our @ISA = qw(Exporter);
10 |
11 | our @EXPORT = qw( assign );
12 |
13 | our $VERSION = '0.08';
14 |
15 | #Variables global to the package
16 | my @mat = ();
17 | my @mask = ();
18 | my @colcov = ();
19 | my @rowcov = ();
20 | my $Z0_row = 0;
21 | my $Z0_col = 0;
22 | my @path = ();
23 |
24 | #The exported subroutine.
25 | #Expected Input: Reference to the input matrix (MxN)
26 | #Output: Mx1 matrix, giving the column number of the value assigned to each row. (For more explaination refer perldoc)
27 | sub assign
28 | {
29 | #reference to the input matrix
30 | my $rmat = shift;
31 | my $rsolution_mat = shift;
32 | my ($row, $row_len) = (0,0);
33 |
34 | # re-initialize that global variables
35 | @mat = ();
36 | @mask = ();
37 | @colcov = ();
38 | @rowcov = ();
39 | $Z0_row = 0;
40 | $Z0_col = 0;
41 | @path = ();
42 |
43 | #variables local to the subroutine
44 | my $step = 0;
45 | my ($i, $j) = (0,0);
46 |
47 | #the input matrix
48 | my @inp_mat = @$rmat;
49 |
50 | #copy the orginal matrix, before applying the algorithm to the matrix
51 | foreach (@inp_mat)
52 | {
53 | push @mat, [ @$_ ];
54 | }
55 |
56 | #check if the input matrix is well-formed i.e. either square or rectangle.
57 | $row_len = $#{$mat[0]};
58 | foreach my $row (@mat)
59 | {
60 | if($row_len != $#$row)
61 | {
62 | die "Please check the input matrix.\nThe input matrix is not a well-formed matrix!\nThe input matrix has to be rectangular or square matrix.\n";
63 | }
64 | }
65 |
66 | #check if the matrix is a square matrix,
67 | #if not convert it to square matrix by padding zeroes.
68 | if($#mat < $#{$mat[0]})
69 | {
70 | # Add rows
71 | my $diff = $#{$mat[0]} - $#mat;
72 | for (1 .. $diff)
73 | {
74 | push @mat, [ (0) x @{$mat[0]} ];
75 | }
76 | }
77 | elsif($#mat > $#{$mat[0]})
78 | {
79 | # Add columns
80 | my $diff = $#mat - $#{$mat[0]};
81 | for (0 .. $#mat)
82 | {
83 | push @{$mat[$_]}, (0) x $diff;
84 | }
85 | }
86 |
87 | #initialize mask, column cover and row cover matrices
88 | clear_covers();
89 |
90 | for($i=0;$i<=$#mat;$i++)
91 | {
92 | push @mask, [ (0) x @mat ];
93 | }
94 |
95 | #The algorithm can be grouped in 6 steps.
96 | &stepone();
97 | &steptwo();
98 | $step = &stepthree();
99 | while($step == 4)
100 | {
101 | $step = &stepfour();
102 | while($step == 6)
103 | {
104 | &stepsix();
105 | $step = &stepfour();
106 | }
107 | &stepfive();
108 | $step = &stepthree();
109 | }
110 |
111 | #create the output matrix
112 | for my $i (0 .. $#mat)
113 | {
114 | for my $j (0 .. $#{$mat[$i]})
115 | {
116 | if($mask[$i][$j] == 1)
117 | {
118 | $rsolution_mat->[$i] = $j;
119 | }
120 | }
121 | }
122 |
123 |
124 | #Code for tracing------------------
125 | <<'ee';
126 | print "\nInput Matrix:\n";
127 | for($i=0;$i<=$#mat;$i++)
128 | {
129 | for($j=0;$j<=$#mat;$j++)
130 | {
131 | print $mat[$i][$j] . "\t";
132 | }
133 | print "\n";
134 | }
135 |
136 | print "\nMask Matrix:\n";
137 | for($i=0;$i<=$#mat;$i++)
138 | {
139 | for($j=0;$j<=$#mat;$j++)
140 | {
141 | print $mask[$i][$j] . "\t";
142 | }
143 | print "\n";
144 | }
145 |
146 | print "\nOutput Matrix:\n";
147 | print "$_\n" for @$rsolution_mat;
148 | ee
149 |
150 | #----------------------------------
151 |
152 | }
153 |
154 | #Step 1 - Find minimum value for every row and subtract this min from each element of the row.
155 | sub stepone
156 | {
157 | # print "Step 1 \n";
158 |
159 | #Find the minimum value for every row
160 | for my $row (@mat)
161 | {
162 | my $min = $row->[0];
163 | for (@$row)
164 | {
165 | $min = $_ if $min > $_;
166 | }
167 |
168 | #Subtract the minimum value of the row from each element of the row.
169 | @$row = map {$_ - $min} @$row;
170 | }
171 | # print "Step 1 end \n";
172 | }
173 |
174 | #Step 2 - Star the zeroes, Create the mask and cover matrices. Re-initialize the cover matrices for next steps.
175 | #To star a zero: We search for a zero in the matrix and than cover the column and row in which it occurs. Now this zero is starred.
176 | #A next starred zero can occur only in those columns and rows which have not been previously covered by any other starred zero.
177 | sub steptwo
178 | {
179 | # print "Step 2 \n";
180 |
181 | my ($i, $j) = (0,0);
182 |
183 | for($i=0;$i<=$#mat;$i++)
184 | {
185 | for($j=0;$j<=$#{$mat[$i]};$j++)
186 | {
187 | if($mat[$i][$j] == 0 && $colcov[$j] == 0 && $rowcov[$i] == 0)
188 | {
189 | $mask[$i][$j] = 1;
190 | $colcov[$j] = 1;
191 | $rowcov[$i] = 1;
192 | }
193 | }
194 | }
195 | #Re-initialize the cover matrices
196 | &clear_covers();
197 | # print "Step 2 end\n";
198 | }
199 |
200 | #Step 3 - Check if each column has a starred zero. If yes then the problem is solved else proceed to step 4
201 | sub stepthree
202 | {
203 | # print "Step 3 \n";
204 |
205 | my $cnt = 0;
206 |
207 | for my $i (0 .. $#mat)
208 | {
209 | for my $j (0 .. $#mat)
210 | {
211 | if($mask[$i][$j] == 1)
212 | {
213 | $colcov[$j] = 1;
214 | $cnt++;
215 | }
216 | }
217 | }
218 | if($cnt > $#mat)
219 | {
220 | # print "Step 3 end. Next expected step 7 \n";
221 | return 7;
222 | }
223 | else
224 | {
225 | # print "Step 3 end. Next expected step 4 \n";
226 | return 4;
227 | }
228 |
229 | }
230 |
231 | #Step 4 - Try to find a zero which is not starred and whose columns and rows are not yet covered.
232 | #If such a zero found, prime it, try to find a starred zero in its row,
233 | # if not found proceed to step 5
234 | # else continue
235 | #Else proceed to step 6.
236 | sub stepfour
237 | {
238 | # print "Step 4 \n";
239 |
240 | while(1)
241 | {
242 | my ($row, $col) = &find_a_zero();
243 | if ($row < 0)
244 | {
245 | # No zeroes
246 | return 6;
247 | }
248 |
249 | $mask[$row][$col] = 2;
250 | my $star_col = &find_star_in_row($row);
251 | if ($star_col >= 0)
252 | {
253 | $col = $star_col;
254 | $rowcov[$row] = 1;
255 | $colcov[$col] = 0;
256 | }
257 | else
258 | {
259 | $Z0_row = $row;
260 | $Z0_col = $col;
261 | return 5;
262 | }
263 | }
264 | }
265 |
266 | #Tries to find yet uncovered zero
267 | sub find_a_zero
268 | {
269 | for my $i (0 .. $#mat)
270 | {
271 | next if $rowcov[$i];
272 |
273 | for my $j (reverse(0 .. $#mat)) # Prefer large $j
274 | {
275 | next if $colcov[$j];
276 | return ($i, $j) if $mat[$i][$j] == 0;
277 | }
278 | }
279 |
280 | return (-1, -1);
281 | }
282 |
283 | #Tries to find starred zero in the given row and returns the column number
284 | sub find_star_in_row
285 | {
286 | my $row = shift;
287 |
288 | for my $j (0 .. $#mat)
289 | {
290 | if($mask[$row][$j] == 1)
291 | {
292 | return $j;
293 | }
294 | }
295 | return -1;
296 | }
297 |
298 | #Step 5 - Try to find a starred zero in the column of the uncovered zero found in the step 4.
299 | #If starred zero found, try to find a prime zero in its row.
300 | #Continue finding starred zero in the column and primed zero in the row until,
301 | #we get to a primed zero which does not have a starred zero in its column.
302 | #At this point reduce the non-zero values of mask matrix by 1. i.e. change prime zeros to starred zeroes.
303 | #Clear the cover matrices and clear any primes i.e. values=2 from mask matrix.
304 | sub stepfive
305 | {
306 | # print "Step 5 \n";
307 |
308 | my $cnt = 0;
309 | my $done = 0;
310 |
311 | $path[$cnt][0] = $Z0_row;
312 | $path[$cnt][1] = $Z0_col;
313 |
314 | while($done == 0)
315 | {
316 | my $row = &find_star_in_col($path[$cnt][1]);
317 | if($row > -1)
318 | {
319 | $cnt++;
320 | $path[$cnt][0] = $row;
321 | $path[$cnt][1] = $path[$cnt - 1][1];
322 | }
323 | else
324 | {
325 | $done = 1;
326 | }
327 | if($done == 0)
328 | {
329 | my $col = &find_prime_in_row($path[$cnt][0]);
330 | $cnt++;
331 | $path[$cnt][0] = $path[$cnt - 1][0];
332 | $path[$cnt][1] = $col;
333 | }
334 | }
335 | &convert_path($cnt);
336 | &clear_covers();
337 | &erase_primes();
338 |
339 | # print "Step 5 end \n";
340 | }
341 |
342 | #Tries to find starred zero in the given column and returns the row number
343 | sub find_star_in_col
344 | {
345 | my $col = shift;
346 |
347 | for my $i (0 .. $#mat)
348 | {
349 | return $i if $mask[$i][$col] == 1;
350 | }
351 |
352 | return -1;
353 | }
354 |
355 | #Tries to find primed zero in the given row and returns the column number
356 | sub find_prime_in_row
357 | {
358 | my $row = shift;
359 |
360 | for my $j (0 .. $#mat)
361 | {
362 | return $j if $mask[$row][$j] == 2;
363 | }
364 |
365 | return -1;
366 | }
367 |
368 | #Reduces non-zero value in the mask matrix by 1.
369 | #i.e. converts all primes to stars and stars to none.
370 | sub convert_path
371 | {
372 | my $cnt = shift;
373 |
374 | for my $i (0 .. $cnt)
375 | {
376 | for ( $mask[$path[$i][0]][$path[$i][1]] ) {
377 | $_ = ( $_ == 1 ) ? 0 : 1;
378 | }
379 | }
380 | }
381 |
382 | #Clears cover matrices
383 | sub clear_covers
384 | {
385 | @rowcov = @colcov = (0) x @mat;
386 | }
387 |
388 | #Changes all primes i.e. values=2 to 0.
389 | sub erase_primes
390 | {
391 | for my $row (@mask)
392 | {
393 | for my $j (0 .. $#$row)
394 | {
395 | $row->[$j] = 0 if $row->[$j] == 2;
396 | }
397 | }
398 | }
399 |
400 | #Step 6 - Find the minimum value from the rows and columns which are currently not covered.
401 | #Subtract this minimum value from all the elements of the columns which are not covered.
402 | #Add this minimum value to all the elements of the rows which are covered.
403 | #Proceed to step 4.
404 | sub stepsix
405 | {
406 | # print "Step 6 \n";
407 | my ($i, $j);
408 | my $minval = 0;
409 |
410 | $minval = &find_smallest();
411 |
412 | for($i=0;$i<=$#mat;$i++)
413 | {
414 | for($j=0;$j<=$#{$mat[$i]};$j++)
415 | {
416 | if($rowcov[$i] == 1)
417 | {
418 | $mat[$i][$j] += $minval;
419 | }
420 | if($colcov[$j] == 0)
421 | {
422 | $mat[$i][$j] -= $minval;
423 | }
424 | }
425 | }
426 |
427 | # print "Step 6 end \n";
428 | }
429 |
430 | #Finds the minimum value from all the matrix values which are not covered.
431 | sub find_smallest
432 | {
433 | my $minval;
434 |
435 | for my $i (0 .. $#mat)
436 | {
437 | next if $rowcov[$i];
438 |
439 | for my $j (0 .. $#mat)
440 | {
441 | next if $colcov[$j];
442 | if( !defined($minval) || $minval > $mat[$i][$j])
443 | {
444 | $minval = $mat[$i][$j];
445 | }
446 | }
447 | }
448 | return $minval;
449 | }
450 |
451 |
452 | 1;
453 | __END__
454 |
455 | =head1 NAME
456 |
457 | Algorithm::Munkres - Perl extension for Munkres' solution to
458 | classical Assignment problem for square and rectangular matrices
459 | This module extends the solution of Assignment problem for square
460 | matrices to rectangular matrices by padding zeros. Thus a rectangular
461 | matrix is converted to square matrix by padding necessary zeros.
462 |
463 | =head1 SYNOPSIS
464 |
465 | use Algorithm::Munkres;
466 |
467 | @mat = (
468 | [2, 4, 7, 9],
469 | [3, 9, 5, 1],
470 | [8, 2, 9, 7],
471 | );
472 |
473 | assign(\@mat,\@out_mat);
474 |
475 | Then the @out_mat array will have the output as: (0,3,1,2),
476 | where
477 | 0th element indicates that 0th row is assigned 0th column i.e value=2
478 | 1st element indicates that 1st row is assigned 3rd column i.e.value=1
479 | 2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
480 | 3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0
481 |
482 |
483 | =head1 DESCRIPTION
484 |
485 | Assignment Problem: Given N jobs, N workers and the time taken by
486 | each worker to complete a job then how should the assignment of a
487 | Worker to a Job be done, so as to minimize the time taken.
488 |
489 | Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that:
490 | x y z
491 | p 2 4 7
492 | q 3 9 5
493 | r 8 2 9
494 |
495 | where the cell values of the above matrix give the time required
496 | for the worker(given by column name) to complete the job(given by
497 | the row name)
498 |
499 | then possible solutions are:
500 | Total
501 | 1. 2, 9, 9 20
502 | 2. 2, 2, 5 9
503 | 3. 3, 4, 9 16
504 | 4. 3, 2, 7 12
505 | 5. 8, 9, 7 24
506 | 6. 8, 4, 5 17
507 |
508 | Thus (2) is the optimal solution for the above problem.
509 | This kind of brute-force approach of solving Assignment problem
510 | quickly becomes slow and bulky as N grows, because the number of
511 | possible solution are N! and thus the task is to evaluate each
512 | and then find the optimal solution.(If N=10, number of possible
513 | solutions: 3628800 !)
514 | Munkres' gives us a solution to this problem, which is implemented
515 | in this module.
516 |
517 | This module also solves Assignment problem for rectangular matrices
518 | (M x N) by converting them to square matrices by padding zeros. ex:
519 | If input matrix is:
520 | [2, 4, 7, 9],
521 | [3, 9, 5, 1],
522 | [8, 2, 9, 7]
523 | i.e 3 x 4 then we will convert it to 4 x 4 and the modified input
524 | matrix will be:
525 | [2, 4, 7, 9],
526 | [3, 9, 5, 1],
527 | [8, 2, 9, 7],
528 | [0, 0, 0, 0]
529 |
530 | =head1 EXPORT
531 |
532 | "assign" function by default.
533 |
534 | =head1 INPUT
535 |
536 | The input matrix should be in a two dimensional array(array of
537 | array) and the 'assign' subroutine expects a reference to this
538 | array and not the complete array.
539 | eg:assign(\@inp_mat, \@out_mat);
540 | The second argument to the assign subroutine is the reference
541 | to the output array.
542 |
543 | =head1 OUTPUT
544 |
545 | The assign subroutine expects references to two arrays as its
546 | input paramenters. The second parameter is the reference to the
547 | output array. This array is populated by assign subroutine. This
548 | array is single dimensional Nx1 matrix.
549 | For above example the output array returned will be:
550 | (0,
551 | 2,
552 | 1)
553 |
554 | where
555 | 0th element indicates that 0th row is assigned 0th column i.e value=2
556 | 1st element indicates that 1st row is assigned 2nd column i.e.value=5
557 | 2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
558 |
559 | =head1 SEE ALSO
560 |
561 | 1. http://216.249.163.93/bob.pilgrim/445/munkres.html
562 |
563 | 2. Munkres, J. Algorithms for the assignment and transportation
564 | Problems. J. Siam 5 (Mar. 1957), 32-38
565 |
566 | 3. François Bourgeois and Jean-Claude Lassalle. 1971.
567 | An extension of the Munkres algorithm for the assignment
568 | problem to rectangular matrices.
569 | Communication ACM, 14(12):802-804
570 |
571 | =head1 AUTHOR
572 |
573 | Anagha Kulkarni, University of Minnesota Duluth
574 | kulka020 d.umn.edu
575 |
576 | Ted Pedersen, University of Minnesota Duluth
577 | tpederse d.umn.edu
578 |
579 | =head1 COPYRIGHT AND LICENSE
580 |
581 | Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni
582 |
583 | This program is free software; you can redistribute it and/or
584 | modify it under the terms of the GNU General Public License
585 | as published by the Free Software Foundation; either version 2
586 | of the License, or (at your option) any later version.
587 | This program is distributed in the hope that it will be useful,
588 | but WITHOUT ANY WARRANTY; without even the implied warranty of
589 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
590 | GNU General Public License for more details.
591 |
592 | You should have received a copy of the GNU General Public License
593 | along with this program; if not, write to the Free Software
594 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
595 |
596 | =cut
597 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/lib/Algorithm/README.Munkres:
--------------------------------------------------------------------------------
1 | NAME
2 | Algorithm-Munkres : Perl extension for Munkres' solution to
3 | classical Assignment problem for square and rectangular matrices
4 | This module extends the solution of Assignment problem for square
5 | matrices to rectangular matrices by padding zeros. Thus a rectangular
6 | matrix is converted to square matrix by padding necessary zeros.
7 |
8 | SYNOPSIS
9 | use Algorithm::Munkres;
10 |
11 | @mat = (
12 | [2, 4, 7, 9],
13 | [3, 9, 5, 1],
14 | [8, 2, 9, 7],
15 | );
16 |
17 | assign(\@mat,\@out_mat);
18 |
19 | Then the @out_mat array will have the output as: (0,3,1,2),
20 | where
21 | 0th element indicates that 0th row is assigned 0th column i.e value=2
22 | 1st element indicates that 1st row is assigned 3rd column i.e.value=1
23 | 2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
24 | 3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0
25 |
26 | DESCRIPTION
27 | Assignment Problem: Given N jobs, N workers and the time taken by
28 | each worker to complete a job then how should the assignment of a
29 | Worker to a Job be done, so as to minimize the time taken.
30 |
31 | Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that:
32 | x y z
33 | p 2 4 7
34 | q 3 9 5
35 | r 8 2 9
36 |
37 | where the cell values of the above matrix give the time required
38 | for the worker(given by column name) to complete the job(given by
39 | the row name)
40 |
41 | then possible solutions are:
42 | Total
43 | 1. 2, 9, 9 20
44 | 2. 2, 2, 5 9
45 | 3. 3, 4, 9 16
46 | 4. 3, 2, 7 12
47 | 5. 8, 9, 7 24
48 | 6. 8, 4, 5 17
49 |
50 | Thus (2) is the optimal solution for the above problem.
51 | This kind of brute-force approach of solving Assignment problem
52 | quickly becomes slow and bulky as N grows, because the number of
53 | possible solution are N! and thus the task is to evaluate each
54 | and then find the optimal solution.(If N=10, number of possible
55 | solutions: 3628800 !)
56 | Munkres' gives us a solution to this problem, which is implemented
57 | in this module.
58 |
59 | This module also solves Assignment problem for rectangular matrices
60 | (M x N) by converting them to square matrices by padding zeros. ex:
61 | If input matrix is:
62 | [2, 4, 7, 9],
63 | [3, 9, 5, 1],
64 | [8, 2, 9, 7]
65 | i.e 3 x 4 then we will convert it to 4 x 4 and the modified input
66 | matrix will be:
67 | [2, 4, 7, 9],
68 | [3, 9, 5, 1],
69 | [8, 2, 9, 7],
70 | [0, 0, 0, 0]
71 |
72 | EXPORT
73 | "assign" function by default.
74 |
75 | INPUT
76 | The input matrix should be in a two dimensional array(array of
77 | array) and the 'assign' subroutine expects a reference to this
78 | array and not the complete array.
79 | eg:assign(\@inp_mat, \@out_mat);
80 | The second argument to the assign subroutine is the reference
81 | to the output array.
82 |
83 | OUTPUT
84 | The assign subroutine expects references to two arrays as its
85 | input paramenters. The second parameter is the reference to the
86 | output array. This array is populated by assign subroutine. This
87 | array is single dimensional Nx1 matrix.
88 | For above example the output array returned will be:
89 | (0,
90 | 2,
91 | 1)
92 |
93 | where
94 | 0th element indicates that 0th row is assigned 0th column i.e value=2
95 | 1st element indicates that 1st row is assigned 2nd column i.e.value=5
96 | 2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
97 |
98 | SEE ALSO
99 | 1. http://216.249.163.93/bob.pilgrim/445/munkres.html
100 |
101 | 2. Munkres, J. Algorithms for the assignment and transportation
102 | Problems. J. Siam 5 (Mar. 1957), 32-38
103 |
104 | 3. François Bourgeois and Jean-Claude Lassalle. 1971.
105 | An extension of the Munkres algorithm for the assignment
106 | problem to rectangular matrices.
107 | Communication ACM, 14(12):802-804
108 |
109 | AUTHOR
110 | Anagha Kulkarni, University of Minnesota Duluth
111 | kulka020 d.umn.edu
112 |
113 | Ted Pedersen, University of Minnesota Duluth
114 | tpederse d.umn.edu
115 |
116 | COPYRIGHT AND LICENSE
117 | Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni
118 |
119 | This program is free software; you can redistribute it and/or modify it
120 | under the terms of the GNU General Public License as published by the
121 | Free Software Foundation; either version 2 of the License, or (at your
122 | option) any later version. This program is distributed in the hope that
123 | it will be useful, but WITHOUT ANY WARRANTY; without even the implied
124 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
125 | GNU General Public License for more details.
126 |
127 | You should have received a copy of the GNU General Public License along
128 | with this program; if not, write to the Free Software Foundation, Inc.,
129 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
130 |
131 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/lib/Cwd.pm:
--------------------------------------------------------------------------------
1 | package Cwd;
2 |
3 | =head1 NAME
4 |
5 | Cwd - get pathname of current working directory
6 |
7 | =head1 SYNOPSIS
8 |
9 | use Cwd;
10 | my $dir = getcwd;
11 |
12 | use Cwd 'abs_path';
13 | my $abs_path = abs_path($file);
14 |
15 | =head1 DESCRIPTION
16 |
17 | This module provides functions for determining the pathname of the
18 | current working directory. It is recommended that getcwd (or another
19 | *cwd() function) be used in I code to ensure portability.
20 |
21 | By default, it exports the functions cwd(), getcwd(), fastcwd(), and
22 | fastgetcwd() (and, on Win32, getdcwd()) into the caller's namespace.
23 |
24 |
25 | =head2 getcwd and friends
26 |
27 | Each of these functions are called without arguments and return the
28 | absolute path of the current working directory.
29 |
30 | =over 4
31 |
32 | =item getcwd
33 |
34 | my $cwd = getcwd();
35 |
36 | Returns the current working directory.
37 |
38 | Exposes the POSIX function getcwd(3) or re-implements it if it's not
39 | available.
40 |
41 | =item cwd
42 |
43 | my $cwd = cwd();
44 |
45 | The cwd() is the most natural form for the current architecture. For
46 | most systems it is identical to `pwd` (but without the trailing line
47 | terminator).
48 |
49 | =item fastcwd
50 |
51 | my $cwd = fastcwd();
52 |
53 | A more dangerous version of getcwd(), but potentially faster.
54 |
55 | It might conceivably chdir() you out of a directory that it can't
56 | chdir() you back into. If fastcwd encounters a problem it will return
57 | undef but will probably leave you in a different directory. For a
58 | measure of extra security, if everything appears to have worked, the
59 | fastcwd() function will check that it leaves you in the same directory
60 | that it started in. If it has changed it will C with the message
61 | "Unstable directory path, current directory changed
62 | unexpectedly". That should never happen.
63 |
64 | =item fastgetcwd
65 |
66 | my $cwd = fastgetcwd();
67 |
68 | The fastgetcwd() function is provided as a synonym for cwd().
69 |
70 | =item getdcwd
71 |
72 | my $cwd = getdcwd();
73 | my $cwd = getdcwd('C:');
74 |
75 | The getdcwd() function is also provided on Win32 to get the current working
76 | directory on the specified drive, since Windows maintains a separate current
77 | working directory for each drive. If no drive is specified then the current
78 | drive is assumed.
79 |
80 | This function simply calls the Microsoft C library _getdcwd() function.
81 |
82 | =back
83 |
84 |
85 | =head2 abs_path and friends
86 |
87 | These functions are exported only on request. They each take a single
88 | argument and return the absolute pathname for it. If no argument is
89 | given they'll use the current working directory.
90 |
91 | =over 4
92 |
93 | =item abs_path
94 |
95 | my $abs_path = abs_path($file);
96 |
97 | Uses the same algorithm as getcwd(). Symbolic links and relative-path
98 | components ("." and "..") are resolved to return the canonical
99 | pathname, just like realpath(3).
100 |
101 | =item realpath
102 |
103 | my $abs_path = realpath($file);
104 |
105 | A synonym for abs_path().
106 |
107 | =item fast_abs_path
108 |
109 | my $abs_path = fast_abs_path($file);
110 |
111 | A more dangerous, but potentially faster version of abs_path.
112 |
113 | =back
114 |
115 | =head2 $ENV{PWD}
116 |
117 | If you ask to override your chdir() built-in function,
118 |
119 | use Cwd qw(chdir);
120 |
121 | then your PWD environment variable will be kept up to date. Note that
122 | it will only be kept up to date if all packages which use chdir import
123 | it from Cwd.
124 |
125 |
126 | =head1 NOTES
127 |
128 | =over 4
129 |
130 | =item *
131 |
132 | Since the path separators are different on some operating systems ('/'
133 | on Unix, ':' on MacPerl, etc...) we recommend you use the File::Spec
134 | modules wherever portability is a concern.
135 |
136 | =item *
137 |
138 | Actually, on Mac OS, the C, C and C
139 | functions are all aliases for the C function, which, on Mac OS,
140 | calls `pwd`. Likewise, the C function is an alias for
141 | C.
142 |
143 | =back
144 |
145 | =head1 AUTHOR
146 |
147 | Originally by the perl5-porters.
148 |
149 | Maintained by Ken Williams
150 |
151 | =head1 COPYRIGHT
152 |
153 | Copyright (c) 2004 by the Perl 5 Porters. All rights reserved.
154 |
155 | This program is free software; you can redistribute it and/or modify
156 | it under the same terms as Perl itself.
157 |
158 | Portions of the C code in this library are copyright (c) 1994 by the
159 | Regents of the University of California. All rights reserved. The
160 | license on this code is compatible with the licensing of the rest of
161 | the distribution - please see the source code in F for the
162 | details.
163 |
164 | =head1 SEE ALSO
165 |
166 | L
167 |
168 | =cut
169 |
170 | use strict;
171 | use Exporter;
172 | use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION);
173 |
174 | $VERSION = '3.39_02';
175 | my $xs_version = $VERSION;
176 | $VERSION =~ tr/_//;
177 |
178 | @ISA = qw/ Exporter /;
179 | @EXPORT = qw(cwd getcwd fastcwd fastgetcwd);
180 | push @EXPORT, qw(getdcwd) if $^O eq 'MSWin32';
181 | @EXPORT_OK = qw(chdir abs_path fast_abs_path realpath fast_realpath);
182 |
183 | # sys_cwd may keep the builtin command
184 |
185 | # All the functionality of this module may provided by builtins,
186 | # there is no sense to process the rest of the file.
187 | # The best choice may be to have this in BEGIN, but how to return from BEGIN?
188 |
189 | if ($^O eq 'os2') {
190 | local $^W = 0;
191 |
192 | *cwd = defined &sys_cwd ? \&sys_cwd : \&_os2_cwd;
193 | *getcwd = \&cwd;
194 | *fastgetcwd = \&cwd;
195 | *fastcwd = \&cwd;
196 |
197 | *fast_abs_path = \&sys_abspath if defined &sys_abspath;
198 | *abs_path = \&fast_abs_path;
199 | *realpath = \&fast_abs_path;
200 | *fast_realpath = \&fast_abs_path;
201 |
202 | return 1;
203 | }
204 |
205 | # Need to look up the feature settings on VMS. The preferred way is to use the
206 | # VMS::Feature module, but that may not be available to dual life modules.
207 |
208 | my $use_vms_feature;
209 | BEGIN {
210 | if ($^O eq 'VMS') {
211 | if (eval { local $SIG{__DIE__}; require VMS::Feature; }) {
212 | $use_vms_feature = 1;
213 | }
214 | }
215 | }
216 |
217 | # Need to look up the UNIX report mode. This may become a dynamic mode
218 | # in the future.
219 | sub _vms_unix_rpt {
220 | my $unix_rpt;
221 | if ($use_vms_feature) {
222 | $unix_rpt = VMS::Feature::current("filename_unix_report");
223 | } else {
224 | my $env_unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || '';
225 | $unix_rpt = $env_unix_rpt =~ /^[ET1]/i;
226 | }
227 | return $unix_rpt;
228 | }
229 |
230 | # Need to look up the EFS character set mode. This may become a dynamic
231 | # mode in the future.
232 | sub _vms_efs {
233 | my $efs;
234 | if ($use_vms_feature) {
235 | $efs = VMS::Feature::current("efs_charset");
236 | } else {
237 | my $env_efs = $ENV{'DECC$EFS_CHARSET'} || '';
238 | $efs = $env_efs =~ /^[ET1]/i;
239 | }
240 | return $efs;
241 | }
242 |
243 |
244 | # If loading the XS stuff doesn't work, we can fall back to pure perl
245 | eval {
246 | if ( $] >= 5.006 ) {
247 | require XSLoader;
248 | XSLoader::load( __PACKAGE__, $xs_version);
249 | } else {
250 | require DynaLoader;
251 | push @ISA, 'DynaLoader';
252 | __PACKAGE__->bootstrap( $xs_version );
253 | }
254 | };
255 |
256 | # Big nasty table of function aliases
257 | my %METHOD_MAP =
258 | (
259 | VMS =>
260 | {
261 | cwd => '_vms_cwd',
262 | getcwd => '_vms_cwd',
263 | fastcwd => '_vms_cwd',
264 | fastgetcwd => '_vms_cwd',
265 | abs_path => '_vms_abs_path',
266 | fast_abs_path => '_vms_abs_path',
267 | },
268 |
269 | MSWin32 =>
270 | {
271 | # We assume that &_NT_cwd is defined as an XSUB or in the core.
272 | cwd => '_NT_cwd',
273 | getcwd => '_NT_cwd',
274 | fastcwd => '_NT_cwd',
275 | fastgetcwd => '_NT_cwd',
276 | abs_path => 'fast_abs_path',
277 | realpath => 'fast_abs_path',
278 | },
279 |
280 | dos =>
281 | {
282 | cwd => '_dos_cwd',
283 | getcwd => '_dos_cwd',
284 | fastgetcwd => '_dos_cwd',
285 | fastcwd => '_dos_cwd',
286 | abs_path => 'fast_abs_path',
287 | },
288 |
289 | # QNX4. QNX6 has a $os of 'nto'.
290 | qnx =>
291 | {
292 | cwd => '_qnx_cwd',
293 | getcwd => '_qnx_cwd',
294 | fastgetcwd => '_qnx_cwd',
295 | fastcwd => '_qnx_cwd',
296 | abs_path => '_qnx_abs_path',
297 | fast_abs_path => '_qnx_abs_path',
298 | },
299 |
300 | cygwin =>
301 | {
302 | getcwd => 'cwd',
303 | fastgetcwd => 'cwd',
304 | fastcwd => 'cwd',
305 | abs_path => 'fast_abs_path',
306 | realpath => 'fast_abs_path',
307 | },
308 |
309 | epoc =>
310 | {
311 | cwd => '_epoc_cwd',
312 | getcwd => '_epoc_cwd',
313 | fastgetcwd => '_epoc_cwd',
314 | fastcwd => '_epoc_cwd',
315 | abs_path => 'fast_abs_path',
316 | },
317 |
318 | MacOS =>
319 | {
320 | getcwd => 'cwd',
321 | fastgetcwd => 'cwd',
322 | fastcwd => 'cwd',
323 | abs_path => 'fast_abs_path',
324 | },
325 | );
326 |
327 | $METHOD_MAP{NT} = $METHOD_MAP{MSWin32};
328 |
329 |
330 | # Find the pwd command in the expected locations. We assume these
331 | # are safe. This prevents _backtick_pwd() consulting $ENV{PATH}
332 | # so everything works under taint mode.
333 | my $pwd_cmd;
334 | foreach my $try ('/bin/pwd',
335 | '/usr/bin/pwd',
336 | '/QOpenSys/bin/pwd', # OS/400 PASE.
337 | ) {
338 |
339 | if( -x $try ) {
340 | $pwd_cmd = $try;
341 | last;
342 | }
343 | }
344 | my $found_pwd_cmd = defined($pwd_cmd);
345 | unless ($pwd_cmd) {
346 | # Isn't this wrong? _backtick_pwd() will fail if somenone has
347 | # pwd in their path but it is not /bin/pwd or /usr/bin/pwd?
348 | # See [perl #16774]. --jhi
349 | $pwd_cmd = 'pwd';
350 | }
351 |
352 | # Lazy-load Carp
353 | sub _carp { require Carp; Carp::carp(@_) }
354 | sub _croak { require Carp; Carp::croak(@_) }
355 |
356 | # The 'natural and safe form' for UNIX (pwd may be setuid root)
357 | sub _backtick_pwd {
358 | # Localize %ENV entries in a way that won't create new hash keys
359 | my @localize = grep exists $ENV{$_}, qw(PATH IFS CDPATH ENV BASH_ENV);
360 | local @ENV{@localize};
361 |
362 | my $cwd = `$pwd_cmd`;
363 | # Belt-and-suspenders in case someone said "undef $/".
364 | local $/ = "\n";
365 | # `pwd` may fail e.g. if the disk is full
366 | chomp($cwd) if defined $cwd;
367 | $cwd;
368 | }
369 |
370 | # Since some ports may predefine cwd internally (e.g., NT)
371 | # we take care not to override an existing definition for cwd().
372 |
373 | unless ($METHOD_MAP{$^O}{cwd} or defined &cwd) {
374 | # The pwd command is not available in some chroot(2)'ed environments
375 | my $sep = $Config::Config{path_sep} || ':';
376 | my $os = $^O; # Protect $^O from tainting
377 |
378 |
379 | # Try again to find a pwd, this time searching the whole PATH.
380 | if (defined $ENV{PATH} and $os ne 'MSWin32') { # no pwd on Windows
381 | my @candidates = split($sep, $ENV{PATH});
382 | while (!$found_pwd_cmd and @candidates) {
383 | my $candidate = shift @candidates;
384 | $found_pwd_cmd = 1 if -x "$candidate/pwd";
385 | }
386 | }
387 |
388 | # MacOS has some special magic to make `pwd` work.
389 | if( $os eq 'MacOS' || $found_pwd_cmd )
390 | {
391 | *cwd = \&_backtick_pwd;
392 | }
393 | else {
394 | *cwd = \&getcwd;
395 | }
396 | }
397 |
398 | if ($^O eq 'cygwin') {
399 | # We need to make sure cwd() is called with no args, because it's
400 | # got an arg-less prototype and will die if args are present.
401 | local $^W = 0;
402 | my $orig_cwd = \&cwd;
403 | *cwd = sub { &$orig_cwd() }
404 | }
405 |
406 |
407 | # set a reasonable (and very safe) default for fastgetcwd, in case it
408 | # isn't redefined later (20001212 rspier)
409 | *fastgetcwd = \&cwd;
410 |
411 | # A non-XS version of getcwd() - also used to bootstrap the perl build
412 | # process, when miniperl is running and no XS loading happens.
413 | sub _perl_getcwd
414 | {
415 | abs_path('.');
416 | }
417 |
418 | # By John Bazik
419 | #
420 | # Usage: $cwd = &fastcwd;
421 | #
422 | # This is a faster version of getcwd. It's also more dangerous because
423 | # you might chdir out of a directory that you can't chdir back into.
424 |
425 | sub fastcwd_ {
426 | my($odev, $oino, $cdev, $cino, $tdev, $tino);
427 | my(@path, $path);
428 | local(*DIR);
429 |
430 | my($orig_cdev, $orig_cino) = stat('.');
431 | ($cdev, $cino) = ($orig_cdev, $orig_cino);
432 | for (;;) {
433 | my $direntry;
434 | ($odev, $oino) = ($cdev, $cino);
435 | CORE::chdir('..') || return undef;
436 | ($cdev, $cino) = stat('.');
437 | last if $odev == $cdev && $oino == $cino;
438 | opendir(DIR, '.') || return undef;
439 | for (;;) {
440 | $direntry = readdir(DIR);
441 | last unless defined $direntry;
442 | next if $direntry eq '.';
443 | next if $direntry eq '..';
444 |
445 | ($tdev, $tino) = lstat($direntry);
446 | last unless $tdev != $odev || $tino != $oino;
447 | }
448 | closedir(DIR);
449 | return undef unless defined $direntry; # should never happen
450 | unshift(@path, $direntry);
451 | }
452 | $path = '/' . join('/', @path);
453 | if ($^O eq 'apollo') { $path = "/".$path; }
454 | # At this point $path may be tainted (if tainting) and chdir would fail.
455 | # Untaint it then check that we landed where we started.
456 | $path =~ /^(.*)\z/s # untaint
457 | && CORE::chdir($1) or return undef;
458 | ($cdev, $cino) = stat('.');
459 | die "Unstable directory path, current directory changed unexpectedly"
460 | if $cdev != $orig_cdev || $cino != $orig_cino;
461 | $path;
462 | }
463 | if (not defined &fastcwd) { *fastcwd = \&fastcwd_ }
464 |
465 |
466 | # Keeps track of current working directory in PWD environment var
467 | # Usage:
468 | # use Cwd 'chdir';
469 | # chdir $newdir;
470 |
471 | my $chdir_init = 0;
472 |
473 | sub chdir_init {
474 | if ($ENV{'PWD'} and $^O ne 'os2' and $^O ne 'dos' and $^O ne 'MSWin32') {
475 | my($dd,$di) = stat('.');
476 | my($pd,$pi) = stat($ENV{'PWD'});
477 | if (!defined $dd or !defined $pd or $di != $pi or $dd != $pd) {
478 | $ENV{'PWD'} = cwd();
479 | }
480 | }
481 | else {
482 | my $wd = cwd();
483 | $wd = Win32::GetFullPathName($wd) if $^O eq 'MSWin32';
484 | $ENV{'PWD'} = $wd;
485 | }
486 | # Strip an automounter prefix (where /tmp_mnt/foo/bar == /foo/bar)
487 | if ($^O ne 'MSWin32' and $ENV{'PWD'} =~ m|(/[^/]+(/[^/]+/[^/]+))(.*)|s) {
488 | my($pd,$pi) = stat($2);
489 | my($dd,$di) = stat($1);
490 | if (defined $pd and defined $dd and $di == $pi and $dd == $pd) {
491 | $ENV{'PWD'}="$2$3";
492 | }
493 | }
494 | $chdir_init = 1;
495 | }
496 |
497 | sub chdir {
498 | my $newdir = @_ ? shift : ''; # allow for no arg (chdir to HOME dir)
499 | $newdir =~ s|///*|/|g unless $^O eq 'MSWin32';
500 | chdir_init() unless $chdir_init;
501 | my $newpwd;
502 | if ($^O eq 'MSWin32') {
503 | # get the full path name *before* the chdir()
504 | $newpwd = Win32::GetFullPathName($newdir);
505 | }
506 |
507 | return 0 unless CORE::chdir $newdir;
508 |
509 | if ($^O eq 'VMS') {
510 | return $ENV{'PWD'} = $ENV{'DEFAULT'}
511 | }
512 | elsif ($^O eq 'MacOS') {
513 | return $ENV{'PWD'} = cwd();
514 | }
515 | elsif ($^O eq 'MSWin32') {
516 | $ENV{'PWD'} = $newpwd;
517 | return 1;
518 | }
519 |
520 | if (ref $newdir eq 'GLOB') { # in case a file/dir handle is passed in
521 | $ENV{'PWD'} = cwd();
522 | } elsif ($newdir =~ m#^/#s) {
523 | $ENV{'PWD'} = $newdir;
524 | } else {
525 | my @curdir = split(m#/#,$ENV{'PWD'});
526 | @curdir = ('') unless @curdir;
527 | my $component;
528 | foreach $component (split(m#/#, $newdir)) {
529 | next if $component eq '.';
530 | pop(@curdir),next if $component eq '..';
531 | push(@curdir,$component);
532 | }
533 | $ENV{'PWD'} = join('/',@curdir) || '/';
534 | }
535 | 1;
536 | }
537 |
538 |
539 | sub _perl_abs_path
540 | {
541 | my $start = @_ ? shift : '.';
542 | my($dotdots, $cwd, @pst, @cst, $dir, @tst);
543 |
544 | unless (@cst = stat( $start ))
545 | {
546 | _carp("stat($start): $!");
547 | return '';
548 | }
549 |
550 | unless (-d _) {
551 | # Make sure we can be invoked on plain files, not just directories.
552 | # NOTE that this routine assumes that '/' is the only directory separator.
553 |
554 | my ($dir, $file) = $start =~ m{^(.*)/(.+)$}
555 | or return cwd() . '/' . $start;
556 |
557 | # Can't use "-l _" here, because the previous stat was a stat(), not an lstat().
558 | if (-l $start) {
559 | my $link_target = readlink($start);
560 | die "Can't resolve link $start: $!" unless defined $link_target;
561 |
562 | require File::Spec;
563 | $link_target = $dir . '/' . $link_target
564 | unless File::Spec->file_name_is_absolute($link_target);
565 |
566 | return abs_path($link_target);
567 | }
568 |
569 | return $dir ? abs_path($dir) . "/$file" : "/$file";
570 | }
571 |
572 | $cwd = '';
573 | $dotdots = $start;
574 | do
575 | {
576 | $dotdots .= '/..';
577 | @pst = @cst;
578 | local *PARENT;
579 | unless (opendir(PARENT, $dotdots))
580 | {
581 | # probably a permissions issue. Try the native command.
582 | require File::Spec;
583 | return File::Spec->rel2abs( $start, _backtick_pwd() );
584 | }
585 | unless (@cst = stat($dotdots))
586 | {
587 | _carp("stat($dotdots): $!");
588 | closedir(PARENT);
589 | return '';
590 | }
591 | if ($pst[0] == $cst[0] && $pst[1] == $cst[1])
592 | {
593 | $dir = undef;
594 | }
595 | else
596 | {
597 | do
598 | {
599 | unless (defined ($dir = readdir(PARENT)))
600 | {
601 | _carp("readdir($dotdots): $!");
602 | closedir(PARENT);
603 | return '';
604 | }
605 | $tst[0] = $pst[0]+1 unless (@tst = lstat("$dotdots/$dir"))
606 | }
607 | while ($dir eq '.' || $dir eq '..' || $tst[0] != $pst[0] ||
608 | $tst[1] != $pst[1]);
609 | }
610 | $cwd = (defined $dir ? "$dir" : "" ) . "/$cwd" ;
611 | closedir(PARENT);
612 | } while (defined $dir);
613 | chop($cwd) unless $cwd eq '/'; # drop the trailing /
614 | $cwd;
615 | }
616 |
617 |
618 | my $Curdir;
619 | sub fast_abs_path {
620 | local $ENV{PWD} = $ENV{PWD} || ''; # Guard against clobberage
621 | my $cwd = getcwd();
622 | require File::Spec;
623 | my $path = @_ ? shift : ($Curdir ||= File::Spec->curdir);
624 |
625 | # Detaint else we'll explode in taint mode. This is safe because
626 | # we're not doing anything dangerous with it.
627 | ($path) = $path =~ /(.*)/;
628 | ($cwd) = $cwd =~ /(.*)/;
629 |
630 | unless (-e $path) {
631 | _croak("$path: No such file or directory");
632 | }
633 |
634 | unless (-d _) {
635 | # Make sure we can be invoked on plain files, not just directories.
636 |
637 | my ($vol, $dir, $file) = File::Spec->splitpath($path);
638 | return File::Spec->catfile($cwd, $path) unless length $dir;
639 |
640 | if (-l $path) {
641 | my $link_target = readlink($path);
642 | die "Can't resolve link $path: $!" unless defined $link_target;
643 |
644 | $link_target = File::Spec->catpath($vol, $dir, $link_target)
645 | unless File::Spec->file_name_is_absolute($link_target);
646 |
647 | return fast_abs_path($link_target);
648 | }
649 |
650 | return $dir eq File::Spec->rootdir
651 | ? File::Spec->catpath($vol, $dir, $file)
652 | : fast_abs_path(File::Spec->catpath($vol, $dir, '')) . '/' . $file;
653 | }
654 |
655 | if (!CORE::chdir($path)) {
656 | _croak("Cannot chdir to $path: $!");
657 | }
658 | my $realpath = getcwd();
659 | if (! ((-d $cwd) && (CORE::chdir($cwd)))) {
660 | _croak("Cannot chdir back to $cwd: $!");
661 | }
662 | $realpath;
663 | }
664 |
665 | # added function alias to follow principle of least surprise
666 | # based on previous aliasing. --tchrist 27-Jan-00
667 | *fast_realpath = \&fast_abs_path;
668 |
669 |
670 | # --- PORTING SECTION ---
671 |
672 | # VMS: $ENV{'DEFAULT'} points to default directory at all times
673 | # 06-Mar-1996 Charles Bailey bailey@newman.upenn.edu
674 | # Note: Use of Cwd::chdir() causes the logical name PWD to be defined
675 | # in the process logical name table as the default device and directory
676 | # seen by Perl. This may not be the same as the default device
677 | # and directory seen by DCL after Perl exits, since the effects
678 | # the CRTL chdir() function persist only until Perl exits.
679 |
680 | sub _vms_cwd {
681 | return $ENV{'DEFAULT'};
682 | }
683 |
684 | sub _vms_abs_path {
685 | return $ENV{'DEFAULT'} unless @_;
686 | my $path = shift;
687 |
688 | my $efs = _vms_efs;
689 | my $unix_rpt = _vms_unix_rpt;
690 |
691 | if (defined &VMS::Filespec::vmsrealpath) {
692 | my $path_unix = 0;
693 | my $path_vms = 0;
694 |
695 | $path_unix = 1 if ($path =~ m#(?<=\^)/#);
696 | $path_unix = 1 if ($path =~ /^\.\.?$/);
697 | $path_vms = 1 if ($path =~ m#[\[<\]]#);
698 | $path_vms = 1 if ($path =~ /^--?$/);
699 |
700 | my $unix_mode = $path_unix;
701 | if ($efs) {
702 | # In case of a tie, the Unix report mode decides.
703 | if ($path_vms == $path_unix) {
704 | $unix_mode = $unix_rpt;
705 | } else {
706 | $unix_mode = 0 if $path_vms;
707 | }
708 | }
709 |
710 | if ($unix_mode) {
711 | # Unix format
712 | return VMS::Filespec::unixrealpath($path);
713 | }
714 |
715 | # VMS format
716 |
717 | my $new_path = VMS::Filespec::vmsrealpath($path);
718 |
719 | # Perl expects directories to be in directory format
720 | $new_path = VMS::Filespec::pathify($new_path) if -d $path;
721 | return $new_path;
722 | }
723 |
724 | # Fallback to older algorithm if correct ones are not
725 | # available.
726 |
727 | if (-l $path) {
728 | my $link_target = readlink($path);
729 | die "Can't resolve link $path: $!" unless defined $link_target;
730 |
731 | return _vms_abs_path($link_target);
732 | }
733 |
734 | # may need to turn foo.dir into [.foo]
735 | my $pathified = VMS::Filespec::pathify($path);
736 | $path = $pathified if defined $pathified;
737 |
738 | return VMS::Filespec::rmsexpand($path);
739 | }
740 |
741 | sub _os2_cwd {
742 | $ENV{'PWD'} = `cmd /c cd`;
743 | chomp $ENV{'PWD'};
744 | $ENV{'PWD'} =~ s:\\:/:g ;
745 | return $ENV{'PWD'};
746 | }
747 |
748 | sub _win32_cwd_simple {
749 | $ENV{'PWD'} = `cd`;
750 | chomp $ENV{'PWD'};
751 | $ENV{'PWD'} =~ s:\\:/:g ;
752 | return $ENV{'PWD'};
753 | }
754 |
755 | sub _win32_cwd {
756 | # Need to avoid taking any sort of reference to the typeglob or the code in
757 | # the optree, so that this tests the runtime state of things, as the
758 | # ExtUtils::MakeMaker tests for "miniperl" need to be able to fake things at
759 | # runtime by deleting the subroutine. *foo{THING} syntax on a symbol table
760 | # lookup avoids needing a string eval, which has been reported to cause
761 | # problems (for reasons that we haven't been able to get to the bottom of -
762 | # rt.cpan.org #56225)
763 | if (*{$DynaLoader::{boot_DynaLoader}}{CODE}) {
764 | $ENV{'PWD'} = Win32::GetCwd();
765 | }
766 | else { # miniperl
767 | chomp($ENV{'PWD'} = `cd`);
768 | }
769 | $ENV{'PWD'} =~ s:\\:/:g ;
770 | return $ENV{'PWD'};
771 | }
772 |
773 | *_NT_cwd = defined &Win32::GetCwd ? \&_win32_cwd : \&_win32_cwd_simple;
774 |
775 | sub _dos_cwd {
776 | if (!defined &Dos::GetCwd) {
777 | $ENV{'PWD'} = `command /c cd`;
778 | chomp $ENV{'PWD'};
779 | $ENV{'PWD'} =~ s:\\:/:g ;
780 | } else {
781 | $ENV{'PWD'} = Dos::GetCwd();
782 | }
783 | return $ENV{'PWD'};
784 | }
785 |
786 | sub _qnx_cwd {
787 | local $ENV{PATH} = '';
788 | local $ENV{CDPATH} = '';
789 | local $ENV{ENV} = '';
790 | $ENV{'PWD'} = `/usr/bin/fullpath -t`;
791 | chomp $ENV{'PWD'};
792 | return $ENV{'PWD'};
793 | }
794 |
795 | sub _qnx_abs_path {
796 | local $ENV{PATH} = '';
797 | local $ENV{CDPATH} = '';
798 | local $ENV{ENV} = '';
799 | my $path = @_ ? shift : '.';
800 | local *REALPATH;
801 |
802 | defined( open(REALPATH, '-|') || exec '/usr/bin/fullpath', '-t', $path ) or
803 | die "Can't open /usr/bin/fullpath: $!";
804 | my $realpath = ;
805 | close REALPATH;
806 | chomp $realpath;
807 | return $realpath;
808 | }
809 |
810 | sub _epoc_cwd {
811 | $ENV{'PWD'} = EPOC::getcwd();
812 | return $ENV{'PWD'};
813 | }
814 |
815 |
816 | # Now that all the base-level functions are set up, alias the
817 | # user-level functions to the right places
818 |
819 | if (exists $METHOD_MAP{$^O}) {
820 | my $map = $METHOD_MAP{$^O};
821 | foreach my $name (keys %$map) {
822 | local $^W = 0; # assignments trigger 'subroutine redefined' warning
823 | no strict 'refs';
824 | *{$name} = \&{$map->{$name}};
825 | }
826 | }
827 |
828 | # In case the XS version doesn't load.
829 | *abs_path = \&_perl_abs_path unless defined &abs_path;
830 | *getcwd = \&_perl_getcwd unless defined &getcwd;
831 |
832 | # added function alias for those of us more
833 | # used to the libc function. --tchrist 27-Jan-00
834 | *realpath = \&abs_path;
835 |
836 | 1;
837 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/scorer.bat:
--------------------------------------------------------------------------------
1 | @rem = '--*-Perl-*--
2 | @echo off
3 | if "%OS%" == "Windows_NT" goto WinNT
4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
5 | goto endofperl
6 | :WinNT
7 | perl -x -S %0 %*
8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH.
10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
11 | goto endofperl
12 | @rem ';
13 | #!perl
14 | #line 15
15 |
16 | BEGIN {
17 | $d = $0;
18 | $d =~ s/\/[^\/][^\/]*$//g;
19 | push(@INC, $d."/lib");
20 | }
21 |
22 | use strict;
23 | use CorScorer;
24 |
25 | if (@ARGV < 3) {
26 | print q|
27 | use: scorer.bat [name]
28 |
29 | metric: the metric desired to score the results:
30 | muc: MUCScorer (Vilain et al, 1995)
31 | bcub: B-Cubed (Bagga and Baldwin, 1998)
32 | ceafm: CEAF (Luo et al, 2005) using mention-based similarity
33 | ceafe: CEAF (Luo et al, 2005) using entity-based similarity
34 | all: uses all the metrics to score
35 |
36 | keys_file: file with expected coreference chains in SemEval format
37 |
38 | response_file: file with output of coreference system (SemEval format)
39 |
40 | name: [optional] the name of the document to score. If name is not
41 | given, all the documents in the dataset will be scored. If given
42 | name is "none" then all the documents are scored but only total
43 | results are shown.
44 |
45 | |;
46 | exit;
47 | }
48 |
49 | my $metric = shift (@ARGV);
50 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) {
51 | print "Invalid metric\n";
52 | exit;
53 | }
54 |
55 |
56 | if ($metric eq 'all') {
57 | foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') {
58 | print "\nMETRIC $m:\n";
59 | &CorScorer::Score( $m, @ARGV );
60 | }
61 | }
62 | else {
63 | &CorScorer::Score( $metric, @ARGV );
64 | }
65 |
66 | __END__
67 | :endofperl
68 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/scorer.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | BEGIN {
4 | $d = $0;
5 | $d =~ s/\/[^\/][^\/]*$//g;
6 |
7 | if ($d eq $0) {
8 | unshift(@INC, "lib");
9 | }
10 | else {
11 | unshift(@INC, $d . "/lib");
12 | }
13 | }
14 |
15 | use strict;
16 | use CorScorer;
17 |
18 | if (@ARGV < 3) {
19 | print q|
20 | use: scorer.pl [name]
21 |
22 | metric: the metric desired to score the results:
23 | muc: MUCScorer (Vilain et al, 1995)
24 | bcub: B-Cubed (Bagga and Baldwin, 1998)
25 | ceafm: CEAF (Luo et al, 2005) using mention-based similarity
26 | ceafe: CEAF (Luo et al, 2005) using entity-based similarity
27 | blanc: BLANC
28 | all: uses all the metrics to score
29 |
30 | keys_file: file with expected coreference chains in SemEval format
31 |
32 | response_file: file with output of coreference system (SemEval format)
33 |
34 | name: [optional] the name of the document to score. If name is not
35 | given, all the documents in the dataset will be scored. If given
36 | name is "none" then all the documents are scored but only total
37 | results are shown.
38 |
39 | |;
40 | exit;
41 | }
42 |
43 | my $metric = shift(@ARGV);
44 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) {
45 | print "Invalid metric\n";
46 | exit;
47 | }
48 |
49 | if ($metric eq 'all') {
50 | foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') {
51 | print "\nMETRIC $m:\n";
52 | &CorScorer::Score($m, @ARGV);
53 | }
54 | }
55 | else {
56 | &CorScorer::Score($metric, @ARGV);
57 | }
58 |
59 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/CorefMetricTest.pm:
--------------------------------------------------------------------------------
1 | package CorefMetricTest;
2 | use strict;
3 | use warnings;
4 | use Exporter;
5 |
6 | our @ISA= qw(Exporter);
7 | our @EXPORT = qw(ComputeScoreFromCounts DiffExpectedAndActual);
8 |
9 | ################################################################################
10 | # Compute recall, precision and F1.
11 | #
12 | # Input: (numerator_counts_for_recall, denominator_counts_for_recall,
13 | # numerator_counts_for_precision, denominator_counts_for_precision)
14 | # Output: (recall, precision, F1)
15 | ################################################################################
16 | sub ComputeScoreFromCounts {
17 | # The first 4 are also coref link counts when using BLANC.
18 | my ($recall_numerator, $recall_denominator,
19 | $precision_numerator, $precision_denominator, @noncoref_counts) = @_;
20 | # The coref recall, precision, and F1 when using BLANC.
21 | my ($recall, $precision, $F1) =
22 | RPFFromCounts($recall_numerator, $recall_denominator,
23 | $precision_numerator, $precision_denominator);
24 |
25 | # BLANC: @noncoref_counts=
26 | # (noncoref_numerator_recall, noncoref_denominator_recall,
27 | # noncoref_numerator_precision, noncoref_denominator_precision)
28 | if (scalar(@noncoref_counts) == 4) {
29 | ($recall, $precision, $F1) = CorScorer::ComputeBLANCFromCounts(
30 | $recall_numerator, $recall_denominator, $precision_denominator,
31 | $noncoref_counts[0], $noncoref_counts[1], $noncoref_counts[3]);
32 | }
33 | $recall = ($recall < 0) ? 0 : $recall;
34 | $precision = ($precision < 0) ? 0 : $precision;
35 | $F1 = ($F1 < 0) ? 0 : $F1;
36 | return ($recall, $precision, $F1);
37 | }
38 |
39 | sub RPFFromCounts
40 | {
41 | my ($recall_numerator, $recall_denominator,
42 | $precision_numerator, $precision_denominator, @nonCorefCounts) = @_;
43 | my ($recall, $precision, $F1) = (-1, -1, 0);
44 | if ($recall_denominator > 0) {
45 | $recall = $recall_numerator / $recall_denominator;
46 | }
47 | if ($precision_denominator > 0) {
48 | $precision = $precision_numerator / $precision_denominator;
49 | }
50 |
51 | if (($recall + $precision) > 0) {
52 | $F1 = 2 * $recall * $precision / ($recall + $precision);
53 | }
54 |
55 | return ($recall, $precision, $F1);
56 | }
57 |
58 | # deprecated -- see CorScorer::ComputeBLANCFromCounts().
59 | sub ComputeBLANCRPF
60 | {
61 | my ($coref_recall, $coref_precision, $coref_F1,
62 | $noncoref_recall, $noncoref_precision, $noncoref_F1) = @_;
63 |
64 | my ($recall, $precision, $F1);
65 |
66 | if ($coref_recall < 0 && $noncoref_recall < 0) {
67 | # no key mention.
68 | $recall = $precision = $F1 = 0;
69 | } elsif ($coref_recall < 0) {
70 | # key: all links are non-coref (mentions are all singltons).
71 | $recall = $noncoref_recall;
72 | $precision = ($noncoref_precision < 0) ? 0 : $noncoref_precision;
73 | $F1 = $noncoref_F1;
74 | } elsif ($noncoref_recall < 0) {
75 | # key: all links are coref (all mentions are in one entity).
76 | $recall = $coref_recall;
77 | $precision = ($coref_precision < 0) ? 0 : $coref_precision;
78 | $F1 = $coref_F1;
79 | } else {
80 | #key contains both coref and non-coref links.
81 | if ($coref_precision < 0 && $noncoref_precision < 0) {
82 | # no response.
83 | $recall = $precision = $F1 = 0;
84 | } else {
85 | if ($coref_precision < 0) {
86 | # response: all links are non-coref, or response mentions are all
87 | # singletons.
88 | $coref_precision = 0;
89 | } elsif ($noncoref_precision < 0) {
90 | # response: all links are coref, or all mentions are in one entity.
91 | $noncoref_precision = 0;
92 | }
93 | $recall = ($coref_recall + $noncoref_recall)/2;
94 | $precision = ($coref_precision + $noncoref_precision)/2;
95 | $F1 = ($coref_F1 + $noncoref_F1)/2;
96 | }
97 | }
98 |
99 | return ($recall, $precision, $F1);
100 | }
101 |
102 | ##############################################################################
103 | # Compute the sum of the duifference between the expected recall, precision,
104 | # F1 and the actual one.
105 | ##############################################################################
106 | sub DiffExpectedAndActual {
107 | my ($expected, $actual) = @_;
108 | if (scalar(@$expected) != scalar(@$actual)) {
109 | print STDERR "Expected and actual have diff dimensions: \n";
110 | print STDERR " Expected: ", join(" ", @$expected), "\n";
111 | print STDERR " Actual: ", join(" ", @$actual), "\n";
112 | return 1.0e5;
113 | }
114 | my $sum = 0.0;
115 | my $i = 0;
116 | foreach my $e (@$expected) {
117 | $sum += abs($e - $actual->[$i]);
118 | ++$i;
119 | }
120 | return $sum;
121 | }
122 |
123 | 1;
124 |
125 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/CorefMetricTestConfig.pm:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # This is the test configuration file. Test cases are stored in an
3 | # array, each element consisting of:
4 | # (1) id: a unique identifier for the test case.
5 | # (2) key_file: the key file to be tested in the CoNLL format.
6 | # (3) response_file: the response file to be tested in the CoNLL format.
7 | # (4) expected_metrics: is a hash label from a metric name (identical to those
8 | # used in the scorer.{pl|bat}) to an array of expected
9 | # metric values. All metrics have 3 expected numbers:
10 | # (recall, precision, F-measure).
11 | ################################################################################
12 |
13 | package CorefMetricTestConfig;
14 | use strict;
15 | use warnings;
16 | use Exporter;
17 |
18 | our @ISA= qw( Exporter );
19 |
20 | # these are exported by default.
21 | our @EXPORT = qw(TestCases);
22 |
23 | #
24 | # Values following metric names are [recall, precision, F1]
25 | #
26 | our @TestCases = (
27 | { id => "A1",
28 | key_file => "DataFiles/TC-A.key",
29 | response_file => "DataFiles/TC-A-1.response",
30 | expected_metrics => { "muc" => [1, 1, 1],
31 | "bcub" => [6/6, 6/6, 1],
32 | "ceafm" => [1, 1, 1],
33 | "ceafe" => [1, 1, 1],
34 | "blanc" => [1, 1, 1] }
35 | },
36 | { id => "A2",
37 | key_file => "DataFiles/TC-A.key",
38 | response_file => "DataFiles/TC-A-2.response",
39 | expected_metrics => { "muc" => [1/3, 1/1, 0.5],
40 | "bcub" => [(7/3)/6, 3/3, 14/25],
41 | "ceafm" => [0.5, 1, 0.66667],
42 | "ceafe" => [0.6, 0.9, 0.72],
43 | "blanc" => [0.21591, 1, 0.35385] }
44 | },
45 | { id => "A3",
46 | key_file => "DataFiles/TC-A.key",
47 | response_file => "DataFiles/TC-A-3.response",
48 | expected_metrics => { "muc" => [3/3, 3/5, 0.75],
49 | "bcub" => [6/6, (4+7/12)/9, 110/163],
50 | "ceafm" => [1, 0.66667, 0.8],
51 | "ceafe" => [0.88571, 0.66429, 0.75918],
52 | "blanc" => [1, 0.42593, 0.59717] }
53 | },
54 | { id => "A4",
55 | key_file => "DataFiles/TC-A.key",
56 | response_file => "DataFiles/TC-A-4.response",
57 | expected_metrics => { "muc" => [1/3, 1/3, 1/3],
58 | "bcub" => [(3+1/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
59 | "ceafm" => [0.66667, 0.57143, 0.61538],
60 | "ceafe" => [0.73333, 0.55, 0.62857],
61 | "blanc" => [0.35227, 0.27206, 0.30357] }
62 | },
63 | { id => "A5",
64 | key_file => "DataFiles/TC-A.key",
65 | response_file => "DataFiles/TC-A-5.response",
66 | expected_metrics => { "muc" => [1/3, 1/4, 2/7],
67 | "bcub" => [(3+1/3)/6, 2.5/8, 2*(5/9)*(5/16)/((5/9)+(5/16))],
68 | "ceafm" => [0.66667, 0.5, 0.57143],
69 | "ceafe" => [0.68889, 0.51667, 0.59048],
70 | "blanc" => [0.35227, 0.19048, 0.24716] }
71 | },
72 | { id => "A6",
73 | key_file => "DataFiles/TC-A.key",
74 | response_file => "DataFiles/TC-A-6.response",
75 | expected_metrics => { "muc" => [1/3, 1/4, 2/7],
76 | "bcub" => [(10/3)/6, (1+4/3+1/2)/8, 2*(5/9)*(17/48)/((5/9)+(17/48))],
77 | "ceafm" => [0.66667, 0.5, 0.57143],
78 | "ceafe" => [0.73333, 0.55, 0.62857],
79 | "blanc" => [0.35227, 0.20870, 0.25817] }
80 | },
81 | { id => "A7",
82 | key_file => "DataFiles/TC-A.key",
83 | response_file => "DataFiles/TC-A-7.response",
84 | expected_metrics => { "muc" => [1/3, 1/3, 1/3],
85 | "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
86 | "ceafm" => [0.66667, 0.57143, 0.61538],
87 | "ceafe" => [0.73333, 0.55, 0.62857],
88 | "blanc" => [0.35227, 0.27206, 0.30357] }
89 | },
90 | { id => "A8",
91 | key_file => "DataFiles/TC-A.key",
92 | response_file => "DataFiles/TC-A-8.response",
93 | expected_metrics => { "muc" => [1/3, 1/3, 1/3],
94 | "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
95 | "ceafm" => [0.66667, 0.57143, 0.61538],
96 | "ceafe" => [0.73333, 0.55, 0.62857],
97 | "blanc" => [0.35227, 0.27206, 0.30357] }
98 | },
99 | { id => "A9",
100 | key_file => "DataFiles/TC-A.key",
101 | response_file => "DataFiles/TC-A-9.response",
102 | expected_metrics => { "muc" => [1/3, 1/3, 1/3],
103 | "bcub" => [(10/3)/6, (1+4/3+1/2)/7, 2*(5/9)*(17/42)/((5/9)+(17/42))],
104 | "ceafm" => [0.66667, 0.57143, 0.61538],
105 | "ceafe" => [0.73333, 0.55, 0.62857],
106 | "blanc" => [0.35227, 0.27206, 0.30357] }
107 | },
108 | { id => "A10",
109 | key_file => "DataFiles/TC-A.key",
110 | response_file => "DataFiles/TC-A-10.response",
111 | expected_metrics => { "muc" => [0, 0, 0],
112 | "bcub" => [3/6, 6/6, 2/3],
113 | #”ceafm" => [1, 1, 1],
114 | #”ceafe" => [1, 1, 1],
115 | "blanc" => [0.5, 0.36667, 0.42308] }
116 | },
117 | { id => "A11",
118 | key_file => "DataFiles/TC-A.key",
119 | response_file => "DataFiles/TC-A-11.response",
120 | expected_metrics => { "muc" => [3/3, 3/5, 6/8],
121 | "bcub" => [6/6, (1/6+2*2/6+3*3/6)/6, 14/25],
122 | #”ceafm" => [1, 1, 1],
123 | #”ceafe" => [1, 1, 1],
124 | "blanc" => [0.5, 0.13333, 0.21053] }
125 | },
126 | { id => "A12",
127 | key_file => "DataFiles/TC-A.key",
128 | response_file => "DataFiles/TC-A-12.response",
129 | expected_metrics => { "muc" => [0, 0, 0],
130 | "bcub" => [(1+1/2+2/3)/6, 4/7, 2*(13/36)*(4/7)/((13/36)+(4/7))],
131 | #”ceafm" => [1, 1, 1],
132 | #”ceafe" => [1, 1, 1],
133 | "blanc" => [0.22727, 0.11905, 0.15625] }
134 | },
135 | { id => "A13",
136 | key_file => "DataFiles/TC-A.key",
137 | response_file => "DataFiles/TC-A-13.response",
138 | expected_metrics => { "muc" => [1/3, 1/6, 2/9],
139 | "bcub" => [(1+1/2+2*2/3)/6, (1/7+1/7+2*2/7)/7, 2*(17/36)*(6/49)/((17/36)+(6/49))],
140 | #”ceafm" => [1, 1, 1],
141 | #”ceafe" => [1, 1, 1],
142 | "blanc" => [0.125, 0.02381, 0.04] }
143 | },
144 | { id => "B1",
145 | key_file => "DataFiles/TC-B.key",
146 | response_file => "DataFiles/TC-B-1.response",
147 | expected_metrics => { #"muc" => [1, 1, 1],
148 | #"bcub" => [1, 1, 1],
149 | #”ceafm" => [1, 1, 1],
150 | #”ceafe" => [1, 1, 1],
151 | "blanc" => [1/2 * (1/4 + 1/3), 1/2 * (1/4 + 1/3), 1/2 * (1/4 + 1/3)] }
152 | },
153 | { id => "C1",
154 | key_file => "DataFiles/TC-C.key",
155 | response_file => "DataFiles/TC-C-1.response",
156 | expected_metrics => { #"muc" => [1, 1, 1],
157 | #"bcub" => [1, 1, 1],
158 | #”ceafm" => [1, 1, 1],
159 | #”ceafe" => [1, 1, 1],
160 | "blanc" => [1/2 * (2/5 + 10/16), 1/2 * (2/5 + 10/16), 1/2 * (2/5 + 10/16)] }
161 | },
162 | { id => "D1",
163 | key_file => "DataFiles/TC-D.key",
164 | response_file => "DataFiles/TC-D-1.response",
165 | expected_metrics => { "muc" => [9/9, 9/10, 2*(9/9)*(9/10)/(9/9+9/10)],
166 | "bcub" => [12/12, 16/21, 2*(12/12)*(16/21)/(12/12+16/21)],
167 | #"ceafm" => [1, 1, 1],
168 | #"ceafe" => [1, 1, 1],
169 | #"blanc" => [1, 1, 1]
170 | }
171 | },
172 | { id => "E1",
173 | key_file => "DataFiles/TC-E.key",
174 | response_file => "DataFiles/TC-E-1.response",
175 | expected_metrics => { "muc" => [9/9, 9/10, 2*(9/9)*(9/10)/(9/9+9/10)],
176 | "bcub" => [1, 7/12, 2*1*(7/12)/(1+7/12)],
177 | #"ceafm" => [1, 1, 1],
178 | #"ceafe" => [1, 1, 1],
179 | #"blanc" => [1, 1, 1]
180 | }
181 | },
182 | { id => "F1",
183 | key_file => "DataFiles/TC-F.key",
184 | response_file => "DataFiles/TC-F-1.response",
185 | expected_metrics => { "muc" => [2/3, 2/2, 2*(2/3)*(2/2)/(2/3+2/2)] ,
186 | #"bcub" => ,
187 | #"ceafm" => ,
188 | #"ceafe" => ,
189 | #"blanc" =>
190 | }
191 | },
192 | { id => "G1",
193 | key_file => "DataFiles/TC-G.key",
194 | response_file => "DataFiles/TC-G-1.response",
195 | expected_metrics => { "muc" => [2/2, 2/3, 2*(2/2)*(2/3)/(2/2+2/3)],
196 | #"bcub" => ,
197 | #"ceafm" => ,
198 | #"ceafe" => ,
199 | #"blanc" =>
200 | }
201 | },
202 | { id => "H1",
203 | key_file => "DataFiles/TC-H.key",
204 | response_file => "DataFiles/TC-H-1.response",
205 | expected_metrics => { "muc" => [1, 1, 1],
206 | #"bcub" => ,
207 | #"ceafm" => ,
208 | #"ceafe" => ,
209 | #"blanc" =>
210 | }
211 | },
212 | { id => "I1",
213 | key_file => "DataFiles/TC-I.key",
214 | response_file => "DataFiles/TC-I-1.response",
215 | expected_metrics => { "muc" => [2/3, 2/2, 2*(2/3)*(2/2)/(2/3+2/2)],
216 | #"bcub" => ,
217 | #"ceafm" => ,
218 | #"ceafe" => ,
219 | #"blanc" =>
220 | }
221 | },
222 | { id => "J1",
223 | key_file => "DataFiles/TC-J.key",
224 | response_file => "DataFiles/TC-J-1.response",
225 | expected_metrics => { "muc" => [1/2, 1/1, 2*(1/2)*(1/1)/(1/2+1/1)],
226 | #"bcub" => ,
227 | #"ceafm" => ,
228 | #"ceafe" => ,
229 | #"blanc" =>
230 | }
231 | },
232 | { id => "K1",
233 | key_file => "DataFiles/TC-K.key",
234 | response_file => "DataFiles/TC-K-1.response",
235 | expected_metrics => { "muc" => [3/6, 3/6, 3/6],
236 | #"bcub" => ,
237 | #"ceafm" => ,
238 | #"ceafe" => ,
239 | #"blanc" =>
240 | }
241 | },
242 | { id => "L1",
243 | key_file => "DataFiles/TC-L.key",
244 | response_file => "DataFiles/TC-L-1.response",
245 | expected_metrics => { "muc" => [2/5, 2/4, 2*(2/5)*(2/4)/(2/5+2/4)],
246 | #"bcub" => ,
247 | #"ceafm" => ,
248 | #"ceafe" => ,
249 | #"blanc" =>
250 | }
251 | },
252 | { id => "M1",
253 | key_file => "DataFiles/TC-M.key",
254 | response_file => "DataFiles/TC-M-1.response",
255 | expected_metrics => { "muc" => [1, 1, 1],
256 | "bcub" => [1, 1, 1],
257 | "ceafm" => [1, 1, 1],
258 | "ceafe" => [1, 1, 1],
259 | "blanc" => [1, 1, 1] }
260 | },
261 | { id => "M2",
262 | key_file => "DataFiles/TC-M.key",
263 | response_file => "DataFiles/TC-M-2.response",
264 | expected_metrics => { "muc" => [0, 0, 0],
265 | #"bcub" => ,
266 | #"ceafm" => ,
267 | #"ceafe" => ,
268 | "blanc" => [0, 0, 0] }
269 | },
270 | { id => "M3",
271 | key_file => "DataFiles/TC-M.key",
272 | response_file => "DataFiles/TC-M-3.response",
273 | expected_metrics => { #"muc" => ,
274 | #"bcub" => ,
275 | #"ceafm" => ,
276 | #"ceafe" => ,
277 | "blanc" => [0.26667, 1, 0.42105] }
278 | },
279 | { id => "M4",
280 | key_file => "DataFiles/TC-M.key",
281 | response_file => "DataFiles/TC-M-4.response",
282 | expected_metrics => { #"muc" => ,
283 | #"bcub" => ,
284 | #"ceafm" => ,
285 | #"ceafe" => ,
286 | "blanc" => [0.2, 0.2, 0.2] }
287 | },
288 | { id => "M5",
289 | key_file => "DataFiles/TC-M.key",
290 | response_file => "DataFiles/TC-M-5.response",
291 | expected_metrics => { "muc" => [0, 0, 0],
292 | #"bcub" => ,
293 | #"ceafm" => ,
294 | #"ceafe" => ,
295 | "blanc" => [0, 0, 0] }
296 | },
297 | { id => "M6",
298 | key_file => "DataFiles/TC-M.key",
299 | response_file => "DataFiles/TC-M-6.response",
300 | expected_metrics => { #"muc" => ,
301 | #"bcub" => ,
302 | #"ceafm" => ,
303 | #"ceafe" => ,
304 | "blanc" => [0.06667, 0.25, 0.10526] }
305 | },
306 | { id => "N1",
307 | key_file => "DataFiles/TC-N.key",
308 | response_file => "DataFiles/TC-N-1.response",
309 | expected_metrics => { "muc" => [0, 0, 0],
310 | #"bcub" => [1, 1, 1],
311 | #"ceafm" => [1, 1, 1],
312 | #"ceafe" => [1, 1, 1],
313 | "blanc" => [1, 1, 1] }
314 | },
315 | { id => "N2",
316 | key_file => "DataFiles/TC-N.key",
317 | response_file => "DataFiles/TC-N-2.response",
318 | expected_metrics => { "muc" => [0, 0, 0],
319 | #"bcub" => ,
320 | #"ceafm" => ,
321 | #"ceafe" => ,
322 | "blanc" => [0, 0, 0] }
323 | },
324 | { id => "N3",
325 | key_file => "DataFiles/TC-N.key",
326 | response_file => "DataFiles/TC-N-3.response",
327 | expected_metrics => { #"muc" => ,
328 | #"bcub" => ,
329 | #"ceafm" => ,
330 | #"ceafe" => ,
331 | "blanc" => [0.73333, 1, 0.84615] }
332 | },
333 | { id => "N4",
334 | key_file => "DataFiles/TC-N.key",
335 | response_file => "DataFiles/TC-N-4.response",
336 | expected_metrics => { "muc" => [0, 0, 0],
337 | #"bcub" => ,
338 | #"ceafm" => ,
339 | #"ceafe" => ,
340 | "blanc" => [0.2, 0.2, 0.2] }
341 | },
342 | { id => "N5",
343 | key_file => "DataFiles/TC-N.key",
344 | response_file => "DataFiles/TC-N-5.response",
345 | expected_metrics => { #"muc" => ,
346 | #"bcub" => ,
347 | #"ceafm" => ,
348 | #"ceafe" => ,
349 | "blanc" => [0, 0, 0] }
350 | },
351 | { id => "N6",
352 | key_file => "DataFiles/TC-N.key",
353 | response_file => "DataFiles/TC-N-6.response",
354 | expected_metrics => { #"muc" => ,
355 | #"bcub" => ,
356 | #"ceafm" => ,
357 | #"ceafe" => ,
358 | "blanc" => [0.13333, 0.18182, 0.15385] }
359 | }
360 |
361 | );
362 |
363 | 1;
364 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-1.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (2
15 | test2 0 3 d2 2)
16 | test2 0 4 jnk -
17 | test2 0 5 e (2)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 (2
20 | test2 0 8 f2 -
21 | test2 0 9 f3 2)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-10.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (2)
13 | test2 0 1 x -
14 | test2 0 2 d1 (3
15 | test2 0 3 d2 3)
16 | test2 0 4 z -
17 | test2 0 5 e (4)
18 | test2 0 6 y -
19 | test2 0 7 f1 (5
20 | test2 0 8 f2 -
21 | test2 0 9 f3 5)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-11.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 0)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (0)
13 | test2 0 1 x -
14 | test2 0 2 d1 (0
15 | test2 0 3 d2 0)
16 | test2 0 4 z -
17 | test2 0 5 e (0)
18 | test2 0 6 y -
19 | test2 0 7 f1 (0
20 | test2 0 8 f2 -
21 | test2 0 9 f3 0)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-12.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 1)
7 | test1 0 5 b3 -
8 | test1 0 6 b4 -
9 | test1 0 7 jnk (2)
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (3)
13 | test2 0 1 x -
14 | test2 0 2 d1 (4
15 | test2 0 3 d2 4)
16 | test2 0 4 z -
17 | test2 0 5 e (5)
18 | test2 0 6 y -
19 | test2 0 7 f1 (6)
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-13.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 0)
7 | test1 0 5 b3 -
8 | test1 0 6 b4 -
9 | test1 0 7 jnk (0)
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (0)
13 | test2 0 1 x -
14 | test2 0 2 d1 (0
15 | test2 0 3 d2 0)
16 | test2 0 4 z -
17 | test2 0 5 e (0)
18 | test2 0 6 y -
19 | test2 0 7 f1 (0)
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-2.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 -
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 -
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c -
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (2
15 | test2 0 3 d2 2)
16 | test2 0 4 jnk -
17 | test2 0 5 e (2)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-3.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 x (1)
14 | test2 0 2 d1 (2
15 | test2 0 3 d2 2)
16 | test2 0 4 y (2)
17 | test2 0 5 e (2)
18 | test2 0 6 z (3)
19 | test2 0 7 f1 (2
20 | test2 0 8 f2 -
21 | test2 0 9 f3 2)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-4.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 x (1)
14 | test2 0 2 d1 (2
15 | test2 0 3 d2 2)
16 | test2 0 4 x (3)
17 | test2 0 5 e -
18 | test2 0 6 y (2)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-5.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 (1
7 | test1 0 5 b3 1)
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 x (1)
14 | test2 0 2 d1 (2
15 | test2 0 3 d2 2)
16 | test2 0 4 z (3)
17 | test2 0 5 e -
18 | test2 0 6 y (2)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-6.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 (3
7 | test1 0 5 b3 3)
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 x (1)
14 | test2 0 2 d1 (2
15 | test2 0 3 d2 2)
16 | test2 0 4 z (3)
17 | test2 0 5 e -
18 | test2 0 6 y (2)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-7.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1(1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 x (1)
14 | test2 0 2 d1 (2
15 | test2 0 3 d2 2)
16 | test2 0 4 z (3)
17 | test2 0 5 e -
18 | test2 0 6 y (2)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-8.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1(3
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 3)1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 x (1)
14 | test2 0 2 d1 (2
15 | test2 0 3 d2 2)
16 | test2 0 4 z (3)
17 | test2 0 5 e -
18 | test2 0 6 y (2)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A-9.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1(3(3(3(3(3(3(3(3(3(3
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 3)3)3)3)3)3)3)3)3)3)1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 x (1)
14 | test2 0 2 d1 (2
15 | test2 0 3 d2 2)
16 | test2 0 4 z (3)
17 | test2 0 5 e -
18 | test2 0 6 y (2)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-A.key:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (2
15 | test2 0 3 d2 2)
16 | test2 0 4 jnk -
17 | test2 0 5 e (2)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 (2
20 | test2 0 8 f2 -
21 | test2 0 9 f3 2)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-B-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 -
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 -
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 -
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 |
74 | #end document
75 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-B.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (10043
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 -
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 -
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 |
74 | #end document
75 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-C-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 -
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 -
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 -
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 |
74 | #end document
75 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-C.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (10043
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 -
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 -
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 |
74 | #end document
75 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-D-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-D.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-E-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (1)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (1)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (1)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (1)
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-E.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-F-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (2)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-F.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-G-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-G.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (2)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-H-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-H.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-I-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (2)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-I.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-J-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 -
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-J.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-K-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 (3)
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-K.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 -
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (1)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (1)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-L-1.response:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (2)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (3)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-L.key:
--------------------------------------------------------------------------------
1 | #begin document (nw/xinhua/00/chtb_0009); part 000
2 | nw/xinhua/00/chtb_0009 -
3 | nw/xinhua/00/chtb_0009 (1)
4 | nw/xinhua/00/chtb_0009 -
5 | nw/xinhua/00/chtb_0009 (1)
6 | nw/xinhua/00/chtb_0009 -
7 | nw/xinhua/00/chtb_0009 (1)
8 | nw/xinhua/00/chtb_0009 -
9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 |
31 | #end document
32 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-1.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 0)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (0)
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (0
15 | test2 0 3 d2 0)
16 | test2 0 4 jnk -
17 | test2 0 5 e (0)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 (0
20 | test2 0 8 f2 -
21 | test2 0 9 f3 0)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-2.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (2)
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (3
15 | test2 0 3 d2 3)
16 | test2 0 4 jnk -
17 | test2 0 5 e (4)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 (5
20 | test2 0 8 f2 -
21 | test2 0 9 f3 5)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-3.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 0)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (1
15 | test2 0 3 d2 1)
16 | test2 0 4 jnk -
17 | test2 0 5 e (1)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 (2
20 | test2 0 8 f2 -
21 | test2 0 9 f3 2)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-4.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 0)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (0)
13 | test2 0 1 jnk (0)
14 | test2 0 2 d1 -
15 | test2 0 3 d2 -
16 | test2 0 4 jnk (0)
17 | test2 0 5 e -
18 | test2 0 6 jnk (0)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-5.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (2)
13 | test2 0 1 jnk (3)
14 | test2 0 2 d1 -
15 | test2 0 3 d2 -
16 | test2 0 4 jnk (4)
17 | test2 0 5 e -
18 | test2 0 6 jnk (5)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M-6.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 0)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 jnk (1)
14 | test2 0 2 d1 -
15 | test2 0 3 d2 -
16 | test2 0 4 jnk (1)
17 | test2 0 5 e -
18 | test2 0 6 jnk (2)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-M.key:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 0)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (0)
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (0
15 | test2 0 3 d2 0)
16 | test2 0 4 jnk -
17 | test2 0 5 e (0)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 (0
20 | test2 0 8 f2 -
21 | test2 0 9 f3 0)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-1.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (2)
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (3
15 | test2 0 3 d2 3)
16 | test2 0 4 jnk -
17 | test2 0 5 e (4)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 (5
20 | test2 0 8 f2 -
21 | test2 0 9 f3 5)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-2.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 0)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (0)
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (0
15 | test2 0 3 d2 0)
16 | test2 0 4 jnk -
17 | test2 0 5 e (0)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 (0
20 | test2 0 8 f2 -
21 | test2 0 9 f3 0)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-3.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 0)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (1
15 | test2 0 3 d2 1)
16 | test2 0 4 jnk -
17 | test2 0 5 e (1)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 (2
20 | test2 0 8 f2 -
21 | test2 0 9 f3 2)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-4.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (2)
13 | test2 0 1 jnk (3)
14 | test2 0 2 d1 -
15 | test2 0 3 d2 -
16 | test2 0 4 jnk (4)
17 | test2 0 5 e -
18 | test2 0 6 jnk (5)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-5.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 0)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (0)
13 | test2 0 1 jnk (0)
14 | test2 0 2 d1 -
15 | test2 0 3 d2 -
16 | test2 0 4 jnk (0)
17 | test2 0 5 e -
18 | test2 0 6 jnk (0)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N-6.response:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (0
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 0)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (1)
13 | test2 0 1 jnk (1)
14 | test2 0 2 d1 -
15 | test2 0 3 d2 -
16 | test2 0 4 jnk (1)
17 | test2 0 5 e -
18 | test2 0 6 jnk (2)
19 | test2 0 7 f1 -
20 | test2 0 8 f2 -
21 | test2 0 9 f3 -
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/DataFiles/TC-N.key:
--------------------------------------------------------------------------------
1 | #begin document (LuoTestCase);
2 | test1 0 0 a1 (0
3 | test1 0 1 a2 0)
4 | test1 0 2 junk -
5 | test1 0 3 b1 (1
6 | test1 0 4 b2 -
7 | test1 0 5 b3 -
8 | test1 0 6 b4 1)
9 | test1 0 7 jnk -
10 | test1 0 8 . -
11 |
12 | test2 0 0 c (2)
13 | test2 0 1 jnk -
14 | test2 0 2 d1 (3
15 | test2 0 3 d2 3)
16 | test2 0 4 jnk -
17 | test2 0 5 e (4)
18 | test2 0 6 jnk -
19 | test2 0 7 f1 (5
20 | test2 0 8 f2 -
21 | test2 0 9 f3 5)
22 | test2 0 10 . -
23 | #end document
24 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/TestCases.README:
--------------------------------------------------------------------------------
1 | TC-A-1 - perfect:
2 | Key/Ref: {a} {bc} {def}
3 | Rsp/Sys: {a} {bc} {def}
4 | Expected: BCUB=1 [recall=6/6, prec=6/6]
5 | Expected: MUC=1 [recall=3/3=1, prec=3/3=1]
6 | Expected: CEAFm=1 [recall=6/6=1, prec=6/6=1]
7 | Expected: CEAFe=1 [recall=3/3=1, prec=3/3=1]
8 | Expected: BLANC=1 [recall_c=4/4=1, prec_c=4/4=1, recall_n=11/11=1, prec_n=11/11=1]
9 |
10 | TC-A-2 -- response with missing mentions/entities
11 | Key/Ref: {a} {bc} {def}
12 | Rsp/Sys: {a} {de}
13 | Expected: BCUB=.5599 [recall=7/18, prec=3/3]
14 | Expected: MUC=0.5 [recall=1/3, prec=1/1]
15 | Expected: CEAFm=6/9=0.67 [common=3, recall=3/6=0.5, Prec=3/3=1]
16 | Expected: CEAFe=3.6/5=0.72 [common=1+4/5=1.8, recall=1.8/3=0.6, Prec=1.8/2=0.9]
17 | Expected: BLANC=0.35 [recall_c=1/4, prec_c=1/1, recall_n=2/11, prec_n=2/2]
18 |
19 | TC-A-3 -- response with false-alarm mentions/entities
20 | Key/Ref: {a} {bc} {def}
21 | Rsp/Sys: {a} {bcx} {defy} {z}
22 | Expected: BCUB=.6748 [recall=6/6, prec=55/108]
23 | Expected: MUC=0.75 [recall=3/3, prec=3/5]
24 | Expected: CEAFm=12/15=0.8 [common=6, recall=6/6=1, prec=6/9=.67]
25 | Expected: CEAFe=3.6/5=0.76 [common=1+4/5+6/7=2.66, recall=2.66/3=0.89, Prec=2.66/4=0.66]
26 | Expected: BLANC=0.60 [recall_c=4/4, prec_c=4/9, recall_n=11/11, prec_n=11/27]
27 |
28 |
29 | TC-A-4 -- response with both missing and false-alarm mentions/entities
30 | Key/Ref: {a} {bc} {def}
31 | Rsp/Sys: {a} {bcx} {dy} {z}
32 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
33 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
34 | Expected: CEAFm=8/13=0.62 [common=4 recall=4/6=0.67 prec=4/7=.57]
35 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
36 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
37 |
38 | TC-A-5 -- response with both missing and false-alarm mentions/entities, and overlapping mentions (capitalized letter: b and B). Overlapping mention B in the aligned entity.
39 | Key/Ref: {a} {bc} {def}
40 | Rsp/Sys: {a} {bcxB} {dy} {z}
41 | Expected: BCUB=.4 [recall=5/9, prec=5/16]
42 | Expected: MUC=2/7=.28571 [recall=1/3, prec=1/4]
43 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
44 | Expected: CEAFe=4.14/7=0.59 [common=1+4/6+2/5=2.07, recall=2.07/3=0.69, Prec=2.07/4=0.52]
45 | Expected: BLANC=0.25 [recall_c=1/4, prec_c=1/7, recall_n=5/11, prec_n=5/21]
46 |
47 | TC-A-6 -- response with both missing and false-alarm mentions/entities, and overlapping mentions (capitalized letter: b and B). Overlapping mention B in an unaligned entity.
48 | Key/Ref: {a} {bc} {def}
49 | Rsp/Sys: {a} {bcx} {dy} {Bz}
50 | Expected: BCUB=.4325 [recall=5/9, prec=17/48]
51 | Expected: MUC=2/7=.28571 [recall=1/3, prec=1/4]
52 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
53 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
54 | Expected: BLANC=0.26 [recall_c=1/4, prec_c=1/5, recall_n=5/11, prec_n=5/23]
55 |
56 | TC-A-7 -- response with both missing and false-alarm mentions/entities, and duplicate mentions (capitalized letter: b and B). Duplicate mention B in the same cluster entity (note: this is diff from TC5) -- this tests mention de-duplication.
57 | Key/Ref: {a} {bc} {def}
58 | Rsp/Sys: {a} {bcxB} {dy} {z}
59 | de-dup: {a} {bcx} {dy} {z}
60 |
61 | de-dup:
62 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
63 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
64 | Expected: CEAFm=8/13=0.61538 [common=4, recall=4/6=0.66667, Prec=4/7=0.57143]
65 | Expected: CEAFe=4.14/7=0.62857 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73333, Prec=2.2/4=0.55]
66 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
67 |
68 | if No de-dup:
69 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
70 | Expected: CEAFe=4.14/7=0.59 [common=1+4/6+2/5=2.07, recall=2.07/3=0.69, Prec=2.07/4=0.52]
71 |
72 |
73 | TC-A-8 -- response with both missing and false-alarm mentions/entities, and duplicate mentions (capitalized letter: b and B). Duplicate mention B in a diff entity from b.
74 | Key/Ref: {a} {bc} {def}
75 | Rsp/Sys: {a} {bcx} {dy} {Bz}
76 |
77 | De-dup:
78 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
79 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
80 | Expected: CEAFm=8/13=0.61538 [common=4 recall=4/6=0.67 prec=4/7=.57143]
81 | Expected: CEAFe=4.14/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
82 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
83 |
84 | If no de-dup:
85 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/8=.5]
86 | Expected: CEAFe=4.14/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
87 |
88 | TC-A-9 -- show B3 can be canned: "b" is repeated 10 times so precision approaches 1
89 | Key/Ref: {a} {bc} {def}
90 | Rsp/Sys: {a} {bcx} {dy} {Bx10z}
91 | de-dup Rsp/Sys: {a} {bcx} {dy} {z}
92 |
93 | De-dup:
94 | Expected: BCUB=.4683 [recall=5/9, prec=17/42]
95 | Expected: MUC=1/3=.33333 [recall=1/3, prec=1/3]
96 | Expected: CEAFm=8/14=0.57 [common=4 recall=4/6=0.67 prec=4/7=.57143]
97 | Expected: CEAFe=4.4/7=0.63 [common=1+4/5+2/5=2.2, recall=2.2/3=0.73, Prec=2.2/4=0.55]
98 | Expected: BLANC=0.30 [recall_c=1/4, prec_c=1/4, recall_n=5/11, prec_n=5/17]
99 |
100 |
101 | TC-A-10 - Gold mentions. Only singletons in the response.
102 | Key/Ref: {a} {bc} {def}
103 | Rsp/Sys: {a} {b} {c} {d} {e} {f}
104 | Expected: BCUB=.6667 [recall=3/6, prec=6/6]
105 | Expected: MUC=0 [recall=0, prec=0]
106 | Expected: BLANC=0.42 [recall_c=0/4, prec_c=0/0, f_c=0, recall_n=11/11, prec_n=11/15]
107 |
108 |
109 | TC-A-11 - Gold mentions. All mentions are coreferent in the response.
110 | Key/Ref: {a} {bc} {def}
111 | Rsp/Sys: {abcdef}
112 |
113 | Expected: BCUB=0.5599 [recall=6/6, prec=7/18]
114 | Expected: MUC=6/8=0.75 [recall=3/3, prec=3/5]
115 | Expected: BLANC=0.21 [recall_c=4/4, prec_c=4/15, recall_n=0/11, prec_n=0/0, f_n=0]
116 |
117 |
118 | TC-A-12 - System mentions. Only singletons in the response.
119 | Key/Ref: {a} {bc} {def}
120 | Rsp/Sys: {a} {x} {y} {c} {d} {e} {z}
121 |
122 | Expected: BCUB=0.4425 [recall=13/36, prec=4/7]
123 | Expected: MUC=0 [recall=0, prec=0]
124 | Expected: BLANC=0.16 [recall_c=0/4, prec_c=0/0, f_c=0, recall_n=5/11, prec_n=5/21]
125 |
126 |
127 | TC-A-13 - System mentions. All mentions are coreferent in the response.
128 | Key/Ref: {a} {bc} {def}
129 | Rsp/Sys: {axycdez}
130 |
131 | Expected: BCUB=0.19447 [recall=17/36, prec=6/49]
132 | Expected: MUC=2/9 [recall=1/3, prec=1/6]
133 | Expected: BLANC=0.04 [recall_c=1/4, prec_c=1/21, recall_n=0/11, prec_n=0/0, f_n=0]
134 |
135 |
136 | TC-B-1 -- spurious mention (x) and missing mention (a) in response; link (bc) is a key non-coref link and is an incorrect response coref link.
137 |
138 | Keys: {ab} {cde}
139 | Response: {bcx} {de}
140 |
141 | key coref links: C_k = {(ab), (cd), (de), (ce)}
142 | key non-coref links: N_k = {(ac), (ad), (ae), (bc), (bd), (be)}
143 |
144 | response coref links: C_r = {(bc), (bx), (cx), (de)}
145 | response non-coref links: N_r = {(bd), (be), (cd), (ce), (xd), (xe)}
146 |
147 | (I'll use ^ for set intersection)
148 | C_k ^ C_r = {(de)} => R_c = |C_k^C_r| / |C_k| = 1/4, P_c = 1/|C_r| = 1/4, F_c = 1/4
149 | N_k ^ N_r = {(bd), (be)} => R_n = |N_k^N_r|/|N_k| = 2/6, P_n = 2/|N_r| = 2/6, F_n = 1/3
150 |
151 | BLANC = 1/2 (F_c + F_n) = 7/24.
152 |
153 |
154 |
155 |
156 | TC-C-1 -- same as TC14 plus a new entity and its correct prediction shown. this was for testing the more than two entity case.
157 |
158 | Keys: {ab} {cde} {fg}
159 | Response: {bcx} {de} {fg}
160 |
161 | key coref links: C_k = {(ab), (cd), (de), (ce), (fg)}}
162 | key non-coref links: N_k = {(ac), (ad), (ae), (bc), (bd), (be), (af), (ag), (bf), (bg), (cf), (cg), (df), (dg), (ef), (eg)}
163 |
164 | response coref links: C_r = {(bc), (bx), (cx), (de), (fg)}
165 | response non-coref links: N_r = {(bd), (be), (cd), (ce), (xd), (xe), (bf), (bg), (cf), (cg), (xf), (xg), (df), (dg), (ef), (eg)}
166 |
167 | (I'll use ^ for set intersection)
168 | C_k ^ C_r = {(de), (fg)} => R_c = |C_k^C_r| / |C_k| = 2/5, P_c = 2/|C_r| = 2/5, F_c = 2/5 = 0.40
169 | N_k ^ N_r = {(bd), (be), (bf), (bg), (cf), (cg), (df), (dg), (ef), (eg)} => R_n = |N_k^N_r|/|N_k| = 10/16, P_n = 10/|N_r| = 10/16, F_n = 10/16 = 0.625
170 |
171 | BLANC = 1/2 (F_c + F_n) = 0.5125
172 |
173 |
174 |
175 | # ------------ examples from the B-CUBED paper
176 |
177 | TC-D-1 -- merging one small cluster with a big cluster
178 |
179 | key: {12345} {67} {89ABC}
180 | ---
181 |
182 | 1-2-3-4-5
183 |
184 | 6-7
185 |
186 | 8-9-A-B-C
187 |
188 |
189 |
190 | response: {12345} {6789ABC}
191 | ---------
192 |
193 | 1-2-3-4-5
194 |
195 | 6-7
196 | |
197 | 8-9-A-B-C
198 |
199 |
200 | Expected: BCUB [r=12/12, p=16/21, f=0.864864865]
201 | Expected: MUC [r=9/9, p=9/10, f=0.947368421]
202 |
203 |
204 |
205 | TC-E-1 -- merging two big clusters
206 |
207 |
208 | key: {12345} {67} {89ABC}
209 | ---
210 |
211 | 1-2-3-4-5
212 |
213 | 6-7
214 |
215 | 8-9-A-B-C
216 |
217 |
218 |
219 | response: {123456789ABC} {67}
220 | ---------
221 |
222 | 1-2-3-4-5
223 | |
224 | 6-7 |
225 | |
226 | 8-9-A-B-C
227 |
228 |
229 | Expected: BCUB [r=1, p=7/12, f=0.736842105]
230 | Expected: MUC [r=9/9, p=9/10, f=0.947368421]
231 |
232 |
233 | # ---------- examples from the MUC paper
234 |
235 | TC-F-1 --
236 |
237 | key: {ABCD} ---- Links: A-B; B-C; C-D
238 | response: {AB} {CD} ---- Links: A-B; C-D
239 |
240 | Expected: MUC [r=2/3, p=2/2, f=2*(2/3)*(2/2)/(2/3+2/2)]
241 |
242 |
243 |
244 | TC-G-1 --
245 |
246 | key: {AB} {CD} ---- Links: A-B; C-D
247 | response: {ABCD} ---- Links: A-B; B-C; C-D
248 |
249 | Expected: MUC [r=2/2, p=2/3, f=2*(2/2)*(2/3)/(2/2+2/3)]
250 |
251 |
252 |
253 | TC-H-1 --
254 |
255 | key: {ABCD} ---- Links: A-B; B-C; B-D
256 | response: {ABCD} ---- Links: A-B; B-C; C-D
257 |
258 | Expected: MUC [r=1, p=1, f=1]
259 |
260 |
261 |
262 | TC-I-1 --
263 |
264 | key: {ABCD} ---- Links: A-B; B-C; B-D
265 | response: {AB} {CD} ---- Links: A-B; C-D
266 |
267 | Expected: MUC [r=2/3, p=2/2, f=2*(2/3)*(2/2)/(2/3+2/2)]
268 |
269 |
270 |
271 | TC-J-1 --
272 |
273 | key: {ABC} ---- Links: A-B; B-C
274 | response: {AC} ---- Links: A-C
275 |
276 | Expected: MUC [r=1/2, p=1/1, f=2*(1/2)*(1/1)/(1/2+1/1)]
277 |
278 |
279 |
280 | TC-K-1 --
281 |
282 | key: {BCDEGHJ} ---- Links: B-C; C-D; D-E; E-G; G-H; H-J
283 | response: {ABC} {DEF} {GHI} ---- Links: A-B; B-C; D-E; E-F; G-H; H-I
284 |
285 | Expected: MUC [r=3/6, p=3/6, f=3/6]
286 |
287 |
288 |
289 | TC-L-1 --
290 |
291 | key: {ABC} {DEFG} ---- Links: A-B; B-C; D-E; E-F; F-G
292 | response: {AB} {CD} {FGH} ---- Links: A-B; C-D; F-G; G-H
293 |
294 | Expected: MUC [r=2/5, p=2/4, f=2*(2/5)*(2/4)/(2/5+2/4)]
295 |
296 |
297 | TC-M-1 - Only coreferent mentions in the key. Gold mentions. Matching response. Since the key contains no non-coreference link, BLANC equals recall_c, prec_c, F_c.
298 | Key/Ref: {abcdef}
299 | Rsp/Sys: {abcdef}
300 |
301 | Expected: BCUB=1
302 | Expected: MUC=1
303 | Expected: CEAFm=1
304 | Expected: CEAFe=1
305 | Expected: BLANC=1 [recall_c=15/15=1, prec_c=15/15=1]
306 |
307 |
308 | TC-M-2 - Only coreferent mentions in the key. Gold mentions. Response contains only non-coreference links.
309 | Key/Ref: {abcdef}
310 | Rsp/Sys: {a} {b} {c} {d} {e} {f}
311 |
312 | Expected: MUC=0
313 | Expected: BLANC=0 [recall_c=0/15=0, prec_c=0/0=0]
314 |
315 |
316 | TC-M-3 - Only coreferent mentions in the key. Gold mentions. Response contains coreference and non-coreference links.
317 | Key/Ref: {abcdef}
318 | Rsp/Sys: {ab} {cde} {f}
319 |
320 | Expected: BLANC=0.42 [recall_c=4/15, prec_c=4/4=1]
321 |
322 |
323 | TC-M-4 - Only coreferent mentions in the key. System mentions: only coreferent mentions. Since the key contains no non-coreference link, BLANC equals recall_c, prec_c, F_c.
324 | Key/Ref: {abcdef}
325 | Rsp/Sys: {abcxyz}
326 |
327 | Expected: BLANC=0.20 [recall_c=3/15, prec_c=3/15]
328 |
329 |
330 | TC-M-5 - Only coreferent mentions in the key. System mentions: only singletons.
331 | Key/Ref: {abcdef}
332 | Rsp/Sys: {a} {b} {c} {x} {y} {z}
333 |
334 | Expected: MUC=0
335 | Expected: BLANC=0 [recall_c=0/15=0, prec_c=0/0=0]
336 |
337 |
338 | TC-M-6 - Only coreferent mentions in the key. System mentions: coreference and non-coreference links.
339 | Key/Ref: {abcdef}
340 | Rsp/Sys: {ab} {cxy} {z}
341 |
342 | Expected: BLANC=0.11 [recall_c=1/15, prec_c=1/4]
343 |
344 |
345 | TC-N-1 - Only singletons in the key. Gold mentions. Matching response. Since the key contains no coreference link, BLANC equals recall_n, prec_n, F_n.
346 | Key/Ref: {a} {b} {c} {d} {e} {f}
347 | Rsp/Sys: {a} {b} {c} {d} {e} {f}
348 |
349 | Expected: BCUB=1
350 | Expected: MUC=0
351 | Expected: CEAFm=1
352 | Expected: CEAFe=1
353 | Expected: BLANC=1 [recall_n=15/15=1, prec_n=15/15=1]
354 |
355 |
356 | TC-N-2 - Only singletons in the key. Gold mentions. Response contains only coreference links.
357 | Key/Ref: {a} {b} {c} {d} {e} {f}
358 | Rsp/Sys: {abcdef}
359 |
360 | Expected: BLANC=0 [recall_n=0/15=0, prec_n=0/0=0]
361 |
362 |
363 | TC-N-3 - Only singletons in the key. Gold mentions. Response contains coreference and non-coreference links.
364 | Key/Ref: {a} {b} {c} {d} {e} {f}
365 | Rsp/Sys: {ab} {cde} {f}
366 |
367 | Expected: BLANC=0.85 [recall_n=11/15, prec_n=11/11=1]
368 |
369 |
370 | TC-N-4 - Only singletons in the key. System mentions: only singletons. Since the key contains no coreference link, BLANC equals recall_n, prec_n, F_n.
371 | Key/Ref: {a} {b} {c} {d} {e} {f}
372 | Rsp/Sys: {a} {b} {c} {x} {y} {z}
373 |
374 | Expected: MUC=0
375 | Expected: BLANC=0.20 [recall_n=3/15, prec_n=3/15]
376 |
377 |
378 | TC-N-5 - Only singletons in the key. System mentions: only coreference links.
379 | Key/Ref: {a} {b} {c} {d} {e} {f}
380 | Rsp/Sys: {abcxyz}
381 |
382 | Expected: BLANC=0 [recall_n=0/15=0, prec_n=0/0=0]
383 |
384 |
385 | TC-N-6 - Only singletons in the key. Only coreferent mentions in the key. System mentions: coreference and non-coreference links.
386 | Key/Ref: {a} {b} {c} {d} {e} {f}
387 | Rsp/Sys: {ab} {cxy} {z}
388 |
389 | Expected: BLANC=0.15 [recall_n=2/15, prec_n=2/11]
390 |
391 |
--------------------------------------------------------------------------------
/reference-coreference-scorers-8.01/test/test.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | BEGIN {
4 | $d = $0;
5 | $d =~ s/\/[^\/][^\/]*$//g;
6 | push(@INC, $d);
7 | push(@INC, $d . "/../lib");
8 | }
9 |
10 | use strict;
11 | use CorScorer;
12 | use CorefMetricTest;
13 | use CorefMetricTestConfig;
14 |
15 | my $error_tolerance = 1.e-4;
16 | my $script_dir = $0;
17 | $script_dir =~ s/\/[^\/][^\/]*$//g;
18 |
19 | foreach my $test_case (@CorefMetricTestConfig::TestCases) {
20 | my $id = $test_case->{'id'};
21 | my @key_response_files = ($script_dir . "/" . $test_case->{'key_file'},
22 | $script_dir . "/" . $test_case->{'response_file'});
23 | print "\nTesting case ($id): keyFile=", $key_response_files[0],
24 | " responseFile=", $key_response_files[1], "\n";
25 | my $expected_metrics = $test_case->{'expected_metrics'};
26 | foreach my $metric_name (sort keys %$expected_metrics) {
27 | my $expected_values = $expected_metrics->{$metric_name};
28 | *::SAVED_STDOUT = *STDOUT;
29 | *STDOUT = *::SUPRRES_STDOUT;
30 | my @actual_counts = &CorScorer::Score($metric_name, @key_response_files);
31 | # Compute R,P,and F1 from raw counts.
32 | my @actual_values = CorefMetricTest::ComputeScoreFromCounts(@actual_counts);
33 | *STDOUT = *::SAVED_STDOUT;
34 | my $diff = CorefMetricTest::DiffExpectedAndActual($expected_values, \@actual_values);
35 | printf " metric: %+10s", $metric_name;
36 | if ($diff < $error_tolerance) {
37 | print " => PASS\n";
38 | } else {
39 | print " => FAIL\n";
40 | print " Expected (recall, prec, F1) = (", join(" ", @$expected_values), ")\n";
41 | print " Actual (recall, prec, F1) = (", join(" ", @actual_values), ")\n";
42 | #exit(1);
43 | }
44 | }
45 | }
46 |
47 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==3.0.2
2 | pyhocon
3 | boltons
4 | scikit-learn==0.22.2
5 | stanza==1.1.1
6 |
--------------------------------------------------------------------------------
/runner.py:
--------------------------------------------------------------------------------
1 | import os
2 | import math
3 | import torch
4 | import tqdm
5 | import random
6 | import time
7 |
8 | from transformers import *
9 | from models import EventCorefModel
10 | from utils import RunningAverage, prepare_configs, get_n_params
11 | from scorer import evaluate
12 | from data import load_oneie_dataset
13 | from argparse import ArgumentParser
14 |
15 | def train(config_name):
16 | # Prepare tokenizer, dataset, and model
17 | configs = prepare_configs(config_name)
18 | tokenizer = AutoTokenizer.from_pretrained(configs['transformer'])
19 | predictions_path = None if configs['use_groundtruth'] else configs['predictions_path']
20 | train_set, dev_set, test_set = \
21 | load_oneie_dataset(configs['base_dataset_path'], tokenizer, predictions_path,
22 | increase_ace_dev_set=configs['increase_ace_dev_set'])
23 | model = EventCorefModel(configs, train_set.event_types)
24 | print('Initialized tokenier, dataset, and model')
25 | print('Number of parameters is {}'.format(get_n_params(model)))
26 |
27 | # Initialize the optimizer
28 | num_train_docs = len(train_set)
29 | epoch_steps = int(math.ceil(num_train_docs / configs['batch_size']))
30 | num_train_steps = int(epoch_steps * configs['epochs'])
31 | num_warmup_steps = int(num_train_steps * 0.1)
32 | optimizer = model.get_optimizer(num_warmup_steps, num_train_steps)
33 | print('Initialized optimizer')
34 |
35 | # Main training loop
36 | best_dev_score, iters, batch_loss = 0.0, 0, 0
37 | for epoch in range(configs['epochs']):
38 | #print('Epoch: {}'.format(epoch))
39 | print('\n')
40 | progress = tqdm.tqdm(total=epoch_steps, ncols=80,
41 | desc='Train {}'.format(epoch))
42 | accumulated_loss = RunningAverage()
43 |
44 | train_indices = list(range(num_train_docs))
45 | random.shuffle(train_indices)
46 | start_train = time.time()
47 | for train_idx in train_indices:
48 | iters += 1
49 | inst = train_set[train_idx]
50 | iter_loss = model(inst, is_training=True)[0]
51 | iter_loss /= configs['batch_size']
52 | iter_loss.backward()
53 | batch_loss += iter_loss.data.item()
54 | if iters % configs['batch_size'] == 0:
55 | accumulated_loss.update(batch_loss)
56 | torch.nn.utils.clip_grad_norm_(model.parameters(), configs['max_grad_norm'])
57 | optimizer.step()
58 | optimizer.zero_grad()
59 | batch_loss = 0
60 | # Update progress bar
61 | progress.update(1)
62 | progress.set_postfix_str('Average Train Loss: {}'.format(accumulated_loss()))
63 | progress.close()
64 | print('One epoch training took {} seconds'.format(time.time() - start_train))
65 |
66 | # Evaluation after each epoch
67 | print('Evaluation on the dev set', flush=True)
68 | start_dev = time.time()
69 | dev_score = evaluate(model, dev_set, configs)['avg']
70 | print('Evaluation on dev set took {} seconds'.format(time.time() - start_dev))
71 |
72 | # Save model if it has better dev score
73 | if dev_score > best_dev_score:
74 | best_dev_score = dev_score
75 | # Save the model
76 | save_path = os.path.join(configs['saved_path'], 'model.pt')
77 | torch.save({'model_state_dict': model.state_dict()}, save_path)
78 | print('Saved the model', flush=True)
79 | # Evaluation on the test set
80 | print('Evaluation on the test set', flush=True)
81 | start_test = time.time()
82 | evaluate(model, test_set, configs)
83 | print('Evaluation on test set took {} seconds'.format(time.time() - start_test))
84 |
85 | if __name__ == "__main__":
86 | # Parse argument
87 | parser = ArgumentParser()
88 | parser.add_argument('-c', '--config_name', default='basic')
89 | args = parser.parse_args()
90 |
91 | # Start training
92 | train(args.config_name)
93 |
--------------------------------------------------------------------------------
/scorer.py:
--------------------------------------------------------------------------------
1 | import os
2 | import math
3 | import numpy as np
4 | import tempfile
5 | import subprocess
6 | import torch
7 | import re
8 |
9 | from boltons.iterutils import pairwise, windowed
10 | from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
11 |
12 | COREF_RESULTS_REGEX = re.compile(r".*Coreference: Recall: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tPrecision: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tF1: ([0-9.]+)%.*", re.DOTALL)
13 | BLANC_RESULTS_REGEX = re.compile(r".*BLANC: Recall: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tPrecision: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tF1: ([0-9.]+)%.*", re.DOTALL)
14 |
15 | def evaluate(model, eval_set, configs, verbose=True):
16 | return evaluate_coref(model, eval_set, configs, verbose)
17 |
18 | def evaluate_coref(model, eval_set, configs, verbose=True):
19 | predictions = []
20 | for inst in eval_set:
21 | # Apply the model for prediction
22 | with torch.no_grad():
23 | loss, preds = model(inst, is_training=False)
24 | preds = [x.cpu().data.numpy() for x in preds]
25 | top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores = preds
26 | predicted_antecedents = get_predicted_antecedents(top_antecedents, top_antecedent_scores)
27 |
28 | predicted_clusters, m2cluster = [], {}
29 | for ix, (s, e) in enumerate(zip(top_span_starts, top_span_ends)):
30 | if predicted_antecedents[ix] < 0:
31 | cluster_id = len(predicted_clusters)
32 | predicted_clusters.append([(s, e)])
33 | else:
34 | antecedent_idx = predicted_antecedents[ix]
35 | p_s, p_e = top_span_starts[antecedent_idx], top_span_ends[antecedent_idx]
36 | cluster_id = m2cluster[(p_s, p_e)]
37 | predicted_clusters[cluster_id].append((s,e))
38 | m2cluster[(s,e)] = cluster_id
39 | predictions.append(m2cluster)
40 |
41 |
42 | with tempfile.NamedTemporaryFile(delete=False, mode='w') as gold_file:
43 | output_gold_conll(gold_file, eval_set.data)
44 | with tempfile.NamedTemporaryFile(delete=False, mode='w') as prediction_file:
45 | for ix, inst in enumerate(eval_set.data):
46 | doc_id = inst.doc_id
47 | m2cluster = predictions[ix]
48 | cluster_labels = ['-'] * inst.num_words
49 | for (start, end) in m2cluster.keys():
50 | c_label = m2cluster[(start, end)]
51 | end = end - 1
52 | if start == end:
53 | cluster_labels[start] = '({})'.format(c_label)
54 | else:
55 | cluster_labels[start] = '({}'.format(c_label)
56 | cluster_labels[end] = '{})'.format(c_label)
57 |
58 | # Write the doc info to output file
59 | prediction_file.write('#begin document ({}); part 000\n'.format(doc_id))
60 | for i in range(inst.num_words):
61 | prediction_file.write('{} {}\n'.format(doc_id, cluster_labels[i]))
62 | prediction_file.write('\n')
63 | prediction_file.write('#end document\n')
64 |
65 | gold_file.flush()
66 | prediction_file.flush()
67 | print("Gold conll file: {}".format(gold_file.name))
68 | print("Prediction conll file: {}".format(prediction_file.name))
69 | metrics = ("muc", "bcub", "ceafe", "blanc", "ceafm")
70 | summary = { m: official_conll_eval(gold_file.name, prediction_file.name, m) for m in metrics}
71 | os.remove(gold_file.name)
72 | os.remove(prediction_file.name)
73 |
74 | avg = 0.0
75 | for metric in metrics[:-1]: avg += summary[metric]['f'] # Excluding ceafm when calculating avg
76 | avg /= len(metrics[:-1])
77 | summary['avg'] = avg
78 |
79 | summary_text = ''
80 | for metric in metrics:
81 | summary_text += '[{}] F1 = {} | '.format(metric, summary[metric]['f'])
82 | summary_text += 'AVG = {}'.format(avg)
83 | print(summary_text)
84 |
85 | return summary
86 |
87 |
88 | def official_conll_eval(gold_path, predicted_path, metric, official_stdout=False):
89 | cmd = ["reference-coreference-scorers-8.01/scorer.pl", metric, gold_path, predicted_path, "none"]
90 | process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
91 | stdout, stderr = process.communicate()
92 | process.wait()
93 |
94 | stdout = stdout.decode("utf-8")
95 | if stderr is not None:
96 | print(stderr)
97 |
98 | if official_stdout:
99 | print("Official result for {}".format(metric))
100 | print(stdout)
101 |
102 | regexp = COREF_RESULTS_REGEX if metric != 'blanc' else BLANC_RESULTS_REGEX
103 | coref_results_match = re.match(regexp, stdout)
104 | recall = float(coref_results_match.group(1))
105 | precision = float(coref_results_match.group(2))
106 | f1 = float(coref_results_match.group(3))
107 | return { "r": recall, "p": precision, "f": f1 }
108 |
109 | def get_predicted_antecedents(antecedents, antecedent_scores):
110 | predicted_antecedents = []
111 | for i, index in enumerate(np.argmax(antecedent_scores, axis=1) - 1):
112 | if index < 0: predicted_antecedents.append(-1)
113 | else: predicted_antecedents.append(antecedents[i, index])
114 | return predicted_antecedents
115 |
116 | def output_gold_conll(gold_file, documents):
117 | for doc in documents:
118 | doc_id = doc.doc_id
119 |
120 | # Build cluster_labels
121 | eventid2label = {}
122 | cluster_labels = ['-'] * doc.num_words
123 | for e in doc.event_mentions:
124 | mention_id = e['id']
125 | event_id = mention_id[:mention_id.rfind('-')]
126 | if not event_id in eventid2label:
127 | eventid2label[event_id] = 1 + len(eventid2label)
128 | start_idx, end_idx = e['trigger']['start'], e['trigger']['end']-1
129 | if start_idx == end_idx:
130 | cluster_labels[start_idx] = '({})'.format(eventid2label[event_id])
131 | else:
132 | cluster_labels[start_idx] = '({}'.format(eventid2label[event_id])
133 | cluster_labels[end_idx] = '{})'.format(eventid2label[event_id])
134 |
135 | # Write the doc info to output file
136 | gold_file.write('#begin document ({}); part 000\n'.format(doc_id))
137 | for i in range(doc.num_words):
138 | gold_file.write('{} {}\n'.format(doc_id, cluster_labels[i]))
139 | gold_file.write('\n')
140 | gold_file.write('#end document\n')
141 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import math
3 | import torch
4 | import pyhocon
5 | import numpy as np
6 | import tempfile
7 | from boltons.iterutils import pairwise, windowed
8 |
9 | def prepare_configs(config_name, verbose=True):
10 | configs = pyhocon.ConfigFactory.parse_file('configs/basic.conf')[config_name]
11 | configs['saved_path'] = 'trained'
12 | if not os.path.exists(configs['saved_path']):
13 | os.makedirs(configs['saved_path'])
14 | if verbose: print(configs)
15 | return configs
16 |
17 | def flatten(l):
18 | return [item for sublist in l for item in sublist]
19 |
20 | def listRightIndex(alist, value):
21 | return len(alist) - alist[-1::-1].index(value) -1
22 |
23 | def bucket_distance(distances, nb_buckets=15):
24 | """
25 | Places the given values (designed for distances) into semi-logscale buckets.
26 | For example if nb_buckets = 15 then:
27 | [0, 1, 2, 3, 4, 5-7, 8-15, 16-31, 32-63, 64-127, 128-255, 256-511, 512-1023, 1024-2047, 2048+].
28 | """
29 | logspace_idx = torch.floor(torch.log2(distances.float())).long() + 3
30 | use_identity = (distances <= 4).long()
31 | combined_idx = use_identity * distances + (1 - use_identity) * logspace_idx
32 | return torch.clamp(combined_idx, 0, nb_buckets-1)
33 |
34 | def extract_input_masks_from_mask_windows(mask_windows):
35 | input_masks = []
36 | for mask_window in mask_windows:
37 | subtoken_count = listRightIndex(mask_window, -3) + 1
38 | input_masks.append([1] * subtoken_count + [0] * (len(mask_window) - subtoken_count))
39 | input_masks = np.array(input_masks)
40 | return input_masks
41 |
42 | def convert_to_sliding_window(expanded_tokens, sliding_window_size, tokenizer):
43 | """
44 | construct sliding windows, allocate tokens and masks into each window
45 | :param expanded_tokens:
46 | :param sliding_window_size:
47 | :return:
48 | """
49 | CLS = tokenizer.convert_tokens_to_ids(['[CLS]'])
50 | SEP = tokenizer.convert_tokens_to_ids(['[SEP]'])
51 | PAD = tokenizer.convert_tokens_to_ids(['[PAD]'])
52 | expanded_masks = [1] * len(expanded_tokens)
53 | sliding_windows = construct_sliding_windows(len(expanded_tokens), sliding_window_size - 2)
54 | token_windows = [] # expanded tokens to sliding window
55 | mask_windows = [] # expanded masks to sliding window
56 | for window_start, window_end, window_mask in sliding_windows:
57 | original_tokens = expanded_tokens[window_start: window_end]
58 | original_masks = expanded_masks[window_start: window_end]
59 | window_masks = [-2 if w == 0 else o for w, o in zip(window_mask, original_masks)]
60 | one_window_token = CLS + original_tokens + SEP + PAD * (sliding_window_size - 2 - len(original_tokens))
61 | one_window_mask = [-3] + window_masks + [-3] + [-4] * (sliding_window_size - 2 - len(original_tokens))
62 | assert len(one_window_token) == sliding_window_size
63 | assert len(one_window_mask) == sliding_window_size
64 | token_windows.append(one_window_token)
65 | mask_windows.append(one_window_mask)
66 | return token_windows, mask_windows
67 |
68 | def construct_sliding_windows(sequence_length: int, sliding_window_size: int):
69 | """
70 | construct sliding windows for BERT processing
71 | :param sequence_length: e.g. 9
72 | :param sliding_window_size: e.g. 4
73 | :return: [(0, 4, [1, 1, 1, 0]), (2, 6, [0, 1, 1, 0]), (4, 8, [0, 1, 1, 0]), (6, 9, [0, 1, 1])]
74 | """
75 | sliding_windows = []
76 | stride = int(sliding_window_size / 2)
77 | start_index = 0
78 | end_index = 0
79 | while end_index < sequence_length:
80 | end_index = min(start_index + sliding_window_size, sequence_length)
81 | left_value = 1 if start_index == 0 else 0
82 | right_value = 1 if end_index == sequence_length else 0
83 | mask = [left_value] * int(sliding_window_size / 4) + [1] * int(sliding_window_size / 2) \
84 | + [right_value] * (sliding_window_size - int(sliding_window_size / 2) - int(sliding_window_size / 4))
85 | mask = mask[: end_index - start_index]
86 | sliding_windows.append((start_index, end_index, mask))
87 | start_index += stride
88 | assert sum([sum(window[2]) for window in sliding_windows]) == sequence_length
89 | return sliding_windows
90 |
91 | # Get total number of parameters in a model
92 | def get_n_params(model):
93 | pp=0
94 | for p in list(model.parameters()):
95 | nn=1
96 | for s in list(p.size()):
97 | nn = nn*s
98 | pp += nn
99 | return pp
100 |
101 | class RunningAverage():
102 | """A simple class that maintains the running average of a quantity
103 | Example:
104 | ```
105 | loss_avg = RunningAverage()
106 | loss_avg.update(2)
107 | loss_avg.update(4)
108 | loss_avg() = 3
109 | ```
110 | """
111 | def __init__(self):
112 | self.steps = 0
113 | self.total = 0
114 |
115 | def update(self, val):
116 | self.total += val
117 | self.steps += 1
118 |
119 | def __call__(self):
120 | return self.total/float(self.steps)
121 |
--------------------------------------------------------------------------------